xref: /freebsd/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1// SPDX-License-Identifier: Apache-2.0
2/*
3 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
4 *
5 * Licensed under the Apache License, Version 2.0 (the "License");
6 * you may not use this file except in compliance with the License.
7 * You may obtain a copy of the License at
8 *
9 *     https://www.apache.org/licenses/LICENSE-2.0
10 *
11 * Unless required by applicable law or agreed to in writing, software
12 * distributed under the License is distributed on an "AS IS" BASIS,
13 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14 * See the License for the specific language governing permissions and
15 * limitations under the License.
16 */
17
18/*
19 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
20 * - modified assembly to fit into OpenZFS
21 */
22
23#if defined(__arm__)
24
25#ifndef __ARM_ARCH
26# define __ARM_ARCH__	7
27#else
28# define __ARM_ARCH__	__ARM_ARCH
29#endif
30
31#if defined(__thumb2__)
32.syntax unified
33.thumb
34#else
35.code   32
36#endif
37
38.text
39
40.type	K256,%object
41.align	5
42K256:
43.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
44.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
45.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
46.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
47.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
48.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
49.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
50.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
51.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
52.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
53.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
54.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
55.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
56.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
57.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
58.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
59.size	K256,.-K256
60.word	0				@ terminator
61
62.align	5
63.globl	zfs_sha256_block_armv7
64.type	zfs_sha256_block_armv7,%function
65zfs_sha256_block_armv7:
66.Lzfs_sha256_block_armv7:
67
68#if __ARM_ARCH__<7 && !defined(__thumb2__)
69	sub	r3,pc,#8		@ zfs_sha256_block_armv7
70#else
71	adr	r3,.Lzfs_sha256_block_armv7
72#endif
73
74	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
75	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
76	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
77	sub	r14,r3,#256+32	@ K256
78	sub	sp,sp,#16*4		@ alloca(X[16])
79.Loop:
80# if __ARM_ARCH__>=7
81	ldr	r2,[r1],#4
82# else
83	ldrb	r2,[r1,#3]
84# endif
85	eor	r3,r5,r6		@ magic
86	eor	r12,r12,r12
87#if __ARM_ARCH__>=7
88	@ ldr	r2,[r1],#4			@ 0
89# if 0==15
90	str	r1,[sp,#17*4]			@ make room for r1
91# endif
92	eor	r0,r8,r8,ror#5
93	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
94	eor	r0,r0,r8,ror#19	@ Sigma1(e)
95# ifndef __ARMEB__
96	rev	r2,r2
97# endif
98#else
99	@ ldrb	r2,[r1,#3]			@ 0
100	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
101	ldrb	r12,[r1,#2]
102	ldrb	r0,[r1,#1]
103	orr	r2,r2,r12,lsl#8
104	ldrb	r12,[r1],#4
105	orr	r2,r2,r0,lsl#16
106# if 0==15
107	str	r1,[sp,#17*4]			@ make room for r1
108# endif
109	eor	r0,r8,r8,ror#5
110	orr	r2,r2,r12,lsl#24
111	eor	r0,r0,r8,ror#19	@ Sigma1(e)
112#endif
113	ldr	r12,[r14],#4			@ *K256++
114	add	r11,r11,r2			@ h+=X[i]
115	str	r2,[sp,#0*4]
116	eor	r2,r9,r10
117	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
118	and	r2,r2,r8
119	add	r11,r11,r12			@ h+=K256[i]
120	eor	r2,r2,r10			@ Ch(e,f,g)
121	eor	r0,r4,r4,ror#11
122	add	r11,r11,r2			@ h+=Ch(e,f,g)
123#if 0==31
124	and	r12,r12,#0xff
125	cmp	r12,#0xf2			@ done?
126#endif
127#if 0<15
128# if __ARM_ARCH__>=7
129	ldr	r2,[r1],#4			@ prefetch
130# else
131	ldrb	r2,[r1,#3]
132# endif
133	eor	r12,r4,r5			@ a^b, b^c in next round
134#else
135	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
136	eor	r12,r4,r5			@ a^b, b^c in next round
137	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
138#endif
139	eor	r0,r0,r4,ror#20	@ Sigma0(a)
140	and	r3,r3,r12			@ (b^c)&=(a^b)
141	add	r7,r7,r11			@ d+=h
142	eor	r3,r3,r5			@ Maj(a,b,c)
143	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
144	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
145#if __ARM_ARCH__>=7
146	@ ldr	r2,[r1],#4			@ 1
147# if 1==15
148	str	r1,[sp,#17*4]			@ make room for r1
149# endif
150	eor	r0,r7,r7,ror#5
151	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
152	eor	r0,r0,r7,ror#19	@ Sigma1(e)
153# ifndef __ARMEB__
154	rev	r2,r2
155# endif
156#else
157	@ ldrb	r2,[r1,#3]			@ 1
158	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
159	ldrb	r3,[r1,#2]
160	ldrb	r0,[r1,#1]
161	orr	r2,r2,r3,lsl#8
162	ldrb	r3,[r1],#4
163	orr	r2,r2,r0,lsl#16
164# if 1==15
165	str	r1,[sp,#17*4]			@ make room for r1
166# endif
167	eor	r0,r7,r7,ror#5
168	orr	r2,r2,r3,lsl#24
169	eor	r0,r0,r7,ror#19	@ Sigma1(e)
170#endif
171	ldr	r3,[r14],#4			@ *K256++
172	add	r10,r10,r2			@ h+=X[i]
173	str	r2,[sp,#1*4]
174	eor	r2,r8,r9
175	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
176	and	r2,r2,r7
177	add	r10,r10,r3			@ h+=K256[i]
178	eor	r2,r2,r9			@ Ch(e,f,g)
179	eor	r0,r11,r11,ror#11
180	add	r10,r10,r2			@ h+=Ch(e,f,g)
181#if 1==31
182	and	r3,r3,#0xff
183	cmp	r3,#0xf2			@ done?
184#endif
185#if 1<15
186# if __ARM_ARCH__>=7
187	ldr	r2,[r1],#4			@ prefetch
188# else
189	ldrb	r2,[r1,#3]
190# endif
191	eor	r3,r11,r4			@ a^b, b^c in next round
192#else
193	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
194	eor	r3,r11,r4			@ a^b, b^c in next round
195	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
196#endif
197	eor	r0,r0,r11,ror#20	@ Sigma0(a)
198	and	r12,r12,r3			@ (b^c)&=(a^b)
199	add	r6,r6,r10			@ d+=h
200	eor	r12,r12,r4			@ Maj(a,b,c)
201	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
202	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
203#if __ARM_ARCH__>=7
204	@ ldr	r2,[r1],#4			@ 2
205# if 2==15
206	str	r1,[sp,#17*4]			@ make room for r1
207# endif
208	eor	r0,r6,r6,ror#5
209	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
210	eor	r0,r0,r6,ror#19	@ Sigma1(e)
211# ifndef __ARMEB__
212	rev	r2,r2
213# endif
214#else
215	@ ldrb	r2,[r1,#3]			@ 2
216	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
217	ldrb	r12,[r1,#2]
218	ldrb	r0,[r1,#1]
219	orr	r2,r2,r12,lsl#8
220	ldrb	r12,[r1],#4
221	orr	r2,r2,r0,lsl#16
222# if 2==15
223	str	r1,[sp,#17*4]			@ make room for r1
224# endif
225	eor	r0,r6,r6,ror#5
226	orr	r2,r2,r12,lsl#24
227	eor	r0,r0,r6,ror#19	@ Sigma1(e)
228#endif
229	ldr	r12,[r14],#4			@ *K256++
230	add	r9,r9,r2			@ h+=X[i]
231	str	r2,[sp,#2*4]
232	eor	r2,r7,r8
233	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
234	and	r2,r2,r6
235	add	r9,r9,r12			@ h+=K256[i]
236	eor	r2,r2,r8			@ Ch(e,f,g)
237	eor	r0,r10,r10,ror#11
238	add	r9,r9,r2			@ h+=Ch(e,f,g)
239#if 2==31
240	and	r12,r12,#0xff
241	cmp	r12,#0xf2			@ done?
242#endif
243#if 2<15
244# if __ARM_ARCH__>=7
245	ldr	r2,[r1],#4			@ prefetch
246# else
247	ldrb	r2,[r1,#3]
248# endif
249	eor	r12,r10,r11			@ a^b, b^c in next round
250#else
251	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
252	eor	r12,r10,r11			@ a^b, b^c in next round
253	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
254#endif
255	eor	r0,r0,r10,ror#20	@ Sigma0(a)
256	and	r3,r3,r12			@ (b^c)&=(a^b)
257	add	r5,r5,r9			@ d+=h
258	eor	r3,r3,r11			@ Maj(a,b,c)
259	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
260	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
261#if __ARM_ARCH__>=7
262	@ ldr	r2,[r1],#4			@ 3
263# if 3==15
264	str	r1,[sp,#17*4]			@ make room for r1
265# endif
266	eor	r0,r5,r5,ror#5
267	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
268	eor	r0,r0,r5,ror#19	@ Sigma1(e)
269# ifndef __ARMEB__
270	rev	r2,r2
271# endif
272#else
273	@ ldrb	r2,[r1,#3]			@ 3
274	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
275	ldrb	r3,[r1,#2]
276	ldrb	r0,[r1,#1]
277	orr	r2,r2,r3,lsl#8
278	ldrb	r3,[r1],#4
279	orr	r2,r2,r0,lsl#16
280# if 3==15
281	str	r1,[sp,#17*4]			@ make room for r1
282# endif
283	eor	r0,r5,r5,ror#5
284	orr	r2,r2,r3,lsl#24
285	eor	r0,r0,r5,ror#19	@ Sigma1(e)
286#endif
287	ldr	r3,[r14],#4			@ *K256++
288	add	r8,r8,r2			@ h+=X[i]
289	str	r2,[sp,#3*4]
290	eor	r2,r6,r7
291	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
292	and	r2,r2,r5
293	add	r8,r8,r3			@ h+=K256[i]
294	eor	r2,r2,r7			@ Ch(e,f,g)
295	eor	r0,r9,r9,ror#11
296	add	r8,r8,r2			@ h+=Ch(e,f,g)
297#if 3==31
298	and	r3,r3,#0xff
299	cmp	r3,#0xf2			@ done?
300#endif
301#if 3<15
302# if __ARM_ARCH__>=7
303	ldr	r2,[r1],#4			@ prefetch
304# else
305	ldrb	r2,[r1,#3]
306# endif
307	eor	r3,r9,r10			@ a^b, b^c in next round
308#else
309	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
310	eor	r3,r9,r10			@ a^b, b^c in next round
311	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
312#endif
313	eor	r0,r0,r9,ror#20	@ Sigma0(a)
314	and	r12,r12,r3			@ (b^c)&=(a^b)
315	add	r4,r4,r8			@ d+=h
316	eor	r12,r12,r10			@ Maj(a,b,c)
317	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
318	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
319#if __ARM_ARCH__>=7
320	@ ldr	r2,[r1],#4			@ 4
321# if 4==15
322	str	r1,[sp,#17*4]			@ make room for r1
323# endif
324	eor	r0,r4,r4,ror#5
325	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
326	eor	r0,r0,r4,ror#19	@ Sigma1(e)
327# ifndef __ARMEB__
328	rev	r2,r2
329# endif
330#else
331	@ ldrb	r2,[r1,#3]			@ 4
332	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
333	ldrb	r12,[r1,#2]
334	ldrb	r0,[r1,#1]
335	orr	r2,r2,r12,lsl#8
336	ldrb	r12,[r1],#4
337	orr	r2,r2,r0,lsl#16
338# if 4==15
339	str	r1,[sp,#17*4]			@ make room for r1
340# endif
341	eor	r0,r4,r4,ror#5
342	orr	r2,r2,r12,lsl#24
343	eor	r0,r0,r4,ror#19	@ Sigma1(e)
344#endif
345	ldr	r12,[r14],#4			@ *K256++
346	add	r7,r7,r2			@ h+=X[i]
347	str	r2,[sp,#4*4]
348	eor	r2,r5,r6
349	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
350	and	r2,r2,r4
351	add	r7,r7,r12			@ h+=K256[i]
352	eor	r2,r2,r6			@ Ch(e,f,g)
353	eor	r0,r8,r8,ror#11
354	add	r7,r7,r2			@ h+=Ch(e,f,g)
355#if 4==31
356	and	r12,r12,#0xff
357	cmp	r12,#0xf2			@ done?
358#endif
359#if 4<15
360# if __ARM_ARCH__>=7
361	ldr	r2,[r1],#4			@ prefetch
362# else
363	ldrb	r2,[r1,#3]
364# endif
365	eor	r12,r8,r9			@ a^b, b^c in next round
366#else
367	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
368	eor	r12,r8,r9			@ a^b, b^c in next round
369	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
370#endif
371	eor	r0,r0,r8,ror#20	@ Sigma0(a)
372	and	r3,r3,r12			@ (b^c)&=(a^b)
373	add	r11,r11,r7			@ d+=h
374	eor	r3,r3,r9			@ Maj(a,b,c)
375	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
376	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
377#if __ARM_ARCH__>=7
378	@ ldr	r2,[r1],#4			@ 5
379# if 5==15
380	str	r1,[sp,#17*4]			@ make room for r1
381# endif
382	eor	r0,r11,r11,ror#5
383	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
384	eor	r0,r0,r11,ror#19	@ Sigma1(e)
385# ifndef __ARMEB__
386	rev	r2,r2
387# endif
388#else
389	@ ldrb	r2,[r1,#3]			@ 5
390	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
391	ldrb	r3,[r1,#2]
392	ldrb	r0,[r1,#1]
393	orr	r2,r2,r3,lsl#8
394	ldrb	r3,[r1],#4
395	orr	r2,r2,r0,lsl#16
396# if 5==15
397	str	r1,[sp,#17*4]			@ make room for r1
398# endif
399	eor	r0,r11,r11,ror#5
400	orr	r2,r2,r3,lsl#24
401	eor	r0,r0,r11,ror#19	@ Sigma1(e)
402#endif
403	ldr	r3,[r14],#4			@ *K256++
404	add	r6,r6,r2			@ h+=X[i]
405	str	r2,[sp,#5*4]
406	eor	r2,r4,r5
407	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
408	and	r2,r2,r11
409	add	r6,r6,r3			@ h+=K256[i]
410	eor	r2,r2,r5			@ Ch(e,f,g)
411	eor	r0,r7,r7,ror#11
412	add	r6,r6,r2			@ h+=Ch(e,f,g)
413#if 5==31
414	and	r3,r3,#0xff
415	cmp	r3,#0xf2			@ done?
416#endif
417#if 5<15
418# if __ARM_ARCH__>=7
419	ldr	r2,[r1],#4			@ prefetch
420# else
421	ldrb	r2,[r1,#3]
422# endif
423	eor	r3,r7,r8			@ a^b, b^c in next round
424#else
425	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
426	eor	r3,r7,r8			@ a^b, b^c in next round
427	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
428#endif
429	eor	r0,r0,r7,ror#20	@ Sigma0(a)
430	and	r12,r12,r3			@ (b^c)&=(a^b)
431	add	r10,r10,r6			@ d+=h
432	eor	r12,r12,r8			@ Maj(a,b,c)
433	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
434	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
435#if __ARM_ARCH__>=7
436	@ ldr	r2,[r1],#4			@ 6
437# if 6==15
438	str	r1,[sp,#17*4]			@ make room for r1
439# endif
440	eor	r0,r10,r10,ror#5
441	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
442	eor	r0,r0,r10,ror#19	@ Sigma1(e)
443# ifndef __ARMEB__
444	rev	r2,r2
445# endif
446#else
447	@ ldrb	r2,[r1,#3]			@ 6
448	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
449	ldrb	r12,[r1,#2]
450	ldrb	r0,[r1,#1]
451	orr	r2,r2,r12,lsl#8
452	ldrb	r12,[r1],#4
453	orr	r2,r2,r0,lsl#16
454# if 6==15
455	str	r1,[sp,#17*4]			@ make room for r1
456# endif
457	eor	r0,r10,r10,ror#5
458	orr	r2,r2,r12,lsl#24
459	eor	r0,r0,r10,ror#19	@ Sigma1(e)
460#endif
461	ldr	r12,[r14],#4			@ *K256++
462	add	r5,r5,r2			@ h+=X[i]
463	str	r2,[sp,#6*4]
464	eor	r2,r11,r4
465	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
466	and	r2,r2,r10
467	add	r5,r5,r12			@ h+=K256[i]
468	eor	r2,r2,r4			@ Ch(e,f,g)
469	eor	r0,r6,r6,ror#11
470	add	r5,r5,r2			@ h+=Ch(e,f,g)
471#if 6==31
472	and	r12,r12,#0xff
473	cmp	r12,#0xf2			@ done?
474#endif
475#if 6<15
476# if __ARM_ARCH__>=7
477	ldr	r2,[r1],#4			@ prefetch
478# else
479	ldrb	r2,[r1,#3]
480# endif
481	eor	r12,r6,r7			@ a^b, b^c in next round
482#else
483	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
484	eor	r12,r6,r7			@ a^b, b^c in next round
485	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
486#endif
487	eor	r0,r0,r6,ror#20	@ Sigma0(a)
488	and	r3,r3,r12			@ (b^c)&=(a^b)
489	add	r9,r9,r5			@ d+=h
490	eor	r3,r3,r7			@ Maj(a,b,c)
491	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
492	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
493#if __ARM_ARCH__>=7
494	@ ldr	r2,[r1],#4			@ 7
495# if 7==15
496	str	r1,[sp,#17*4]			@ make room for r1
497# endif
498	eor	r0,r9,r9,ror#5
499	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
500	eor	r0,r0,r9,ror#19	@ Sigma1(e)
501# ifndef __ARMEB__
502	rev	r2,r2
503# endif
504#else
505	@ ldrb	r2,[r1,#3]			@ 7
506	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
507	ldrb	r3,[r1,#2]
508	ldrb	r0,[r1,#1]
509	orr	r2,r2,r3,lsl#8
510	ldrb	r3,[r1],#4
511	orr	r2,r2,r0,lsl#16
512# if 7==15
513	str	r1,[sp,#17*4]			@ make room for r1
514# endif
515	eor	r0,r9,r9,ror#5
516	orr	r2,r2,r3,lsl#24
517	eor	r0,r0,r9,ror#19	@ Sigma1(e)
518#endif
519	ldr	r3,[r14],#4			@ *K256++
520	add	r4,r4,r2			@ h+=X[i]
521	str	r2,[sp,#7*4]
522	eor	r2,r10,r11
523	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
524	and	r2,r2,r9
525	add	r4,r4,r3			@ h+=K256[i]
526	eor	r2,r2,r11			@ Ch(e,f,g)
527	eor	r0,r5,r5,ror#11
528	add	r4,r4,r2			@ h+=Ch(e,f,g)
529#if 7==31
530	and	r3,r3,#0xff
531	cmp	r3,#0xf2			@ done?
532#endif
533#if 7<15
534# if __ARM_ARCH__>=7
535	ldr	r2,[r1],#4			@ prefetch
536# else
537	ldrb	r2,[r1,#3]
538# endif
539	eor	r3,r5,r6			@ a^b, b^c in next round
540#else
541	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
542	eor	r3,r5,r6			@ a^b, b^c in next round
543	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
544#endif
545	eor	r0,r0,r5,ror#20	@ Sigma0(a)
546	and	r12,r12,r3			@ (b^c)&=(a^b)
547	add	r8,r8,r4			@ d+=h
548	eor	r12,r12,r6			@ Maj(a,b,c)
549	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
550	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
551#if __ARM_ARCH__>=7
552	@ ldr	r2,[r1],#4			@ 8
553# if 8==15
554	str	r1,[sp,#17*4]			@ make room for r1
555# endif
556	eor	r0,r8,r8,ror#5
557	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
558	eor	r0,r0,r8,ror#19	@ Sigma1(e)
559# ifndef __ARMEB__
560	rev	r2,r2
561# endif
562#else
563	@ ldrb	r2,[r1,#3]			@ 8
564	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
565	ldrb	r12,[r1,#2]
566	ldrb	r0,[r1,#1]
567	orr	r2,r2,r12,lsl#8
568	ldrb	r12,[r1],#4
569	orr	r2,r2,r0,lsl#16
570# if 8==15
571	str	r1,[sp,#17*4]			@ make room for r1
572# endif
573	eor	r0,r8,r8,ror#5
574	orr	r2,r2,r12,lsl#24
575	eor	r0,r0,r8,ror#19	@ Sigma1(e)
576#endif
577	ldr	r12,[r14],#4			@ *K256++
578	add	r11,r11,r2			@ h+=X[i]
579	str	r2,[sp,#8*4]
580	eor	r2,r9,r10
581	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
582	and	r2,r2,r8
583	add	r11,r11,r12			@ h+=K256[i]
584	eor	r2,r2,r10			@ Ch(e,f,g)
585	eor	r0,r4,r4,ror#11
586	add	r11,r11,r2			@ h+=Ch(e,f,g)
587#if 8==31
588	and	r12,r12,#0xff
589	cmp	r12,#0xf2			@ done?
590#endif
591#if 8<15
592# if __ARM_ARCH__>=7
593	ldr	r2,[r1],#4			@ prefetch
594# else
595	ldrb	r2,[r1,#3]
596# endif
597	eor	r12,r4,r5			@ a^b, b^c in next round
598#else
599	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
600	eor	r12,r4,r5			@ a^b, b^c in next round
601	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
602#endif
603	eor	r0,r0,r4,ror#20	@ Sigma0(a)
604	and	r3,r3,r12			@ (b^c)&=(a^b)
605	add	r7,r7,r11			@ d+=h
606	eor	r3,r3,r5			@ Maj(a,b,c)
607	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
608	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
609#if __ARM_ARCH__>=7
610	@ ldr	r2,[r1],#4			@ 9
611# if 9==15
612	str	r1,[sp,#17*4]			@ make room for r1
613# endif
614	eor	r0,r7,r7,ror#5
615	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
616	eor	r0,r0,r7,ror#19	@ Sigma1(e)
617# ifndef __ARMEB__
618	rev	r2,r2
619# endif
620#else
621	@ ldrb	r2,[r1,#3]			@ 9
622	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
623	ldrb	r3,[r1,#2]
624	ldrb	r0,[r1,#1]
625	orr	r2,r2,r3,lsl#8
626	ldrb	r3,[r1],#4
627	orr	r2,r2,r0,lsl#16
628# if 9==15
629	str	r1,[sp,#17*4]			@ make room for r1
630# endif
631	eor	r0,r7,r7,ror#5
632	orr	r2,r2,r3,lsl#24
633	eor	r0,r0,r7,ror#19	@ Sigma1(e)
634#endif
635	ldr	r3,[r14],#4			@ *K256++
636	add	r10,r10,r2			@ h+=X[i]
637	str	r2,[sp,#9*4]
638	eor	r2,r8,r9
639	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
640	and	r2,r2,r7
641	add	r10,r10,r3			@ h+=K256[i]
642	eor	r2,r2,r9			@ Ch(e,f,g)
643	eor	r0,r11,r11,ror#11
644	add	r10,r10,r2			@ h+=Ch(e,f,g)
645#if 9==31
646	and	r3,r3,#0xff
647	cmp	r3,#0xf2			@ done?
648#endif
649#if 9<15
650# if __ARM_ARCH__>=7
651	ldr	r2,[r1],#4			@ prefetch
652# else
653	ldrb	r2,[r1,#3]
654# endif
655	eor	r3,r11,r4			@ a^b, b^c in next round
656#else
657	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
658	eor	r3,r11,r4			@ a^b, b^c in next round
659	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
660#endif
661	eor	r0,r0,r11,ror#20	@ Sigma0(a)
662	and	r12,r12,r3			@ (b^c)&=(a^b)
663	add	r6,r6,r10			@ d+=h
664	eor	r12,r12,r4			@ Maj(a,b,c)
665	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
666	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
667#if __ARM_ARCH__>=7
668	@ ldr	r2,[r1],#4			@ 10
669# if 10==15
670	str	r1,[sp,#17*4]			@ make room for r1
671# endif
672	eor	r0,r6,r6,ror#5
673	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
674	eor	r0,r0,r6,ror#19	@ Sigma1(e)
675# ifndef __ARMEB__
676	rev	r2,r2
677# endif
678#else
679	@ ldrb	r2,[r1,#3]			@ 10
680	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
681	ldrb	r12,[r1,#2]
682	ldrb	r0,[r1,#1]
683	orr	r2,r2,r12,lsl#8
684	ldrb	r12,[r1],#4
685	orr	r2,r2,r0,lsl#16
686# if 10==15
687	str	r1,[sp,#17*4]			@ make room for r1
688# endif
689	eor	r0,r6,r6,ror#5
690	orr	r2,r2,r12,lsl#24
691	eor	r0,r0,r6,ror#19	@ Sigma1(e)
692#endif
693	ldr	r12,[r14],#4			@ *K256++
694	add	r9,r9,r2			@ h+=X[i]
695	str	r2,[sp,#10*4]
696	eor	r2,r7,r8
697	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
698	and	r2,r2,r6
699	add	r9,r9,r12			@ h+=K256[i]
700	eor	r2,r2,r8			@ Ch(e,f,g)
701	eor	r0,r10,r10,ror#11
702	add	r9,r9,r2			@ h+=Ch(e,f,g)
703#if 10==31
704	and	r12,r12,#0xff
705	cmp	r12,#0xf2			@ done?
706#endif
707#if 10<15
708# if __ARM_ARCH__>=7
709	ldr	r2,[r1],#4			@ prefetch
710# else
711	ldrb	r2,[r1,#3]
712# endif
713	eor	r12,r10,r11			@ a^b, b^c in next round
714#else
715	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
716	eor	r12,r10,r11			@ a^b, b^c in next round
717	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
718#endif
719	eor	r0,r0,r10,ror#20	@ Sigma0(a)
720	and	r3,r3,r12			@ (b^c)&=(a^b)
721	add	r5,r5,r9			@ d+=h
722	eor	r3,r3,r11			@ Maj(a,b,c)
723	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
724	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
725#if __ARM_ARCH__>=7
726	@ ldr	r2,[r1],#4			@ 11
727# if 11==15
728	str	r1,[sp,#17*4]			@ make room for r1
729# endif
730	eor	r0,r5,r5,ror#5
731	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
732	eor	r0,r0,r5,ror#19	@ Sigma1(e)
733# ifndef __ARMEB__
734	rev	r2,r2
735# endif
736#else
737	@ ldrb	r2,[r1,#3]			@ 11
738	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
739	ldrb	r3,[r1,#2]
740	ldrb	r0,[r1,#1]
741	orr	r2,r2,r3,lsl#8
742	ldrb	r3,[r1],#4
743	orr	r2,r2,r0,lsl#16
744# if 11==15
745	str	r1,[sp,#17*4]			@ make room for r1
746# endif
747	eor	r0,r5,r5,ror#5
748	orr	r2,r2,r3,lsl#24
749	eor	r0,r0,r5,ror#19	@ Sigma1(e)
750#endif
751	ldr	r3,[r14],#4			@ *K256++
752	add	r8,r8,r2			@ h+=X[i]
753	str	r2,[sp,#11*4]
754	eor	r2,r6,r7
755	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
756	and	r2,r2,r5
757	add	r8,r8,r3			@ h+=K256[i]
758	eor	r2,r2,r7			@ Ch(e,f,g)
759	eor	r0,r9,r9,ror#11
760	add	r8,r8,r2			@ h+=Ch(e,f,g)
761#if 11==31
762	and	r3,r3,#0xff
763	cmp	r3,#0xf2			@ done?
764#endif
765#if 11<15
766# if __ARM_ARCH__>=7
767	ldr	r2,[r1],#4			@ prefetch
768# else
769	ldrb	r2,[r1,#3]
770# endif
771	eor	r3,r9,r10			@ a^b, b^c in next round
772#else
773	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
774	eor	r3,r9,r10			@ a^b, b^c in next round
775	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
776#endif
777	eor	r0,r0,r9,ror#20	@ Sigma0(a)
778	and	r12,r12,r3			@ (b^c)&=(a^b)
779	add	r4,r4,r8			@ d+=h
780	eor	r12,r12,r10			@ Maj(a,b,c)
781	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
782	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
783#if __ARM_ARCH__>=7
784	@ ldr	r2,[r1],#4			@ 12
785# if 12==15
786	str	r1,[sp,#17*4]			@ make room for r1
787# endif
788	eor	r0,r4,r4,ror#5
789	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
790	eor	r0,r0,r4,ror#19	@ Sigma1(e)
791# ifndef __ARMEB__
792	rev	r2,r2
793# endif
794#else
795	@ ldrb	r2,[r1,#3]			@ 12
796	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
797	ldrb	r12,[r1,#2]
798	ldrb	r0,[r1,#1]
799	orr	r2,r2,r12,lsl#8
800	ldrb	r12,[r1],#4
801	orr	r2,r2,r0,lsl#16
802# if 12==15
803	str	r1,[sp,#17*4]			@ make room for r1
804# endif
805	eor	r0,r4,r4,ror#5
806	orr	r2,r2,r12,lsl#24
807	eor	r0,r0,r4,ror#19	@ Sigma1(e)
808#endif
809	ldr	r12,[r14],#4			@ *K256++
810	add	r7,r7,r2			@ h+=X[i]
811	str	r2,[sp,#12*4]
812	eor	r2,r5,r6
813	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
814	and	r2,r2,r4
815	add	r7,r7,r12			@ h+=K256[i]
816	eor	r2,r2,r6			@ Ch(e,f,g)
817	eor	r0,r8,r8,ror#11
818	add	r7,r7,r2			@ h+=Ch(e,f,g)
819#if 12==31
820	and	r12,r12,#0xff
821	cmp	r12,#0xf2			@ done?
822#endif
823#if 12<15
824# if __ARM_ARCH__>=7
825	ldr	r2,[r1],#4			@ prefetch
826# else
827	ldrb	r2,[r1,#3]
828# endif
829	eor	r12,r8,r9			@ a^b, b^c in next round
830#else
831	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
832	eor	r12,r8,r9			@ a^b, b^c in next round
833	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
834#endif
835	eor	r0,r0,r8,ror#20	@ Sigma0(a)
836	and	r3,r3,r12			@ (b^c)&=(a^b)
837	add	r11,r11,r7			@ d+=h
838	eor	r3,r3,r9			@ Maj(a,b,c)
839	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
840	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
841#if __ARM_ARCH__>=7
842	@ ldr	r2,[r1],#4			@ 13
843# if 13==15
844	str	r1,[sp,#17*4]			@ make room for r1
845# endif
846	eor	r0,r11,r11,ror#5
847	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
848	eor	r0,r0,r11,ror#19	@ Sigma1(e)
849# ifndef __ARMEB__
850	rev	r2,r2
851# endif
852#else
853	@ ldrb	r2,[r1,#3]			@ 13
854	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
855	ldrb	r3,[r1,#2]
856	ldrb	r0,[r1,#1]
857	orr	r2,r2,r3,lsl#8
858	ldrb	r3,[r1],#4
859	orr	r2,r2,r0,lsl#16
860# if 13==15
861	str	r1,[sp,#17*4]			@ make room for r1
862# endif
863	eor	r0,r11,r11,ror#5
864	orr	r2,r2,r3,lsl#24
865	eor	r0,r0,r11,ror#19	@ Sigma1(e)
866#endif
867	ldr	r3,[r14],#4			@ *K256++
868	add	r6,r6,r2			@ h+=X[i]
869	str	r2,[sp,#13*4]
870	eor	r2,r4,r5
871	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
872	and	r2,r2,r11
873	add	r6,r6,r3			@ h+=K256[i]
874	eor	r2,r2,r5			@ Ch(e,f,g)
875	eor	r0,r7,r7,ror#11
876	add	r6,r6,r2			@ h+=Ch(e,f,g)
877#if 13==31
878	and	r3,r3,#0xff
879	cmp	r3,#0xf2			@ done?
880#endif
881#if 13<15
882# if __ARM_ARCH__>=7
883	ldr	r2,[r1],#4			@ prefetch
884# else
885	ldrb	r2,[r1,#3]
886# endif
887	eor	r3,r7,r8			@ a^b, b^c in next round
888#else
889	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
890	eor	r3,r7,r8			@ a^b, b^c in next round
891	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
892#endif
893	eor	r0,r0,r7,ror#20	@ Sigma0(a)
894	and	r12,r12,r3			@ (b^c)&=(a^b)
895	add	r10,r10,r6			@ d+=h
896	eor	r12,r12,r8			@ Maj(a,b,c)
897	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
898	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
899#if __ARM_ARCH__>=7
900	@ ldr	r2,[r1],#4			@ 14
901# if 14==15
902	str	r1,[sp,#17*4]			@ make room for r1
903# endif
904	eor	r0,r10,r10,ror#5
905	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
906	eor	r0,r0,r10,ror#19	@ Sigma1(e)
907# ifndef __ARMEB__
908	rev	r2,r2
909# endif
910#else
911	@ ldrb	r2,[r1,#3]			@ 14
912	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
913	ldrb	r12,[r1,#2]
914	ldrb	r0,[r1,#1]
915	orr	r2,r2,r12,lsl#8
916	ldrb	r12,[r1],#4
917	orr	r2,r2,r0,lsl#16
918# if 14==15
919	str	r1,[sp,#17*4]			@ make room for r1
920# endif
921	eor	r0,r10,r10,ror#5
922	orr	r2,r2,r12,lsl#24
923	eor	r0,r0,r10,ror#19	@ Sigma1(e)
924#endif
925	ldr	r12,[r14],#4			@ *K256++
926	add	r5,r5,r2			@ h+=X[i]
927	str	r2,[sp,#14*4]
928	eor	r2,r11,r4
929	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
930	and	r2,r2,r10
931	add	r5,r5,r12			@ h+=K256[i]
932	eor	r2,r2,r4			@ Ch(e,f,g)
933	eor	r0,r6,r6,ror#11
934	add	r5,r5,r2			@ h+=Ch(e,f,g)
935#if 14==31
936	and	r12,r12,#0xff
937	cmp	r12,#0xf2			@ done?
938#endif
939#if 14<15
940# if __ARM_ARCH__>=7
941	ldr	r2,[r1],#4			@ prefetch
942# else
943	ldrb	r2,[r1,#3]
944# endif
945	eor	r12,r6,r7			@ a^b, b^c in next round
946#else
947	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
948	eor	r12,r6,r7			@ a^b, b^c in next round
949	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
950#endif
951	eor	r0,r0,r6,ror#20	@ Sigma0(a)
952	and	r3,r3,r12			@ (b^c)&=(a^b)
953	add	r9,r9,r5			@ d+=h
954	eor	r3,r3,r7			@ Maj(a,b,c)
955	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
956	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
957#if __ARM_ARCH__>=7
958	@ ldr	r2,[r1],#4			@ 15
959# if 15==15
960	str	r1,[sp,#17*4]			@ make room for r1
961# endif
962	eor	r0,r9,r9,ror#5
963	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
964	eor	r0,r0,r9,ror#19	@ Sigma1(e)
965# ifndef __ARMEB__
966	rev	r2,r2
967# endif
968#else
969	@ ldrb	r2,[r1,#3]			@ 15
970	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
971	ldrb	r3,[r1,#2]
972	ldrb	r0,[r1,#1]
973	orr	r2,r2,r3,lsl#8
974	ldrb	r3,[r1],#4
975	orr	r2,r2,r0,lsl#16
976# if 15==15
977	str	r1,[sp,#17*4]			@ make room for r1
978# endif
979	eor	r0,r9,r9,ror#5
980	orr	r2,r2,r3,lsl#24
981	eor	r0,r0,r9,ror#19	@ Sigma1(e)
982#endif
983	ldr	r3,[r14],#4			@ *K256++
984	add	r4,r4,r2			@ h+=X[i]
985	str	r2,[sp,#15*4]
986	eor	r2,r10,r11
987	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
988	and	r2,r2,r9
989	add	r4,r4,r3			@ h+=K256[i]
990	eor	r2,r2,r11			@ Ch(e,f,g)
991	eor	r0,r5,r5,ror#11
992	add	r4,r4,r2			@ h+=Ch(e,f,g)
993#if 15==31
994	and	r3,r3,#0xff
995	cmp	r3,#0xf2			@ done?
996#endif
997#if 15<15
998# if __ARM_ARCH__>=7
999	ldr	r2,[r1],#4			@ prefetch
1000# else
1001	ldrb	r2,[r1,#3]
1002# endif
1003	eor	r3,r5,r6			@ a^b, b^c in next round
1004#else
1005	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1006	eor	r3,r5,r6			@ a^b, b^c in next round
1007	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1008#endif
1009	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1010	and	r12,r12,r3			@ (b^c)&=(a^b)
1011	add	r8,r8,r4			@ d+=h
1012	eor	r12,r12,r6			@ Maj(a,b,c)
1013	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1014	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1015.Lrounds_16_xx:
1016	@ ldr	r2,[sp,#1*4]		@ 16
1017	@ ldr	r1,[sp,#14*4]
1018	mov	r0,r2,ror#7
1019	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1020	mov	r12,r1,ror#17
1021	eor	r0,r0,r2,ror#18
1022	eor	r12,r12,r1,ror#19
1023	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1024	ldr	r2,[sp,#0*4]
1025	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1026	ldr	r1,[sp,#9*4]
1027
1028	add	r12,r12,r0
1029	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1030	add	r2,r2,r12
1031	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1032	add	r2,r2,r1			@ X[i]
1033	ldr	r12,[r14],#4			@ *K256++
1034	add	r11,r11,r2			@ h+=X[i]
1035	str	r2,[sp,#0*4]
1036	eor	r2,r9,r10
1037	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1038	and	r2,r2,r8
1039	add	r11,r11,r12			@ h+=K256[i]
1040	eor	r2,r2,r10			@ Ch(e,f,g)
1041	eor	r0,r4,r4,ror#11
1042	add	r11,r11,r2			@ h+=Ch(e,f,g)
1043#if 16==31
1044	and	r12,r12,#0xff
1045	cmp	r12,#0xf2			@ done?
1046#endif
1047#if 16<15
1048# if __ARM_ARCH__>=7
1049	ldr	r2,[r1],#4			@ prefetch
1050# else
1051	ldrb	r2,[r1,#3]
1052# endif
1053	eor	r12,r4,r5			@ a^b, b^c in next round
1054#else
1055	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1056	eor	r12,r4,r5			@ a^b, b^c in next round
1057	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1058#endif
1059	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1060	and	r3,r3,r12			@ (b^c)&=(a^b)
1061	add	r7,r7,r11			@ d+=h
1062	eor	r3,r3,r5			@ Maj(a,b,c)
1063	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1064	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1065	@ ldr	r2,[sp,#2*4]		@ 17
1066	@ ldr	r1,[sp,#15*4]
1067	mov	r0,r2,ror#7
1068	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1069	mov	r3,r1,ror#17
1070	eor	r0,r0,r2,ror#18
1071	eor	r3,r3,r1,ror#19
1072	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1073	ldr	r2,[sp,#1*4]
1074	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1075	ldr	r1,[sp,#10*4]
1076
1077	add	r3,r3,r0
1078	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1079	add	r2,r2,r3
1080	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1081	add	r2,r2,r1			@ X[i]
1082	ldr	r3,[r14],#4			@ *K256++
1083	add	r10,r10,r2			@ h+=X[i]
1084	str	r2,[sp,#1*4]
1085	eor	r2,r8,r9
1086	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1087	and	r2,r2,r7
1088	add	r10,r10,r3			@ h+=K256[i]
1089	eor	r2,r2,r9			@ Ch(e,f,g)
1090	eor	r0,r11,r11,ror#11
1091	add	r10,r10,r2			@ h+=Ch(e,f,g)
1092#if 17==31
1093	and	r3,r3,#0xff
1094	cmp	r3,#0xf2			@ done?
1095#endif
1096#if 17<15
1097# if __ARM_ARCH__>=7
1098	ldr	r2,[r1],#4			@ prefetch
1099# else
1100	ldrb	r2,[r1,#3]
1101# endif
1102	eor	r3,r11,r4			@ a^b, b^c in next round
1103#else
1104	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1105	eor	r3,r11,r4			@ a^b, b^c in next round
1106	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1107#endif
1108	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1109	and	r12,r12,r3			@ (b^c)&=(a^b)
1110	add	r6,r6,r10			@ d+=h
1111	eor	r12,r12,r4			@ Maj(a,b,c)
1112	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1113	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1114	@ ldr	r2,[sp,#3*4]		@ 18
1115	@ ldr	r1,[sp,#0*4]
1116	mov	r0,r2,ror#7
1117	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1118	mov	r12,r1,ror#17
1119	eor	r0,r0,r2,ror#18
1120	eor	r12,r12,r1,ror#19
1121	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1122	ldr	r2,[sp,#2*4]
1123	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1124	ldr	r1,[sp,#11*4]
1125
1126	add	r12,r12,r0
1127	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1128	add	r2,r2,r12
1129	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1130	add	r2,r2,r1			@ X[i]
1131	ldr	r12,[r14],#4			@ *K256++
1132	add	r9,r9,r2			@ h+=X[i]
1133	str	r2,[sp,#2*4]
1134	eor	r2,r7,r8
1135	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1136	and	r2,r2,r6
1137	add	r9,r9,r12			@ h+=K256[i]
1138	eor	r2,r2,r8			@ Ch(e,f,g)
1139	eor	r0,r10,r10,ror#11
1140	add	r9,r9,r2			@ h+=Ch(e,f,g)
1141#if 18==31
1142	and	r12,r12,#0xff
1143	cmp	r12,#0xf2			@ done?
1144#endif
1145#if 18<15
1146# if __ARM_ARCH__>=7
1147	ldr	r2,[r1],#4			@ prefetch
1148# else
1149	ldrb	r2,[r1,#3]
1150# endif
1151	eor	r12,r10,r11			@ a^b, b^c in next round
1152#else
1153	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1154	eor	r12,r10,r11			@ a^b, b^c in next round
1155	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1156#endif
1157	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1158	and	r3,r3,r12			@ (b^c)&=(a^b)
1159	add	r5,r5,r9			@ d+=h
1160	eor	r3,r3,r11			@ Maj(a,b,c)
1161	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1162	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1163	@ ldr	r2,[sp,#4*4]		@ 19
1164	@ ldr	r1,[sp,#1*4]
1165	mov	r0,r2,ror#7
1166	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1167	mov	r3,r1,ror#17
1168	eor	r0,r0,r2,ror#18
1169	eor	r3,r3,r1,ror#19
1170	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1171	ldr	r2,[sp,#3*4]
1172	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1173	ldr	r1,[sp,#12*4]
1174
1175	add	r3,r3,r0
1176	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1177	add	r2,r2,r3
1178	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1179	add	r2,r2,r1			@ X[i]
1180	ldr	r3,[r14],#4			@ *K256++
1181	add	r8,r8,r2			@ h+=X[i]
1182	str	r2,[sp,#3*4]
1183	eor	r2,r6,r7
1184	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1185	and	r2,r2,r5
1186	add	r8,r8,r3			@ h+=K256[i]
1187	eor	r2,r2,r7			@ Ch(e,f,g)
1188	eor	r0,r9,r9,ror#11
1189	add	r8,r8,r2			@ h+=Ch(e,f,g)
1190#if 19==31
1191	and	r3,r3,#0xff
1192	cmp	r3,#0xf2			@ done?
1193#endif
1194#if 19<15
1195# if __ARM_ARCH__>=7
1196	ldr	r2,[r1],#4			@ prefetch
1197# else
1198	ldrb	r2,[r1,#3]
1199# endif
1200	eor	r3,r9,r10			@ a^b, b^c in next round
1201#else
1202	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1203	eor	r3,r9,r10			@ a^b, b^c in next round
1204	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1205#endif
1206	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1207	and	r12,r12,r3			@ (b^c)&=(a^b)
1208	add	r4,r4,r8			@ d+=h
1209	eor	r12,r12,r10			@ Maj(a,b,c)
1210	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1211	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1212	@ ldr	r2,[sp,#5*4]		@ 20
1213	@ ldr	r1,[sp,#2*4]
1214	mov	r0,r2,ror#7
1215	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1216	mov	r12,r1,ror#17
1217	eor	r0,r0,r2,ror#18
1218	eor	r12,r12,r1,ror#19
1219	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1220	ldr	r2,[sp,#4*4]
1221	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1222	ldr	r1,[sp,#13*4]
1223
1224	add	r12,r12,r0
1225	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1226	add	r2,r2,r12
1227	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1228	add	r2,r2,r1			@ X[i]
1229	ldr	r12,[r14],#4			@ *K256++
1230	add	r7,r7,r2			@ h+=X[i]
1231	str	r2,[sp,#4*4]
1232	eor	r2,r5,r6
1233	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1234	and	r2,r2,r4
1235	add	r7,r7,r12			@ h+=K256[i]
1236	eor	r2,r2,r6			@ Ch(e,f,g)
1237	eor	r0,r8,r8,ror#11
1238	add	r7,r7,r2			@ h+=Ch(e,f,g)
1239#if 20==31
1240	and	r12,r12,#0xff
1241	cmp	r12,#0xf2			@ done?
1242#endif
1243#if 20<15
1244# if __ARM_ARCH__>=7
1245	ldr	r2,[r1],#4			@ prefetch
1246# else
1247	ldrb	r2,[r1,#3]
1248# endif
1249	eor	r12,r8,r9			@ a^b, b^c in next round
1250#else
1251	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1252	eor	r12,r8,r9			@ a^b, b^c in next round
1253	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1254#endif
1255	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1256	and	r3,r3,r12			@ (b^c)&=(a^b)
1257	add	r11,r11,r7			@ d+=h
1258	eor	r3,r3,r9			@ Maj(a,b,c)
1259	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1260	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1261	@ ldr	r2,[sp,#6*4]		@ 21
1262	@ ldr	r1,[sp,#3*4]
1263	mov	r0,r2,ror#7
1264	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1265	mov	r3,r1,ror#17
1266	eor	r0,r0,r2,ror#18
1267	eor	r3,r3,r1,ror#19
1268	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1269	ldr	r2,[sp,#5*4]
1270	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1271	ldr	r1,[sp,#14*4]
1272
1273	add	r3,r3,r0
1274	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1275	add	r2,r2,r3
1276	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1277	add	r2,r2,r1			@ X[i]
1278	ldr	r3,[r14],#4			@ *K256++
1279	add	r6,r6,r2			@ h+=X[i]
1280	str	r2,[sp,#5*4]
1281	eor	r2,r4,r5
1282	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1283	and	r2,r2,r11
1284	add	r6,r6,r3			@ h+=K256[i]
1285	eor	r2,r2,r5			@ Ch(e,f,g)
1286	eor	r0,r7,r7,ror#11
1287	add	r6,r6,r2			@ h+=Ch(e,f,g)
1288#if 21==31
1289	and	r3,r3,#0xff
1290	cmp	r3,#0xf2			@ done?
1291#endif
1292#if 21<15
1293# if __ARM_ARCH__>=7
1294	ldr	r2,[r1],#4			@ prefetch
1295# else
1296	ldrb	r2,[r1,#3]
1297# endif
1298	eor	r3,r7,r8			@ a^b, b^c in next round
1299#else
1300	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1301	eor	r3,r7,r8			@ a^b, b^c in next round
1302	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1303#endif
1304	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1305	and	r12,r12,r3			@ (b^c)&=(a^b)
1306	add	r10,r10,r6			@ d+=h
1307	eor	r12,r12,r8			@ Maj(a,b,c)
1308	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1309	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1310	@ ldr	r2,[sp,#7*4]		@ 22
1311	@ ldr	r1,[sp,#4*4]
1312	mov	r0,r2,ror#7
1313	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1314	mov	r12,r1,ror#17
1315	eor	r0,r0,r2,ror#18
1316	eor	r12,r12,r1,ror#19
1317	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1318	ldr	r2,[sp,#6*4]
1319	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1320	ldr	r1,[sp,#15*4]
1321
1322	add	r12,r12,r0
1323	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1324	add	r2,r2,r12
1325	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1326	add	r2,r2,r1			@ X[i]
1327	ldr	r12,[r14],#4			@ *K256++
1328	add	r5,r5,r2			@ h+=X[i]
1329	str	r2,[sp,#6*4]
1330	eor	r2,r11,r4
1331	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1332	and	r2,r2,r10
1333	add	r5,r5,r12			@ h+=K256[i]
1334	eor	r2,r2,r4			@ Ch(e,f,g)
1335	eor	r0,r6,r6,ror#11
1336	add	r5,r5,r2			@ h+=Ch(e,f,g)
1337#if 22==31
1338	and	r12,r12,#0xff
1339	cmp	r12,#0xf2			@ done?
1340#endif
1341#if 22<15
1342# if __ARM_ARCH__>=7
1343	ldr	r2,[r1],#4			@ prefetch
1344# else
1345	ldrb	r2,[r1,#3]
1346# endif
1347	eor	r12,r6,r7			@ a^b, b^c in next round
1348#else
1349	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1350	eor	r12,r6,r7			@ a^b, b^c in next round
1351	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1352#endif
1353	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1354	and	r3,r3,r12			@ (b^c)&=(a^b)
1355	add	r9,r9,r5			@ d+=h
1356	eor	r3,r3,r7			@ Maj(a,b,c)
1357	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1358	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1359	@ ldr	r2,[sp,#8*4]		@ 23
1360	@ ldr	r1,[sp,#5*4]
1361	mov	r0,r2,ror#7
1362	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1363	mov	r3,r1,ror#17
1364	eor	r0,r0,r2,ror#18
1365	eor	r3,r3,r1,ror#19
1366	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1367	ldr	r2,[sp,#7*4]
1368	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1369	ldr	r1,[sp,#0*4]
1370
1371	add	r3,r3,r0
1372	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1373	add	r2,r2,r3
1374	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1375	add	r2,r2,r1			@ X[i]
1376	ldr	r3,[r14],#4			@ *K256++
1377	add	r4,r4,r2			@ h+=X[i]
1378	str	r2,[sp,#7*4]
1379	eor	r2,r10,r11
1380	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1381	and	r2,r2,r9
1382	add	r4,r4,r3			@ h+=K256[i]
1383	eor	r2,r2,r11			@ Ch(e,f,g)
1384	eor	r0,r5,r5,ror#11
1385	add	r4,r4,r2			@ h+=Ch(e,f,g)
1386#if 23==31
1387	and	r3,r3,#0xff
1388	cmp	r3,#0xf2			@ done?
1389#endif
1390#if 23<15
1391# if __ARM_ARCH__>=7
1392	ldr	r2,[r1],#4			@ prefetch
1393# else
1394	ldrb	r2,[r1,#3]
1395# endif
1396	eor	r3,r5,r6			@ a^b, b^c in next round
1397#else
1398	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1399	eor	r3,r5,r6			@ a^b, b^c in next round
1400	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1401#endif
1402	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1403	and	r12,r12,r3			@ (b^c)&=(a^b)
1404	add	r8,r8,r4			@ d+=h
1405	eor	r12,r12,r6			@ Maj(a,b,c)
1406	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1407	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1408	@ ldr	r2,[sp,#9*4]		@ 24
1409	@ ldr	r1,[sp,#6*4]
1410	mov	r0,r2,ror#7
1411	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1412	mov	r12,r1,ror#17
1413	eor	r0,r0,r2,ror#18
1414	eor	r12,r12,r1,ror#19
1415	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1416	ldr	r2,[sp,#8*4]
1417	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1418	ldr	r1,[sp,#1*4]
1419
1420	add	r12,r12,r0
1421	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1422	add	r2,r2,r12
1423	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1424	add	r2,r2,r1			@ X[i]
1425	ldr	r12,[r14],#4			@ *K256++
1426	add	r11,r11,r2			@ h+=X[i]
1427	str	r2,[sp,#8*4]
1428	eor	r2,r9,r10
1429	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1430	and	r2,r2,r8
1431	add	r11,r11,r12			@ h+=K256[i]
1432	eor	r2,r2,r10			@ Ch(e,f,g)
1433	eor	r0,r4,r4,ror#11
1434	add	r11,r11,r2			@ h+=Ch(e,f,g)
1435#if 24==31
1436	and	r12,r12,#0xff
1437	cmp	r12,#0xf2			@ done?
1438#endif
1439#if 24<15
1440# if __ARM_ARCH__>=7
1441	ldr	r2,[r1],#4			@ prefetch
1442# else
1443	ldrb	r2,[r1,#3]
1444# endif
1445	eor	r12,r4,r5			@ a^b, b^c in next round
1446#else
1447	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1448	eor	r12,r4,r5			@ a^b, b^c in next round
1449	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1450#endif
1451	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1452	and	r3,r3,r12			@ (b^c)&=(a^b)
1453	add	r7,r7,r11			@ d+=h
1454	eor	r3,r3,r5			@ Maj(a,b,c)
1455	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1456	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1457	@ ldr	r2,[sp,#10*4]		@ 25
1458	@ ldr	r1,[sp,#7*4]
1459	mov	r0,r2,ror#7
1460	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1461	mov	r3,r1,ror#17
1462	eor	r0,r0,r2,ror#18
1463	eor	r3,r3,r1,ror#19
1464	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1465	ldr	r2,[sp,#9*4]
1466	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1467	ldr	r1,[sp,#2*4]
1468
1469	add	r3,r3,r0
1470	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1471	add	r2,r2,r3
1472	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1473	add	r2,r2,r1			@ X[i]
1474	ldr	r3,[r14],#4			@ *K256++
1475	add	r10,r10,r2			@ h+=X[i]
1476	str	r2,[sp,#9*4]
1477	eor	r2,r8,r9
1478	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1479	and	r2,r2,r7
1480	add	r10,r10,r3			@ h+=K256[i]
1481	eor	r2,r2,r9			@ Ch(e,f,g)
1482	eor	r0,r11,r11,ror#11
1483	add	r10,r10,r2			@ h+=Ch(e,f,g)
1484#if 25==31
1485	and	r3,r3,#0xff
1486	cmp	r3,#0xf2			@ done?
1487#endif
1488#if 25<15
1489# if __ARM_ARCH__>=7
1490	ldr	r2,[r1],#4			@ prefetch
1491# else
1492	ldrb	r2,[r1,#3]
1493# endif
1494	eor	r3,r11,r4			@ a^b, b^c in next round
1495#else
1496	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1497	eor	r3,r11,r4			@ a^b, b^c in next round
1498	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1499#endif
1500	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1501	and	r12,r12,r3			@ (b^c)&=(a^b)
1502	add	r6,r6,r10			@ d+=h
1503	eor	r12,r12,r4			@ Maj(a,b,c)
1504	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1505	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1506	@ ldr	r2,[sp,#11*4]		@ 26
1507	@ ldr	r1,[sp,#8*4]
1508	mov	r0,r2,ror#7
1509	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1510	mov	r12,r1,ror#17
1511	eor	r0,r0,r2,ror#18
1512	eor	r12,r12,r1,ror#19
1513	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1514	ldr	r2,[sp,#10*4]
1515	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1516	ldr	r1,[sp,#3*4]
1517
1518	add	r12,r12,r0
1519	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1520	add	r2,r2,r12
1521	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1522	add	r2,r2,r1			@ X[i]
1523	ldr	r12,[r14],#4			@ *K256++
1524	add	r9,r9,r2			@ h+=X[i]
1525	str	r2,[sp,#10*4]
1526	eor	r2,r7,r8
1527	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1528	and	r2,r2,r6
1529	add	r9,r9,r12			@ h+=K256[i]
1530	eor	r2,r2,r8			@ Ch(e,f,g)
1531	eor	r0,r10,r10,ror#11
1532	add	r9,r9,r2			@ h+=Ch(e,f,g)
1533#if 26==31
1534	and	r12,r12,#0xff
1535	cmp	r12,#0xf2			@ done?
1536#endif
1537#if 26<15
1538# if __ARM_ARCH__>=7
1539	ldr	r2,[r1],#4			@ prefetch
1540# else
1541	ldrb	r2,[r1,#3]
1542# endif
1543	eor	r12,r10,r11			@ a^b, b^c in next round
1544#else
1545	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1546	eor	r12,r10,r11			@ a^b, b^c in next round
1547	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1548#endif
1549	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1550	and	r3,r3,r12			@ (b^c)&=(a^b)
1551	add	r5,r5,r9			@ d+=h
1552	eor	r3,r3,r11			@ Maj(a,b,c)
1553	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1554	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1555	@ ldr	r2,[sp,#12*4]		@ 27
1556	@ ldr	r1,[sp,#9*4]
1557	mov	r0,r2,ror#7
1558	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1559	mov	r3,r1,ror#17
1560	eor	r0,r0,r2,ror#18
1561	eor	r3,r3,r1,ror#19
1562	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1563	ldr	r2,[sp,#11*4]
1564	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1565	ldr	r1,[sp,#4*4]
1566
1567	add	r3,r3,r0
1568	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1569	add	r2,r2,r3
1570	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1571	add	r2,r2,r1			@ X[i]
1572	ldr	r3,[r14],#4			@ *K256++
1573	add	r8,r8,r2			@ h+=X[i]
1574	str	r2,[sp,#11*4]
1575	eor	r2,r6,r7
1576	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1577	and	r2,r2,r5
1578	add	r8,r8,r3			@ h+=K256[i]
1579	eor	r2,r2,r7			@ Ch(e,f,g)
1580	eor	r0,r9,r9,ror#11
1581	add	r8,r8,r2			@ h+=Ch(e,f,g)
1582#if 27==31
1583	and	r3,r3,#0xff
1584	cmp	r3,#0xf2			@ done?
1585#endif
1586#if 27<15
1587# if __ARM_ARCH__>=7
1588	ldr	r2,[r1],#4			@ prefetch
1589# else
1590	ldrb	r2,[r1,#3]
1591# endif
1592	eor	r3,r9,r10			@ a^b, b^c in next round
1593#else
1594	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1595	eor	r3,r9,r10			@ a^b, b^c in next round
1596	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1597#endif
1598	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1599	and	r12,r12,r3			@ (b^c)&=(a^b)
1600	add	r4,r4,r8			@ d+=h
1601	eor	r12,r12,r10			@ Maj(a,b,c)
1602	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1603	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1604	@ ldr	r2,[sp,#13*4]		@ 28
1605	@ ldr	r1,[sp,#10*4]
1606	mov	r0,r2,ror#7
1607	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1608	mov	r12,r1,ror#17
1609	eor	r0,r0,r2,ror#18
1610	eor	r12,r12,r1,ror#19
1611	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1612	ldr	r2,[sp,#12*4]
1613	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1614	ldr	r1,[sp,#5*4]
1615
1616	add	r12,r12,r0
1617	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1618	add	r2,r2,r12
1619	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1620	add	r2,r2,r1			@ X[i]
1621	ldr	r12,[r14],#4			@ *K256++
1622	add	r7,r7,r2			@ h+=X[i]
1623	str	r2,[sp,#12*4]
1624	eor	r2,r5,r6
1625	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1626	and	r2,r2,r4
1627	add	r7,r7,r12			@ h+=K256[i]
1628	eor	r2,r2,r6			@ Ch(e,f,g)
1629	eor	r0,r8,r8,ror#11
1630	add	r7,r7,r2			@ h+=Ch(e,f,g)
1631#if 28==31
1632	and	r12,r12,#0xff
1633	cmp	r12,#0xf2			@ done?
1634#endif
1635#if 28<15
1636# if __ARM_ARCH__>=7
1637	ldr	r2,[r1],#4			@ prefetch
1638# else
1639	ldrb	r2,[r1,#3]
1640# endif
1641	eor	r12,r8,r9			@ a^b, b^c in next round
1642#else
1643	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1644	eor	r12,r8,r9			@ a^b, b^c in next round
1645	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1646#endif
1647	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1648	and	r3,r3,r12			@ (b^c)&=(a^b)
1649	add	r11,r11,r7			@ d+=h
1650	eor	r3,r3,r9			@ Maj(a,b,c)
1651	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1652	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1653	@ ldr	r2,[sp,#14*4]		@ 29
1654	@ ldr	r1,[sp,#11*4]
1655	mov	r0,r2,ror#7
1656	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1657	mov	r3,r1,ror#17
1658	eor	r0,r0,r2,ror#18
1659	eor	r3,r3,r1,ror#19
1660	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1661	ldr	r2,[sp,#13*4]
1662	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1663	ldr	r1,[sp,#6*4]
1664
1665	add	r3,r3,r0
1666	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1667	add	r2,r2,r3
1668	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1669	add	r2,r2,r1			@ X[i]
1670	ldr	r3,[r14],#4			@ *K256++
1671	add	r6,r6,r2			@ h+=X[i]
1672	str	r2,[sp,#13*4]
1673	eor	r2,r4,r5
1674	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1675	and	r2,r2,r11
1676	add	r6,r6,r3			@ h+=K256[i]
1677	eor	r2,r2,r5			@ Ch(e,f,g)
1678	eor	r0,r7,r7,ror#11
1679	add	r6,r6,r2			@ h+=Ch(e,f,g)
1680#if 29==31
1681	and	r3,r3,#0xff
1682	cmp	r3,#0xf2			@ done?
1683#endif
1684#if 29<15
1685# if __ARM_ARCH__>=7
1686	ldr	r2,[r1],#4			@ prefetch
1687# else
1688	ldrb	r2,[r1,#3]
1689# endif
1690	eor	r3,r7,r8			@ a^b, b^c in next round
1691#else
1692	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1693	eor	r3,r7,r8			@ a^b, b^c in next round
1694	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1695#endif
1696	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1697	and	r12,r12,r3			@ (b^c)&=(a^b)
1698	add	r10,r10,r6			@ d+=h
1699	eor	r12,r12,r8			@ Maj(a,b,c)
1700	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1701	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1702	@ ldr	r2,[sp,#15*4]		@ 30
1703	@ ldr	r1,[sp,#12*4]
1704	mov	r0,r2,ror#7
1705	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1706	mov	r12,r1,ror#17
1707	eor	r0,r0,r2,ror#18
1708	eor	r12,r12,r1,ror#19
1709	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1710	ldr	r2,[sp,#14*4]
1711	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1712	ldr	r1,[sp,#7*4]
1713
1714	add	r12,r12,r0
1715	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1716	add	r2,r2,r12
1717	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1718	add	r2,r2,r1			@ X[i]
1719	ldr	r12,[r14],#4			@ *K256++
1720	add	r5,r5,r2			@ h+=X[i]
1721	str	r2,[sp,#14*4]
1722	eor	r2,r11,r4
1723	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1724	and	r2,r2,r10
1725	add	r5,r5,r12			@ h+=K256[i]
1726	eor	r2,r2,r4			@ Ch(e,f,g)
1727	eor	r0,r6,r6,ror#11
1728	add	r5,r5,r2			@ h+=Ch(e,f,g)
1729#if 30==31
1730	and	r12,r12,#0xff
1731	cmp	r12,#0xf2			@ done?
1732#endif
1733#if 30<15
1734# if __ARM_ARCH__>=7
1735	ldr	r2,[r1],#4			@ prefetch
1736# else
1737	ldrb	r2,[r1,#3]
1738# endif
1739	eor	r12,r6,r7			@ a^b, b^c in next round
1740#else
1741	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1742	eor	r12,r6,r7			@ a^b, b^c in next round
1743	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1744#endif
1745	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1746	and	r3,r3,r12			@ (b^c)&=(a^b)
1747	add	r9,r9,r5			@ d+=h
1748	eor	r3,r3,r7			@ Maj(a,b,c)
1749	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1750	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1751	@ ldr	r2,[sp,#0*4]		@ 31
1752	@ ldr	r1,[sp,#13*4]
1753	mov	r0,r2,ror#7
1754	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1755	mov	r3,r1,ror#17
1756	eor	r0,r0,r2,ror#18
1757	eor	r3,r3,r1,ror#19
1758	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1759	ldr	r2,[sp,#15*4]
1760	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1761	ldr	r1,[sp,#8*4]
1762
1763	add	r3,r3,r0
1764	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1765	add	r2,r2,r3
1766	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1767	add	r2,r2,r1			@ X[i]
1768	ldr	r3,[r14],#4			@ *K256++
1769	add	r4,r4,r2			@ h+=X[i]
1770	str	r2,[sp,#15*4]
1771	eor	r2,r10,r11
1772	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1773	and	r2,r2,r9
1774	add	r4,r4,r3			@ h+=K256[i]
1775	eor	r2,r2,r11			@ Ch(e,f,g)
1776	eor	r0,r5,r5,ror#11
1777	add	r4,r4,r2			@ h+=Ch(e,f,g)
1778#if 31==31
1779	and	r3,r3,#0xff
1780	cmp	r3,#0xf2			@ done?
1781#endif
1782#if 31<15
1783# if __ARM_ARCH__>=7
1784	ldr	r2,[r1],#4			@ prefetch
1785# else
1786	ldrb	r2,[r1,#3]
1787# endif
1788	eor	r3,r5,r6			@ a^b, b^c in next round
1789#else
1790	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1791	eor	r3,r5,r6			@ a^b, b^c in next round
1792	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1793#endif
1794	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1795	and	r12,r12,r3			@ (b^c)&=(a^b)
1796	add	r8,r8,r4			@ d+=h
1797	eor	r12,r12,r6			@ Maj(a,b,c)
1798	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1799	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1800#ifdef	__thumb2__
1801	ite	eq			@ Thumb2 thing, sanity check in ARM
1802#endif
1803	ldreq	r3,[sp,#16*4]		@ pull ctx
1804	bne	.Lrounds_16_xx
1805
1806	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1807	ldr	r0,[r3,#0]
1808	ldr	r2,[r3,#4]
1809	ldr	r12,[r3,#8]
1810	add	r4,r4,r0
1811	ldr	r0,[r3,#12]
1812	add	r5,r5,r2
1813	ldr	r2,[r3,#16]
1814	add	r6,r6,r12
1815	ldr	r12,[r3,#20]
1816	add	r7,r7,r0
1817	ldr	r0,[r3,#24]
1818	add	r8,r8,r2
1819	ldr	r2,[r3,#28]
1820	add	r9,r9,r12
1821	ldr	r1,[sp,#17*4]		@ pull inp
1822	ldr	r12,[sp,#18*4]		@ pull inp+len
1823	add	r10,r10,r0
1824	add	r11,r11,r2
1825	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1826	cmp	r1,r12
1827	sub	r14,r14,#256	@ rewind Ktbl
1828	bne	.Loop
1829
1830	add	sp,sp,#19*4	@ destroy frame
1831#if __ARM_ARCH__>=5
1832	ldmia	sp!,{r4-r11,pc}
1833#else
1834	ldmia	sp!,{r4-r11,lr}
1835	tst	lr,#1
1836	moveq	pc,lr			@ be binary compatible with V4, yet
1837	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1838#endif
1839.size	zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
1840
1841#if __ARM_ARCH__ >= 7
1842.arch	armv7-a
1843.fpu	neon
1844
1845.globl	zfs_sha256_block_neon
1846.type	zfs_sha256_block_neon,%function
1847.align	5
1848.skip	16
1849zfs_sha256_block_neon:
1850.LNEON:
1851	stmdb	sp!,{r4-r12,lr}
1852
1853	sub	r11,sp,#16*4+16
1854	adr	r14,K256
1855	bic	r11,r11,#15		@ align for 128-bit stores
1856	mov	r12,sp
1857	mov	sp,r11			@ alloca
1858	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1859
1860	vld1.8		{q0},[r1]!
1861	vld1.8		{q1},[r1]!
1862	vld1.8		{q2},[r1]!
1863	vld1.8		{q3},[r1]!
1864	vld1.32		{q8},[r14,:128]!
1865	vld1.32		{q9},[r14,:128]!
1866	vld1.32		{q10},[r14,:128]!
1867	vld1.32		{q11},[r14,:128]!
1868	vrev32.8	q0,q0		@ yes, even on
1869	str		r0,[sp,#64]
1870	vrev32.8	q1,q1		@ big-endian
1871	str		r1,[sp,#68]
1872	mov		r1,sp
1873	vrev32.8	q2,q2
1874	str		r2,[sp,#72]
1875	vrev32.8	q3,q3
1876	str		r12,[sp,#76]		@ save original sp
1877	vadd.i32	q8,q8,q0
1878	vadd.i32	q9,q9,q1
1879	vst1.32		{q8},[r1,:128]!
1880	vadd.i32	q10,q10,q2
1881	vst1.32		{q9},[r1,:128]!
1882	vadd.i32	q11,q11,q3
1883	vst1.32		{q10},[r1,:128]!
1884	vst1.32		{q11},[r1,:128]!
1885
1886	ldmia		r0,{r4-r11}
1887	sub		r1,r1,#64
1888	ldr		r2,[sp,#0]
1889	eor		r12,r12,r12
1890	eor		r3,r5,r6
1891	b		.L_00_48
1892
1893.align	4
1894.L_00_48:
1895	vext.8	q8,q0,q1,#4
1896	add	r11,r11,r2
1897	eor	r2,r9,r10
1898	eor	r0,r8,r8,ror#5
1899	vext.8	q9,q2,q3,#4
1900	add	r4,r4,r12
1901	and	r2,r2,r8
1902	eor	r12,r0,r8,ror#19
1903	vshr.u32	q10,q8,#7
1904	eor	r0,r4,r4,ror#11
1905	eor	r2,r2,r10
1906	vadd.i32	q0,q0,q9
1907	add	r11,r11,r12,ror#6
1908	eor	r12,r4,r5
1909	vshr.u32	q9,q8,#3
1910	eor	r0,r0,r4,ror#20
1911	add	r11,r11,r2
1912	vsli.32	q10,q8,#25
1913	ldr	r2,[sp,#4]
1914	and	r3,r3,r12
1915	vshr.u32	q11,q8,#18
1916	add	r7,r7,r11
1917	add	r11,r11,r0,ror#2
1918	eor	r3,r3,r5
1919	veor	q9,q9,q10
1920	add	r10,r10,r2
1921	vsli.32	q11,q8,#14
1922	eor	r2,r8,r9
1923	eor	r0,r7,r7,ror#5
1924	vshr.u32	d24,d7,#17
1925	add	r11,r11,r3
1926	and	r2,r2,r7
1927	veor	q9,q9,q11
1928	eor	r3,r0,r7,ror#19
1929	eor	r0,r11,r11,ror#11
1930	vsli.32	d24,d7,#15
1931	eor	r2,r2,r9
1932	add	r10,r10,r3,ror#6
1933	vshr.u32	d25,d7,#10
1934	eor	r3,r11,r4
1935	eor	r0,r0,r11,ror#20
1936	vadd.i32	q0,q0,q9
1937	add	r10,r10,r2
1938	ldr	r2,[sp,#8]
1939	veor	d25,d25,d24
1940	and	r12,r12,r3
1941	add	r6,r6,r10
1942	vshr.u32	d24,d7,#19
1943	add	r10,r10,r0,ror#2
1944	eor	r12,r12,r4
1945	vsli.32	d24,d7,#13
1946	add	r9,r9,r2
1947	eor	r2,r7,r8
1948	veor	d25,d25,d24
1949	eor	r0,r6,r6,ror#5
1950	add	r10,r10,r12
1951	vadd.i32	d0,d0,d25
1952	and	r2,r2,r6
1953	eor	r12,r0,r6,ror#19
1954	vshr.u32	d24,d0,#17
1955	eor	r0,r10,r10,ror#11
1956	eor	r2,r2,r8
1957	vsli.32	d24,d0,#15
1958	add	r9,r9,r12,ror#6
1959	eor	r12,r10,r11
1960	vshr.u32	d25,d0,#10
1961	eor	r0,r0,r10,ror#20
1962	add	r9,r9,r2
1963	veor	d25,d25,d24
1964	ldr	r2,[sp,#12]
1965	and	r3,r3,r12
1966	vshr.u32	d24,d0,#19
1967	add	r5,r5,r9
1968	add	r9,r9,r0,ror#2
1969	eor	r3,r3,r11
1970	vld1.32	{q8},[r14,:128]!
1971	add	r8,r8,r2
1972	vsli.32	d24,d0,#13
1973	eor	r2,r6,r7
1974	eor	r0,r5,r5,ror#5
1975	veor	d25,d25,d24
1976	add	r9,r9,r3
1977	and	r2,r2,r5
1978	vadd.i32	d1,d1,d25
1979	eor	r3,r0,r5,ror#19
1980	eor	r0,r9,r9,ror#11
1981	vadd.i32	q8,q8,q0
1982	eor	r2,r2,r7
1983	add	r8,r8,r3,ror#6
1984	eor	r3,r9,r10
1985	eor	r0,r0,r9,ror#20
1986	add	r8,r8,r2
1987	ldr	r2,[sp,#16]
1988	and	r12,r12,r3
1989	add	r4,r4,r8
1990	vst1.32	{q8},[r1,:128]!
1991	add	r8,r8,r0,ror#2
1992	eor	r12,r12,r10
1993	vext.8	q8,q1,q2,#4
1994	add	r7,r7,r2
1995	eor	r2,r5,r6
1996	eor	r0,r4,r4,ror#5
1997	vext.8	q9,q3,q0,#4
1998	add	r8,r8,r12
1999	and	r2,r2,r4
2000	eor	r12,r0,r4,ror#19
2001	vshr.u32	q10,q8,#7
2002	eor	r0,r8,r8,ror#11
2003	eor	r2,r2,r6
2004	vadd.i32	q1,q1,q9
2005	add	r7,r7,r12,ror#6
2006	eor	r12,r8,r9
2007	vshr.u32	q9,q8,#3
2008	eor	r0,r0,r8,ror#20
2009	add	r7,r7,r2
2010	vsli.32	q10,q8,#25
2011	ldr	r2,[sp,#20]
2012	and	r3,r3,r12
2013	vshr.u32	q11,q8,#18
2014	add	r11,r11,r7
2015	add	r7,r7,r0,ror#2
2016	eor	r3,r3,r9
2017	veor	q9,q9,q10
2018	add	r6,r6,r2
2019	vsli.32	q11,q8,#14
2020	eor	r2,r4,r5
2021	eor	r0,r11,r11,ror#5
2022	vshr.u32	d24,d1,#17
2023	add	r7,r7,r3
2024	and	r2,r2,r11
2025	veor	q9,q9,q11
2026	eor	r3,r0,r11,ror#19
2027	eor	r0,r7,r7,ror#11
2028	vsli.32	d24,d1,#15
2029	eor	r2,r2,r5
2030	add	r6,r6,r3,ror#6
2031	vshr.u32	d25,d1,#10
2032	eor	r3,r7,r8
2033	eor	r0,r0,r7,ror#20
2034	vadd.i32	q1,q1,q9
2035	add	r6,r6,r2
2036	ldr	r2,[sp,#24]
2037	veor	d25,d25,d24
2038	and	r12,r12,r3
2039	add	r10,r10,r6
2040	vshr.u32	d24,d1,#19
2041	add	r6,r6,r0,ror#2
2042	eor	r12,r12,r8
2043	vsli.32	d24,d1,#13
2044	add	r5,r5,r2
2045	eor	r2,r11,r4
2046	veor	d25,d25,d24
2047	eor	r0,r10,r10,ror#5
2048	add	r6,r6,r12
2049	vadd.i32	d2,d2,d25
2050	and	r2,r2,r10
2051	eor	r12,r0,r10,ror#19
2052	vshr.u32	d24,d2,#17
2053	eor	r0,r6,r6,ror#11
2054	eor	r2,r2,r4
2055	vsli.32	d24,d2,#15
2056	add	r5,r5,r12,ror#6
2057	eor	r12,r6,r7
2058	vshr.u32	d25,d2,#10
2059	eor	r0,r0,r6,ror#20
2060	add	r5,r5,r2
2061	veor	d25,d25,d24
2062	ldr	r2,[sp,#28]
2063	and	r3,r3,r12
2064	vshr.u32	d24,d2,#19
2065	add	r9,r9,r5
2066	add	r5,r5,r0,ror#2
2067	eor	r3,r3,r7
2068	vld1.32	{q8},[r14,:128]!
2069	add	r4,r4,r2
2070	vsli.32	d24,d2,#13
2071	eor	r2,r10,r11
2072	eor	r0,r9,r9,ror#5
2073	veor	d25,d25,d24
2074	add	r5,r5,r3
2075	and	r2,r2,r9
2076	vadd.i32	d3,d3,d25
2077	eor	r3,r0,r9,ror#19
2078	eor	r0,r5,r5,ror#11
2079	vadd.i32	q8,q8,q1
2080	eor	r2,r2,r11
2081	add	r4,r4,r3,ror#6
2082	eor	r3,r5,r6
2083	eor	r0,r0,r5,ror#20
2084	add	r4,r4,r2
2085	ldr	r2,[sp,#32]
2086	and	r12,r12,r3
2087	add	r8,r8,r4
2088	vst1.32	{q8},[r1,:128]!
2089	add	r4,r4,r0,ror#2
2090	eor	r12,r12,r6
2091	vext.8	q8,q2,q3,#4
2092	add	r11,r11,r2
2093	eor	r2,r9,r10
2094	eor	r0,r8,r8,ror#5
2095	vext.8	q9,q0,q1,#4
2096	add	r4,r4,r12
2097	and	r2,r2,r8
2098	eor	r12,r0,r8,ror#19
2099	vshr.u32	q10,q8,#7
2100	eor	r0,r4,r4,ror#11
2101	eor	r2,r2,r10
2102	vadd.i32	q2,q2,q9
2103	add	r11,r11,r12,ror#6
2104	eor	r12,r4,r5
2105	vshr.u32	q9,q8,#3
2106	eor	r0,r0,r4,ror#20
2107	add	r11,r11,r2
2108	vsli.32	q10,q8,#25
2109	ldr	r2,[sp,#36]
2110	and	r3,r3,r12
2111	vshr.u32	q11,q8,#18
2112	add	r7,r7,r11
2113	add	r11,r11,r0,ror#2
2114	eor	r3,r3,r5
2115	veor	q9,q9,q10
2116	add	r10,r10,r2
2117	vsli.32	q11,q8,#14
2118	eor	r2,r8,r9
2119	eor	r0,r7,r7,ror#5
2120	vshr.u32	d24,d3,#17
2121	add	r11,r11,r3
2122	and	r2,r2,r7
2123	veor	q9,q9,q11
2124	eor	r3,r0,r7,ror#19
2125	eor	r0,r11,r11,ror#11
2126	vsli.32	d24,d3,#15
2127	eor	r2,r2,r9
2128	add	r10,r10,r3,ror#6
2129	vshr.u32	d25,d3,#10
2130	eor	r3,r11,r4
2131	eor	r0,r0,r11,ror#20
2132	vadd.i32	q2,q2,q9
2133	add	r10,r10,r2
2134	ldr	r2,[sp,#40]
2135	veor	d25,d25,d24
2136	and	r12,r12,r3
2137	add	r6,r6,r10
2138	vshr.u32	d24,d3,#19
2139	add	r10,r10,r0,ror#2
2140	eor	r12,r12,r4
2141	vsli.32	d24,d3,#13
2142	add	r9,r9,r2
2143	eor	r2,r7,r8
2144	veor	d25,d25,d24
2145	eor	r0,r6,r6,ror#5
2146	add	r10,r10,r12
2147	vadd.i32	d4,d4,d25
2148	and	r2,r2,r6
2149	eor	r12,r0,r6,ror#19
2150	vshr.u32	d24,d4,#17
2151	eor	r0,r10,r10,ror#11
2152	eor	r2,r2,r8
2153	vsli.32	d24,d4,#15
2154	add	r9,r9,r12,ror#6
2155	eor	r12,r10,r11
2156	vshr.u32	d25,d4,#10
2157	eor	r0,r0,r10,ror#20
2158	add	r9,r9,r2
2159	veor	d25,d25,d24
2160	ldr	r2,[sp,#44]
2161	and	r3,r3,r12
2162	vshr.u32	d24,d4,#19
2163	add	r5,r5,r9
2164	add	r9,r9,r0,ror#2
2165	eor	r3,r3,r11
2166	vld1.32	{q8},[r14,:128]!
2167	add	r8,r8,r2
2168	vsli.32	d24,d4,#13
2169	eor	r2,r6,r7
2170	eor	r0,r5,r5,ror#5
2171	veor	d25,d25,d24
2172	add	r9,r9,r3
2173	and	r2,r2,r5
2174	vadd.i32	d5,d5,d25
2175	eor	r3,r0,r5,ror#19
2176	eor	r0,r9,r9,ror#11
2177	vadd.i32	q8,q8,q2
2178	eor	r2,r2,r7
2179	add	r8,r8,r3,ror#6
2180	eor	r3,r9,r10
2181	eor	r0,r0,r9,ror#20
2182	add	r8,r8,r2
2183	ldr	r2,[sp,#48]
2184	and	r12,r12,r3
2185	add	r4,r4,r8
2186	vst1.32	{q8},[r1,:128]!
2187	add	r8,r8,r0,ror#2
2188	eor	r12,r12,r10
2189	vext.8	q8,q3,q0,#4
2190	add	r7,r7,r2
2191	eor	r2,r5,r6
2192	eor	r0,r4,r4,ror#5
2193	vext.8	q9,q1,q2,#4
2194	add	r8,r8,r12
2195	and	r2,r2,r4
2196	eor	r12,r0,r4,ror#19
2197	vshr.u32	q10,q8,#7
2198	eor	r0,r8,r8,ror#11
2199	eor	r2,r2,r6
2200	vadd.i32	q3,q3,q9
2201	add	r7,r7,r12,ror#6
2202	eor	r12,r8,r9
2203	vshr.u32	q9,q8,#3
2204	eor	r0,r0,r8,ror#20
2205	add	r7,r7,r2
2206	vsli.32	q10,q8,#25
2207	ldr	r2,[sp,#52]
2208	and	r3,r3,r12
2209	vshr.u32	q11,q8,#18
2210	add	r11,r11,r7
2211	add	r7,r7,r0,ror#2
2212	eor	r3,r3,r9
2213	veor	q9,q9,q10
2214	add	r6,r6,r2
2215	vsli.32	q11,q8,#14
2216	eor	r2,r4,r5
2217	eor	r0,r11,r11,ror#5
2218	vshr.u32	d24,d5,#17
2219	add	r7,r7,r3
2220	and	r2,r2,r11
2221	veor	q9,q9,q11
2222	eor	r3,r0,r11,ror#19
2223	eor	r0,r7,r7,ror#11
2224	vsli.32	d24,d5,#15
2225	eor	r2,r2,r5
2226	add	r6,r6,r3,ror#6
2227	vshr.u32	d25,d5,#10
2228	eor	r3,r7,r8
2229	eor	r0,r0,r7,ror#20
2230	vadd.i32	q3,q3,q9
2231	add	r6,r6,r2
2232	ldr	r2,[sp,#56]
2233	veor	d25,d25,d24
2234	and	r12,r12,r3
2235	add	r10,r10,r6
2236	vshr.u32	d24,d5,#19
2237	add	r6,r6,r0,ror#2
2238	eor	r12,r12,r8
2239	vsli.32	d24,d5,#13
2240	add	r5,r5,r2
2241	eor	r2,r11,r4
2242	veor	d25,d25,d24
2243	eor	r0,r10,r10,ror#5
2244	add	r6,r6,r12
2245	vadd.i32	d6,d6,d25
2246	and	r2,r2,r10
2247	eor	r12,r0,r10,ror#19
2248	vshr.u32	d24,d6,#17
2249	eor	r0,r6,r6,ror#11
2250	eor	r2,r2,r4
2251	vsli.32	d24,d6,#15
2252	add	r5,r5,r12,ror#6
2253	eor	r12,r6,r7
2254	vshr.u32	d25,d6,#10
2255	eor	r0,r0,r6,ror#20
2256	add	r5,r5,r2
2257	veor	d25,d25,d24
2258	ldr	r2,[sp,#60]
2259	and	r3,r3,r12
2260	vshr.u32	d24,d6,#19
2261	add	r9,r9,r5
2262	add	r5,r5,r0,ror#2
2263	eor	r3,r3,r7
2264	vld1.32	{q8},[r14,:128]!
2265	add	r4,r4,r2
2266	vsli.32	d24,d6,#13
2267	eor	r2,r10,r11
2268	eor	r0,r9,r9,ror#5
2269	veor	d25,d25,d24
2270	add	r5,r5,r3
2271	and	r2,r2,r9
2272	vadd.i32	d7,d7,d25
2273	eor	r3,r0,r9,ror#19
2274	eor	r0,r5,r5,ror#11
2275	vadd.i32	q8,q8,q3
2276	eor	r2,r2,r11
2277	add	r4,r4,r3,ror#6
2278	eor	r3,r5,r6
2279	eor	r0,r0,r5,ror#20
2280	add	r4,r4,r2
2281	ldr	r2,[r14]
2282	and	r12,r12,r3
2283	add	r8,r8,r4
2284	vst1.32	{q8},[r1,:128]!
2285	add	r4,r4,r0,ror#2
2286	eor	r12,r12,r6
2287	teq	r2,#0				@ check for K256 terminator
2288	ldr	r2,[sp,#0]
2289	sub	r1,r1,#64
2290	bne	.L_00_48
2291
2292	ldr		r1,[sp,#68]
2293	ldr		r0,[sp,#72]
2294	sub		r14,r14,#256	@ rewind r14
2295	teq		r1,r0
2296	it		eq
2297	subeq		r1,r1,#64		@ avoid SEGV
2298	vld1.8		{q0},[r1]!		@ load next input block
2299	vld1.8		{q1},[r1]!
2300	vld1.8		{q2},[r1]!
2301	vld1.8		{q3},[r1]!
2302	it		ne
2303	strne		r1,[sp,#68]
2304	mov		r1,sp
2305	add	r11,r11,r2
2306	eor	r2,r9,r10
2307	eor	r0,r8,r8,ror#5
2308	add	r4,r4,r12
2309	vld1.32	{q8},[r14,:128]!
2310	and	r2,r2,r8
2311	eor	r12,r0,r8,ror#19
2312	eor	r0,r4,r4,ror#11
2313	eor	r2,r2,r10
2314	vrev32.8	q0,q0
2315	add	r11,r11,r12,ror#6
2316	eor	r12,r4,r5
2317	eor	r0,r0,r4,ror#20
2318	add	r11,r11,r2
2319	vadd.i32	q8,q8,q0
2320	ldr	r2,[sp,#4]
2321	and	r3,r3,r12
2322	add	r7,r7,r11
2323	add	r11,r11,r0,ror#2
2324	eor	r3,r3,r5
2325	add	r10,r10,r2
2326	eor	r2,r8,r9
2327	eor	r0,r7,r7,ror#5
2328	add	r11,r11,r3
2329	and	r2,r2,r7
2330	eor	r3,r0,r7,ror#19
2331	eor	r0,r11,r11,ror#11
2332	eor	r2,r2,r9
2333	add	r10,r10,r3,ror#6
2334	eor	r3,r11,r4
2335	eor	r0,r0,r11,ror#20
2336	add	r10,r10,r2
2337	ldr	r2,[sp,#8]
2338	and	r12,r12,r3
2339	add	r6,r6,r10
2340	add	r10,r10,r0,ror#2
2341	eor	r12,r12,r4
2342	add	r9,r9,r2
2343	eor	r2,r7,r8
2344	eor	r0,r6,r6,ror#5
2345	add	r10,r10,r12
2346	and	r2,r2,r6
2347	eor	r12,r0,r6,ror#19
2348	eor	r0,r10,r10,ror#11
2349	eor	r2,r2,r8
2350	add	r9,r9,r12,ror#6
2351	eor	r12,r10,r11
2352	eor	r0,r0,r10,ror#20
2353	add	r9,r9,r2
2354	ldr	r2,[sp,#12]
2355	and	r3,r3,r12
2356	add	r5,r5,r9
2357	add	r9,r9,r0,ror#2
2358	eor	r3,r3,r11
2359	add	r8,r8,r2
2360	eor	r2,r6,r7
2361	eor	r0,r5,r5,ror#5
2362	add	r9,r9,r3
2363	and	r2,r2,r5
2364	eor	r3,r0,r5,ror#19
2365	eor	r0,r9,r9,ror#11
2366	eor	r2,r2,r7
2367	add	r8,r8,r3,ror#6
2368	eor	r3,r9,r10
2369	eor	r0,r0,r9,ror#20
2370	add	r8,r8,r2
2371	ldr	r2,[sp,#16]
2372	and	r12,r12,r3
2373	add	r4,r4,r8
2374	add	r8,r8,r0,ror#2
2375	eor	r12,r12,r10
2376	vst1.32	{q8},[r1,:128]!
2377	add	r7,r7,r2
2378	eor	r2,r5,r6
2379	eor	r0,r4,r4,ror#5
2380	add	r8,r8,r12
2381	vld1.32	{q8},[r14,:128]!
2382	and	r2,r2,r4
2383	eor	r12,r0,r4,ror#19
2384	eor	r0,r8,r8,ror#11
2385	eor	r2,r2,r6
2386	vrev32.8	q1,q1
2387	add	r7,r7,r12,ror#6
2388	eor	r12,r8,r9
2389	eor	r0,r0,r8,ror#20
2390	add	r7,r7,r2
2391	vadd.i32	q8,q8,q1
2392	ldr	r2,[sp,#20]
2393	and	r3,r3,r12
2394	add	r11,r11,r7
2395	add	r7,r7,r0,ror#2
2396	eor	r3,r3,r9
2397	add	r6,r6,r2
2398	eor	r2,r4,r5
2399	eor	r0,r11,r11,ror#5
2400	add	r7,r7,r3
2401	and	r2,r2,r11
2402	eor	r3,r0,r11,ror#19
2403	eor	r0,r7,r7,ror#11
2404	eor	r2,r2,r5
2405	add	r6,r6,r3,ror#6
2406	eor	r3,r7,r8
2407	eor	r0,r0,r7,ror#20
2408	add	r6,r6,r2
2409	ldr	r2,[sp,#24]
2410	and	r12,r12,r3
2411	add	r10,r10,r6
2412	add	r6,r6,r0,ror#2
2413	eor	r12,r12,r8
2414	add	r5,r5,r2
2415	eor	r2,r11,r4
2416	eor	r0,r10,r10,ror#5
2417	add	r6,r6,r12
2418	and	r2,r2,r10
2419	eor	r12,r0,r10,ror#19
2420	eor	r0,r6,r6,ror#11
2421	eor	r2,r2,r4
2422	add	r5,r5,r12,ror#6
2423	eor	r12,r6,r7
2424	eor	r0,r0,r6,ror#20
2425	add	r5,r5,r2
2426	ldr	r2,[sp,#28]
2427	and	r3,r3,r12
2428	add	r9,r9,r5
2429	add	r5,r5,r0,ror#2
2430	eor	r3,r3,r7
2431	add	r4,r4,r2
2432	eor	r2,r10,r11
2433	eor	r0,r9,r9,ror#5
2434	add	r5,r5,r3
2435	and	r2,r2,r9
2436	eor	r3,r0,r9,ror#19
2437	eor	r0,r5,r5,ror#11
2438	eor	r2,r2,r11
2439	add	r4,r4,r3,ror#6
2440	eor	r3,r5,r6
2441	eor	r0,r0,r5,ror#20
2442	add	r4,r4,r2
2443	ldr	r2,[sp,#32]
2444	and	r12,r12,r3
2445	add	r8,r8,r4
2446	add	r4,r4,r0,ror#2
2447	eor	r12,r12,r6
2448	vst1.32	{q8},[r1,:128]!
2449	add	r11,r11,r2
2450	eor	r2,r9,r10
2451	eor	r0,r8,r8,ror#5
2452	add	r4,r4,r12
2453	vld1.32	{q8},[r14,:128]!
2454	and	r2,r2,r8
2455	eor	r12,r0,r8,ror#19
2456	eor	r0,r4,r4,ror#11
2457	eor	r2,r2,r10
2458	vrev32.8	q2,q2
2459	add	r11,r11,r12,ror#6
2460	eor	r12,r4,r5
2461	eor	r0,r0,r4,ror#20
2462	add	r11,r11,r2
2463	vadd.i32	q8,q8,q2
2464	ldr	r2,[sp,#36]
2465	and	r3,r3,r12
2466	add	r7,r7,r11
2467	add	r11,r11,r0,ror#2
2468	eor	r3,r3,r5
2469	add	r10,r10,r2
2470	eor	r2,r8,r9
2471	eor	r0,r7,r7,ror#5
2472	add	r11,r11,r3
2473	and	r2,r2,r7
2474	eor	r3,r0,r7,ror#19
2475	eor	r0,r11,r11,ror#11
2476	eor	r2,r2,r9
2477	add	r10,r10,r3,ror#6
2478	eor	r3,r11,r4
2479	eor	r0,r0,r11,ror#20
2480	add	r10,r10,r2
2481	ldr	r2,[sp,#40]
2482	and	r12,r12,r3
2483	add	r6,r6,r10
2484	add	r10,r10,r0,ror#2
2485	eor	r12,r12,r4
2486	add	r9,r9,r2
2487	eor	r2,r7,r8
2488	eor	r0,r6,r6,ror#5
2489	add	r10,r10,r12
2490	and	r2,r2,r6
2491	eor	r12,r0,r6,ror#19
2492	eor	r0,r10,r10,ror#11
2493	eor	r2,r2,r8
2494	add	r9,r9,r12,ror#6
2495	eor	r12,r10,r11
2496	eor	r0,r0,r10,ror#20
2497	add	r9,r9,r2
2498	ldr	r2,[sp,#44]
2499	and	r3,r3,r12
2500	add	r5,r5,r9
2501	add	r9,r9,r0,ror#2
2502	eor	r3,r3,r11
2503	add	r8,r8,r2
2504	eor	r2,r6,r7
2505	eor	r0,r5,r5,ror#5
2506	add	r9,r9,r3
2507	and	r2,r2,r5
2508	eor	r3,r0,r5,ror#19
2509	eor	r0,r9,r9,ror#11
2510	eor	r2,r2,r7
2511	add	r8,r8,r3,ror#6
2512	eor	r3,r9,r10
2513	eor	r0,r0,r9,ror#20
2514	add	r8,r8,r2
2515	ldr	r2,[sp,#48]
2516	and	r12,r12,r3
2517	add	r4,r4,r8
2518	add	r8,r8,r0,ror#2
2519	eor	r12,r12,r10
2520	vst1.32	{q8},[r1,:128]!
2521	add	r7,r7,r2
2522	eor	r2,r5,r6
2523	eor	r0,r4,r4,ror#5
2524	add	r8,r8,r12
2525	vld1.32	{q8},[r14,:128]!
2526	and	r2,r2,r4
2527	eor	r12,r0,r4,ror#19
2528	eor	r0,r8,r8,ror#11
2529	eor	r2,r2,r6
2530	vrev32.8	q3,q3
2531	add	r7,r7,r12,ror#6
2532	eor	r12,r8,r9
2533	eor	r0,r0,r8,ror#20
2534	add	r7,r7,r2
2535	vadd.i32	q8,q8,q3
2536	ldr	r2,[sp,#52]
2537	and	r3,r3,r12
2538	add	r11,r11,r7
2539	add	r7,r7,r0,ror#2
2540	eor	r3,r3,r9
2541	add	r6,r6,r2
2542	eor	r2,r4,r5
2543	eor	r0,r11,r11,ror#5
2544	add	r7,r7,r3
2545	and	r2,r2,r11
2546	eor	r3,r0,r11,ror#19
2547	eor	r0,r7,r7,ror#11
2548	eor	r2,r2,r5
2549	add	r6,r6,r3,ror#6
2550	eor	r3,r7,r8
2551	eor	r0,r0,r7,ror#20
2552	add	r6,r6,r2
2553	ldr	r2,[sp,#56]
2554	and	r12,r12,r3
2555	add	r10,r10,r6
2556	add	r6,r6,r0,ror#2
2557	eor	r12,r12,r8
2558	add	r5,r5,r2
2559	eor	r2,r11,r4
2560	eor	r0,r10,r10,ror#5
2561	add	r6,r6,r12
2562	and	r2,r2,r10
2563	eor	r12,r0,r10,ror#19
2564	eor	r0,r6,r6,ror#11
2565	eor	r2,r2,r4
2566	add	r5,r5,r12,ror#6
2567	eor	r12,r6,r7
2568	eor	r0,r0,r6,ror#20
2569	add	r5,r5,r2
2570	ldr	r2,[sp,#60]
2571	and	r3,r3,r12
2572	add	r9,r9,r5
2573	add	r5,r5,r0,ror#2
2574	eor	r3,r3,r7
2575	add	r4,r4,r2
2576	eor	r2,r10,r11
2577	eor	r0,r9,r9,ror#5
2578	add	r5,r5,r3
2579	and	r2,r2,r9
2580	eor	r3,r0,r9,ror#19
2581	eor	r0,r5,r5,ror#11
2582	eor	r2,r2,r11
2583	add	r4,r4,r3,ror#6
2584	eor	r3,r5,r6
2585	eor	r0,r0,r5,ror#20
2586	add	r4,r4,r2
2587	ldr	r2,[sp,#64]
2588	and	r12,r12,r3
2589	add	r8,r8,r4
2590	add	r4,r4,r0,ror#2
2591	eor	r12,r12,r6
2592	vst1.32	{q8},[r1,:128]!
2593	ldr	r0,[r2,#0]
2594	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2595	ldr	r12,[r2,#4]
2596	ldr	r3,[r2,#8]
2597	ldr	r1,[r2,#12]
2598	add	r4,r4,r0			@ accumulate
2599	ldr	r0,[r2,#16]
2600	add	r5,r5,r12
2601	ldr	r12,[r2,#20]
2602	add	r6,r6,r3
2603	ldr	r3,[r2,#24]
2604	add	r7,r7,r1
2605	ldr	r1,[r2,#28]
2606	add	r8,r8,r0
2607	str	r4,[r2],#4
2608	add	r9,r9,r12
2609	str	r5,[r2],#4
2610	add	r10,r10,r3
2611	str	r6,[r2],#4
2612	add	r11,r11,r1
2613	str	r7,[r2],#4
2614	stmia	r2,{r8-r11}
2615
2616	ittte	ne
2617	movne	r1,sp
2618	ldrne	r2,[sp,#0]
2619	eorne	r12,r12,r12
2620	ldreq	sp,[sp,#76]			@ restore original sp
2621	itt	ne
2622	eorne	r3,r5,r6
2623	bne	.L_00_48
2624
2625	ldmia	sp!,{r4-r12,pc}
2626.size	zfs_sha256_block_neon,.-zfs_sha256_block_neon
2627
2628# if defined(__thumb2__)
2629#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2630# else
2631#  define INST(a,b,c,d)	.byte	a,b,c,d
2632# endif
2633
2634.globl	zfs_sha256_block_armv8
2635.type	zfs_sha256_block_armv8,%function
2636.align	5
2637zfs_sha256_block_armv8:
2638.LARMv8:
2639	vld1.32	{q0,q1},[r0]
2640	sub	r3,r3,#256+32
2641	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2642	b	.Loop_v8
2643
2644.align	4
2645.Loop_v8:
2646	vld1.8		{q8-q9},[r1]!
2647	vld1.8		{q10-q11},[r1]!
2648	vld1.32		{q12},[r3]!
2649	vrev32.8	q8,q8
2650	vrev32.8	q9,q9
2651	vrev32.8	q10,q10
2652	vrev32.8	q11,q11
2653	vmov		q14,q0	@ offload
2654	vmov		q15,q1
2655	teq		r1,r2
2656	vld1.32		{q13},[r3]!
2657	vadd.i32	q12,q12,q8
2658	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2659	vmov		q2,q0
2660	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2661	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2662	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2663	vld1.32		{q12},[r3]!
2664	vadd.i32	q13,q13,q9
2665	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2666	vmov		q2,q0
2667	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2668	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2669	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2670	vld1.32		{q13},[r3]!
2671	vadd.i32	q12,q12,q10
2672	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2673	vmov		q2,q0
2674	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2675	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2676	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2677	vld1.32		{q12},[r3]!
2678	vadd.i32	q13,q13,q11
2679	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2680	vmov		q2,q0
2681	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2682	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2683	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2684	vld1.32		{q13},[r3]!
2685	vadd.i32	q12,q12,q8
2686	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2687	vmov		q2,q0
2688	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2689	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2690	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2691	vld1.32		{q12},[r3]!
2692	vadd.i32	q13,q13,q9
2693	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2694	vmov		q2,q0
2695	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2696	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2697	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2698	vld1.32		{q13},[r3]!
2699	vadd.i32	q12,q12,q10
2700	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2701	vmov		q2,q0
2702	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2703	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2704	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2705	vld1.32		{q12},[r3]!
2706	vadd.i32	q13,q13,q11
2707	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2708	vmov		q2,q0
2709	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2710	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2711	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2712	vld1.32		{q13},[r3]!
2713	vadd.i32	q12,q12,q8
2714	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2715	vmov		q2,q0
2716	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2717	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2718	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2719	vld1.32		{q12},[r3]!
2720	vadd.i32	q13,q13,q9
2721	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2722	vmov		q2,q0
2723	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2724	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2725	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2726	vld1.32		{q13},[r3]!
2727	vadd.i32	q12,q12,q10
2728	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2729	vmov		q2,q0
2730	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2731	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2732	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2733	vld1.32		{q12},[r3]!
2734	vadd.i32	q13,q13,q11
2735	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2736	vmov		q2,q0
2737	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2738	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2739	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2740	vld1.32		{q13},[r3]!
2741	vadd.i32	q12,q12,q8
2742	vmov		q2,q0
2743	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2744	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2745
2746	vld1.32		{q12},[r3]!
2747	vadd.i32	q13,q13,q9
2748	vmov		q2,q0
2749	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2750	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2751
2752	vld1.32		{q13},[r3]
2753	vadd.i32	q12,q12,q10
2754	sub		r3,r3,#256-16	@ rewind
2755	vmov		q2,q0
2756	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2757	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2758
2759	vadd.i32	q13,q13,q11
2760	vmov		q2,q0
2761	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2762	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2763
2764	vadd.i32	q0,q0,q14
2765	vadd.i32	q1,q1,q15
2766	it		ne
2767	bne		.Loop_v8
2768
2769	vst1.32		{q0,q1},[r0]
2770
2771	bx	lr		@ bx lr
2772.size	zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
2773
2774#endif // #if __ARM_ARCH__ >= 7
2775#endif // #if defined(__arm__)
2776