xref: /freebsd/sys/contrib/openzfs/module/icp/asm-arm/sha2/sha256-armv7.S (revision 641efdd10cc3ad05fb7eaeeae20b15c5ad4128c8)
1/*
2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
3 *
4 * Licensed under the Apache License, Version 2.0 (the "License");
5 * you may not use this file except in compliance with the License.
6 * You may obtain a copy of the License at
7 *
8 *     https://www.apache.org/licenses/LICENSE-2.0
9 *
10 * Unless required by applicable law or agreed to in writing, software
11 * distributed under the License is distributed on an "AS IS" BASIS,
12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
13 * See the License for the specific language governing permissions and
14 * limitations under the License.
15 */
16
17/*
18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
19 * - modified assembly to fit into OpenZFS
20 */
21
22#if defined(__arm__)
23
24#ifndef __ARM_ARCH
25# define __ARM_ARCH__	7
26#else
27# define __ARM_ARCH__	__ARM_ARCH
28#endif
29
30#if defined(__thumb2__)
31.syntax unified
32.thumb
33#else
34.code   32
35#endif
36
37.text
38
39.type	K256,%object
40.align	5
41K256:
42.word	0x428a2f98,0x71374491,0xb5c0fbcf,0xe9b5dba5
43.word	0x3956c25b,0x59f111f1,0x923f82a4,0xab1c5ed5
44.word	0xd807aa98,0x12835b01,0x243185be,0x550c7dc3
45.word	0x72be5d74,0x80deb1fe,0x9bdc06a7,0xc19bf174
46.word	0xe49b69c1,0xefbe4786,0x0fc19dc6,0x240ca1cc
47.word	0x2de92c6f,0x4a7484aa,0x5cb0a9dc,0x76f988da
48.word	0x983e5152,0xa831c66d,0xb00327c8,0xbf597fc7
49.word	0xc6e00bf3,0xd5a79147,0x06ca6351,0x14292967
50.word	0x27b70a85,0x2e1b2138,0x4d2c6dfc,0x53380d13
51.word	0x650a7354,0x766a0abb,0x81c2c92e,0x92722c85
52.word	0xa2bfe8a1,0xa81a664b,0xc24b8b70,0xc76c51a3
53.word	0xd192e819,0xd6990624,0xf40e3585,0x106aa070
54.word	0x19a4c116,0x1e376c08,0x2748774c,0x34b0bcb5
55.word	0x391c0cb3,0x4ed8aa4a,0x5b9cca4f,0x682e6ff3
56.word	0x748f82ee,0x78a5636f,0x84c87814,0x8cc70208
57.word	0x90befffa,0xa4506ceb,0xbef9a3f7,0xc67178f2
58.size	K256,.-K256
59.word	0				@ terminator
60
61.align	5
62.globl	zfs_sha256_block_armv7
63.type	zfs_sha256_block_armv7,%function
64zfs_sha256_block_armv7:
65.Lzfs_sha256_block_armv7:
66
67#if __ARM_ARCH__<7 && !defined(__thumb2__)
68	sub	r3,pc,#8		@ zfs_sha256_block_armv7
69#else
70	adr	r3,.Lzfs_sha256_block_armv7
71#endif
72
73	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
74	stmdb	sp!,{r0,r1,r2,r4-r11,lr}
75	ldmia	r0,{r4,r5,r6,r7,r8,r9,r10,r11}
76	sub	r14,r3,#256+32	@ K256
77	sub	sp,sp,#16*4		@ alloca(X[16])
78.Loop:
79# if __ARM_ARCH__>=7
80	ldr	r2,[r1],#4
81# else
82	ldrb	r2,[r1,#3]
83# endif
84	eor	r3,r5,r6		@ magic
85	eor	r12,r12,r12
86#if __ARM_ARCH__>=7
87	@ ldr	r2,[r1],#4			@ 0
88# if 0==15
89	str	r1,[sp,#17*4]			@ make room for r1
90# endif
91	eor	r0,r8,r8,ror#5
92	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
93	eor	r0,r0,r8,ror#19	@ Sigma1(e)
94# ifndef __ARMEB__
95	rev	r2,r2
96# endif
97#else
98	@ ldrb	r2,[r1,#3]			@ 0
99	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
100	ldrb	r12,[r1,#2]
101	ldrb	r0,[r1,#1]
102	orr	r2,r2,r12,lsl#8
103	ldrb	r12,[r1],#4
104	orr	r2,r2,r0,lsl#16
105# if 0==15
106	str	r1,[sp,#17*4]			@ make room for r1
107# endif
108	eor	r0,r8,r8,ror#5
109	orr	r2,r2,r12,lsl#24
110	eor	r0,r0,r8,ror#19	@ Sigma1(e)
111#endif
112	ldr	r12,[r14],#4			@ *K256++
113	add	r11,r11,r2			@ h+=X[i]
114	str	r2,[sp,#0*4]
115	eor	r2,r9,r10
116	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
117	and	r2,r2,r8
118	add	r11,r11,r12			@ h+=K256[i]
119	eor	r2,r2,r10			@ Ch(e,f,g)
120	eor	r0,r4,r4,ror#11
121	add	r11,r11,r2			@ h+=Ch(e,f,g)
122#if 0==31
123	and	r12,r12,#0xff
124	cmp	r12,#0xf2			@ done?
125#endif
126#if 0<15
127# if __ARM_ARCH__>=7
128	ldr	r2,[r1],#4			@ prefetch
129# else
130	ldrb	r2,[r1,#3]
131# endif
132	eor	r12,r4,r5			@ a^b, b^c in next round
133#else
134	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
135	eor	r12,r4,r5			@ a^b, b^c in next round
136	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
137#endif
138	eor	r0,r0,r4,ror#20	@ Sigma0(a)
139	and	r3,r3,r12			@ (b^c)&=(a^b)
140	add	r7,r7,r11			@ d+=h
141	eor	r3,r3,r5			@ Maj(a,b,c)
142	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
143	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
144#if __ARM_ARCH__>=7
145	@ ldr	r2,[r1],#4			@ 1
146# if 1==15
147	str	r1,[sp,#17*4]			@ make room for r1
148# endif
149	eor	r0,r7,r7,ror#5
150	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
151	eor	r0,r0,r7,ror#19	@ Sigma1(e)
152# ifndef __ARMEB__
153	rev	r2,r2
154# endif
155#else
156	@ ldrb	r2,[r1,#3]			@ 1
157	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
158	ldrb	r3,[r1,#2]
159	ldrb	r0,[r1,#1]
160	orr	r2,r2,r3,lsl#8
161	ldrb	r3,[r1],#4
162	orr	r2,r2,r0,lsl#16
163# if 1==15
164	str	r1,[sp,#17*4]			@ make room for r1
165# endif
166	eor	r0,r7,r7,ror#5
167	orr	r2,r2,r3,lsl#24
168	eor	r0,r0,r7,ror#19	@ Sigma1(e)
169#endif
170	ldr	r3,[r14],#4			@ *K256++
171	add	r10,r10,r2			@ h+=X[i]
172	str	r2,[sp,#1*4]
173	eor	r2,r8,r9
174	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
175	and	r2,r2,r7
176	add	r10,r10,r3			@ h+=K256[i]
177	eor	r2,r2,r9			@ Ch(e,f,g)
178	eor	r0,r11,r11,ror#11
179	add	r10,r10,r2			@ h+=Ch(e,f,g)
180#if 1==31
181	and	r3,r3,#0xff
182	cmp	r3,#0xf2			@ done?
183#endif
184#if 1<15
185# if __ARM_ARCH__>=7
186	ldr	r2,[r1],#4			@ prefetch
187# else
188	ldrb	r2,[r1,#3]
189# endif
190	eor	r3,r11,r4			@ a^b, b^c in next round
191#else
192	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
193	eor	r3,r11,r4			@ a^b, b^c in next round
194	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
195#endif
196	eor	r0,r0,r11,ror#20	@ Sigma0(a)
197	and	r12,r12,r3			@ (b^c)&=(a^b)
198	add	r6,r6,r10			@ d+=h
199	eor	r12,r12,r4			@ Maj(a,b,c)
200	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
201	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
202#if __ARM_ARCH__>=7
203	@ ldr	r2,[r1],#4			@ 2
204# if 2==15
205	str	r1,[sp,#17*4]			@ make room for r1
206# endif
207	eor	r0,r6,r6,ror#5
208	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
209	eor	r0,r0,r6,ror#19	@ Sigma1(e)
210# ifndef __ARMEB__
211	rev	r2,r2
212# endif
213#else
214	@ ldrb	r2,[r1,#3]			@ 2
215	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
216	ldrb	r12,[r1,#2]
217	ldrb	r0,[r1,#1]
218	orr	r2,r2,r12,lsl#8
219	ldrb	r12,[r1],#4
220	orr	r2,r2,r0,lsl#16
221# if 2==15
222	str	r1,[sp,#17*4]			@ make room for r1
223# endif
224	eor	r0,r6,r6,ror#5
225	orr	r2,r2,r12,lsl#24
226	eor	r0,r0,r6,ror#19	@ Sigma1(e)
227#endif
228	ldr	r12,[r14],#4			@ *K256++
229	add	r9,r9,r2			@ h+=X[i]
230	str	r2,[sp,#2*4]
231	eor	r2,r7,r8
232	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
233	and	r2,r2,r6
234	add	r9,r9,r12			@ h+=K256[i]
235	eor	r2,r2,r8			@ Ch(e,f,g)
236	eor	r0,r10,r10,ror#11
237	add	r9,r9,r2			@ h+=Ch(e,f,g)
238#if 2==31
239	and	r12,r12,#0xff
240	cmp	r12,#0xf2			@ done?
241#endif
242#if 2<15
243# if __ARM_ARCH__>=7
244	ldr	r2,[r1],#4			@ prefetch
245# else
246	ldrb	r2,[r1,#3]
247# endif
248	eor	r12,r10,r11			@ a^b, b^c in next round
249#else
250	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
251	eor	r12,r10,r11			@ a^b, b^c in next round
252	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
253#endif
254	eor	r0,r0,r10,ror#20	@ Sigma0(a)
255	and	r3,r3,r12			@ (b^c)&=(a^b)
256	add	r5,r5,r9			@ d+=h
257	eor	r3,r3,r11			@ Maj(a,b,c)
258	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
259	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
260#if __ARM_ARCH__>=7
261	@ ldr	r2,[r1],#4			@ 3
262# if 3==15
263	str	r1,[sp,#17*4]			@ make room for r1
264# endif
265	eor	r0,r5,r5,ror#5
266	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
267	eor	r0,r0,r5,ror#19	@ Sigma1(e)
268# ifndef __ARMEB__
269	rev	r2,r2
270# endif
271#else
272	@ ldrb	r2,[r1,#3]			@ 3
273	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
274	ldrb	r3,[r1,#2]
275	ldrb	r0,[r1,#1]
276	orr	r2,r2,r3,lsl#8
277	ldrb	r3,[r1],#4
278	orr	r2,r2,r0,lsl#16
279# if 3==15
280	str	r1,[sp,#17*4]			@ make room for r1
281# endif
282	eor	r0,r5,r5,ror#5
283	orr	r2,r2,r3,lsl#24
284	eor	r0,r0,r5,ror#19	@ Sigma1(e)
285#endif
286	ldr	r3,[r14],#4			@ *K256++
287	add	r8,r8,r2			@ h+=X[i]
288	str	r2,[sp,#3*4]
289	eor	r2,r6,r7
290	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
291	and	r2,r2,r5
292	add	r8,r8,r3			@ h+=K256[i]
293	eor	r2,r2,r7			@ Ch(e,f,g)
294	eor	r0,r9,r9,ror#11
295	add	r8,r8,r2			@ h+=Ch(e,f,g)
296#if 3==31
297	and	r3,r3,#0xff
298	cmp	r3,#0xf2			@ done?
299#endif
300#if 3<15
301# if __ARM_ARCH__>=7
302	ldr	r2,[r1],#4			@ prefetch
303# else
304	ldrb	r2,[r1,#3]
305# endif
306	eor	r3,r9,r10			@ a^b, b^c in next round
307#else
308	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
309	eor	r3,r9,r10			@ a^b, b^c in next round
310	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
311#endif
312	eor	r0,r0,r9,ror#20	@ Sigma0(a)
313	and	r12,r12,r3			@ (b^c)&=(a^b)
314	add	r4,r4,r8			@ d+=h
315	eor	r12,r12,r10			@ Maj(a,b,c)
316	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
317	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
318#if __ARM_ARCH__>=7
319	@ ldr	r2,[r1],#4			@ 4
320# if 4==15
321	str	r1,[sp,#17*4]			@ make room for r1
322# endif
323	eor	r0,r4,r4,ror#5
324	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
325	eor	r0,r0,r4,ror#19	@ Sigma1(e)
326# ifndef __ARMEB__
327	rev	r2,r2
328# endif
329#else
330	@ ldrb	r2,[r1,#3]			@ 4
331	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
332	ldrb	r12,[r1,#2]
333	ldrb	r0,[r1,#1]
334	orr	r2,r2,r12,lsl#8
335	ldrb	r12,[r1],#4
336	orr	r2,r2,r0,lsl#16
337# if 4==15
338	str	r1,[sp,#17*4]			@ make room for r1
339# endif
340	eor	r0,r4,r4,ror#5
341	orr	r2,r2,r12,lsl#24
342	eor	r0,r0,r4,ror#19	@ Sigma1(e)
343#endif
344	ldr	r12,[r14],#4			@ *K256++
345	add	r7,r7,r2			@ h+=X[i]
346	str	r2,[sp,#4*4]
347	eor	r2,r5,r6
348	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
349	and	r2,r2,r4
350	add	r7,r7,r12			@ h+=K256[i]
351	eor	r2,r2,r6			@ Ch(e,f,g)
352	eor	r0,r8,r8,ror#11
353	add	r7,r7,r2			@ h+=Ch(e,f,g)
354#if 4==31
355	and	r12,r12,#0xff
356	cmp	r12,#0xf2			@ done?
357#endif
358#if 4<15
359# if __ARM_ARCH__>=7
360	ldr	r2,[r1],#4			@ prefetch
361# else
362	ldrb	r2,[r1,#3]
363# endif
364	eor	r12,r8,r9			@ a^b, b^c in next round
365#else
366	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
367	eor	r12,r8,r9			@ a^b, b^c in next round
368	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
369#endif
370	eor	r0,r0,r8,ror#20	@ Sigma0(a)
371	and	r3,r3,r12			@ (b^c)&=(a^b)
372	add	r11,r11,r7			@ d+=h
373	eor	r3,r3,r9			@ Maj(a,b,c)
374	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
375	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
376#if __ARM_ARCH__>=7
377	@ ldr	r2,[r1],#4			@ 5
378# if 5==15
379	str	r1,[sp,#17*4]			@ make room for r1
380# endif
381	eor	r0,r11,r11,ror#5
382	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
383	eor	r0,r0,r11,ror#19	@ Sigma1(e)
384# ifndef __ARMEB__
385	rev	r2,r2
386# endif
387#else
388	@ ldrb	r2,[r1,#3]			@ 5
389	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
390	ldrb	r3,[r1,#2]
391	ldrb	r0,[r1,#1]
392	orr	r2,r2,r3,lsl#8
393	ldrb	r3,[r1],#4
394	orr	r2,r2,r0,lsl#16
395# if 5==15
396	str	r1,[sp,#17*4]			@ make room for r1
397# endif
398	eor	r0,r11,r11,ror#5
399	orr	r2,r2,r3,lsl#24
400	eor	r0,r0,r11,ror#19	@ Sigma1(e)
401#endif
402	ldr	r3,[r14],#4			@ *K256++
403	add	r6,r6,r2			@ h+=X[i]
404	str	r2,[sp,#5*4]
405	eor	r2,r4,r5
406	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
407	and	r2,r2,r11
408	add	r6,r6,r3			@ h+=K256[i]
409	eor	r2,r2,r5			@ Ch(e,f,g)
410	eor	r0,r7,r7,ror#11
411	add	r6,r6,r2			@ h+=Ch(e,f,g)
412#if 5==31
413	and	r3,r3,#0xff
414	cmp	r3,#0xf2			@ done?
415#endif
416#if 5<15
417# if __ARM_ARCH__>=7
418	ldr	r2,[r1],#4			@ prefetch
419# else
420	ldrb	r2,[r1,#3]
421# endif
422	eor	r3,r7,r8			@ a^b, b^c in next round
423#else
424	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
425	eor	r3,r7,r8			@ a^b, b^c in next round
426	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
427#endif
428	eor	r0,r0,r7,ror#20	@ Sigma0(a)
429	and	r12,r12,r3			@ (b^c)&=(a^b)
430	add	r10,r10,r6			@ d+=h
431	eor	r12,r12,r8			@ Maj(a,b,c)
432	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
433	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
434#if __ARM_ARCH__>=7
435	@ ldr	r2,[r1],#4			@ 6
436# if 6==15
437	str	r1,[sp,#17*4]			@ make room for r1
438# endif
439	eor	r0,r10,r10,ror#5
440	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
441	eor	r0,r0,r10,ror#19	@ Sigma1(e)
442# ifndef __ARMEB__
443	rev	r2,r2
444# endif
445#else
446	@ ldrb	r2,[r1,#3]			@ 6
447	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
448	ldrb	r12,[r1,#2]
449	ldrb	r0,[r1,#1]
450	orr	r2,r2,r12,lsl#8
451	ldrb	r12,[r1],#4
452	orr	r2,r2,r0,lsl#16
453# if 6==15
454	str	r1,[sp,#17*4]			@ make room for r1
455# endif
456	eor	r0,r10,r10,ror#5
457	orr	r2,r2,r12,lsl#24
458	eor	r0,r0,r10,ror#19	@ Sigma1(e)
459#endif
460	ldr	r12,[r14],#4			@ *K256++
461	add	r5,r5,r2			@ h+=X[i]
462	str	r2,[sp,#6*4]
463	eor	r2,r11,r4
464	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
465	and	r2,r2,r10
466	add	r5,r5,r12			@ h+=K256[i]
467	eor	r2,r2,r4			@ Ch(e,f,g)
468	eor	r0,r6,r6,ror#11
469	add	r5,r5,r2			@ h+=Ch(e,f,g)
470#if 6==31
471	and	r12,r12,#0xff
472	cmp	r12,#0xf2			@ done?
473#endif
474#if 6<15
475# if __ARM_ARCH__>=7
476	ldr	r2,[r1],#4			@ prefetch
477# else
478	ldrb	r2,[r1,#3]
479# endif
480	eor	r12,r6,r7			@ a^b, b^c in next round
481#else
482	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
483	eor	r12,r6,r7			@ a^b, b^c in next round
484	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
485#endif
486	eor	r0,r0,r6,ror#20	@ Sigma0(a)
487	and	r3,r3,r12			@ (b^c)&=(a^b)
488	add	r9,r9,r5			@ d+=h
489	eor	r3,r3,r7			@ Maj(a,b,c)
490	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
491	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
492#if __ARM_ARCH__>=7
493	@ ldr	r2,[r1],#4			@ 7
494# if 7==15
495	str	r1,[sp,#17*4]			@ make room for r1
496# endif
497	eor	r0,r9,r9,ror#5
498	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
499	eor	r0,r0,r9,ror#19	@ Sigma1(e)
500# ifndef __ARMEB__
501	rev	r2,r2
502# endif
503#else
504	@ ldrb	r2,[r1,#3]			@ 7
505	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
506	ldrb	r3,[r1,#2]
507	ldrb	r0,[r1,#1]
508	orr	r2,r2,r3,lsl#8
509	ldrb	r3,[r1],#4
510	orr	r2,r2,r0,lsl#16
511# if 7==15
512	str	r1,[sp,#17*4]			@ make room for r1
513# endif
514	eor	r0,r9,r9,ror#5
515	orr	r2,r2,r3,lsl#24
516	eor	r0,r0,r9,ror#19	@ Sigma1(e)
517#endif
518	ldr	r3,[r14],#4			@ *K256++
519	add	r4,r4,r2			@ h+=X[i]
520	str	r2,[sp,#7*4]
521	eor	r2,r10,r11
522	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
523	and	r2,r2,r9
524	add	r4,r4,r3			@ h+=K256[i]
525	eor	r2,r2,r11			@ Ch(e,f,g)
526	eor	r0,r5,r5,ror#11
527	add	r4,r4,r2			@ h+=Ch(e,f,g)
528#if 7==31
529	and	r3,r3,#0xff
530	cmp	r3,#0xf2			@ done?
531#endif
532#if 7<15
533# if __ARM_ARCH__>=7
534	ldr	r2,[r1],#4			@ prefetch
535# else
536	ldrb	r2,[r1,#3]
537# endif
538	eor	r3,r5,r6			@ a^b, b^c in next round
539#else
540	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
541	eor	r3,r5,r6			@ a^b, b^c in next round
542	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
543#endif
544	eor	r0,r0,r5,ror#20	@ Sigma0(a)
545	and	r12,r12,r3			@ (b^c)&=(a^b)
546	add	r8,r8,r4			@ d+=h
547	eor	r12,r12,r6			@ Maj(a,b,c)
548	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
549	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
550#if __ARM_ARCH__>=7
551	@ ldr	r2,[r1],#4			@ 8
552# if 8==15
553	str	r1,[sp,#17*4]			@ make room for r1
554# endif
555	eor	r0,r8,r8,ror#5
556	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
557	eor	r0,r0,r8,ror#19	@ Sigma1(e)
558# ifndef __ARMEB__
559	rev	r2,r2
560# endif
561#else
562	@ ldrb	r2,[r1,#3]			@ 8
563	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
564	ldrb	r12,[r1,#2]
565	ldrb	r0,[r1,#1]
566	orr	r2,r2,r12,lsl#8
567	ldrb	r12,[r1],#4
568	orr	r2,r2,r0,lsl#16
569# if 8==15
570	str	r1,[sp,#17*4]			@ make room for r1
571# endif
572	eor	r0,r8,r8,ror#5
573	orr	r2,r2,r12,lsl#24
574	eor	r0,r0,r8,ror#19	@ Sigma1(e)
575#endif
576	ldr	r12,[r14],#4			@ *K256++
577	add	r11,r11,r2			@ h+=X[i]
578	str	r2,[sp,#8*4]
579	eor	r2,r9,r10
580	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
581	and	r2,r2,r8
582	add	r11,r11,r12			@ h+=K256[i]
583	eor	r2,r2,r10			@ Ch(e,f,g)
584	eor	r0,r4,r4,ror#11
585	add	r11,r11,r2			@ h+=Ch(e,f,g)
586#if 8==31
587	and	r12,r12,#0xff
588	cmp	r12,#0xf2			@ done?
589#endif
590#if 8<15
591# if __ARM_ARCH__>=7
592	ldr	r2,[r1],#4			@ prefetch
593# else
594	ldrb	r2,[r1,#3]
595# endif
596	eor	r12,r4,r5			@ a^b, b^c in next round
597#else
598	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
599	eor	r12,r4,r5			@ a^b, b^c in next round
600	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
601#endif
602	eor	r0,r0,r4,ror#20	@ Sigma0(a)
603	and	r3,r3,r12			@ (b^c)&=(a^b)
604	add	r7,r7,r11			@ d+=h
605	eor	r3,r3,r5			@ Maj(a,b,c)
606	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
607	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
608#if __ARM_ARCH__>=7
609	@ ldr	r2,[r1],#4			@ 9
610# if 9==15
611	str	r1,[sp,#17*4]			@ make room for r1
612# endif
613	eor	r0,r7,r7,ror#5
614	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
615	eor	r0,r0,r7,ror#19	@ Sigma1(e)
616# ifndef __ARMEB__
617	rev	r2,r2
618# endif
619#else
620	@ ldrb	r2,[r1,#3]			@ 9
621	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
622	ldrb	r3,[r1,#2]
623	ldrb	r0,[r1,#1]
624	orr	r2,r2,r3,lsl#8
625	ldrb	r3,[r1],#4
626	orr	r2,r2,r0,lsl#16
627# if 9==15
628	str	r1,[sp,#17*4]			@ make room for r1
629# endif
630	eor	r0,r7,r7,ror#5
631	orr	r2,r2,r3,lsl#24
632	eor	r0,r0,r7,ror#19	@ Sigma1(e)
633#endif
634	ldr	r3,[r14],#4			@ *K256++
635	add	r10,r10,r2			@ h+=X[i]
636	str	r2,[sp,#9*4]
637	eor	r2,r8,r9
638	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
639	and	r2,r2,r7
640	add	r10,r10,r3			@ h+=K256[i]
641	eor	r2,r2,r9			@ Ch(e,f,g)
642	eor	r0,r11,r11,ror#11
643	add	r10,r10,r2			@ h+=Ch(e,f,g)
644#if 9==31
645	and	r3,r3,#0xff
646	cmp	r3,#0xf2			@ done?
647#endif
648#if 9<15
649# if __ARM_ARCH__>=7
650	ldr	r2,[r1],#4			@ prefetch
651# else
652	ldrb	r2,[r1,#3]
653# endif
654	eor	r3,r11,r4			@ a^b, b^c in next round
655#else
656	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
657	eor	r3,r11,r4			@ a^b, b^c in next round
658	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
659#endif
660	eor	r0,r0,r11,ror#20	@ Sigma0(a)
661	and	r12,r12,r3			@ (b^c)&=(a^b)
662	add	r6,r6,r10			@ d+=h
663	eor	r12,r12,r4			@ Maj(a,b,c)
664	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
665	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
666#if __ARM_ARCH__>=7
667	@ ldr	r2,[r1],#4			@ 10
668# if 10==15
669	str	r1,[sp,#17*4]			@ make room for r1
670# endif
671	eor	r0,r6,r6,ror#5
672	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
673	eor	r0,r0,r6,ror#19	@ Sigma1(e)
674# ifndef __ARMEB__
675	rev	r2,r2
676# endif
677#else
678	@ ldrb	r2,[r1,#3]			@ 10
679	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
680	ldrb	r12,[r1,#2]
681	ldrb	r0,[r1,#1]
682	orr	r2,r2,r12,lsl#8
683	ldrb	r12,[r1],#4
684	orr	r2,r2,r0,lsl#16
685# if 10==15
686	str	r1,[sp,#17*4]			@ make room for r1
687# endif
688	eor	r0,r6,r6,ror#5
689	orr	r2,r2,r12,lsl#24
690	eor	r0,r0,r6,ror#19	@ Sigma1(e)
691#endif
692	ldr	r12,[r14],#4			@ *K256++
693	add	r9,r9,r2			@ h+=X[i]
694	str	r2,[sp,#10*4]
695	eor	r2,r7,r8
696	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
697	and	r2,r2,r6
698	add	r9,r9,r12			@ h+=K256[i]
699	eor	r2,r2,r8			@ Ch(e,f,g)
700	eor	r0,r10,r10,ror#11
701	add	r9,r9,r2			@ h+=Ch(e,f,g)
702#if 10==31
703	and	r12,r12,#0xff
704	cmp	r12,#0xf2			@ done?
705#endif
706#if 10<15
707# if __ARM_ARCH__>=7
708	ldr	r2,[r1],#4			@ prefetch
709# else
710	ldrb	r2,[r1,#3]
711# endif
712	eor	r12,r10,r11			@ a^b, b^c in next round
713#else
714	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
715	eor	r12,r10,r11			@ a^b, b^c in next round
716	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
717#endif
718	eor	r0,r0,r10,ror#20	@ Sigma0(a)
719	and	r3,r3,r12			@ (b^c)&=(a^b)
720	add	r5,r5,r9			@ d+=h
721	eor	r3,r3,r11			@ Maj(a,b,c)
722	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
723	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
724#if __ARM_ARCH__>=7
725	@ ldr	r2,[r1],#4			@ 11
726# if 11==15
727	str	r1,[sp,#17*4]			@ make room for r1
728# endif
729	eor	r0,r5,r5,ror#5
730	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
731	eor	r0,r0,r5,ror#19	@ Sigma1(e)
732# ifndef __ARMEB__
733	rev	r2,r2
734# endif
735#else
736	@ ldrb	r2,[r1,#3]			@ 11
737	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
738	ldrb	r3,[r1,#2]
739	ldrb	r0,[r1,#1]
740	orr	r2,r2,r3,lsl#8
741	ldrb	r3,[r1],#4
742	orr	r2,r2,r0,lsl#16
743# if 11==15
744	str	r1,[sp,#17*4]			@ make room for r1
745# endif
746	eor	r0,r5,r5,ror#5
747	orr	r2,r2,r3,lsl#24
748	eor	r0,r0,r5,ror#19	@ Sigma1(e)
749#endif
750	ldr	r3,[r14],#4			@ *K256++
751	add	r8,r8,r2			@ h+=X[i]
752	str	r2,[sp,#11*4]
753	eor	r2,r6,r7
754	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
755	and	r2,r2,r5
756	add	r8,r8,r3			@ h+=K256[i]
757	eor	r2,r2,r7			@ Ch(e,f,g)
758	eor	r0,r9,r9,ror#11
759	add	r8,r8,r2			@ h+=Ch(e,f,g)
760#if 11==31
761	and	r3,r3,#0xff
762	cmp	r3,#0xf2			@ done?
763#endif
764#if 11<15
765# if __ARM_ARCH__>=7
766	ldr	r2,[r1],#4			@ prefetch
767# else
768	ldrb	r2,[r1,#3]
769# endif
770	eor	r3,r9,r10			@ a^b, b^c in next round
771#else
772	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
773	eor	r3,r9,r10			@ a^b, b^c in next round
774	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
775#endif
776	eor	r0,r0,r9,ror#20	@ Sigma0(a)
777	and	r12,r12,r3			@ (b^c)&=(a^b)
778	add	r4,r4,r8			@ d+=h
779	eor	r12,r12,r10			@ Maj(a,b,c)
780	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
781	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
782#if __ARM_ARCH__>=7
783	@ ldr	r2,[r1],#4			@ 12
784# if 12==15
785	str	r1,[sp,#17*4]			@ make room for r1
786# endif
787	eor	r0,r4,r4,ror#5
788	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
789	eor	r0,r0,r4,ror#19	@ Sigma1(e)
790# ifndef __ARMEB__
791	rev	r2,r2
792# endif
793#else
794	@ ldrb	r2,[r1,#3]			@ 12
795	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
796	ldrb	r12,[r1,#2]
797	ldrb	r0,[r1,#1]
798	orr	r2,r2,r12,lsl#8
799	ldrb	r12,[r1],#4
800	orr	r2,r2,r0,lsl#16
801# if 12==15
802	str	r1,[sp,#17*4]			@ make room for r1
803# endif
804	eor	r0,r4,r4,ror#5
805	orr	r2,r2,r12,lsl#24
806	eor	r0,r0,r4,ror#19	@ Sigma1(e)
807#endif
808	ldr	r12,[r14],#4			@ *K256++
809	add	r7,r7,r2			@ h+=X[i]
810	str	r2,[sp,#12*4]
811	eor	r2,r5,r6
812	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
813	and	r2,r2,r4
814	add	r7,r7,r12			@ h+=K256[i]
815	eor	r2,r2,r6			@ Ch(e,f,g)
816	eor	r0,r8,r8,ror#11
817	add	r7,r7,r2			@ h+=Ch(e,f,g)
818#if 12==31
819	and	r12,r12,#0xff
820	cmp	r12,#0xf2			@ done?
821#endif
822#if 12<15
823# if __ARM_ARCH__>=7
824	ldr	r2,[r1],#4			@ prefetch
825# else
826	ldrb	r2,[r1,#3]
827# endif
828	eor	r12,r8,r9			@ a^b, b^c in next round
829#else
830	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
831	eor	r12,r8,r9			@ a^b, b^c in next round
832	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
833#endif
834	eor	r0,r0,r8,ror#20	@ Sigma0(a)
835	and	r3,r3,r12			@ (b^c)&=(a^b)
836	add	r11,r11,r7			@ d+=h
837	eor	r3,r3,r9			@ Maj(a,b,c)
838	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
839	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
840#if __ARM_ARCH__>=7
841	@ ldr	r2,[r1],#4			@ 13
842# if 13==15
843	str	r1,[sp,#17*4]			@ make room for r1
844# endif
845	eor	r0,r11,r11,ror#5
846	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
847	eor	r0,r0,r11,ror#19	@ Sigma1(e)
848# ifndef __ARMEB__
849	rev	r2,r2
850# endif
851#else
852	@ ldrb	r2,[r1,#3]			@ 13
853	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
854	ldrb	r3,[r1,#2]
855	ldrb	r0,[r1,#1]
856	orr	r2,r2,r3,lsl#8
857	ldrb	r3,[r1],#4
858	orr	r2,r2,r0,lsl#16
859# if 13==15
860	str	r1,[sp,#17*4]			@ make room for r1
861# endif
862	eor	r0,r11,r11,ror#5
863	orr	r2,r2,r3,lsl#24
864	eor	r0,r0,r11,ror#19	@ Sigma1(e)
865#endif
866	ldr	r3,[r14],#4			@ *K256++
867	add	r6,r6,r2			@ h+=X[i]
868	str	r2,[sp,#13*4]
869	eor	r2,r4,r5
870	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
871	and	r2,r2,r11
872	add	r6,r6,r3			@ h+=K256[i]
873	eor	r2,r2,r5			@ Ch(e,f,g)
874	eor	r0,r7,r7,ror#11
875	add	r6,r6,r2			@ h+=Ch(e,f,g)
876#if 13==31
877	and	r3,r3,#0xff
878	cmp	r3,#0xf2			@ done?
879#endif
880#if 13<15
881# if __ARM_ARCH__>=7
882	ldr	r2,[r1],#4			@ prefetch
883# else
884	ldrb	r2,[r1,#3]
885# endif
886	eor	r3,r7,r8			@ a^b, b^c in next round
887#else
888	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
889	eor	r3,r7,r8			@ a^b, b^c in next round
890	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
891#endif
892	eor	r0,r0,r7,ror#20	@ Sigma0(a)
893	and	r12,r12,r3			@ (b^c)&=(a^b)
894	add	r10,r10,r6			@ d+=h
895	eor	r12,r12,r8			@ Maj(a,b,c)
896	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
897	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
898#if __ARM_ARCH__>=7
899	@ ldr	r2,[r1],#4			@ 14
900# if 14==15
901	str	r1,[sp,#17*4]			@ make room for r1
902# endif
903	eor	r0,r10,r10,ror#5
904	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
905	eor	r0,r0,r10,ror#19	@ Sigma1(e)
906# ifndef __ARMEB__
907	rev	r2,r2
908# endif
909#else
910	@ ldrb	r2,[r1,#3]			@ 14
911	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
912	ldrb	r12,[r1,#2]
913	ldrb	r0,[r1,#1]
914	orr	r2,r2,r12,lsl#8
915	ldrb	r12,[r1],#4
916	orr	r2,r2,r0,lsl#16
917# if 14==15
918	str	r1,[sp,#17*4]			@ make room for r1
919# endif
920	eor	r0,r10,r10,ror#5
921	orr	r2,r2,r12,lsl#24
922	eor	r0,r0,r10,ror#19	@ Sigma1(e)
923#endif
924	ldr	r12,[r14],#4			@ *K256++
925	add	r5,r5,r2			@ h+=X[i]
926	str	r2,[sp,#14*4]
927	eor	r2,r11,r4
928	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
929	and	r2,r2,r10
930	add	r5,r5,r12			@ h+=K256[i]
931	eor	r2,r2,r4			@ Ch(e,f,g)
932	eor	r0,r6,r6,ror#11
933	add	r5,r5,r2			@ h+=Ch(e,f,g)
934#if 14==31
935	and	r12,r12,#0xff
936	cmp	r12,#0xf2			@ done?
937#endif
938#if 14<15
939# if __ARM_ARCH__>=7
940	ldr	r2,[r1],#4			@ prefetch
941# else
942	ldrb	r2,[r1,#3]
943# endif
944	eor	r12,r6,r7			@ a^b, b^c in next round
945#else
946	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
947	eor	r12,r6,r7			@ a^b, b^c in next round
948	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
949#endif
950	eor	r0,r0,r6,ror#20	@ Sigma0(a)
951	and	r3,r3,r12			@ (b^c)&=(a^b)
952	add	r9,r9,r5			@ d+=h
953	eor	r3,r3,r7			@ Maj(a,b,c)
954	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
955	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
956#if __ARM_ARCH__>=7
957	@ ldr	r2,[r1],#4			@ 15
958# if 15==15
959	str	r1,[sp,#17*4]			@ make room for r1
960# endif
961	eor	r0,r9,r9,ror#5
962	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
963	eor	r0,r0,r9,ror#19	@ Sigma1(e)
964# ifndef __ARMEB__
965	rev	r2,r2
966# endif
967#else
968	@ ldrb	r2,[r1,#3]			@ 15
969	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
970	ldrb	r3,[r1,#2]
971	ldrb	r0,[r1,#1]
972	orr	r2,r2,r3,lsl#8
973	ldrb	r3,[r1],#4
974	orr	r2,r2,r0,lsl#16
975# if 15==15
976	str	r1,[sp,#17*4]			@ make room for r1
977# endif
978	eor	r0,r9,r9,ror#5
979	orr	r2,r2,r3,lsl#24
980	eor	r0,r0,r9,ror#19	@ Sigma1(e)
981#endif
982	ldr	r3,[r14],#4			@ *K256++
983	add	r4,r4,r2			@ h+=X[i]
984	str	r2,[sp,#15*4]
985	eor	r2,r10,r11
986	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
987	and	r2,r2,r9
988	add	r4,r4,r3			@ h+=K256[i]
989	eor	r2,r2,r11			@ Ch(e,f,g)
990	eor	r0,r5,r5,ror#11
991	add	r4,r4,r2			@ h+=Ch(e,f,g)
992#if 15==31
993	and	r3,r3,#0xff
994	cmp	r3,#0xf2			@ done?
995#endif
996#if 15<15
997# if __ARM_ARCH__>=7
998	ldr	r2,[r1],#4			@ prefetch
999# else
1000	ldrb	r2,[r1,#3]
1001# endif
1002	eor	r3,r5,r6			@ a^b, b^c in next round
1003#else
1004	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1005	eor	r3,r5,r6			@ a^b, b^c in next round
1006	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1007#endif
1008	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1009	and	r12,r12,r3			@ (b^c)&=(a^b)
1010	add	r8,r8,r4			@ d+=h
1011	eor	r12,r12,r6			@ Maj(a,b,c)
1012	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1013	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1014.Lrounds_16_xx:
1015	@ ldr	r2,[sp,#1*4]		@ 16
1016	@ ldr	r1,[sp,#14*4]
1017	mov	r0,r2,ror#7
1018	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1019	mov	r12,r1,ror#17
1020	eor	r0,r0,r2,ror#18
1021	eor	r12,r12,r1,ror#19
1022	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1023	ldr	r2,[sp,#0*4]
1024	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1025	ldr	r1,[sp,#9*4]
1026
1027	add	r12,r12,r0
1028	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1029	add	r2,r2,r12
1030	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1031	add	r2,r2,r1			@ X[i]
1032	ldr	r12,[r14],#4			@ *K256++
1033	add	r11,r11,r2			@ h+=X[i]
1034	str	r2,[sp,#0*4]
1035	eor	r2,r9,r10
1036	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1037	and	r2,r2,r8
1038	add	r11,r11,r12			@ h+=K256[i]
1039	eor	r2,r2,r10			@ Ch(e,f,g)
1040	eor	r0,r4,r4,ror#11
1041	add	r11,r11,r2			@ h+=Ch(e,f,g)
1042#if 16==31
1043	and	r12,r12,#0xff
1044	cmp	r12,#0xf2			@ done?
1045#endif
1046#if 16<15
1047# if __ARM_ARCH__>=7
1048	ldr	r2,[r1],#4			@ prefetch
1049# else
1050	ldrb	r2,[r1,#3]
1051# endif
1052	eor	r12,r4,r5			@ a^b, b^c in next round
1053#else
1054	ldr	r2,[sp,#2*4]		@ from future BODY_16_xx
1055	eor	r12,r4,r5			@ a^b, b^c in next round
1056	ldr	r1,[sp,#15*4]	@ from future BODY_16_xx
1057#endif
1058	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1059	and	r3,r3,r12			@ (b^c)&=(a^b)
1060	add	r7,r7,r11			@ d+=h
1061	eor	r3,r3,r5			@ Maj(a,b,c)
1062	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1063	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1064	@ ldr	r2,[sp,#2*4]		@ 17
1065	@ ldr	r1,[sp,#15*4]
1066	mov	r0,r2,ror#7
1067	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1068	mov	r3,r1,ror#17
1069	eor	r0,r0,r2,ror#18
1070	eor	r3,r3,r1,ror#19
1071	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1072	ldr	r2,[sp,#1*4]
1073	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1074	ldr	r1,[sp,#10*4]
1075
1076	add	r3,r3,r0
1077	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1078	add	r2,r2,r3
1079	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1080	add	r2,r2,r1			@ X[i]
1081	ldr	r3,[r14],#4			@ *K256++
1082	add	r10,r10,r2			@ h+=X[i]
1083	str	r2,[sp,#1*4]
1084	eor	r2,r8,r9
1085	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1086	and	r2,r2,r7
1087	add	r10,r10,r3			@ h+=K256[i]
1088	eor	r2,r2,r9			@ Ch(e,f,g)
1089	eor	r0,r11,r11,ror#11
1090	add	r10,r10,r2			@ h+=Ch(e,f,g)
1091#if 17==31
1092	and	r3,r3,#0xff
1093	cmp	r3,#0xf2			@ done?
1094#endif
1095#if 17<15
1096# if __ARM_ARCH__>=7
1097	ldr	r2,[r1],#4			@ prefetch
1098# else
1099	ldrb	r2,[r1,#3]
1100# endif
1101	eor	r3,r11,r4			@ a^b, b^c in next round
1102#else
1103	ldr	r2,[sp,#3*4]		@ from future BODY_16_xx
1104	eor	r3,r11,r4			@ a^b, b^c in next round
1105	ldr	r1,[sp,#0*4]	@ from future BODY_16_xx
1106#endif
1107	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1108	and	r12,r12,r3			@ (b^c)&=(a^b)
1109	add	r6,r6,r10			@ d+=h
1110	eor	r12,r12,r4			@ Maj(a,b,c)
1111	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1112	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1113	@ ldr	r2,[sp,#3*4]		@ 18
1114	@ ldr	r1,[sp,#0*4]
1115	mov	r0,r2,ror#7
1116	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1117	mov	r12,r1,ror#17
1118	eor	r0,r0,r2,ror#18
1119	eor	r12,r12,r1,ror#19
1120	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1121	ldr	r2,[sp,#2*4]
1122	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1123	ldr	r1,[sp,#11*4]
1124
1125	add	r12,r12,r0
1126	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1127	add	r2,r2,r12
1128	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1129	add	r2,r2,r1			@ X[i]
1130	ldr	r12,[r14],#4			@ *K256++
1131	add	r9,r9,r2			@ h+=X[i]
1132	str	r2,[sp,#2*4]
1133	eor	r2,r7,r8
1134	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1135	and	r2,r2,r6
1136	add	r9,r9,r12			@ h+=K256[i]
1137	eor	r2,r2,r8			@ Ch(e,f,g)
1138	eor	r0,r10,r10,ror#11
1139	add	r9,r9,r2			@ h+=Ch(e,f,g)
1140#if 18==31
1141	and	r12,r12,#0xff
1142	cmp	r12,#0xf2			@ done?
1143#endif
1144#if 18<15
1145# if __ARM_ARCH__>=7
1146	ldr	r2,[r1],#4			@ prefetch
1147# else
1148	ldrb	r2,[r1,#3]
1149# endif
1150	eor	r12,r10,r11			@ a^b, b^c in next round
1151#else
1152	ldr	r2,[sp,#4*4]		@ from future BODY_16_xx
1153	eor	r12,r10,r11			@ a^b, b^c in next round
1154	ldr	r1,[sp,#1*4]	@ from future BODY_16_xx
1155#endif
1156	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1157	and	r3,r3,r12			@ (b^c)&=(a^b)
1158	add	r5,r5,r9			@ d+=h
1159	eor	r3,r3,r11			@ Maj(a,b,c)
1160	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1161	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1162	@ ldr	r2,[sp,#4*4]		@ 19
1163	@ ldr	r1,[sp,#1*4]
1164	mov	r0,r2,ror#7
1165	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1166	mov	r3,r1,ror#17
1167	eor	r0,r0,r2,ror#18
1168	eor	r3,r3,r1,ror#19
1169	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1170	ldr	r2,[sp,#3*4]
1171	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1172	ldr	r1,[sp,#12*4]
1173
1174	add	r3,r3,r0
1175	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1176	add	r2,r2,r3
1177	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1178	add	r2,r2,r1			@ X[i]
1179	ldr	r3,[r14],#4			@ *K256++
1180	add	r8,r8,r2			@ h+=X[i]
1181	str	r2,[sp,#3*4]
1182	eor	r2,r6,r7
1183	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1184	and	r2,r2,r5
1185	add	r8,r8,r3			@ h+=K256[i]
1186	eor	r2,r2,r7			@ Ch(e,f,g)
1187	eor	r0,r9,r9,ror#11
1188	add	r8,r8,r2			@ h+=Ch(e,f,g)
1189#if 19==31
1190	and	r3,r3,#0xff
1191	cmp	r3,#0xf2			@ done?
1192#endif
1193#if 19<15
1194# if __ARM_ARCH__>=7
1195	ldr	r2,[r1],#4			@ prefetch
1196# else
1197	ldrb	r2,[r1,#3]
1198# endif
1199	eor	r3,r9,r10			@ a^b, b^c in next round
1200#else
1201	ldr	r2,[sp,#5*4]		@ from future BODY_16_xx
1202	eor	r3,r9,r10			@ a^b, b^c in next round
1203	ldr	r1,[sp,#2*4]	@ from future BODY_16_xx
1204#endif
1205	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1206	and	r12,r12,r3			@ (b^c)&=(a^b)
1207	add	r4,r4,r8			@ d+=h
1208	eor	r12,r12,r10			@ Maj(a,b,c)
1209	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1210	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1211	@ ldr	r2,[sp,#5*4]		@ 20
1212	@ ldr	r1,[sp,#2*4]
1213	mov	r0,r2,ror#7
1214	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1215	mov	r12,r1,ror#17
1216	eor	r0,r0,r2,ror#18
1217	eor	r12,r12,r1,ror#19
1218	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1219	ldr	r2,[sp,#4*4]
1220	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1221	ldr	r1,[sp,#13*4]
1222
1223	add	r12,r12,r0
1224	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1225	add	r2,r2,r12
1226	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1227	add	r2,r2,r1			@ X[i]
1228	ldr	r12,[r14],#4			@ *K256++
1229	add	r7,r7,r2			@ h+=X[i]
1230	str	r2,[sp,#4*4]
1231	eor	r2,r5,r6
1232	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1233	and	r2,r2,r4
1234	add	r7,r7,r12			@ h+=K256[i]
1235	eor	r2,r2,r6			@ Ch(e,f,g)
1236	eor	r0,r8,r8,ror#11
1237	add	r7,r7,r2			@ h+=Ch(e,f,g)
1238#if 20==31
1239	and	r12,r12,#0xff
1240	cmp	r12,#0xf2			@ done?
1241#endif
1242#if 20<15
1243# if __ARM_ARCH__>=7
1244	ldr	r2,[r1],#4			@ prefetch
1245# else
1246	ldrb	r2,[r1,#3]
1247# endif
1248	eor	r12,r8,r9			@ a^b, b^c in next round
1249#else
1250	ldr	r2,[sp,#6*4]		@ from future BODY_16_xx
1251	eor	r12,r8,r9			@ a^b, b^c in next round
1252	ldr	r1,[sp,#3*4]	@ from future BODY_16_xx
1253#endif
1254	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1255	and	r3,r3,r12			@ (b^c)&=(a^b)
1256	add	r11,r11,r7			@ d+=h
1257	eor	r3,r3,r9			@ Maj(a,b,c)
1258	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1259	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1260	@ ldr	r2,[sp,#6*4]		@ 21
1261	@ ldr	r1,[sp,#3*4]
1262	mov	r0,r2,ror#7
1263	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1264	mov	r3,r1,ror#17
1265	eor	r0,r0,r2,ror#18
1266	eor	r3,r3,r1,ror#19
1267	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1268	ldr	r2,[sp,#5*4]
1269	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1270	ldr	r1,[sp,#14*4]
1271
1272	add	r3,r3,r0
1273	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1274	add	r2,r2,r3
1275	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1276	add	r2,r2,r1			@ X[i]
1277	ldr	r3,[r14],#4			@ *K256++
1278	add	r6,r6,r2			@ h+=X[i]
1279	str	r2,[sp,#5*4]
1280	eor	r2,r4,r5
1281	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1282	and	r2,r2,r11
1283	add	r6,r6,r3			@ h+=K256[i]
1284	eor	r2,r2,r5			@ Ch(e,f,g)
1285	eor	r0,r7,r7,ror#11
1286	add	r6,r6,r2			@ h+=Ch(e,f,g)
1287#if 21==31
1288	and	r3,r3,#0xff
1289	cmp	r3,#0xf2			@ done?
1290#endif
1291#if 21<15
1292# if __ARM_ARCH__>=7
1293	ldr	r2,[r1],#4			@ prefetch
1294# else
1295	ldrb	r2,[r1,#3]
1296# endif
1297	eor	r3,r7,r8			@ a^b, b^c in next round
1298#else
1299	ldr	r2,[sp,#7*4]		@ from future BODY_16_xx
1300	eor	r3,r7,r8			@ a^b, b^c in next round
1301	ldr	r1,[sp,#4*4]	@ from future BODY_16_xx
1302#endif
1303	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1304	and	r12,r12,r3			@ (b^c)&=(a^b)
1305	add	r10,r10,r6			@ d+=h
1306	eor	r12,r12,r8			@ Maj(a,b,c)
1307	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1308	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1309	@ ldr	r2,[sp,#7*4]		@ 22
1310	@ ldr	r1,[sp,#4*4]
1311	mov	r0,r2,ror#7
1312	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1313	mov	r12,r1,ror#17
1314	eor	r0,r0,r2,ror#18
1315	eor	r12,r12,r1,ror#19
1316	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1317	ldr	r2,[sp,#6*4]
1318	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1319	ldr	r1,[sp,#15*4]
1320
1321	add	r12,r12,r0
1322	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1323	add	r2,r2,r12
1324	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1325	add	r2,r2,r1			@ X[i]
1326	ldr	r12,[r14],#4			@ *K256++
1327	add	r5,r5,r2			@ h+=X[i]
1328	str	r2,[sp,#6*4]
1329	eor	r2,r11,r4
1330	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1331	and	r2,r2,r10
1332	add	r5,r5,r12			@ h+=K256[i]
1333	eor	r2,r2,r4			@ Ch(e,f,g)
1334	eor	r0,r6,r6,ror#11
1335	add	r5,r5,r2			@ h+=Ch(e,f,g)
1336#if 22==31
1337	and	r12,r12,#0xff
1338	cmp	r12,#0xf2			@ done?
1339#endif
1340#if 22<15
1341# if __ARM_ARCH__>=7
1342	ldr	r2,[r1],#4			@ prefetch
1343# else
1344	ldrb	r2,[r1,#3]
1345# endif
1346	eor	r12,r6,r7			@ a^b, b^c in next round
1347#else
1348	ldr	r2,[sp,#8*4]		@ from future BODY_16_xx
1349	eor	r12,r6,r7			@ a^b, b^c in next round
1350	ldr	r1,[sp,#5*4]	@ from future BODY_16_xx
1351#endif
1352	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1353	and	r3,r3,r12			@ (b^c)&=(a^b)
1354	add	r9,r9,r5			@ d+=h
1355	eor	r3,r3,r7			@ Maj(a,b,c)
1356	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1357	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1358	@ ldr	r2,[sp,#8*4]		@ 23
1359	@ ldr	r1,[sp,#5*4]
1360	mov	r0,r2,ror#7
1361	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1362	mov	r3,r1,ror#17
1363	eor	r0,r0,r2,ror#18
1364	eor	r3,r3,r1,ror#19
1365	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1366	ldr	r2,[sp,#7*4]
1367	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1368	ldr	r1,[sp,#0*4]
1369
1370	add	r3,r3,r0
1371	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1372	add	r2,r2,r3
1373	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1374	add	r2,r2,r1			@ X[i]
1375	ldr	r3,[r14],#4			@ *K256++
1376	add	r4,r4,r2			@ h+=X[i]
1377	str	r2,[sp,#7*4]
1378	eor	r2,r10,r11
1379	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1380	and	r2,r2,r9
1381	add	r4,r4,r3			@ h+=K256[i]
1382	eor	r2,r2,r11			@ Ch(e,f,g)
1383	eor	r0,r5,r5,ror#11
1384	add	r4,r4,r2			@ h+=Ch(e,f,g)
1385#if 23==31
1386	and	r3,r3,#0xff
1387	cmp	r3,#0xf2			@ done?
1388#endif
1389#if 23<15
1390# if __ARM_ARCH__>=7
1391	ldr	r2,[r1],#4			@ prefetch
1392# else
1393	ldrb	r2,[r1,#3]
1394# endif
1395	eor	r3,r5,r6			@ a^b, b^c in next round
1396#else
1397	ldr	r2,[sp,#9*4]		@ from future BODY_16_xx
1398	eor	r3,r5,r6			@ a^b, b^c in next round
1399	ldr	r1,[sp,#6*4]	@ from future BODY_16_xx
1400#endif
1401	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1402	and	r12,r12,r3			@ (b^c)&=(a^b)
1403	add	r8,r8,r4			@ d+=h
1404	eor	r12,r12,r6			@ Maj(a,b,c)
1405	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1406	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1407	@ ldr	r2,[sp,#9*4]		@ 24
1408	@ ldr	r1,[sp,#6*4]
1409	mov	r0,r2,ror#7
1410	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
1411	mov	r12,r1,ror#17
1412	eor	r0,r0,r2,ror#18
1413	eor	r12,r12,r1,ror#19
1414	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1415	ldr	r2,[sp,#8*4]
1416	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1417	ldr	r1,[sp,#1*4]
1418
1419	add	r12,r12,r0
1420	eor	r0,r8,r8,ror#5	@ from BODY_00_15
1421	add	r2,r2,r12
1422	eor	r0,r0,r8,ror#19	@ Sigma1(e)
1423	add	r2,r2,r1			@ X[i]
1424	ldr	r12,[r14],#4			@ *K256++
1425	add	r11,r11,r2			@ h+=X[i]
1426	str	r2,[sp,#8*4]
1427	eor	r2,r9,r10
1428	add	r11,r11,r0,ror#6	@ h+=Sigma1(e)
1429	and	r2,r2,r8
1430	add	r11,r11,r12			@ h+=K256[i]
1431	eor	r2,r2,r10			@ Ch(e,f,g)
1432	eor	r0,r4,r4,ror#11
1433	add	r11,r11,r2			@ h+=Ch(e,f,g)
1434#if 24==31
1435	and	r12,r12,#0xff
1436	cmp	r12,#0xf2			@ done?
1437#endif
1438#if 24<15
1439# if __ARM_ARCH__>=7
1440	ldr	r2,[r1],#4			@ prefetch
1441# else
1442	ldrb	r2,[r1,#3]
1443# endif
1444	eor	r12,r4,r5			@ a^b, b^c in next round
1445#else
1446	ldr	r2,[sp,#10*4]		@ from future BODY_16_xx
1447	eor	r12,r4,r5			@ a^b, b^c in next round
1448	ldr	r1,[sp,#7*4]	@ from future BODY_16_xx
1449#endif
1450	eor	r0,r0,r4,ror#20	@ Sigma0(a)
1451	and	r3,r3,r12			@ (b^c)&=(a^b)
1452	add	r7,r7,r11			@ d+=h
1453	eor	r3,r3,r5			@ Maj(a,b,c)
1454	add	r11,r11,r0,ror#2	@ h+=Sigma0(a)
1455	@ add	r11,r11,r3			@ h+=Maj(a,b,c)
1456	@ ldr	r2,[sp,#10*4]		@ 25
1457	@ ldr	r1,[sp,#7*4]
1458	mov	r0,r2,ror#7
1459	add	r11,r11,r3			@ h+=Maj(a,b,c) from the past
1460	mov	r3,r1,ror#17
1461	eor	r0,r0,r2,ror#18
1462	eor	r3,r3,r1,ror#19
1463	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1464	ldr	r2,[sp,#9*4]
1465	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1466	ldr	r1,[sp,#2*4]
1467
1468	add	r3,r3,r0
1469	eor	r0,r7,r7,ror#5	@ from BODY_00_15
1470	add	r2,r2,r3
1471	eor	r0,r0,r7,ror#19	@ Sigma1(e)
1472	add	r2,r2,r1			@ X[i]
1473	ldr	r3,[r14],#4			@ *K256++
1474	add	r10,r10,r2			@ h+=X[i]
1475	str	r2,[sp,#9*4]
1476	eor	r2,r8,r9
1477	add	r10,r10,r0,ror#6	@ h+=Sigma1(e)
1478	and	r2,r2,r7
1479	add	r10,r10,r3			@ h+=K256[i]
1480	eor	r2,r2,r9			@ Ch(e,f,g)
1481	eor	r0,r11,r11,ror#11
1482	add	r10,r10,r2			@ h+=Ch(e,f,g)
1483#if 25==31
1484	and	r3,r3,#0xff
1485	cmp	r3,#0xf2			@ done?
1486#endif
1487#if 25<15
1488# if __ARM_ARCH__>=7
1489	ldr	r2,[r1],#4			@ prefetch
1490# else
1491	ldrb	r2,[r1,#3]
1492# endif
1493	eor	r3,r11,r4			@ a^b, b^c in next round
1494#else
1495	ldr	r2,[sp,#11*4]		@ from future BODY_16_xx
1496	eor	r3,r11,r4			@ a^b, b^c in next round
1497	ldr	r1,[sp,#8*4]	@ from future BODY_16_xx
1498#endif
1499	eor	r0,r0,r11,ror#20	@ Sigma0(a)
1500	and	r12,r12,r3			@ (b^c)&=(a^b)
1501	add	r6,r6,r10			@ d+=h
1502	eor	r12,r12,r4			@ Maj(a,b,c)
1503	add	r10,r10,r0,ror#2	@ h+=Sigma0(a)
1504	@ add	r10,r10,r12			@ h+=Maj(a,b,c)
1505	@ ldr	r2,[sp,#11*4]		@ 26
1506	@ ldr	r1,[sp,#8*4]
1507	mov	r0,r2,ror#7
1508	add	r10,r10,r12			@ h+=Maj(a,b,c) from the past
1509	mov	r12,r1,ror#17
1510	eor	r0,r0,r2,ror#18
1511	eor	r12,r12,r1,ror#19
1512	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1513	ldr	r2,[sp,#10*4]
1514	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1515	ldr	r1,[sp,#3*4]
1516
1517	add	r12,r12,r0
1518	eor	r0,r6,r6,ror#5	@ from BODY_00_15
1519	add	r2,r2,r12
1520	eor	r0,r0,r6,ror#19	@ Sigma1(e)
1521	add	r2,r2,r1			@ X[i]
1522	ldr	r12,[r14],#4			@ *K256++
1523	add	r9,r9,r2			@ h+=X[i]
1524	str	r2,[sp,#10*4]
1525	eor	r2,r7,r8
1526	add	r9,r9,r0,ror#6	@ h+=Sigma1(e)
1527	and	r2,r2,r6
1528	add	r9,r9,r12			@ h+=K256[i]
1529	eor	r2,r2,r8			@ Ch(e,f,g)
1530	eor	r0,r10,r10,ror#11
1531	add	r9,r9,r2			@ h+=Ch(e,f,g)
1532#if 26==31
1533	and	r12,r12,#0xff
1534	cmp	r12,#0xf2			@ done?
1535#endif
1536#if 26<15
1537# if __ARM_ARCH__>=7
1538	ldr	r2,[r1],#4			@ prefetch
1539# else
1540	ldrb	r2,[r1,#3]
1541# endif
1542	eor	r12,r10,r11			@ a^b, b^c in next round
1543#else
1544	ldr	r2,[sp,#12*4]		@ from future BODY_16_xx
1545	eor	r12,r10,r11			@ a^b, b^c in next round
1546	ldr	r1,[sp,#9*4]	@ from future BODY_16_xx
1547#endif
1548	eor	r0,r0,r10,ror#20	@ Sigma0(a)
1549	and	r3,r3,r12			@ (b^c)&=(a^b)
1550	add	r5,r5,r9			@ d+=h
1551	eor	r3,r3,r11			@ Maj(a,b,c)
1552	add	r9,r9,r0,ror#2	@ h+=Sigma0(a)
1553	@ add	r9,r9,r3			@ h+=Maj(a,b,c)
1554	@ ldr	r2,[sp,#12*4]		@ 27
1555	@ ldr	r1,[sp,#9*4]
1556	mov	r0,r2,ror#7
1557	add	r9,r9,r3			@ h+=Maj(a,b,c) from the past
1558	mov	r3,r1,ror#17
1559	eor	r0,r0,r2,ror#18
1560	eor	r3,r3,r1,ror#19
1561	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1562	ldr	r2,[sp,#11*4]
1563	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1564	ldr	r1,[sp,#4*4]
1565
1566	add	r3,r3,r0
1567	eor	r0,r5,r5,ror#5	@ from BODY_00_15
1568	add	r2,r2,r3
1569	eor	r0,r0,r5,ror#19	@ Sigma1(e)
1570	add	r2,r2,r1			@ X[i]
1571	ldr	r3,[r14],#4			@ *K256++
1572	add	r8,r8,r2			@ h+=X[i]
1573	str	r2,[sp,#11*4]
1574	eor	r2,r6,r7
1575	add	r8,r8,r0,ror#6	@ h+=Sigma1(e)
1576	and	r2,r2,r5
1577	add	r8,r8,r3			@ h+=K256[i]
1578	eor	r2,r2,r7			@ Ch(e,f,g)
1579	eor	r0,r9,r9,ror#11
1580	add	r8,r8,r2			@ h+=Ch(e,f,g)
1581#if 27==31
1582	and	r3,r3,#0xff
1583	cmp	r3,#0xf2			@ done?
1584#endif
1585#if 27<15
1586# if __ARM_ARCH__>=7
1587	ldr	r2,[r1],#4			@ prefetch
1588# else
1589	ldrb	r2,[r1,#3]
1590# endif
1591	eor	r3,r9,r10			@ a^b, b^c in next round
1592#else
1593	ldr	r2,[sp,#13*4]		@ from future BODY_16_xx
1594	eor	r3,r9,r10			@ a^b, b^c in next round
1595	ldr	r1,[sp,#10*4]	@ from future BODY_16_xx
1596#endif
1597	eor	r0,r0,r9,ror#20	@ Sigma0(a)
1598	and	r12,r12,r3			@ (b^c)&=(a^b)
1599	add	r4,r4,r8			@ d+=h
1600	eor	r12,r12,r10			@ Maj(a,b,c)
1601	add	r8,r8,r0,ror#2	@ h+=Sigma0(a)
1602	@ add	r8,r8,r12			@ h+=Maj(a,b,c)
1603	@ ldr	r2,[sp,#13*4]		@ 28
1604	@ ldr	r1,[sp,#10*4]
1605	mov	r0,r2,ror#7
1606	add	r8,r8,r12			@ h+=Maj(a,b,c) from the past
1607	mov	r12,r1,ror#17
1608	eor	r0,r0,r2,ror#18
1609	eor	r12,r12,r1,ror#19
1610	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1611	ldr	r2,[sp,#12*4]
1612	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1613	ldr	r1,[sp,#5*4]
1614
1615	add	r12,r12,r0
1616	eor	r0,r4,r4,ror#5	@ from BODY_00_15
1617	add	r2,r2,r12
1618	eor	r0,r0,r4,ror#19	@ Sigma1(e)
1619	add	r2,r2,r1			@ X[i]
1620	ldr	r12,[r14],#4			@ *K256++
1621	add	r7,r7,r2			@ h+=X[i]
1622	str	r2,[sp,#12*4]
1623	eor	r2,r5,r6
1624	add	r7,r7,r0,ror#6	@ h+=Sigma1(e)
1625	and	r2,r2,r4
1626	add	r7,r7,r12			@ h+=K256[i]
1627	eor	r2,r2,r6			@ Ch(e,f,g)
1628	eor	r0,r8,r8,ror#11
1629	add	r7,r7,r2			@ h+=Ch(e,f,g)
1630#if 28==31
1631	and	r12,r12,#0xff
1632	cmp	r12,#0xf2			@ done?
1633#endif
1634#if 28<15
1635# if __ARM_ARCH__>=7
1636	ldr	r2,[r1],#4			@ prefetch
1637# else
1638	ldrb	r2,[r1,#3]
1639# endif
1640	eor	r12,r8,r9			@ a^b, b^c in next round
1641#else
1642	ldr	r2,[sp,#14*4]		@ from future BODY_16_xx
1643	eor	r12,r8,r9			@ a^b, b^c in next round
1644	ldr	r1,[sp,#11*4]	@ from future BODY_16_xx
1645#endif
1646	eor	r0,r0,r8,ror#20	@ Sigma0(a)
1647	and	r3,r3,r12			@ (b^c)&=(a^b)
1648	add	r11,r11,r7			@ d+=h
1649	eor	r3,r3,r9			@ Maj(a,b,c)
1650	add	r7,r7,r0,ror#2	@ h+=Sigma0(a)
1651	@ add	r7,r7,r3			@ h+=Maj(a,b,c)
1652	@ ldr	r2,[sp,#14*4]		@ 29
1653	@ ldr	r1,[sp,#11*4]
1654	mov	r0,r2,ror#7
1655	add	r7,r7,r3			@ h+=Maj(a,b,c) from the past
1656	mov	r3,r1,ror#17
1657	eor	r0,r0,r2,ror#18
1658	eor	r3,r3,r1,ror#19
1659	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1660	ldr	r2,[sp,#13*4]
1661	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1662	ldr	r1,[sp,#6*4]
1663
1664	add	r3,r3,r0
1665	eor	r0,r11,r11,ror#5	@ from BODY_00_15
1666	add	r2,r2,r3
1667	eor	r0,r0,r11,ror#19	@ Sigma1(e)
1668	add	r2,r2,r1			@ X[i]
1669	ldr	r3,[r14],#4			@ *K256++
1670	add	r6,r6,r2			@ h+=X[i]
1671	str	r2,[sp,#13*4]
1672	eor	r2,r4,r5
1673	add	r6,r6,r0,ror#6	@ h+=Sigma1(e)
1674	and	r2,r2,r11
1675	add	r6,r6,r3			@ h+=K256[i]
1676	eor	r2,r2,r5			@ Ch(e,f,g)
1677	eor	r0,r7,r7,ror#11
1678	add	r6,r6,r2			@ h+=Ch(e,f,g)
1679#if 29==31
1680	and	r3,r3,#0xff
1681	cmp	r3,#0xf2			@ done?
1682#endif
1683#if 29<15
1684# if __ARM_ARCH__>=7
1685	ldr	r2,[r1],#4			@ prefetch
1686# else
1687	ldrb	r2,[r1,#3]
1688# endif
1689	eor	r3,r7,r8			@ a^b, b^c in next round
1690#else
1691	ldr	r2,[sp,#15*4]		@ from future BODY_16_xx
1692	eor	r3,r7,r8			@ a^b, b^c in next round
1693	ldr	r1,[sp,#12*4]	@ from future BODY_16_xx
1694#endif
1695	eor	r0,r0,r7,ror#20	@ Sigma0(a)
1696	and	r12,r12,r3			@ (b^c)&=(a^b)
1697	add	r10,r10,r6			@ d+=h
1698	eor	r12,r12,r8			@ Maj(a,b,c)
1699	add	r6,r6,r0,ror#2	@ h+=Sigma0(a)
1700	@ add	r6,r6,r12			@ h+=Maj(a,b,c)
1701	@ ldr	r2,[sp,#15*4]		@ 30
1702	@ ldr	r1,[sp,#12*4]
1703	mov	r0,r2,ror#7
1704	add	r6,r6,r12			@ h+=Maj(a,b,c) from the past
1705	mov	r12,r1,ror#17
1706	eor	r0,r0,r2,ror#18
1707	eor	r12,r12,r1,ror#19
1708	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1709	ldr	r2,[sp,#14*4]
1710	eor	r12,r12,r1,lsr#10	@ sigma1(X[i+14])
1711	ldr	r1,[sp,#7*4]
1712
1713	add	r12,r12,r0
1714	eor	r0,r10,r10,ror#5	@ from BODY_00_15
1715	add	r2,r2,r12
1716	eor	r0,r0,r10,ror#19	@ Sigma1(e)
1717	add	r2,r2,r1			@ X[i]
1718	ldr	r12,[r14],#4			@ *K256++
1719	add	r5,r5,r2			@ h+=X[i]
1720	str	r2,[sp,#14*4]
1721	eor	r2,r11,r4
1722	add	r5,r5,r0,ror#6	@ h+=Sigma1(e)
1723	and	r2,r2,r10
1724	add	r5,r5,r12			@ h+=K256[i]
1725	eor	r2,r2,r4			@ Ch(e,f,g)
1726	eor	r0,r6,r6,ror#11
1727	add	r5,r5,r2			@ h+=Ch(e,f,g)
1728#if 30==31
1729	and	r12,r12,#0xff
1730	cmp	r12,#0xf2			@ done?
1731#endif
1732#if 30<15
1733# if __ARM_ARCH__>=7
1734	ldr	r2,[r1],#4			@ prefetch
1735# else
1736	ldrb	r2,[r1,#3]
1737# endif
1738	eor	r12,r6,r7			@ a^b, b^c in next round
1739#else
1740	ldr	r2,[sp,#0*4]		@ from future BODY_16_xx
1741	eor	r12,r6,r7			@ a^b, b^c in next round
1742	ldr	r1,[sp,#13*4]	@ from future BODY_16_xx
1743#endif
1744	eor	r0,r0,r6,ror#20	@ Sigma0(a)
1745	and	r3,r3,r12			@ (b^c)&=(a^b)
1746	add	r9,r9,r5			@ d+=h
1747	eor	r3,r3,r7			@ Maj(a,b,c)
1748	add	r5,r5,r0,ror#2	@ h+=Sigma0(a)
1749	@ add	r5,r5,r3			@ h+=Maj(a,b,c)
1750	@ ldr	r2,[sp,#0*4]		@ 31
1751	@ ldr	r1,[sp,#13*4]
1752	mov	r0,r2,ror#7
1753	add	r5,r5,r3			@ h+=Maj(a,b,c) from the past
1754	mov	r3,r1,ror#17
1755	eor	r0,r0,r2,ror#18
1756	eor	r3,r3,r1,ror#19
1757	eor	r0,r0,r2,lsr#3	@ sigma0(X[i+1])
1758	ldr	r2,[sp,#15*4]
1759	eor	r3,r3,r1,lsr#10	@ sigma1(X[i+14])
1760	ldr	r1,[sp,#8*4]
1761
1762	add	r3,r3,r0
1763	eor	r0,r9,r9,ror#5	@ from BODY_00_15
1764	add	r2,r2,r3
1765	eor	r0,r0,r9,ror#19	@ Sigma1(e)
1766	add	r2,r2,r1			@ X[i]
1767	ldr	r3,[r14],#4			@ *K256++
1768	add	r4,r4,r2			@ h+=X[i]
1769	str	r2,[sp,#15*4]
1770	eor	r2,r10,r11
1771	add	r4,r4,r0,ror#6	@ h+=Sigma1(e)
1772	and	r2,r2,r9
1773	add	r4,r4,r3			@ h+=K256[i]
1774	eor	r2,r2,r11			@ Ch(e,f,g)
1775	eor	r0,r5,r5,ror#11
1776	add	r4,r4,r2			@ h+=Ch(e,f,g)
1777#if 31==31
1778	and	r3,r3,#0xff
1779	cmp	r3,#0xf2			@ done?
1780#endif
1781#if 31<15
1782# if __ARM_ARCH__>=7
1783	ldr	r2,[r1],#4			@ prefetch
1784# else
1785	ldrb	r2,[r1,#3]
1786# endif
1787	eor	r3,r5,r6			@ a^b, b^c in next round
1788#else
1789	ldr	r2,[sp,#1*4]		@ from future BODY_16_xx
1790	eor	r3,r5,r6			@ a^b, b^c in next round
1791	ldr	r1,[sp,#14*4]	@ from future BODY_16_xx
1792#endif
1793	eor	r0,r0,r5,ror#20	@ Sigma0(a)
1794	and	r12,r12,r3			@ (b^c)&=(a^b)
1795	add	r8,r8,r4			@ d+=h
1796	eor	r12,r12,r6			@ Maj(a,b,c)
1797	add	r4,r4,r0,ror#2	@ h+=Sigma0(a)
1798	@ add	r4,r4,r12			@ h+=Maj(a,b,c)
1799#ifdef	__thumb2__
1800	ite	eq			@ Thumb2 thing, sanity check in ARM
1801#endif
1802	ldreq	r3,[sp,#16*4]		@ pull ctx
1803	bne	.Lrounds_16_xx
1804
1805	add	r4,r4,r12		@ h+=Maj(a,b,c) from the past
1806	ldr	r0,[r3,#0]
1807	ldr	r2,[r3,#4]
1808	ldr	r12,[r3,#8]
1809	add	r4,r4,r0
1810	ldr	r0,[r3,#12]
1811	add	r5,r5,r2
1812	ldr	r2,[r3,#16]
1813	add	r6,r6,r12
1814	ldr	r12,[r3,#20]
1815	add	r7,r7,r0
1816	ldr	r0,[r3,#24]
1817	add	r8,r8,r2
1818	ldr	r2,[r3,#28]
1819	add	r9,r9,r12
1820	ldr	r1,[sp,#17*4]		@ pull inp
1821	ldr	r12,[sp,#18*4]		@ pull inp+len
1822	add	r10,r10,r0
1823	add	r11,r11,r2
1824	stmia	r3,{r4,r5,r6,r7,r8,r9,r10,r11}
1825	cmp	r1,r12
1826	sub	r14,r14,#256	@ rewind Ktbl
1827	bne	.Loop
1828
1829	add	sp,sp,#19*4	@ destroy frame
1830#if __ARM_ARCH__>=5
1831	ldmia	sp!,{r4-r11,pc}
1832#else
1833	ldmia	sp!,{r4-r11,lr}
1834	tst	lr,#1
1835	moveq	pc,lr			@ be binary compatible with V4, yet
1836	.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
1837#endif
1838.size	zfs_sha256_block_armv7,.-zfs_sha256_block_armv7
1839
1840.arch	armv7-a
1841.fpu	neon
1842
1843.globl	zfs_sha256_block_neon
1844.type	zfs_sha256_block_neon,%function
1845.align	5
1846.skip	16
1847zfs_sha256_block_neon:
1848.LNEON:
1849	stmdb	sp!,{r4-r12,lr}
1850
1851	sub	r11,sp,#16*4+16
1852#if __ARM_ARCH__ >=7
1853	adr	r14,K256
1854#else
1855	ldr	r14,=K256
1856#endif
1857	bic	r11,r11,#15		@ align for 128-bit stores
1858	mov	r12,sp
1859	mov	sp,r11			@ alloca
1860	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
1861
1862	vld1.8		{q0},[r1]!
1863	vld1.8		{q1},[r1]!
1864	vld1.8		{q2},[r1]!
1865	vld1.8		{q3},[r1]!
1866	vld1.32		{q8},[r14,:128]!
1867	vld1.32		{q9},[r14,:128]!
1868	vld1.32		{q10},[r14,:128]!
1869	vld1.32		{q11},[r14,:128]!
1870	vrev32.8	q0,q0		@ yes, even on
1871	str		r0,[sp,#64]
1872	vrev32.8	q1,q1		@ big-endian
1873	str		r1,[sp,#68]
1874	mov		r1,sp
1875	vrev32.8	q2,q2
1876	str		r2,[sp,#72]
1877	vrev32.8	q3,q3
1878	str		r12,[sp,#76]		@ save original sp
1879	vadd.i32	q8,q8,q0
1880	vadd.i32	q9,q9,q1
1881	vst1.32		{q8},[r1,:128]!
1882	vadd.i32	q10,q10,q2
1883	vst1.32		{q9},[r1,:128]!
1884	vadd.i32	q11,q11,q3
1885	vst1.32		{q10},[r1,:128]!
1886	vst1.32		{q11},[r1,:128]!
1887
1888	ldmia		r0,{r4-r11}
1889	sub		r1,r1,#64
1890	ldr		r2,[sp,#0]
1891	eor		r12,r12,r12
1892	eor		r3,r5,r6
1893	b		.L_00_48
1894
1895.align	4
1896.L_00_48:
1897	vext.8	q8,q0,q1,#4
1898	add	r11,r11,r2
1899	eor	r2,r9,r10
1900	eor	r0,r8,r8,ror#5
1901	vext.8	q9,q2,q3,#4
1902	add	r4,r4,r12
1903	and	r2,r2,r8
1904	eor	r12,r0,r8,ror#19
1905	vshr.u32	q10,q8,#7
1906	eor	r0,r4,r4,ror#11
1907	eor	r2,r2,r10
1908	vadd.i32	q0,q0,q9
1909	add	r11,r11,r12,ror#6
1910	eor	r12,r4,r5
1911	vshr.u32	q9,q8,#3
1912	eor	r0,r0,r4,ror#20
1913	add	r11,r11,r2
1914	vsli.32	q10,q8,#25
1915	ldr	r2,[sp,#4]
1916	and	r3,r3,r12
1917	vshr.u32	q11,q8,#18
1918	add	r7,r7,r11
1919	add	r11,r11,r0,ror#2
1920	eor	r3,r3,r5
1921	veor	q9,q9,q10
1922	add	r10,r10,r2
1923	vsli.32	q11,q8,#14
1924	eor	r2,r8,r9
1925	eor	r0,r7,r7,ror#5
1926	vshr.u32	d24,d7,#17
1927	add	r11,r11,r3
1928	and	r2,r2,r7
1929	veor	q9,q9,q11
1930	eor	r3,r0,r7,ror#19
1931	eor	r0,r11,r11,ror#11
1932	vsli.32	d24,d7,#15
1933	eor	r2,r2,r9
1934	add	r10,r10,r3,ror#6
1935	vshr.u32	d25,d7,#10
1936	eor	r3,r11,r4
1937	eor	r0,r0,r11,ror#20
1938	vadd.i32	q0,q0,q9
1939	add	r10,r10,r2
1940	ldr	r2,[sp,#8]
1941	veor	d25,d25,d24
1942	and	r12,r12,r3
1943	add	r6,r6,r10
1944	vshr.u32	d24,d7,#19
1945	add	r10,r10,r0,ror#2
1946	eor	r12,r12,r4
1947	vsli.32	d24,d7,#13
1948	add	r9,r9,r2
1949	eor	r2,r7,r8
1950	veor	d25,d25,d24
1951	eor	r0,r6,r6,ror#5
1952	add	r10,r10,r12
1953	vadd.i32	d0,d0,d25
1954	and	r2,r2,r6
1955	eor	r12,r0,r6,ror#19
1956	vshr.u32	d24,d0,#17
1957	eor	r0,r10,r10,ror#11
1958	eor	r2,r2,r8
1959	vsli.32	d24,d0,#15
1960	add	r9,r9,r12,ror#6
1961	eor	r12,r10,r11
1962	vshr.u32	d25,d0,#10
1963	eor	r0,r0,r10,ror#20
1964	add	r9,r9,r2
1965	veor	d25,d25,d24
1966	ldr	r2,[sp,#12]
1967	and	r3,r3,r12
1968	vshr.u32	d24,d0,#19
1969	add	r5,r5,r9
1970	add	r9,r9,r0,ror#2
1971	eor	r3,r3,r11
1972	vld1.32	{q8},[r14,:128]!
1973	add	r8,r8,r2
1974	vsli.32	d24,d0,#13
1975	eor	r2,r6,r7
1976	eor	r0,r5,r5,ror#5
1977	veor	d25,d25,d24
1978	add	r9,r9,r3
1979	and	r2,r2,r5
1980	vadd.i32	d1,d1,d25
1981	eor	r3,r0,r5,ror#19
1982	eor	r0,r9,r9,ror#11
1983	vadd.i32	q8,q8,q0
1984	eor	r2,r2,r7
1985	add	r8,r8,r3,ror#6
1986	eor	r3,r9,r10
1987	eor	r0,r0,r9,ror#20
1988	add	r8,r8,r2
1989	ldr	r2,[sp,#16]
1990	and	r12,r12,r3
1991	add	r4,r4,r8
1992	vst1.32	{q8},[r1,:128]!
1993	add	r8,r8,r0,ror#2
1994	eor	r12,r12,r10
1995	vext.8	q8,q1,q2,#4
1996	add	r7,r7,r2
1997	eor	r2,r5,r6
1998	eor	r0,r4,r4,ror#5
1999	vext.8	q9,q3,q0,#4
2000	add	r8,r8,r12
2001	and	r2,r2,r4
2002	eor	r12,r0,r4,ror#19
2003	vshr.u32	q10,q8,#7
2004	eor	r0,r8,r8,ror#11
2005	eor	r2,r2,r6
2006	vadd.i32	q1,q1,q9
2007	add	r7,r7,r12,ror#6
2008	eor	r12,r8,r9
2009	vshr.u32	q9,q8,#3
2010	eor	r0,r0,r8,ror#20
2011	add	r7,r7,r2
2012	vsli.32	q10,q8,#25
2013	ldr	r2,[sp,#20]
2014	and	r3,r3,r12
2015	vshr.u32	q11,q8,#18
2016	add	r11,r11,r7
2017	add	r7,r7,r0,ror#2
2018	eor	r3,r3,r9
2019	veor	q9,q9,q10
2020	add	r6,r6,r2
2021	vsli.32	q11,q8,#14
2022	eor	r2,r4,r5
2023	eor	r0,r11,r11,ror#5
2024	vshr.u32	d24,d1,#17
2025	add	r7,r7,r3
2026	and	r2,r2,r11
2027	veor	q9,q9,q11
2028	eor	r3,r0,r11,ror#19
2029	eor	r0,r7,r7,ror#11
2030	vsli.32	d24,d1,#15
2031	eor	r2,r2,r5
2032	add	r6,r6,r3,ror#6
2033	vshr.u32	d25,d1,#10
2034	eor	r3,r7,r8
2035	eor	r0,r0,r7,ror#20
2036	vadd.i32	q1,q1,q9
2037	add	r6,r6,r2
2038	ldr	r2,[sp,#24]
2039	veor	d25,d25,d24
2040	and	r12,r12,r3
2041	add	r10,r10,r6
2042	vshr.u32	d24,d1,#19
2043	add	r6,r6,r0,ror#2
2044	eor	r12,r12,r8
2045	vsli.32	d24,d1,#13
2046	add	r5,r5,r2
2047	eor	r2,r11,r4
2048	veor	d25,d25,d24
2049	eor	r0,r10,r10,ror#5
2050	add	r6,r6,r12
2051	vadd.i32	d2,d2,d25
2052	and	r2,r2,r10
2053	eor	r12,r0,r10,ror#19
2054	vshr.u32	d24,d2,#17
2055	eor	r0,r6,r6,ror#11
2056	eor	r2,r2,r4
2057	vsli.32	d24,d2,#15
2058	add	r5,r5,r12,ror#6
2059	eor	r12,r6,r7
2060	vshr.u32	d25,d2,#10
2061	eor	r0,r0,r6,ror#20
2062	add	r5,r5,r2
2063	veor	d25,d25,d24
2064	ldr	r2,[sp,#28]
2065	and	r3,r3,r12
2066	vshr.u32	d24,d2,#19
2067	add	r9,r9,r5
2068	add	r5,r5,r0,ror#2
2069	eor	r3,r3,r7
2070	vld1.32	{q8},[r14,:128]!
2071	add	r4,r4,r2
2072	vsli.32	d24,d2,#13
2073	eor	r2,r10,r11
2074	eor	r0,r9,r9,ror#5
2075	veor	d25,d25,d24
2076	add	r5,r5,r3
2077	and	r2,r2,r9
2078	vadd.i32	d3,d3,d25
2079	eor	r3,r0,r9,ror#19
2080	eor	r0,r5,r5,ror#11
2081	vadd.i32	q8,q8,q1
2082	eor	r2,r2,r11
2083	add	r4,r4,r3,ror#6
2084	eor	r3,r5,r6
2085	eor	r0,r0,r5,ror#20
2086	add	r4,r4,r2
2087	ldr	r2,[sp,#32]
2088	and	r12,r12,r3
2089	add	r8,r8,r4
2090	vst1.32	{q8},[r1,:128]!
2091	add	r4,r4,r0,ror#2
2092	eor	r12,r12,r6
2093	vext.8	q8,q2,q3,#4
2094	add	r11,r11,r2
2095	eor	r2,r9,r10
2096	eor	r0,r8,r8,ror#5
2097	vext.8	q9,q0,q1,#4
2098	add	r4,r4,r12
2099	and	r2,r2,r8
2100	eor	r12,r0,r8,ror#19
2101	vshr.u32	q10,q8,#7
2102	eor	r0,r4,r4,ror#11
2103	eor	r2,r2,r10
2104	vadd.i32	q2,q2,q9
2105	add	r11,r11,r12,ror#6
2106	eor	r12,r4,r5
2107	vshr.u32	q9,q8,#3
2108	eor	r0,r0,r4,ror#20
2109	add	r11,r11,r2
2110	vsli.32	q10,q8,#25
2111	ldr	r2,[sp,#36]
2112	and	r3,r3,r12
2113	vshr.u32	q11,q8,#18
2114	add	r7,r7,r11
2115	add	r11,r11,r0,ror#2
2116	eor	r3,r3,r5
2117	veor	q9,q9,q10
2118	add	r10,r10,r2
2119	vsli.32	q11,q8,#14
2120	eor	r2,r8,r9
2121	eor	r0,r7,r7,ror#5
2122	vshr.u32	d24,d3,#17
2123	add	r11,r11,r3
2124	and	r2,r2,r7
2125	veor	q9,q9,q11
2126	eor	r3,r0,r7,ror#19
2127	eor	r0,r11,r11,ror#11
2128	vsli.32	d24,d3,#15
2129	eor	r2,r2,r9
2130	add	r10,r10,r3,ror#6
2131	vshr.u32	d25,d3,#10
2132	eor	r3,r11,r4
2133	eor	r0,r0,r11,ror#20
2134	vadd.i32	q2,q2,q9
2135	add	r10,r10,r2
2136	ldr	r2,[sp,#40]
2137	veor	d25,d25,d24
2138	and	r12,r12,r3
2139	add	r6,r6,r10
2140	vshr.u32	d24,d3,#19
2141	add	r10,r10,r0,ror#2
2142	eor	r12,r12,r4
2143	vsli.32	d24,d3,#13
2144	add	r9,r9,r2
2145	eor	r2,r7,r8
2146	veor	d25,d25,d24
2147	eor	r0,r6,r6,ror#5
2148	add	r10,r10,r12
2149	vadd.i32	d4,d4,d25
2150	and	r2,r2,r6
2151	eor	r12,r0,r6,ror#19
2152	vshr.u32	d24,d4,#17
2153	eor	r0,r10,r10,ror#11
2154	eor	r2,r2,r8
2155	vsli.32	d24,d4,#15
2156	add	r9,r9,r12,ror#6
2157	eor	r12,r10,r11
2158	vshr.u32	d25,d4,#10
2159	eor	r0,r0,r10,ror#20
2160	add	r9,r9,r2
2161	veor	d25,d25,d24
2162	ldr	r2,[sp,#44]
2163	and	r3,r3,r12
2164	vshr.u32	d24,d4,#19
2165	add	r5,r5,r9
2166	add	r9,r9,r0,ror#2
2167	eor	r3,r3,r11
2168	vld1.32	{q8},[r14,:128]!
2169	add	r8,r8,r2
2170	vsli.32	d24,d4,#13
2171	eor	r2,r6,r7
2172	eor	r0,r5,r5,ror#5
2173	veor	d25,d25,d24
2174	add	r9,r9,r3
2175	and	r2,r2,r5
2176	vadd.i32	d5,d5,d25
2177	eor	r3,r0,r5,ror#19
2178	eor	r0,r9,r9,ror#11
2179	vadd.i32	q8,q8,q2
2180	eor	r2,r2,r7
2181	add	r8,r8,r3,ror#6
2182	eor	r3,r9,r10
2183	eor	r0,r0,r9,ror#20
2184	add	r8,r8,r2
2185	ldr	r2,[sp,#48]
2186	and	r12,r12,r3
2187	add	r4,r4,r8
2188	vst1.32	{q8},[r1,:128]!
2189	add	r8,r8,r0,ror#2
2190	eor	r12,r12,r10
2191	vext.8	q8,q3,q0,#4
2192	add	r7,r7,r2
2193	eor	r2,r5,r6
2194	eor	r0,r4,r4,ror#5
2195	vext.8	q9,q1,q2,#4
2196	add	r8,r8,r12
2197	and	r2,r2,r4
2198	eor	r12,r0,r4,ror#19
2199	vshr.u32	q10,q8,#7
2200	eor	r0,r8,r8,ror#11
2201	eor	r2,r2,r6
2202	vadd.i32	q3,q3,q9
2203	add	r7,r7,r12,ror#6
2204	eor	r12,r8,r9
2205	vshr.u32	q9,q8,#3
2206	eor	r0,r0,r8,ror#20
2207	add	r7,r7,r2
2208	vsli.32	q10,q8,#25
2209	ldr	r2,[sp,#52]
2210	and	r3,r3,r12
2211	vshr.u32	q11,q8,#18
2212	add	r11,r11,r7
2213	add	r7,r7,r0,ror#2
2214	eor	r3,r3,r9
2215	veor	q9,q9,q10
2216	add	r6,r6,r2
2217	vsli.32	q11,q8,#14
2218	eor	r2,r4,r5
2219	eor	r0,r11,r11,ror#5
2220	vshr.u32	d24,d5,#17
2221	add	r7,r7,r3
2222	and	r2,r2,r11
2223	veor	q9,q9,q11
2224	eor	r3,r0,r11,ror#19
2225	eor	r0,r7,r7,ror#11
2226	vsli.32	d24,d5,#15
2227	eor	r2,r2,r5
2228	add	r6,r6,r3,ror#6
2229	vshr.u32	d25,d5,#10
2230	eor	r3,r7,r8
2231	eor	r0,r0,r7,ror#20
2232	vadd.i32	q3,q3,q9
2233	add	r6,r6,r2
2234	ldr	r2,[sp,#56]
2235	veor	d25,d25,d24
2236	and	r12,r12,r3
2237	add	r10,r10,r6
2238	vshr.u32	d24,d5,#19
2239	add	r6,r6,r0,ror#2
2240	eor	r12,r12,r8
2241	vsli.32	d24,d5,#13
2242	add	r5,r5,r2
2243	eor	r2,r11,r4
2244	veor	d25,d25,d24
2245	eor	r0,r10,r10,ror#5
2246	add	r6,r6,r12
2247	vadd.i32	d6,d6,d25
2248	and	r2,r2,r10
2249	eor	r12,r0,r10,ror#19
2250	vshr.u32	d24,d6,#17
2251	eor	r0,r6,r6,ror#11
2252	eor	r2,r2,r4
2253	vsli.32	d24,d6,#15
2254	add	r5,r5,r12,ror#6
2255	eor	r12,r6,r7
2256	vshr.u32	d25,d6,#10
2257	eor	r0,r0,r6,ror#20
2258	add	r5,r5,r2
2259	veor	d25,d25,d24
2260	ldr	r2,[sp,#60]
2261	and	r3,r3,r12
2262	vshr.u32	d24,d6,#19
2263	add	r9,r9,r5
2264	add	r5,r5,r0,ror#2
2265	eor	r3,r3,r7
2266	vld1.32	{q8},[r14,:128]!
2267	add	r4,r4,r2
2268	vsli.32	d24,d6,#13
2269	eor	r2,r10,r11
2270	eor	r0,r9,r9,ror#5
2271	veor	d25,d25,d24
2272	add	r5,r5,r3
2273	and	r2,r2,r9
2274	vadd.i32	d7,d7,d25
2275	eor	r3,r0,r9,ror#19
2276	eor	r0,r5,r5,ror#11
2277	vadd.i32	q8,q8,q3
2278	eor	r2,r2,r11
2279	add	r4,r4,r3,ror#6
2280	eor	r3,r5,r6
2281	eor	r0,r0,r5,ror#20
2282	add	r4,r4,r2
2283	ldr	r2,[r14]
2284	and	r12,r12,r3
2285	add	r8,r8,r4
2286	vst1.32	{q8},[r1,:128]!
2287	add	r4,r4,r0,ror#2
2288	eor	r12,r12,r6
2289	teq	r2,#0				@ check for K256 terminator
2290	ldr	r2,[sp,#0]
2291	sub	r1,r1,#64
2292	bne	.L_00_48
2293
2294	ldr		r1,[sp,#68]
2295	ldr		r0,[sp,#72]
2296	sub		r14,r14,#256	@ rewind r14
2297	teq		r1,r0
2298	it		eq
2299	subeq		r1,r1,#64		@ avoid SEGV
2300	vld1.8		{q0},[r1]!		@ load next input block
2301	vld1.8		{q1},[r1]!
2302	vld1.8		{q2},[r1]!
2303	vld1.8		{q3},[r1]!
2304	it		ne
2305	strne		r1,[sp,#68]
2306	mov		r1,sp
2307	add	r11,r11,r2
2308	eor	r2,r9,r10
2309	eor	r0,r8,r8,ror#5
2310	add	r4,r4,r12
2311	vld1.32	{q8},[r14,:128]!
2312	and	r2,r2,r8
2313	eor	r12,r0,r8,ror#19
2314	eor	r0,r4,r4,ror#11
2315	eor	r2,r2,r10
2316	vrev32.8	q0,q0
2317	add	r11,r11,r12,ror#6
2318	eor	r12,r4,r5
2319	eor	r0,r0,r4,ror#20
2320	add	r11,r11,r2
2321	vadd.i32	q8,q8,q0
2322	ldr	r2,[sp,#4]
2323	and	r3,r3,r12
2324	add	r7,r7,r11
2325	add	r11,r11,r0,ror#2
2326	eor	r3,r3,r5
2327	add	r10,r10,r2
2328	eor	r2,r8,r9
2329	eor	r0,r7,r7,ror#5
2330	add	r11,r11,r3
2331	and	r2,r2,r7
2332	eor	r3,r0,r7,ror#19
2333	eor	r0,r11,r11,ror#11
2334	eor	r2,r2,r9
2335	add	r10,r10,r3,ror#6
2336	eor	r3,r11,r4
2337	eor	r0,r0,r11,ror#20
2338	add	r10,r10,r2
2339	ldr	r2,[sp,#8]
2340	and	r12,r12,r3
2341	add	r6,r6,r10
2342	add	r10,r10,r0,ror#2
2343	eor	r12,r12,r4
2344	add	r9,r9,r2
2345	eor	r2,r7,r8
2346	eor	r0,r6,r6,ror#5
2347	add	r10,r10,r12
2348	and	r2,r2,r6
2349	eor	r12,r0,r6,ror#19
2350	eor	r0,r10,r10,ror#11
2351	eor	r2,r2,r8
2352	add	r9,r9,r12,ror#6
2353	eor	r12,r10,r11
2354	eor	r0,r0,r10,ror#20
2355	add	r9,r9,r2
2356	ldr	r2,[sp,#12]
2357	and	r3,r3,r12
2358	add	r5,r5,r9
2359	add	r9,r9,r0,ror#2
2360	eor	r3,r3,r11
2361	add	r8,r8,r2
2362	eor	r2,r6,r7
2363	eor	r0,r5,r5,ror#5
2364	add	r9,r9,r3
2365	and	r2,r2,r5
2366	eor	r3,r0,r5,ror#19
2367	eor	r0,r9,r9,ror#11
2368	eor	r2,r2,r7
2369	add	r8,r8,r3,ror#6
2370	eor	r3,r9,r10
2371	eor	r0,r0,r9,ror#20
2372	add	r8,r8,r2
2373	ldr	r2,[sp,#16]
2374	and	r12,r12,r3
2375	add	r4,r4,r8
2376	add	r8,r8,r0,ror#2
2377	eor	r12,r12,r10
2378	vst1.32	{q8},[r1,:128]!
2379	add	r7,r7,r2
2380	eor	r2,r5,r6
2381	eor	r0,r4,r4,ror#5
2382	add	r8,r8,r12
2383	vld1.32	{q8},[r14,:128]!
2384	and	r2,r2,r4
2385	eor	r12,r0,r4,ror#19
2386	eor	r0,r8,r8,ror#11
2387	eor	r2,r2,r6
2388	vrev32.8	q1,q1
2389	add	r7,r7,r12,ror#6
2390	eor	r12,r8,r9
2391	eor	r0,r0,r8,ror#20
2392	add	r7,r7,r2
2393	vadd.i32	q8,q8,q1
2394	ldr	r2,[sp,#20]
2395	and	r3,r3,r12
2396	add	r11,r11,r7
2397	add	r7,r7,r0,ror#2
2398	eor	r3,r3,r9
2399	add	r6,r6,r2
2400	eor	r2,r4,r5
2401	eor	r0,r11,r11,ror#5
2402	add	r7,r7,r3
2403	and	r2,r2,r11
2404	eor	r3,r0,r11,ror#19
2405	eor	r0,r7,r7,ror#11
2406	eor	r2,r2,r5
2407	add	r6,r6,r3,ror#6
2408	eor	r3,r7,r8
2409	eor	r0,r0,r7,ror#20
2410	add	r6,r6,r2
2411	ldr	r2,[sp,#24]
2412	and	r12,r12,r3
2413	add	r10,r10,r6
2414	add	r6,r6,r0,ror#2
2415	eor	r12,r12,r8
2416	add	r5,r5,r2
2417	eor	r2,r11,r4
2418	eor	r0,r10,r10,ror#5
2419	add	r6,r6,r12
2420	and	r2,r2,r10
2421	eor	r12,r0,r10,ror#19
2422	eor	r0,r6,r6,ror#11
2423	eor	r2,r2,r4
2424	add	r5,r5,r12,ror#6
2425	eor	r12,r6,r7
2426	eor	r0,r0,r6,ror#20
2427	add	r5,r5,r2
2428	ldr	r2,[sp,#28]
2429	and	r3,r3,r12
2430	add	r9,r9,r5
2431	add	r5,r5,r0,ror#2
2432	eor	r3,r3,r7
2433	add	r4,r4,r2
2434	eor	r2,r10,r11
2435	eor	r0,r9,r9,ror#5
2436	add	r5,r5,r3
2437	and	r2,r2,r9
2438	eor	r3,r0,r9,ror#19
2439	eor	r0,r5,r5,ror#11
2440	eor	r2,r2,r11
2441	add	r4,r4,r3,ror#6
2442	eor	r3,r5,r6
2443	eor	r0,r0,r5,ror#20
2444	add	r4,r4,r2
2445	ldr	r2,[sp,#32]
2446	and	r12,r12,r3
2447	add	r8,r8,r4
2448	add	r4,r4,r0,ror#2
2449	eor	r12,r12,r6
2450	vst1.32	{q8},[r1,:128]!
2451	add	r11,r11,r2
2452	eor	r2,r9,r10
2453	eor	r0,r8,r8,ror#5
2454	add	r4,r4,r12
2455	vld1.32	{q8},[r14,:128]!
2456	and	r2,r2,r8
2457	eor	r12,r0,r8,ror#19
2458	eor	r0,r4,r4,ror#11
2459	eor	r2,r2,r10
2460	vrev32.8	q2,q2
2461	add	r11,r11,r12,ror#6
2462	eor	r12,r4,r5
2463	eor	r0,r0,r4,ror#20
2464	add	r11,r11,r2
2465	vadd.i32	q8,q8,q2
2466	ldr	r2,[sp,#36]
2467	and	r3,r3,r12
2468	add	r7,r7,r11
2469	add	r11,r11,r0,ror#2
2470	eor	r3,r3,r5
2471	add	r10,r10,r2
2472	eor	r2,r8,r9
2473	eor	r0,r7,r7,ror#5
2474	add	r11,r11,r3
2475	and	r2,r2,r7
2476	eor	r3,r0,r7,ror#19
2477	eor	r0,r11,r11,ror#11
2478	eor	r2,r2,r9
2479	add	r10,r10,r3,ror#6
2480	eor	r3,r11,r4
2481	eor	r0,r0,r11,ror#20
2482	add	r10,r10,r2
2483	ldr	r2,[sp,#40]
2484	and	r12,r12,r3
2485	add	r6,r6,r10
2486	add	r10,r10,r0,ror#2
2487	eor	r12,r12,r4
2488	add	r9,r9,r2
2489	eor	r2,r7,r8
2490	eor	r0,r6,r6,ror#5
2491	add	r10,r10,r12
2492	and	r2,r2,r6
2493	eor	r12,r0,r6,ror#19
2494	eor	r0,r10,r10,ror#11
2495	eor	r2,r2,r8
2496	add	r9,r9,r12,ror#6
2497	eor	r12,r10,r11
2498	eor	r0,r0,r10,ror#20
2499	add	r9,r9,r2
2500	ldr	r2,[sp,#44]
2501	and	r3,r3,r12
2502	add	r5,r5,r9
2503	add	r9,r9,r0,ror#2
2504	eor	r3,r3,r11
2505	add	r8,r8,r2
2506	eor	r2,r6,r7
2507	eor	r0,r5,r5,ror#5
2508	add	r9,r9,r3
2509	and	r2,r2,r5
2510	eor	r3,r0,r5,ror#19
2511	eor	r0,r9,r9,ror#11
2512	eor	r2,r2,r7
2513	add	r8,r8,r3,ror#6
2514	eor	r3,r9,r10
2515	eor	r0,r0,r9,ror#20
2516	add	r8,r8,r2
2517	ldr	r2,[sp,#48]
2518	and	r12,r12,r3
2519	add	r4,r4,r8
2520	add	r8,r8,r0,ror#2
2521	eor	r12,r12,r10
2522	vst1.32	{q8},[r1,:128]!
2523	add	r7,r7,r2
2524	eor	r2,r5,r6
2525	eor	r0,r4,r4,ror#5
2526	add	r8,r8,r12
2527	vld1.32	{q8},[r14,:128]!
2528	and	r2,r2,r4
2529	eor	r12,r0,r4,ror#19
2530	eor	r0,r8,r8,ror#11
2531	eor	r2,r2,r6
2532	vrev32.8	q3,q3
2533	add	r7,r7,r12,ror#6
2534	eor	r12,r8,r9
2535	eor	r0,r0,r8,ror#20
2536	add	r7,r7,r2
2537	vadd.i32	q8,q8,q3
2538	ldr	r2,[sp,#52]
2539	and	r3,r3,r12
2540	add	r11,r11,r7
2541	add	r7,r7,r0,ror#2
2542	eor	r3,r3,r9
2543	add	r6,r6,r2
2544	eor	r2,r4,r5
2545	eor	r0,r11,r11,ror#5
2546	add	r7,r7,r3
2547	and	r2,r2,r11
2548	eor	r3,r0,r11,ror#19
2549	eor	r0,r7,r7,ror#11
2550	eor	r2,r2,r5
2551	add	r6,r6,r3,ror#6
2552	eor	r3,r7,r8
2553	eor	r0,r0,r7,ror#20
2554	add	r6,r6,r2
2555	ldr	r2,[sp,#56]
2556	and	r12,r12,r3
2557	add	r10,r10,r6
2558	add	r6,r6,r0,ror#2
2559	eor	r12,r12,r8
2560	add	r5,r5,r2
2561	eor	r2,r11,r4
2562	eor	r0,r10,r10,ror#5
2563	add	r6,r6,r12
2564	and	r2,r2,r10
2565	eor	r12,r0,r10,ror#19
2566	eor	r0,r6,r6,ror#11
2567	eor	r2,r2,r4
2568	add	r5,r5,r12,ror#6
2569	eor	r12,r6,r7
2570	eor	r0,r0,r6,ror#20
2571	add	r5,r5,r2
2572	ldr	r2,[sp,#60]
2573	and	r3,r3,r12
2574	add	r9,r9,r5
2575	add	r5,r5,r0,ror#2
2576	eor	r3,r3,r7
2577	add	r4,r4,r2
2578	eor	r2,r10,r11
2579	eor	r0,r9,r9,ror#5
2580	add	r5,r5,r3
2581	and	r2,r2,r9
2582	eor	r3,r0,r9,ror#19
2583	eor	r0,r5,r5,ror#11
2584	eor	r2,r2,r11
2585	add	r4,r4,r3,ror#6
2586	eor	r3,r5,r6
2587	eor	r0,r0,r5,ror#20
2588	add	r4,r4,r2
2589	ldr	r2,[sp,#64]
2590	and	r12,r12,r3
2591	add	r8,r8,r4
2592	add	r4,r4,r0,ror#2
2593	eor	r12,r12,r6
2594	vst1.32	{q8},[r1,:128]!
2595	ldr	r0,[r2,#0]
2596	add	r4,r4,r12			@ h+=Maj(a,b,c) from the past
2597	ldr	r12,[r2,#4]
2598	ldr	r3,[r2,#8]
2599	ldr	r1,[r2,#12]
2600	add	r4,r4,r0			@ accumulate
2601	ldr	r0,[r2,#16]
2602	add	r5,r5,r12
2603	ldr	r12,[r2,#20]
2604	add	r6,r6,r3
2605	ldr	r3,[r2,#24]
2606	add	r7,r7,r1
2607	ldr	r1,[r2,#28]
2608	add	r8,r8,r0
2609	str	r4,[r2],#4
2610	add	r9,r9,r12
2611	str	r5,[r2],#4
2612	add	r10,r10,r3
2613	str	r6,[r2],#4
2614	add	r11,r11,r1
2615	str	r7,[r2],#4
2616	stmia	r2,{r8-r11}
2617
2618	ittte	ne
2619	movne	r1,sp
2620	ldrne	r2,[sp,#0]
2621	eorne	r12,r12,r12
2622	ldreq	sp,[sp,#76]			@ restore original sp
2623	itt	ne
2624	eorne	r3,r5,r6
2625	bne	.L_00_48
2626
2627	ldmia	sp!,{r4-r12,pc}
2628.size	zfs_sha256_block_neon,.-zfs_sha256_block_neon
2629
2630# if defined(__thumb2__)
2631#  define INST(a,b,c,d)	.byte	c,d|0xc,a,b
2632# else
2633#  define INST(a,b,c,d)	.byte	a,b,c,d
2634# endif
2635
2636.globl	zfs_sha256_block_armv8
2637.type	zfs_sha256_block_armv8,%function
2638.align	5
2639zfs_sha256_block_armv8:
2640.LARMv8:
2641	vld1.32	{q0,q1},[r0]
2642	sub	r3,r3,#256+32
2643	add	r2,r1,r2,lsl#6	@ len to point at the end of inp
2644	b	.Loop_v8
2645
2646.align	4
2647.Loop_v8:
2648	vld1.8		{q8-q9},[r1]!
2649	vld1.8		{q10-q11},[r1]!
2650	vld1.32		{q12},[r3]!
2651	vrev32.8	q8,q8
2652	vrev32.8	q9,q9
2653	vrev32.8	q10,q10
2654	vrev32.8	q11,q11
2655	vmov		q14,q0	@ offload
2656	vmov		q15,q1
2657	teq		r1,r2
2658	vld1.32		{q13},[r3]!
2659	vadd.i32	q12,q12,q8
2660	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2661	vmov		q2,q0
2662	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2663	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2664	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2665	vld1.32		{q12},[r3]!
2666	vadd.i32	q13,q13,q9
2667	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2668	vmov		q2,q0
2669	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2670	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2671	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2672	vld1.32		{q13},[r3]!
2673	vadd.i32	q12,q12,q10
2674	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2675	vmov		q2,q0
2676	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2677	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2678	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2679	vld1.32		{q12},[r3]!
2680	vadd.i32	q13,q13,q11
2681	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2682	vmov		q2,q0
2683	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2684	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2685	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2686	vld1.32		{q13},[r3]!
2687	vadd.i32	q12,q12,q8
2688	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2689	vmov		q2,q0
2690	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2691	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2692	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2693	vld1.32		{q12},[r3]!
2694	vadd.i32	q13,q13,q9
2695	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2696	vmov		q2,q0
2697	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2698	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2699	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2700	vld1.32		{q13},[r3]!
2701	vadd.i32	q12,q12,q10
2702	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2703	vmov		q2,q0
2704	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2705	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2706	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2707	vld1.32		{q12},[r3]!
2708	vadd.i32	q13,q13,q11
2709	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2710	vmov		q2,q0
2711	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2712	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2713	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2714	vld1.32		{q13},[r3]!
2715	vadd.i32	q12,q12,q8
2716	INST(0xe2,0x03,0xfa,0xf3)	@ sha256su0 q8,q9
2717	vmov		q2,q0
2718	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2719	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2720	INST(0xe6,0x0c,0x64,0xf3)	@ sha256su1 q8,q10,q11
2721	vld1.32		{q12},[r3]!
2722	vadd.i32	q13,q13,q9
2723	INST(0xe4,0x23,0xfa,0xf3)	@ sha256su0 q9,q10
2724	vmov		q2,q0
2725	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2726	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2727	INST(0xe0,0x2c,0x66,0xf3)	@ sha256su1 q9,q11,q8
2728	vld1.32		{q13},[r3]!
2729	vadd.i32	q12,q12,q10
2730	INST(0xe6,0x43,0xfa,0xf3)	@ sha256su0 q10,q11
2731	vmov		q2,q0
2732	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2733	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2734	INST(0xe2,0x4c,0x60,0xf3)	@ sha256su1 q10,q8,q9
2735	vld1.32		{q12},[r3]!
2736	vadd.i32	q13,q13,q11
2737	INST(0xe0,0x63,0xfa,0xf3)	@ sha256su0 q11,q8
2738	vmov		q2,q0
2739	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2740	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2741	INST(0xe4,0x6c,0x62,0xf3)	@ sha256su1 q11,q9,q10
2742	vld1.32		{q13},[r3]!
2743	vadd.i32	q12,q12,q8
2744	vmov		q2,q0
2745	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2746	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2747
2748	vld1.32		{q12},[r3]!
2749	vadd.i32	q13,q13,q9
2750	vmov		q2,q0
2751	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2752	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2753
2754	vld1.32		{q13},[r3]
2755	vadd.i32	q12,q12,q10
2756	sub		r3,r3,#256-16	@ rewind
2757	vmov		q2,q0
2758	INST(0x68,0x0c,0x02,0xf3)	@ sha256h q0,q1,q12
2759	INST(0x68,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q12
2760
2761	vadd.i32	q13,q13,q11
2762	vmov		q2,q0
2763	INST(0x6a,0x0c,0x02,0xf3)	@ sha256h q0,q1,q13
2764	INST(0x6a,0x2c,0x14,0xf3)	@ sha256h2 q1,q2,q13
2765
2766	vadd.i32	q0,q0,q14
2767	vadd.i32	q1,q1,q15
2768	it		ne
2769	bne		.Loop_v8
2770
2771	vst1.32		{q0,q1},[r0]
2772
2773	bx	lr		@ bx lr
2774.size	zfs_sha256_block_armv8,.-zfs_sha256_block_armv8
2775
2776#endif
2777