xref: /freebsd/sys/crypto/openssl/arm/sha512-armv4.S (revision c0855eaa3ee9614804b6bd6a255aa9f71e095f43)
1bc3d5698SJohn Baldwin/* Do not modify. This file is auto-generated from sha512-armv4.pl. */
2bc3d5698SJohn Baldwin@ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved.
3bc3d5698SJohn Baldwin@
4*c0855eaaSJohn Baldwin@ Licensed under the Apache License 2.0 (the "License").  You may not use
5bc3d5698SJohn Baldwin@ this file except in compliance with the License.  You can obtain a copy
6bc3d5698SJohn Baldwin@ in the file LICENSE in the source distribution or at
7bc3d5698SJohn Baldwin@ https://www.openssl.org/source/license.html
8bc3d5698SJohn Baldwin
9bc3d5698SJohn Baldwin
10bc3d5698SJohn Baldwin@ ====================================================================
11bc3d5698SJohn Baldwin@ Written by Andy Polyakov <appro@openssl.org> for the OpenSSL
12bc3d5698SJohn Baldwin@ project. The module is, however, dual licensed under OpenSSL and
13bc3d5698SJohn Baldwin@ CRYPTOGAMS licenses depending on where you obtain it. For further
14bc3d5698SJohn Baldwin@ details see http://www.openssl.org/~appro/cryptogams/.
15bc3d5698SJohn Baldwin@
16bc3d5698SJohn Baldwin@ Permission to use under GPL terms is granted.
17bc3d5698SJohn Baldwin@ ====================================================================
18bc3d5698SJohn Baldwin
19bc3d5698SJohn Baldwin@ SHA512 block procedure for ARMv4. September 2007.
20bc3d5698SJohn Baldwin
21bc3d5698SJohn Baldwin@ This code is ~4.5 (four and a half) times faster than code generated
22bc3d5698SJohn Baldwin@ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue
23bc3d5698SJohn Baldwin@ Xscale PXA250 core].
24bc3d5698SJohn Baldwin@
25bc3d5698SJohn Baldwin@ July 2010.
26bc3d5698SJohn Baldwin@
27bc3d5698SJohn Baldwin@ Rescheduling for dual-issue pipeline resulted in 6% improvement on
28bc3d5698SJohn Baldwin@ Cortex A8 core and ~40 cycles per processed byte.
29bc3d5698SJohn Baldwin
30bc3d5698SJohn Baldwin@ February 2011.
31bc3d5698SJohn Baldwin@
32bc3d5698SJohn Baldwin@ Profiler-assisted and platform-specific optimization resulted in 7%
33bc3d5698SJohn Baldwin@ improvement on Coxtex A8 core and ~38 cycles per byte.
34bc3d5698SJohn Baldwin
35bc3d5698SJohn Baldwin@ March 2011.
36bc3d5698SJohn Baldwin@
37bc3d5698SJohn Baldwin@ Add NEON implementation. On Cortex A8 it was measured to process
38bc3d5698SJohn Baldwin@ one byte in 23.3 cycles or ~60% faster than integer-only code.
39bc3d5698SJohn Baldwin
40bc3d5698SJohn Baldwin@ August 2012.
41bc3d5698SJohn Baldwin@
42bc3d5698SJohn Baldwin@ Improve NEON performance by 12% on Snapdragon S4. In absolute
43bc3d5698SJohn Baldwin@ terms it's 22.6 cycles per byte, which is disappointing result.
44bc3d5698SJohn Baldwin@ Technical writers asserted that 3-way S4 pipeline can sustain
45bc3d5698SJohn Baldwin@ multiple NEON instructions per cycle, but dual NEON issue could
46bc3d5698SJohn Baldwin@ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html
47bc3d5698SJohn Baldwin@ for further details. On side note Cortex-A15 processes one byte in
48bc3d5698SJohn Baldwin@ 16 cycles.
49bc3d5698SJohn Baldwin
50bc3d5698SJohn Baldwin@ Byte order [in]dependence. =========================================
51bc3d5698SJohn Baldwin@
52bc3d5698SJohn Baldwin@ Originally caller was expected to maintain specific *dword* order in
53bc3d5698SJohn Baldwin@ h[0-7], namely with most significant dword at *lower* address, which
54bc3d5698SJohn Baldwin@ was reflected in below two parameters as 0 and 4. Now caller is
55bc3d5698SJohn Baldwin@ expected to maintain native byte order for whole 64-bit values.
56bc3d5698SJohn Baldwin#ifndef __KERNEL__
57bc3d5698SJohn Baldwin# include "arm_arch.h"
58bc3d5698SJohn Baldwin# define VFP_ABI_PUSH	vstmdb	sp!,{d8-d15}
59bc3d5698SJohn Baldwin# define VFP_ABI_POP	vldmia	sp!,{d8-d15}
60bc3d5698SJohn Baldwin#else
61bc3d5698SJohn Baldwin# define __ARM_ARCH__ __LINUX_ARM_ARCH__
62bc3d5698SJohn Baldwin# define __ARM_MAX_ARCH__ 7
63bc3d5698SJohn Baldwin# define VFP_ABI_PUSH
64bc3d5698SJohn Baldwin# define VFP_ABI_POP
65bc3d5698SJohn Baldwin#endif
66bc3d5698SJohn Baldwin
67bc3d5698SJohn Baldwin#ifdef __ARMEL__
68bc3d5698SJohn Baldwin# define LO 0
69bc3d5698SJohn Baldwin# define HI 4
70bc3d5698SJohn Baldwin# define WORD64(hi0,lo0,hi1,lo1)	.word	lo0,hi0, lo1,hi1
71bc3d5698SJohn Baldwin#else
72bc3d5698SJohn Baldwin# define HI 0
73bc3d5698SJohn Baldwin# define LO 4
74bc3d5698SJohn Baldwin# define WORD64(hi0,lo0,hi1,lo1)	.word	hi0,lo0, hi1,lo1
75bc3d5698SJohn Baldwin#endif
76bc3d5698SJohn Baldwin
77bc3d5698SJohn Baldwin#if defined(__thumb2__)
78bc3d5698SJohn Baldwin.syntax	unified
79bc3d5698SJohn Baldwin.thumb
80bc3d5698SJohn Baldwin# define adrl adr
81bc3d5698SJohn Baldwin#else
82bc3d5698SJohn Baldwin.code	32
83bc3d5698SJohn Baldwin#endif
84bc3d5698SJohn Baldwin
85*c0855eaaSJohn Baldwin.text
86*c0855eaaSJohn Baldwin
87bc3d5698SJohn Baldwin.type	K512,%object
88bc3d5698SJohn Baldwin.align	5
89bc3d5698SJohn BaldwinK512:
90bc3d5698SJohn Baldwin	WORD64(0x428a2f98,0xd728ae22,	0x71374491,0x23ef65cd)
91bc3d5698SJohn Baldwin	WORD64(0xb5c0fbcf,0xec4d3b2f,	0xe9b5dba5,0x8189dbbc)
92bc3d5698SJohn Baldwin	WORD64(0x3956c25b,0xf348b538,	0x59f111f1,0xb605d019)
93bc3d5698SJohn Baldwin	WORD64(0x923f82a4,0xaf194f9b,	0xab1c5ed5,0xda6d8118)
94bc3d5698SJohn Baldwin	WORD64(0xd807aa98,0xa3030242,	0x12835b01,0x45706fbe)
95bc3d5698SJohn Baldwin	WORD64(0x243185be,0x4ee4b28c,	0x550c7dc3,0xd5ffb4e2)
96bc3d5698SJohn Baldwin	WORD64(0x72be5d74,0xf27b896f,	0x80deb1fe,0x3b1696b1)
97bc3d5698SJohn Baldwin	WORD64(0x9bdc06a7,0x25c71235,	0xc19bf174,0xcf692694)
98bc3d5698SJohn Baldwin	WORD64(0xe49b69c1,0x9ef14ad2,	0xefbe4786,0x384f25e3)
99bc3d5698SJohn Baldwin	WORD64(0x0fc19dc6,0x8b8cd5b5,	0x240ca1cc,0x77ac9c65)
100bc3d5698SJohn Baldwin	WORD64(0x2de92c6f,0x592b0275,	0x4a7484aa,0x6ea6e483)
101bc3d5698SJohn Baldwin	WORD64(0x5cb0a9dc,0xbd41fbd4,	0x76f988da,0x831153b5)
102bc3d5698SJohn Baldwin	WORD64(0x983e5152,0xee66dfab,	0xa831c66d,0x2db43210)
103bc3d5698SJohn Baldwin	WORD64(0xb00327c8,0x98fb213f,	0xbf597fc7,0xbeef0ee4)
104bc3d5698SJohn Baldwin	WORD64(0xc6e00bf3,0x3da88fc2,	0xd5a79147,0x930aa725)
105bc3d5698SJohn Baldwin	WORD64(0x06ca6351,0xe003826f,	0x14292967,0x0a0e6e70)
106bc3d5698SJohn Baldwin	WORD64(0x27b70a85,0x46d22ffc,	0x2e1b2138,0x5c26c926)
107bc3d5698SJohn Baldwin	WORD64(0x4d2c6dfc,0x5ac42aed,	0x53380d13,0x9d95b3df)
108bc3d5698SJohn Baldwin	WORD64(0x650a7354,0x8baf63de,	0x766a0abb,0x3c77b2a8)
109bc3d5698SJohn Baldwin	WORD64(0x81c2c92e,0x47edaee6,	0x92722c85,0x1482353b)
110bc3d5698SJohn Baldwin	WORD64(0xa2bfe8a1,0x4cf10364,	0xa81a664b,0xbc423001)
111bc3d5698SJohn Baldwin	WORD64(0xc24b8b70,0xd0f89791,	0xc76c51a3,0x0654be30)
112bc3d5698SJohn Baldwin	WORD64(0xd192e819,0xd6ef5218,	0xd6990624,0x5565a910)
113bc3d5698SJohn Baldwin	WORD64(0xf40e3585,0x5771202a,	0x106aa070,0x32bbd1b8)
114bc3d5698SJohn Baldwin	WORD64(0x19a4c116,0xb8d2d0c8,	0x1e376c08,0x5141ab53)
115bc3d5698SJohn Baldwin	WORD64(0x2748774c,0xdf8eeb99,	0x34b0bcb5,0xe19b48a8)
116bc3d5698SJohn Baldwin	WORD64(0x391c0cb3,0xc5c95a63,	0x4ed8aa4a,0xe3418acb)
117bc3d5698SJohn Baldwin	WORD64(0x5b9cca4f,0x7763e373,	0x682e6ff3,0xd6b2b8a3)
118bc3d5698SJohn Baldwin	WORD64(0x748f82ee,0x5defb2fc,	0x78a5636f,0x43172f60)
119bc3d5698SJohn Baldwin	WORD64(0x84c87814,0xa1f0ab72,	0x8cc70208,0x1a6439ec)
120bc3d5698SJohn Baldwin	WORD64(0x90befffa,0x23631e28,	0xa4506ceb,0xde82bde9)
121bc3d5698SJohn Baldwin	WORD64(0xbef9a3f7,0xb2c67915,	0xc67178f2,0xe372532b)
122bc3d5698SJohn Baldwin	WORD64(0xca273ece,0xea26619c,	0xd186b8c7,0x21c0c207)
123bc3d5698SJohn Baldwin	WORD64(0xeada7dd6,0xcde0eb1e,	0xf57d4f7f,0xee6ed178)
124bc3d5698SJohn Baldwin	WORD64(0x06f067aa,0x72176fba,	0x0a637dc5,0xa2c898a6)
125bc3d5698SJohn Baldwin	WORD64(0x113f9804,0xbef90dae,	0x1b710b35,0x131c471b)
126bc3d5698SJohn Baldwin	WORD64(0x28db77f5,0x23047d84,	0x32caab7b,0x40c72493)
127bc3d5698SJohn Baldwin	WORD64(0x3c9ebe0a,0x15c9bebc,	0x431d67c4,0x9c100d4c)
128bc3d5698SJohn Baldwin	WORD64(0x4cc5d4be,0xcb3e42b6,	0x597f299c,0xfc657e2a)
129bc3d5698SJohn Baldwin	WORD64(0x5fcb6fab,0x3ad6faec,	0x6c44198c,0x4a475817)
130bc3d5698SJohn Baldwin.size	K512,.-K512
131bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
132bc3d5698SJohn Baldwin.LOPENSSL_armcap:
133*c0855eaaSJohn Baldwin# ifdef	_WIN32
134*c0855eaaSJohn Baldwin.word	OPENSSL_armcap_P
135*c0855eaaSJohn Baldwin# else
136bc3d5698SJohn Baldwin.word	OPENSSL_armcap_P-.Lsha512_block_data_order
137*c0855eaaSJohn Baldwin# endif
138bc3d5698SJohn Baldwin.skip	32-4
139bc3d5698SJohn Baldwin#else
140bc3d5698SJohn Baldwin.skip	32
141bc3d5698SJohn Baldwin#endif
142bc3d5698SJohn Baldwin
143bc3d5698SJohn Baldwin.globl	sha512_block_data_order
144bc3d5698SJohn Baldwin.type	sha512_block_data_order,%function
145bc3d5698SJohn Baldwinsha512_block_data_order:
146bc3d5698SJohn Baldwin.Lsha512_block_data_order:
147bc3d5698SJohn Baldwin#if __ARM_ARCH__<7 && !defined(__thumb2__)
148bc3d5698SJohn Baldwin	sub	r3,pc,#8		@ sha512_block_data_order
149bc3d5698SJohn Baldwin#else
150bc3d5698SJohn Baldwin	adr	r3,.Lsha512_block_data_order
151bc3d5698SJohn Baldwin#endif
152bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
153bc3d5698SJohn Baldwin	ldr	r12,.LOPENSSL_armcap
154*c0855eaaSJohn Baldwin# if !defined(_WIN32)
155bc3d5698SJohn Baldwin	ldr	r12,[r3,r12]		@ OPENSSL_armcap_P
156*c0855eaaSJohn Baldwin# endif
157*c0855eaaSJohn Baldwin# if defined(__APPLE__) || defined(_WIN32)
158bc3d5698SJohn Baldwin	ldr	r12,[r12]
159bc3d5698SJohn Baldwin# endif
160bc3d5698SJohn Baldwin	tst	r12,#ARMV7_NEON
161bc3d5698SJohn Baldwin	bne	.LNEON
162bc3d5698SJohn Baldwin#endif
163bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
164bc3d5698SJohn Baldwin	stmdb	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
165bc3d5698SJohn Baldwin	sub	r14,r3,#672		@ K512
166bc3d5698SJohn Baldwin	sub	sp,sp,#9*8
167bc3d5698SJohn Baldwin
168bc3d5698SJohn Baldwin	ldr	r7,[r0,#32+LO]
169bc3d5698SJohn Baldwin	ldr	r8,[r0,#32+HI]
170bc3d5698SJohn Baldwin	ldr	r9, [r0,#48+LO]
171bc3d5698SJohn Baldwin	ldr	r10, [r0,#48+HI]
172bc3d5698SJohn Baldwin	ldr	r11, [r0,#56+LO]
173bc3d5698SJohn Baldwin	ldr	r12, [r0,#56+HI]
174bc3d5698SJohn Baldwin.Loop:
175bc3d5698SJohn Baldwin	str	r9, [sp,#48+0]
176bc3d5698SJohn Baldwin	str	r10, [sp,#48+4]
177bc3d5698SJohn Baldwin	str	r11, [sp,#56+0]
178bc3d5698SJohn Baldwin	str	r12, [sp,#56+4]
179bc3d5698SJohn Baldwin	ldr	r5,[r0,#0+LO]
180bc3d5698SJohn Baldwin	ldr	r6,[r0,#0+HI]
181bc3d5698SJohn Baldwin	ldr	r3,[r0,#8+LO]
182bc3d5698SJohn Baldwin	ldr	r4,[r0,#8+HI]
183bc3d5698SJohn Baldwin	ldr	r9, [r0,#16+LO]
184bc3d5698SJohn Baldwin	ldr	r10, [r0,#16+HI]
185bc3d5698SJohn Baldwin	ldr	r11, [r0,#24+LO]
186bc3d5698SJohn Baldwin	ldr	r12, [r0,#24+HI]
187bc3d5698SJohn Baldwin	str	r3,[sp,#8+0]
188bc3d5698SJohn Baldwin	str	r4,[sp,#8+4]
189bc3d5698SJohn Baldwin	str	r9, [sp,#16+0]
190bc3d5698SJohn Baldwin	str	r10, [sp,#16+4]
191bc3d5698SJohn Baldwin	str	r11, [sp,#24+0]
192bc3d5698SJohn Baldwin	str	r12, [sp,#24+4]
193bc3d5698SJohn Baldwin	ldr	r3,[r0,#40+LO]
194bc3d5698SJohn Baldwin	ldr	r4,[r0,#40+HI]
195bc3d5698SJohn Baldwin	str	r3,[sp,#40+0]
196bc3d5698SJohn Baldwin	str	r4,[sp,#40+4]
197bc3d5698SJohn Baldwin
198bc3d5698SJohn Baldwin.L00_15:
199bc3d5698SJohn Baldwin#if __ARM_ARCH__<7
200bc3d5698SJohn Baldwin	ldrb	r3,[r1,#7]
201bc3d5698SJohn Baldwin	ldrb	r9, [r1,#6]
202bc3d5698SJohn Baldwin	ldrb	r10, [r1,#5]
203bc3d5698SJohn Baldwin	ldrb	r11, [r1,#4]
204bc3d5698SJohn Baldwin	ldrb	r4,[r1,#3]
205bc3d5698SJohn Baldwin	ldrb	r12, [r1,#2]
206bc3d5698SJohn Baldwin	orr	r3,r3,r9,lsl#8
207bc3d5698SJohn Baldwin	ldrb	r9, [r1,#1]
208bc3d5698SJohn Baldwin	orr	r3,r3,r10,lsl#16
209bc3d5698SJohn Baldwin	ldrb	r10, [r1],#8
210bc3d5698SJohn Baldwin	orr	r3,r3,r11,lsl#24
211bc3d5698SJohn Baldwin	orr	r4,r4,r12,lsl#8
212bc3d5698SJohn Baldwin	orr	r4,r4,r9,lsl#16
213bc3d5698SJohn Baldwin	orr	r4,r4,r10,lsl#24
214bc3d5698SJohn Baldwin#else
215bc3d5698SJohn Baldwin	ldr	r3,[r1,#4]
216bc3d5698SJohn Baldwin	ldr	r4,[r1],#8
217bc3d5698SJohn Baldwin#ifdef __ARMEL__
218bc3d5698SJohn Baldwin	rev	r3,r3
219bc3d5698SJohn Baldwin	rev	r4,r4
220bc3d5698SJohn Baldwin#endif
221bc3d5698SJohn Baldwin#endif
222bc3d5698SJohn Baldwin	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
223bc3d5698SJohn Baldwin	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
224bc3d5698SJohn Baldwin	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
225bc3d5698SJohn Baldwin	mov	r9,r7,lsr#14
226bc3d5698SJohn Baldwin	str	r3,[sp,#64+0]
227bc3d5698SJohn Baldwin	mov	r10,r8,lsr#14
228bc3d5698SJohn Baldwin	str	r4,[sp,#64+4]
229bc3d5698SJohn Baldwin	eor	r9,r9,r8,lsl#18
230bc3d5698SJohn Baldwin	ldr	r11,[sp,#56+0]	@ h.lo
231bc3d5698SJohn Baldwin	eor	r10,r10,r7,lsl#18
232bc3d5698SJohn Baldwin	ldr	r12,[sp,#56+4]	@ h.hi
233bc3d5698SJohn Baldwin	eor	r9,r9,r7,lsr#18
234bc3d5698SJohn Baldwin	eor	r10,r10,r8,lsr#18
235bc3d5698SJohn Baldwin	eor	r9,r9,r8,lsl#14
236bc3d5698SJohn Baldwin	eor	r10,r10,r7,lsl#14
237bc3d5698SJohn Baldwin	eor	r9,r9,r8,lsr#9
238bc3d5698SJohn Baldwin	eor	r10,r10,r7,lsr#9
239bc3d5698SJohn Baldwin	eor	r9,r9,r7,lsl#23
240bc3d5698SJohn Baldwin	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
241bc3d5698SJohn Baldwin	adds	r3,r3,r9
242bc3d5698SJohn Baldwin	ldr	r9,[sp,#40+0]	@ f.lo
243bc3d5698SJohn Baldwin	adc	r4,r4,r10		@ T += Sigma1(e)
244bc3d5698SJohn Baldwin	ldr	r10,[sp,#40+4]	@ f.hi
245bc3d5698SJohn Baldwin	adds	r3,r3,r11
246bc3d5698SJohn Baldwin	ldr	r11,[sp,#48+0]	@ g.lo
247bc3d5698SJohn Baldwin	adc	r4,r4,r12		@ T += h
248bc3d5698SJohn Baldwin	ldr	r12,[sp,#48+4]	@ g.hi
249bc3d5698SJohn Baldwin
250bc3d5698SJohn Baldwin	eor	r9,r9,r11
251bc3d5698SJohn Baldwin	str	r7,[sp,#32+0]
252bc3d5698SJohn Baldwin	eor	r10,r10,r12
253bc3d5698SJohn Baldwin	str	r8,[sp,#32+4]
254bc3d5698SJohn Baldwin	and	r9,r9,r7
255bc3d5698SJohn Baldwin	str	r5,[sp,#0+0]
256bc3d5698SJohn Baldwin	and	r10,r10,r8
257bc3d5698SJohn Baldwin	str	r6,[sp,#0+4]
258bc3d5698SJohn Baldwin	eor	r9,r9,r11
259bc3d5698SJohn Baldwin	ldr	r11,[r14,#LO]	@ K[i].lo
260bc3d5698SJohn Baldwin	eor	r10,r10,r12		@ Ch(e,f,g)
261bc3d5698SJohn Baldwin	ldr	r12,[r14,#HI]	@ K[i].hi
262bc3d5698SJohn Baldwin
263bc3d5698SJohn Baldwin	adds	r3,r3,r9
264bc3d5698SJohn Baldwin	ldr	r7,[sp,#24+0]	@ d.lo
265bc3d5698SJohn Baldwin	adc	r4,r4,r10		@ T += Ch(e,f,g)
266bc3d5698SJohn Baldwin	ldr	r8,[sp,#24+4]	@ d.hi
267bc3d5698SJohn Baldwin	adds	r3,r3,r11
268bc3d5698SJohn Baldwin	and	r9,r11,#0xff
269bc3d5698SJohn Baldwin	adc	r4,r4,r12		@ T += K[i]
270bc3d5698SJohn Baldwin	adds	r7,r7,r3
271bc3d5698SJohn Baldwin	ldr	r11,[sp,#8+0]	@ b.lo
272bc3d5698SJohn Baldwin	adc	r8,r8,r4		@ d += T
273bc3d5698SJohn Baldwin	teq	r9,#148
274bc3d5698SJohn Baldwin
275bc3d5698SJohn Baldwin	ldr	r12,[sp,#16+0]	@ c.lo
276bc3d5698SJohn Baldwin#ifdef	__thumb2__
277bc3d5698SJohn Baldwin	it	eq			@ Thumb2 thing, sanity check in ARM
278bc3d5698SJohn Baldwin#endif
279bc3d5698SJohn Baldwin	orreq	r14,r14,#1
280bc3d5698SJohn Baldwin	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
281bc3d5698SJohn Baldwin	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
282bc3d5698SJohn Baldwin	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
283bc3d5698SJohn Baldwin	mov	r9,r5,lsr#28
284bc3d5698SJohn Baldwin	mov	r10,r6,lsr#28
285bc3d5698SJohn Baldwin	eor	r9,r9,r6,lsl#4
286bc3d5698SJohn Baldwin	eor	r10,r10,r5,lsl#4
287bc3d5698SJohn Baldwin	eor	r9,r9,r6,lsr#2
288bc3d5698SJohn Baldwin	eor	r10,r10,r5,lsr#2
289bc3d5698SJohn Baldwin	eor	r9,r9,r5,lsl#30
290bc3d5698SJohn Baldwin	eor	r10,r10,r6,lsl#30
291bc3d5698SJohn Baldwin	eor	r9,r9,r6,lsr#7
292bc3d5698SJohn Baldwin	eor	r10,r10,r5,lsr#7
293bc3d5698SJohn Baldwin	eor	r9,r9,r5,lsl#25
294bc3d5698SJohn Baldwin	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
295bc3d5698SJohn Baldwin	adds	r3,r3,r9
296bc3d5698SJohn Baldwin	and	r9,r5,r11
297bc3d5698SJohn Baldwin	adc	r4,r4,r10		@ T += Sigma0(a)
298bc3d5698SJohn Baldwin
299bc3d5698SJohn Baldwin	ldr	r10,[sp,#8+4]	@ b.hi
300bc3d5698SJohn Baldwin	orr	r5,r5,r11
301bc3d5698SJohn Baldwin	ldr	r11,[sp,#16+4]	@ c.hi
302bc3d5698SJohn Baldwin	and	r5,r5,r12
303bc3d5698SJohn Baldwin	and	r12,r6,r10
304bc3d5698SJohn Baldwin	orr	r6,r6,r10
305bc3d5698SJohn Baldwin	orr	r5,r5,r9		@ Maj(a,b,c).lo
306bc3d5698SJohn Baldwin	and	r6,r6,r11
307bc3d5698SJohn Baldwin	adds	r5,r5,r3
308bc3d5698SJohn Baldwin	orr	r6,r6,r12		@ Maj(a,b,c).hi
309bc3d5698SJohn Baldwin	sub	sp,sp,#8
310bc3d5698SJohn Baldwin	adc	r6,r6,r4		@ h += T
311bc3d5698SJohn Baldwin	tst	r14,#1
312bc3d5698SJohn Baldwin	add	r14,r14,#8
313bc3d5698SJohn Baldwin	tst	r14,#1
314bc3d5698SJohn Baldwin	beq	.L00_15
315bc3d5698SJohn Baldwin	ldr	r9,[sp,#184+0]
316bc3d5698SJohn Baldwin	ldr	r10,[sp,#184+4]
317bc3d5698SJohn Baldwin	bic	r14,r14,#1
318bc3d5698SJohn Baldwin.L16_79:
319bc3d5698SJohn Baldwin	@ sigma0(x)	(ROTR((x),1)  ^ ROTR((x),8)  ^ ((x)>>7))
320bc3d5698SJohn Baldwin	@ LO		lo>>1^hi<<31  ^ lo>>8^hi<<24 ^ lo>>7^hi<<25
321bc3d5698SJohn Baldwin	@ HI		hi>>1^lo<<31  ^ hi>>8^lo<<24 ^ hi>>7
322bc3d5698SJohn Baldwin	mov	r3,r9,lsr#1
323bc3d5698SJohn Baldwin	ldr	r11,[sp,#80+0]
324bc3d5698SJohn Baldwin	mov	r4,r10,lsr#1
325bc3d5698SJohn Baldwin	ldr	r12,[sp,#80+4]
326bc3d5698SJohn Baldwin	eor	r3,r3,r10,lsl#31
327bc3d5698SJohn Baldwin	eor	r4,r4,r9,lsl#31
328bc3d5698SJohn Baldwin	eor	r3,r3,r9,lsr#8
329bc3d5698SJohn Baldwin	eor	r4,r4,r10,lsr#8
330bc3d5698SJohn Baldwin	eor	r3,r3,r10,lsl#24
331bc3d5698SJohn Baldwin	eor	r4,r4,r9,lsl#24
332bc3d5698SJohn Baldwin	eor	r3,r3,r9,lsr#7
333bc3d5698SJohn Baldwin	eor	r4,r4,r10,lsr#7
334bc3d5698SJohn Baldwin	eor	r3,r3,r10,lsl#25
335bc3d5698SJohn Baldwin
336bc3d5698SJohn Baldwin	@ sigma1(x)	(ROTR((x),19) ^ ROTR((x),61) ^ ((x)>>6))
337bc3d5698SJohn Baldwin	@ LO		lo>>19^hi<<13 ^ hi>>29^lo<<3 ^ lo>>6^hi<<26
338bc3d5698SJohn Baldwin	@ HI		hi>>19^lo<<13 ^ lo>>29^hi<<3 ^ hi>>6
339bc3d5698SJohn Baldwin	mov	r9,r11,lsr#19
340bc3d5698SJohn Baldwin	mov	r10,r12,lsr#19
341bc3d5698SJohn Baldwin	eor	r9,r9,r12,lsl#13
342bc3d5698SJohn Baldwin	eor	r10,r10,r11,lsl#13
343bc3d5698SJohn Baldwin	eor	r9,r9,r12,lsr#29
344bc3d5698SJohn Baldwin	eor	r10,r10,r11,lsr#29
345bc3d5698SJohn Baldwin	eor	r9,r9,r11,lsl#3
346bc3d5698SJohn Baldwin	eor	r10,r10,r12,lsl#3
347bc3d5698SJohn Baldwin	eor	r9,r9,r11,lsr#6
348bc3d5698SJohn Baldwin	eor	r10,r10,r12,lsr#6
349bc3d5698SJohn Baldwin	ldr	r11,[sp,#120+0]
350bc3d5698SJohn Baldwin	eor	r9,r9,r12,lsl#26
351bc3d5698SJohn Baldwin
352bc3d5698SJohn Baldwin	ldr	r12,[sp,#120+4]
353bc3d5698SJohn Baldwin	adds	r3,r3,r9
354bc3d5698SJohn Baldwin	ldr	r9,[sp,#192+0]
355bc3d5698SJohn Baldwin	adc	r4,r4,r10
356bc3d5698SJohn Baldwin
357bc3d5698SJohn Baldwin	ldr	r10,[sp,#192+4]
358bc3d5698SJohn Baldwin	adds	r3,r3,r11
359bc3d5698SJohn Baldwin	adc	r4,r4,r12
360bc3d5698SJohn Baldwin	adds	r3,r3,r9
361bc3d5698SJohn Baldwin	adc	r4,r4,r10
362bc3d5698SJohn Baldwin	@ Sigma1(x)	(ROTR((x),14) ^ ROTR((x),18)  ^ ROTR((x),41))
363bc3d5698SJohn Baldwin	@ LO		lo>>14^hi<<18 ^ lo>>18^hi<<14 ^ hi>>9^lo<<23
364bc3d5698SJohn Baldwin	@ HI		hi>>14^lo<<18 ^ hi>>18^lo<<14 ^ lo>>9^hi<<23
365bc3d5698SJohn Baldwin	mov	r9,r7,lsr#14
366bc3d5698SJohn Baldwin	str	r3,[sp,#64+0]
367bc3d5698SJohn Baldwin	mov	r10,r8,lsr#14
368bc3d5698SJohn Baldwin	str	r4,[sp,#64+4]
369bc3d5698SJohn Baldwin	eor	r9,r9,r8,lsl#18
370bc3d5698SJohn Baldwin	ldr	r11,[sp,#56+0]	@ h.lo
371bc3d5698SJohn Baldwin	eor	r10,r10,r7,lsl#18
372bc3d5698SJohn Baldwin	ldr	r12,[sp,#56+4]	@ h.hi
373bc3d5698SJohn Baldwin	eor	r9,r9,r7,lsr#18
374bc3d5698SJohn Baldwin	eor	r10,r10,r8,lsr#18
375bc3d5698SJohn Baldwin	eor	r9,r9,r8,lsl#14
376bc3d5698SJohn Baldwin	eor	r10,r10,r7,lsl#14
377bc3d5698SJohn Baldwin	eor	r9,r9,r8,lsr#9
378bc3d5698SJohn Baldwin	eor	r10,r10,r7,lsr#9
379bc3d5698SJohn Baldwin	eor	r9,r9,r7,lsl#23
380bc3d5698SJohn Baldwin	eor	r10,r10,r8,lsl#23	@ Sigma1(e)
381bc3d5698SJohn Baldwin	adds	r3,r3,r9
382bc3d5698SJohn Baldwin	ldr	r9,[sp,#40+0]	@ f.lo
383bc3d5698SJohn Baldwin	adc	r4,r4,r10		@ T += Sigma1(e)
384bc3d5698SJohn Baldwin	ldr	r10,[sp,#40+4]	@ f.hi
385bc3d5698SJohn Baldwin	adds	r3,r3,r11
386bc3d5698SJohn Baldwin	ldr	r11,[sp,#48+0]	@ g.lo
387bc3d5698SJohn Baldwin	adc	r4,r4,r12		@ T += h
388bc3d5698SJohn Baldwin	ldr	r12,[sp,#48+4]	@ g.hi
389bc3d5698SJohn Baldwin
390bc3d5698SJohn Baldwin	eor	r9,r9,r11
391bc3d5698SJohn Baldwin	str	r7,[sp,#32+0]
392bc3d5698SJohn Baldwin	eor	r10,r10,r12
393bc3d5698SJohn Baldwin	str	r8,[sp,#32+4]
394bc3d5698SJohn Baldwin	and	r9,r9,r7
395bc3d5698SJohn Baldwin	str	r5,[sp,#0+0]
396bc3d5698SJohn Baldwin	and	r10,r10,r8
397bc3d5698SJohn Baldwin	str	r6,[sp,#0+4]
398bc3d5698SJohn Baldwin	eor	r9,r9,r11
399bc3d5698SJohn Baldwin	ldr	r11,[r14,#LO]	@ K[i].lo
400bc3d5698SJohn Baldwin	eor	r10,r10,r12		@ Ch(e,f,g)
401bc3d5698SJohn Baldwin	ldr	r12,[r14,#HI]	@ K[i].hi
402bc3d5698SJohn Baldwin
403bc3d5698SJohn Baldwin	adds	r3,r3,r9
404bc3d5698SJohn Baldwin	ldr	r7,[sp,#24+0]	@ d.lo
405bc3d5698SJohn Baldwin	adc	r4,r4,r10		@ T += Ch(e,f,g)
406bc3d5698SJohn Baldwin	ldr	r8,[sp,#24+4]	@ d.hi
407bc3d5698SJohn Baldwin	adds	r3,r3,r11
408bc3d5698SJohn Baldwin	and	r9,r11,#0xff
409bc3d5698SJohn Baldwin	adc	r4,r4,r12		@ T += K[i]
410bc3d5698SJohn Baldwin	adds	r7,r7,r3
411bc3d5698SJohn Baldwin	ldr	r11,[sp,#8+0]	@ b.lo
412bc3d5698SJohn Baldwin	adc	r8,r8,r4		@ d += T
413bc3d5698SJohn Baldwin	teq	r9,#23
414bc3d5698SJohn Baldwin
415bc3d5698SJohn Baldwin	ldr	r12,[sp,#16+0]	@ c.lo
416bc3d5698SJohn Baldwin#ifdef	__thumb2__
417bc3d5698SJohn Baldwin	it	eq			@ Thumb2 thing, sanity check in ARM
418bc3d5698SJohn Baldwin#endif
419bc3d5698SJohn Baldwin	orreq	r14,r14,#1
420bc3d5698SJohn Baldwin	@ Sigma0(x)	(ROTR((x),28) ^ ROTR((x),34) ^ ROTR((x),39))
421bc3d5698SJohn Baldwin	@ LO		lo>>28^hi<<4  ^ hi>>2^lo<<30 ^ hi>>7^lo<<25
422bc3d5698SJohn Baldwin	@ HI		hi>>28^lo<<4  ^ lo>>2^hi<<30 ^ lo>>7^hi<<25
423bc3d5698SJohn Baldwin	mov	r9,r5,lsr#28
424bc3d5698SJohn Baldwin	mov	r10,r6,lsr#28
425bc3d5698SJohn Baldwin	eor	r9,r9,r6,lsl#4
426bc3d5698SJohn Baldwin	eor	r10,r10,r5,lsl#4
427bc3d5698SJohn Baldwin	eor	r9,r9,r6,lsr#2
428bc3d5698SJohn Baldwin	eor	r10,r10,r5,lsr#2
429bc3d5698SJohn Baldwin	eor	r9,r9,r5,lsl#30
430bc3d5698SJohn Baldwin	eor	r10,r10,r6,lsl#30
431bc3d5698SJohn Baldwin	eor	r9,r9,r6,lsr#7
432bc3d5698SJohn Baldwin	eor	r10,r10,r5,lsr#7
433bc3d5698SJohn Baldwin	eor	r9,r9,r5,lsl#25
434bc3d5698SJohn Baldwin	eor	r10,r10,r6,lsl#25	@ Sigma0(a)
435bc3d5698SJohn Baldwin	adds	r3,r3,r9
436bc3d5698SJohn Baldwin	and	r9,r5,r11
437bc3d5698SJohn Baldwin	adc	r4,r4,r10		@ T += Sigma0(a)
438bc3d5698SJohn Baldwin
439bc3d5698SJohn Baldwin	ldr	r10,[sp,#8+4]	@ b.hi
440bc3d5698SJohn Baldwin	orr	r5,r5,r11
441bc3d5698SJohn Baldwin	ldr	r11,[sp,#16+4]	@ c.hi
442bc3d5698SJohn Baldwin	and	r5,r5,r12
443bc3d5698SJohn Baldwin	and	r12,r6,r10
444bc3d5698SJohn Baldwin	orr	r6,r6,r10
445bc3d5698SJohn Baldwin	orr	r5,r5,r9		@ Maj(a,b,c).lo
446bc3d5698SJohn Baldwin	and	r6,r6,r11
447bc3d5698SJohn Baldwin	adds	r5,r5,r3
448bc3d5698SJohn Baldwin	orr	r6,r6,r12		@ Maj(a,b,c).hi
449bc3d5698SJohn Baldwin	sub	sp,sp,#8
450bc3d5698SJohn Baldwin	adc	r6,r6,r4		@ h += T
451bc3d5698SJohn Baldwin	tst	r14,#1
452bc3d5698SJohn Baldwin	add	r14,r14,#8
453bc3d5698SJohn Baldwin#ifdef	__thumb2__
454bc3d5698SJohn Baldwin	ittt	eq			@ Thumb2 thing, sanity check in ARM
455bc3d5698SJohn Baldwin#endif
456bc3d5698SJohn Baldwin	ldreq	r9,[sp,#184+0]
457bc3d5698SJohn Baldwin	ldreq	r10,[sp,#184+4]
458bc3d5698SJohn Baldwin	beq	.L16_79
459bc3d5698SJohn Baldwin	bic	r14,r14,#1
460bc3d5698SJohn Baldwin
461bc3d5698SJohn Baldwin	ldr	r3,[sp,#8+0]
462bc3d5698SJohn Baldwin	ldr	r4,[sp,#8+4]
463bc3d5698SJohn Baldwin	ldr	r9, [r0,#0+LO]
464bc3d5698SJohn Baldwin	ldr	r10, [r0,#0+HI]
465bc3d5698SJohn Baldwin	ldr	r11, [r0,#8+LO]
466bc3d5698SJohn Baldwin	ldr	r12, [r0,#8+HI]
467bc3d5698SJohn Baldwin	adds	r9,r5,r9
468bc3d5698SJohn Baldwin	str	r9, [r0,#0+LO]
469bc3d5698SJohn Baldwin	adc	r10,r6,r10
470bc3d5698SJohn Baldwin	str	r10, [r0,#0+HI]
471bc3d5698SJohn Baldwin	adds	r11,r3,r11
472bc3d5698SJohn Baldwin	str	r11, [r0,#8+LO]
473bc3d5698SJohn Baldwin	adc	r12,r4,r12
474bc3d5698SJohn Baldwin	str	r12, [r0,#8+HI]
475bc3d5698SJohn Baldwin
476bc3d5698SJohn Baldwin	ldr	r5,[sp,#16+0]
477bc3d5698SJohn Baldwin	ldr	r6,[sp,#16+4]
478bc3d5698SJohn Baldwin	ldr	r3,[sp,#24+0]
479bc3d5698SJohn Baldwin	ldr	r4,[sp,#24+4]
480bc3d5698SJohn Baldwin	ldr	r9, [r0,#16+LO]
481bc3d5698SJohn Baldwin	ldr	r10, [r0,#16+HI]
482bc3d5698SJohn Baldwin	ldr	r11, [r0,#24+LO]
483bc3d5698SJohn Baldwin	ldr	r12, [r0,#24+HI]
484bc3d5698SJohn Baldwin	adds	r9,r5,r9
485bc3d5698SJohn Baldwin	str	r9, [r0,#16+LO]
486bc3d5698SJohn Baldwin	adc	r10,r6,r10
487bc3d5698SJohn Baldwin	str	r10, [r0,#16+HI]
488bc3d5698SJohn Baldwin	adds	r11,r3,r11
489bc3d5698SJohn Baldwin	str	r11, [r0,#24+LO]
490bc3d5698SJohn Baldwin	adc	r12,r4,r12
491bc3d5698SJohn Baldwin	str	r12, [r0,#24+HI]
492bc3d5698SJohn Baldwin
493bc3d5698SJohn Baldwin	ldr	r3,[sp,#40+0]
494bc3d5698SJohn Baldwin	ldr	r4,[sp,#40+4]
495bc3d5698SJohn Baldwin	ldr	r9, [r0,#32+LO]
496bc3d5698SJohn Baldwin	ldr	r10, [r0,#32+HI]
497bc3d5698SJohn Baldwin	ldr	r11, [r0,#40+LO]
498bc3d5698SJohn Baldwin	ldr	r12, [r0,#40+HI]
499bc3d5698SJohn Baldwin	adds	r7,r7,r9
500bc3d5698SJohn Baldwin	str	r7,[r0,#32+LO]
501bc3d5698SJohn Baldwin	adc	r8,r8,r10
502bc3d5698SJohn Baldwin	str	r8,[r0,#32+HI]
503bc3d5698SJohn Baldwin	adds	r11,r3,r11
504bc3d5698SJohn Baldwin	str	r11, [r0,#40+LO]
505bc3d5698SJohn Baldwin	adc	r12,r4,r12
506bc3d5698SJohn Baldwin	str	r12, [r0,#40+HI]
507bc3d5698SJohn Baldwin
508bc3d5698SJohn Baldwin	ldr	r5,[sp,#48+0]
509bc3d5698SJohn Baldwin	ldr	r6,[sp,#48+4]
510bc3d5698SJohn Baldwin	ldr	r3,[sp,#56+0]
511bc3d5698SJohn Baldwin	ldr	r4,[sp,#56+4]
512bc3d5698SJohn Baldwin	ldr	r9, [r0,#48+LO]
513bc3d5698SJohn Baldwin	ldr	r10, [r0,#48+HI]
514bc3d5698SJohn Baldwin	ldr	r11, [r0,#56+LO]
515bc3d5698SJohn Baldwin	ldr	r12, [r0,#56+HI]
516bc3d5698SJohn Baldwin	adds	r9,r5,r9
517bc3d5698SJohn Baldwin	str	r9, [r0,#48+LO]
518bc3d5698SJohn Baldwin	adc	r10,r6,r10
519bc3d5698SJohn Baldwin	str	r10, [r0,#48+HI]
520bc3d5698SJohn Baldwin	adds	r11,r3,r11
521bc3d5698SJohn Baldwin	str	r11, [r0,#56+LO]
522bc3d5698SJohn Baldwin	adc	r12,r4,r12
523bc3d5698SJohn Baldwin	str	r12, [r0,#56+HI]
524bc3d5698SJohn Baldwin
525bc3d5698SJohn Baldwin	add	sp,sp,#640
526bc3d5698SJohn Baldwin	sub	r14,r14,#640
527bc3d5698SJohn Baldwin
528bc3d5698SJohn Baldwin	teq	r1,r2
529bc3d5698SJohn Baldwin	bne	.Loop
530bc3d5698SJohn Baldwin
531bc3d5698SJohn Baldwin	add	sp,sp,#8*9		@ destroy frame
532bc3d5698SJohn Baldwin#if __ARM_ARCH__>=5
533bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,pc}
534bc3d5698SJohn Baldwin#else
535bc3d5698SJohn Baldwin	ldmia	sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr}
536bc3d5698SJohn Baldwin	tst	lr,#1
537bc3d5698SJohn Baldwin	moveq	pc,lr			@ be binary compatible with V4, yet
538bc3d5698SJohn Baldwin.word	0xe12fff1e			@ interoperable with Thumb ISA:-)
539bc3d5698SJohn Baldwin#endif
540bc3d5698SJohn Baldwin.size	sha512_block_data_order,.-sha512_block_data_order
541bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7
542bc3d5698SJohn Baldwin.arch	armv7-a
543bc3d5698SJohn Baldwin.fpu	neon
544bc3d5698SJohn Baldwin
545bc3d5698SJohn Baldwin.globl	sha512_block_data_order_neon
546bc3d5698SJohn Baldwin.type	sha512_block_data_order_neon,%function
547bc3d5698SJohn Baldwin.align	4
548bc3d5698SJohn Baldwinsha512_block_data_order_neon:
549bc3d5698SJohn Baldwin.LNEON:
550bc3d5698SJohn Baldwin	dmb	@ errata #451034 on early Cortex A8
551bc3d5698SJohn Baldwin	add	r2,r1,r2,lsl#7	@ len to point at the end of inp
552bc3d5698SJohn Baldwin	adr	r3,K512
553bc3d5698SJohn Baldwin	VFP_ABI_PUSH
554bc3d5698SJohn Baldwin	vldmia	r0,{d16,d17,d18,d19,d20,d21,d22,d23}		@ load context
555bc3d5698SJohn Baldwin.Loop_neon:
556bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#14	@ 0
557bc3d5698SJohn Baldwin#if 0<16
558bc3d5698SJohn Baldwin	vld1.64	{d0},[r1]!	@ handles unaligned
559bc3d5698SJohn Baldwin#endif
560bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#18
561bc3d5698SJohn Baldwin#if 0>0
562bc3d5698SJohn Baldwin	vadd.i64	d16,d30			@ h+=Maj from the past
563bc3d5698SJohn Baldwin#endif
564bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#41
565bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
566bc3d5698SJohn Baldwin	vsli.64	d24,d20,#50
567bc3d5698SJohn Baldwin	vsli.64	d25,d20,#46
568bc3d5698SJohn Baldwin	vmov	d29,d20
569bc3d5698SJohn Baldwin	vsli.64	d26,d20,#23
570bc3d5698SJohn Baldwin#if 0<16 && defined(__ARMEL__)
571bc3d5698SJohn Baldwin	vrev64.8	d0,d0
572bc3d5698SJohn Baldwin#endif
573bc3d5698SJohn Baldwin	veor	d25,d24
574bc3d5698SJohn Baldwin	vbsl	d29,d21,d22		@ Ch(e,f,g)
575bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#28
576bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
577bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d23
578bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#34
579bc3d5698SJohn Baldwin	vsli.64	d24,d16,#36
580bc3d5698SJohn Baldwin	vadd.i64	d27,d26
581bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#39
582bc3d5698SJohn Baldwin	vadd.i64	d28,d0
583bc3d5698SJohn Baldwin	vsli.64	d25,d16,#30
584bc3d5698SJohn Baldwin	veor	d30,d16,d17
585bc3d5698SJohn Baldwin	vsli.64	d26,d16,#25
586bc3d5698SJohn Baldwin	veor	d23,d24,d25
587bc3d5698SJohn Baldwin	vadd.i64	d27,d28
588bc3d5698SJohn Baldwin	vbsl	d30,d18,d17		@ Maj(a,b,c)
589bc3d5698SJohn Baldwin	veor	d23,d26			@ Sigma0(a)
590bc3d5698SJohn Baldwin	vadd.i64	d19,d27
591bc3d5698SJohn Baldwin	vadd.i64	d30,d27
592bc3d5698SJohn Baldwin	@ vadd.i64	d23,d30
593bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#14	@ 1
594bc3d5698SJohn Baldwin#if 1<16
595bc3d5698SJohn Baldwin	vld1.64	{d1},[r1]!	@ handles unaligned
596bc3d5698SJohn Baldwin#endif
597bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#18
598bc3d5698SJohn Baldwin#if 1>0
599bc3d5698SJohn Baldwin	vadd.i64	d23,d30			@ h+=Maj from the past
600bc3d5698SJohn Baldwin#endif
601bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#41
602bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
603bc3d5698SJohn Baldwin	vsli.64	d24,d19,#50
604bc3d5698SJohn Baldwin	vsli.64	d25,d19,#46
605bc3d5698SJohn Baldwin	vmov	d29,d19
606bc3d5698SJohn Baldwin	vsli.64	d26,d19,#23
607bc3d5698SJohn Baldwin#if 1<16 && defined(__ARMEL__)
608bc3d5698SJohn Baldwin	vrev64.8	d1,d1
609bc3d5698SJohn Baldwin#endif
610bc3d5698SJohn Baldwin	veor	d25,d24
611bc3d5698SJohn Baldwin	vbsl	d29,d20,d21		@ Ch(e,f,g)
612bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#28
613bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
614bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d22
615bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#34
616bc3d5698SJohn Baldwin	vsli.64	d24,d23,#36
617bc3d5698SJohn Baldwin	vadd.i64	d27,d26
618bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#39
619bc3d5698SJohn Baldwin	vadd.i64	d28,d1
620bc3d5698SJohn Baldwin	vsli.64	d25,d23,#30
621bc3d5698SJohn Baldwin	veor	d30,d23,d16
622bc3d5698SJohn Baldwin	vsli.64	d26,d23,#25
623bc3d5698SJohn Baldwin	veor	d22,d24,d25
624bc3d5698SJohn Baldwin	vadd.i64	d27,d28
625bc3d5698SJohn Baldwin	vbsl	d30,d17,d16		@ Maj(a,b,c)
626bc3d5698SJohn Baldwin	veor	d22,d26			@ Sigma0(a)
627bc3d5698SJohn Baldwin	vadd.i64	d18,d27
628bc3d5698SJohn Baldwin	vadd.i64	d30,d27
629bc3d5698SJohn Baldwin	@ vadd.i64	d22,d30
630bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#14	@ 2
631bc3d5698SJohn Baldwin#if 2<16
632bc3d5698SJohn Baldwin	vld1.64	{d2},[r1]!	@ handles unaligned
633bc3d5698SJohn Baldwin#endif
634bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#18
635bc3d5698SJohn Baldwin#if 2>0
636bc3d5698SJohn Baldwin	vadd.i64	d22,d30			@ h+=Maj from the past
637bc3d5698SJohn Baldwin#endif
638bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#41
639bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
640bc3d5698SJohn Baldwin	vsli.64	d24,d18,#50
641bc3d5698SJohn Baldwin	vsli.64	d25,d18,#46
642bc3d5698SJohn Baldwin	vmov	d29,d18
643bc3d5698SJohn Baldwin	vsli.64	d26,d18,#23
644bc3d5698SJohn Baldwin#if 2<16 && defined(__ARMEL__)
645bc3d5698SJohn Baldwin	vrev64.8	d2,d2
646bc3d5698SJohn Baldwin#endif
647bc3d5698SJohn Baldwin	veor	d25,d24
648bc3d5698SJohn Baldwin	vbsl	d29,d19,d20		@ Ch(e,f,g)
649bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#28
650bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
651bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d21
652bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#34
653bc3d5698SJohn Baldwin	vsli.64	d24,d22,#36
654bc3d5698SJohn Baldwin	vadd.i64	d27,d26
655bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#39
656bc3d5698SJohn Baldwin	vadd.i64	d28,d2
657bc3d5698SJohn Baldwin	vsli.64	d25,d22,#30
658bc3d5698SJohn Baldwin	veor	d30,d22,d23
659bc3d5698SJohn Baldwin	vsli.64	d26,d22,#25
660bc3d5698SJohn Baldwin	veor	d21,d24,d25
661bc3d5698SJohn Baldwin	vadd.i64	d27,d28
662bc3d5698SJohn Baldwin	vbsl	d30,d16,d23		@ Maj(a,b,c)
663bc3d5698SJohn Baldwin	veor	d21,d26			@ Sigma0(a)
664bc3d5698SJohn Baldwin	vadd.i64	d17,d27
665bc3d5698SJohn Baldwin	vadd.i64	d30,d27
666bc3d5698SJohn Baldwin	@ vadd.i64	d21,d30
667bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#14	@ 3
668bc3d5698SJohn Baldwin#if 3<16
669bc3d5698SJohn Baldwin	vld1.64	{d3},[r1]!	@ handles unaligned
670bc3d5698SJohn Baldwin#endif
671bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#18
672bc3d5698SJohn Baldwin#if 3>0
673bc3d5698SJohn Baldwin	vadd.i64	d21,d30			@ h+=Maj from the past
674bc3d5698SJohn Baldwin#endif
675bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#41
676bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
677bc3d5698SJohn Baldwin	vsli.64	d24,d17,#50
678bc3d5698SJohn Baldwin	vsli.64	d25,d17,#46
679bc3d5698SJohn Baldwin	vmov	d29,d17
680bc3d5698SJohn Baldwin	vsli.64	d26,d17,#23
681bc3d5698SJohn Baldwin#if 3<16 && defined(__ARMEL__)
682bc3d5698SJohn Baldwin	vrev64.8	d3,d3
683bc3d5698SJohn Baldwin#endif
684bc3d5698SJohn Baldwin	veor	d25,d24
685bc3d5698SJohn Baldwin	vbsl	d29,d18,d19		@ Ch(e,f,g)
686bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#28
687bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
688bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d20
689bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#34
690bc3d5698SJohn Baldwin	vsli.64	d24,d21,#36
691bc3d5698SJohn Baldwin	vadd.i64	d27,d26
692bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#39
693bc3d5698SJohn Baldwin	vadd.i64	d28,d3
694bc3d5698SJohn Baldwin	vsli.64	d25,d21,#30
695bc3d5698SJohn Baldwin	veor	d30,d21,d22
696bc3d5698SJohn Baldwin	vsli.64	d26,d21,#25
697bc3d5698SJohn Baldwin	veor	d20,d24,d25
698bc3d5698SJohn Baldwin	vadd.i64	d27,d28
699bc3d5698SJohn Baldwin	vbsl	d30,d23,d22		@ Maj(a,b,c)
700bc3d5698SJohn Baldwin	veor	d20,d26			@ Sigma0(a)
701bc3d5698SJohn Baldwin	vadd.i64	d16,d27
702bc3d5698SJohn Baldwin	vadd.i64	d30,d27
703bc3d5698SJohn Baldwin	@ vadd.i64	d20,d30
704bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#14	@ 4
705bc3d5698SJohn Baldwin#if 4<16
706bc3d5698SJohn Baldwin	vld1.64	{d4},[r1]!	@ handles unaligned
707bc3d5698SJohn Baldwin#endif
708bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#18
709bc3d5698SJohn Baldwin#if 4>0
710bc3d5698SJohn Baldwin	vadd.i64	d20,d30			@ h+=Maj from the past
711bc3d5698SJohn Baldwin#endif
712bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#41
713bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
714bc3d5698SJohn Baldwin	vsli.64	d24,d16,#50
715bc3d5698SJohn Baldwin	vsli.64	d25,d16,#46
716bc3d5698SJohn Baldwin	vmov	d29,d16
717bc3d5698SJohn Baldwin	vsli.64	d26,d16,#23
718bc3d5698SJohn Baldwin#if 4<16 && defined(__ARMEL__)
719bc3d5698SJohn Baldwin	vrev64.8	d4,d4
720bc3d5698SJohn Baldwin#endif
721bc3d5698SJohn Baldwin	veor	d25,d24
722bc3d5698SJohn Baldwin	vbsl	d29,d17,d18		@ Ch(e,f,g)
723bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#28
724bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
725bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d19
726bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#34
727bc3d5698SJohn Baldwin	vsli.64	d24,d20,#36
728bc3d5698SJohn Baldwin	vadd.i64	d27,d26
729bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#39
730bc3d5698SJohn Baldwin	vadd.i64	d28,d4
731bc3d5698SJohn Baldwin	vsli.64	d25,d20,#30
732bc3d5698SJohn Baldwin	veor	d30,d20,d21
733bc3d5698SJohn Baldwin	vsli.64	d26,d20,#25
734bc3d5698SJohn Baldwin	veor	d19,d24,d25
735bc3d5698SJohn Baldwin	vadd.i64	d27,d28
736bc3d5698SJohn Baldwin	vbsl	d30,d22,d21		@ Maj(a,b,c)
737bc3d5698SJohn Baldwin	veor	d19,d26			@ Sigma0(a)
738bc3d5698SJohn Baldwin	vadd.i64	d23,d27
739bc3d5698SJohn Baldwin	vadd.i64	d30,d27
740bc3d5698SJohn Baldwin	@ vadd.i64	d19,d30
741bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#14	@ 5
742bc3d5698SJohn Baldwin#if 5<16
743bc3d5698SJohn Baldwin	vld1.64	{d5},[r1]!	@ handles unaligned
744bc3d5698SJohn Baldwin#endif
745bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#18
746bc3d5698SJohn Baldwin#if 5>0
747bc3d5698SJohn Baldwin	vadd.i64	d19,d30			@ h+=Maj from the past
748bc3d5698SJohn Baldwin#endif
749bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#41
750bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
751bc3d5698SJohn Baldwin	vsli.64	d24,d23,#50
752bc3d5698SJohn Baldwin	vsli.64	d25,d23,#46
753bc3d5698SJohn Baldwin	vmov	d29,d23
754bc3d5698SJohn Baldwin	vsli.64	d26,d23,#23
755bc3d5698SJohn Baldwin#if 5<16 && defined(__ARMEL__)
756bc3d5698SJohn Baldwin	vrev64.8	d5,d5
757bc3d5698SJohn Baldwin#endif
758bc3d5698SJohn Baldwin	veor	d25,d24
759bc3d5698SJohn Baldwin	vbsl	d29,d16,d17		@ Ch(e,f,g)
760bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#28
761bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
762bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d18
763bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#34
764bc3d5698SJohn Baldwin	vsli.64	d24,d19,#36
765bc3d5698SJohn Baldwin	vadd.i64	d27,d26
766bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#39
767bc3d5698SJohn Baldwin	vadd.i64	d28,d5
768bc3d5698SJohn Baldwin	vsli.64	d25,d19,#30
769bc3d5698SJohn Baldwin	veor	d30,d19,d20
770bc3d5698SJohn Baldwin	vsli.64	d26,d19,#25
771bc3d5698SJohn Baldwin	veor	d18,d24,d25
772bc3d5698SJohn Baldwin	vadd.i64	d27,d28
773bc3d5698SJohn Baldwin	vbsl	d30,d21,d20		@ Maj(a,b,c)
774bc3d5698SJohn Baldwin	veor	d18,d26			@ Sigma0(a)
775bc3d5698SJohn Baldwin	vadd.i64	d22,d27
776bc3d5698SJohn Baldwin	vadd.i64	d30,d27
777bc3d5698SJohn Baldwin	@ vadd.i64	d18,d30
778bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#14	@ 6
779bc3d5698SJohn Baldwin#if 6<16
780bc3d5698SJohn Baldwin	vld1.64	{d6},[r1]!	@ handles unaligned
781bc3d5698SJohn Baldwin#endif
782bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#18
783bc3d5698SJohn Baldwin#if 6>0
784bc3d5698SJohn Baldwin	vadd.i64	d18,d30			@ h+=Maj from the past
785bc3d5698SJohn Baldwin#endif
786bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#41
787bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
788bc3d5698SJohn Baldwin	vsli.64	d24,d22,#50
789bc3d5698SJohn Baldwin	vsli.64	d25,d22,#46
790bc3d5698SJohn Baldwin	vmov	d29,d22
791bc3d5698SJohn Baldwin	vsli.64	d26,d22,#23
792bc3d5698SJohn Baldwin#if 6<16 && defined(__ARMEL__)
793bc3d5698SJohn Baldwin	vrev64.8	d6,d6
794bc3d5698SJohn Baldwin#endif
795bc3d5698SJohn Baldwin	veor	d25,d24
796bc3d5698SJohn Baldwin	vbsl	d29,d23,d16		@ Ch(e,f,g)
797bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#28
798bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
799bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d17
800bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#34
801bc3d5698SJohn Baldwin	vsli.64	d24,d18,#36
802bc3d5698SJohn Baldwin	vadd.i64	d27,d26
803bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#39
804bc3d5698SJohn Baldwin	vadd.i64	d28,d6
805bc3d5698SJohn Baldwin	vsli.64	d25,d18,#30
806bc3d5698SJohn Baldwin	veor	d30,d18,d19
807bc3d5698SJohn Baldwin	vsli.64	d26,d18,#25
808bc3d5698SJohn Baldwin	veor	d17,d24,d25
809bc3d5698SJohn Baldwin	vadd.i64	d27,d28
810bc3d5698SJohn Baldwin	vbsl	d30,d20,d19		@ Maj(a,b,c)
811bc3d5698SJohn Baldwin	veor	d17,d26			@ Sigma0(a)
812bc3d5698SJohn Baldwin	vadd.i64	d21,d27
813bc3d5698SJohn Baldwin	vadd.i64	d30,d27
814bc3d5698SJohn Baldwin	@ vadd.i64	d17,d30
815bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#14	@ 7
816bc3d5698SJohn Baldwin#if 7<16
817bc3d5698SJohn Baldwin	vld1.64	{d7},[r1]!	@ handles unaligned
818bc3d5698SJohn Baldwin#endif
819bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#18
820bc3d5698SJohn Baldwin#if 7>0
821bc3d5698SJohn Baldwin	vadd.i64	d17,d30			@ h+=Maj from the past
822bc3d5698SJohn Baldwin#endif
823bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#41
824bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
825bc3d5698SJohn Baldwin	vsli.64	d24,d21,#50
826bc3d5698SJohn Baldwin	vsli.64	d25,d21,#46
827bc3d5698SJohn Baldwin	vmov	d29,d21
828bc3d5698SJohn Baldwin	vsli.64	d26,d21,#23
829bc3d5698SJohn Baldwin#if 7<16 && defined(__ARMEL__)
830bc3d5698SJohn Baldwin	vrev64.8	d7,d7
831bc3d5698SJohn Baldwin#endif
832bc3d5698SJohn Baldwin	veor	d25,d24
833bc3d5698SJohn Baldwin	vbsl	d29,d22,d23		@ Ch(e,f,g)
834bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#28
835bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
836bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d16
837bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#34
838bc3d5698SJohn Baldwin	vsli.64	d24,d17,#36
839bc3d5698SJohn Baldwin	vadd.i64	d27,d26
840bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#39
841bc3d5698SJohn Baldwin	vadd.i64	d28,d7
842bc3d5698SJohn Baldwin	vsli.64	d25,d17,#30
843bc3d5698SJohn Baldwin	veor	d30,d17,d18
844bc3d5698SJohn Baldwin	vsli.64	d26,d17,#25
845bc3d5698SJohn Baldwin	veor	d16,d24,d25
846bc3d5698SJohn Baldwin	vadd.i64	d27,d28
847bc3d5698SJohn Baldwin	vbsl	d30,d19,d18		@ Maj(a,b,c)
848bc3d5698SJohn Baldwin	veor	d16,d26			@ Sigma0(a)
849bc3d5698SJohn Baldwin	vadd.i64	d20,d27
850bc3d5698SJohn Baldwin	vadd.i64	d30,d27
851bc3d5698SJohn Baldwin	@ vadd.i64	d16,d30
852bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#14	@ 8
853bc3d5698SJohn Baldwin#if 8<16
854bc3d5698SJohn Baldwin	vld1.64	{d8},[r1]!	@ handles unaligned
855bc3d5698SJohn Baldwin#endif
856bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#18
857bc3d5698SJohn Baldwin#if 8>0
858bc3d5698SJohn Baldwin	vadd.i64	d16,d30			@ h+=Maj from the past
859bc3d5698SJohn Baldwin#endif
860bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#41
861bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
862bc3d5698SJohn Baldwin	vsli.64	d24,d20,#50
863bc3d5698SJohn Baldwin	vsli.64	d25,d20,#46
864bc3d5698SJohn Baldwin	vmov	d29,d20
865bc3d5698SJohn Baldwin	vsli.64	d26,d20,#23
866bc3d5698SJohn Baldwin#if 8<16 && defined(__ARMEL__)
867bc3d5698SJohn Baldwin	vrev64.8	d8,d8
868bc3d5698SJohn Baldwin#endif
869bc3d5698SJohn Baldwin	veor	d25,d24
870bc3d5698SJohn Baldwin	vbsl	d29,d21,d22		@ Ch(e,f,g)
871bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#28
872bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
873bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d23
874bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#34
875bc3d5698SJohn Baldwin	vsli.64	d24,d16,#36
876bc3d5698SJohn Baldwin	vadd.i64	d27,d26
877bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#39
878bc3d5698SJohn Baldwin	vadd.i64	d28,d8
879bc3d5698SJohn Baldwin	vsli.64	d25,d16,#30
880bc3d5698SJohn Baldwin	veor	d30,d16,d17
881bc3d5698SJohn Baldwin	vsli.64	d26,d16,#25
882bc3d5698SJohn Baldwin	veor	d23,d24,d25
883bc3d5698SJohn Baldwin	vadd.i64	d27,d28
884bc3d5698SJohn Baldwin	vbsl	d30,d18,d17		@ Maj(a,b,c)
885bc3d5698SJohn Baldwin	veor	d23,d26			@ Sigma0(a)
886bc3d5698SJohn Baldwin	vadd.i64	d19,d27
887bc3d5698SJohn Baldwin	vadd.i64	d30,d27
888bc3d5698SJohn Baldwin	@ vadd.i64	d23,d30
889bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#14	@ 9
890bc3d5698SJohn Baldwin#if 9<16
891bc3d5698SJohn Baldwin	vld1.64	{d9},[r1]!	@ handles unaligned
892bc3d5698SJohn Baldwin#endif
893bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#18
894bc3d5698SJohn Baldwin#if 9>0
895bc3d5698SJohn Baldwin	vadd.i64	d23,d30			@ h+=Maj from the past
896bc3d5698SJohn Baldwin#endif
897bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#41
898bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
899bc3d5698SJohn Baldwin	vsli.64	d24,d19,#50
900bc3d5698SJohn Baldwin	vsli.64	d25,d19,#46
901bc3d5698SJohn Baldwin	vmov	d29,d19
902bc3d5698SJohn Baldwin	vsli.64	d26,d19,#23
903bc3d5698SJohn Baldwin#if 9<16 && defined(__ARMEL__)
904bc3d5698SJohn Baldwin	vrev64.8	d9,d9
905bc3d5698SJohn Baldwin#endif
906bc3d5698SJohn Baldwin	veor	d25,d24
907bc3d5698SJohn Baldwin	vbsl	d29,d20,d21		@ Ch(e,f,g)
908bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#28
909bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
910bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d22
911bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#34
912bc3d5698SJohn Baldwin	vsli.64	d24,d23,#36
913bc3d5698SJohn Baldwin	vadd.i64	d27,d26
914bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#39
915bc3d5698SJohn Baldwin	vadd.i64	d28,d9
916bc3d5698SJohn Baldwin	vsli.64	d25,d23,#30
917bc3d5698SJohn Baldwin	veor	d30,d23,d16
918bc3d5698SJohn Baldwin	vsli.64	d26,d23,#25
919bc3d5698SJohn Baldwin	veor	d22,d24,d25
920bc3d5698SJohn Baldwin	vadd.i64	d27,d28
921bc3d5698SJohn Baldwin	vbsl	d30,d17,d16		@ Maj(a,b,c)
922bc3d5698SJohn Baldwin	veor	d22,d26			@ Sigma0(a)
923bc3d5698SJohn Baldwin	vadd.i64	d18,d27
924bc3d5698SJohn Baldwin	vadd.i64	d30,d27
925bc3d5698SJohn Baldwin	@ vadd.i64	d22,d30
926bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#14	@ 10
927bc3d5698SJohn Baldwin#if 10<16
928bc3d5698SJohn Baldwin	vld1.64	{d10},[r1]!	@ handles unaligned
929bc3d5698SJohn Baldwin#endif
930bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#18
931bc3d5698SJohn Baldwin#if 10>0
932bc3d5698SJohn Baldwin	vadd.i64	d22,d30			@ h+=Maj from the past
933bc3d5698SJohn Baldwin#endif
934bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#41
935bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
936bc3d5698SJohn Baldwin	vsli.64	d24,d18,#50
937bc3d5698SJohn Baldwin	vsli.64	d25,d18,#46
938bc3d5698SJohn Baldwin	vmov	d29,d18
939bc3d5698SJohn Baldwin	vsli.64	d26,d18,#23
940bc3d5698SJohn Baldwin#if 10<16 && defined(__ARMEL__)
941bc3d5698SJohn Baldwin	vrev64.8	d10,d10
942bc3d5698SJohn Baldwin#endif
943bc3d5698SJohn Baldwin	veor	d25,d24
944bc3d5698SJohn Baldwin	vbsl	d29,d19,d20		@ Ch(e,f,g)
945bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#28
946bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
947bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d21
948bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#34
949bc3d5698SJohn Baldwin	vsli.64	d24,d22,#36
950bc3d5698SJohn Baldwin	vadd.i64	d27,d26
951bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#39
952bc3d5698SJohn Baldwin	vadd.i64	d28,d10
953bc3d5698SJohn Baldwin	vsli.64	d25,d22,#30
954bc3d5698SJohn Baldwin	veor	d30,d22,d23
955bc3d5698SJohn Baldwin	vsli.64	d26,d22,#25
956bc3d5698SJohn Baldwin	veor	d21,d24,d25
957bc3d5698SJohn Baldwin	vadd.i64	d27,d28
958bc3d5698SJohn Baldwin	vbsl	d30,d16,d23		@ Maj(a,b,c)
959bc3d5698SJohn Baldwin	veor	d21,d26			@ Sigma0(a)
960bc3d5698SJohn Baldwin	vadd.i64	d17,d27
961bc3d5698SJohn Baldwin	vadd.i64	d30,d27
962bc3d5698SJohn Baldwin	@ vadd.i64	d21,d30
963bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#14	@ 11
964bc3d5698SJohn Baldwin#if 11<16
965bc3d5698SJohn Baldwin	vld1.64	{d11},[r1]!	@ handles unaligned
966bc3d5698SJohn Baldwin#endif
967bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#18
968bc3d5698SJohn Baldwin#if 11>0
969bc3d5698SJohn Baldwin	vadd.i64	d21,d30			@ h+=Maj from the past
970bc3d5698SJohn Baldwin#endif
971bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#41
972bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
973bc3d5698SJohn Baldwin	vsli.64	d24,d17,#50
974bc3d5698SJohn Baldwin	vsli.64	d25,d17,#46
975bc3d5698SJohn Baldwin	vmov	d29,d17
976bc3d5698SJohn Baldwin	vsli.64	d26,d17,#23
977bc3d5698SJohn Baldwin#if 11<16 && defined(__ARMEL__)
978bc3d5698SJohn Baldwin	vrev64.8	d11,d11
979bc3d5698SJohn Baldwin#endif
980bc3d5698SJohn Baldwin	veor	d25,d24
981bc3d5698SJohn Baldwin	vbsl	d29,d18,d19		@ Ch(e,f,g)
982bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#28
983bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
984bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d20
985bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#34
986bc3d5698SJohn Baldwin	vsli.64	d24,d21,#36
987bc3d5698SJohn Baldwin	vadd.i64	d27,d26
988bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#39
989bc3d5698SJohn Baldwin	vadd.i64	d28,d11
990bc3d5698SJohn Baldwin	vsli.64	d25,d21,#30
991bc3d5698SJohn Baldwin	veor	d30,d21,d22
992bc3d5698SJohn Baldwin	vsli.64	d26,d21,#25
993bc3d5698SJohn Baldwin	veor	d20,d24,d25
994bc3d5698SJohn Baldwin	vadd.i64	d27,d28
995bc3d5698SJohn Baldwin	vbsl	d30,d23,d22		@ Maj(a,b,c)
996bc3d5698SJohn Baldwin	veor	d20,d26			@ Sigma0(a)
997bc3d5698SJohn Baldwin	vadd.i64	d16,d27
998bc3d5698SJohn Baldwin	vadd.i64	d30,d27
999bc3d5698SJohn Baldwin	@ vadd.i64	d20,d30
1000bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#14	@ 12
1001bc3d5698SJohn Baldwin#if 12<16
1002bc3d5698SJohn Baldwin	vld1.64	{d12},[r1]!	@ handles unaligned
1003bc3d5698SJohn Baldwin#endif
1004bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#18
1005bc3d5698SJohn Baldwin#if 12>0
1006bc3d5698SJohn Baldwin	vadd.i64	d20,d30			@ h+=Maj from the past
1007bc3d5698SJohn Baldwin#endif
1008bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#41
1009bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1010bc3d5698SJohn Baldwin	vsli.64	d24,d16,#50
1011bc3d5698SJohn Baldwin	vsli.64	d25,d16,#46
1012bc3d5698SJohn Baldwin	vmov	d29,d16
1013bc3d5698SJohn Baldwin	vsli.64	d26,d16,#23
1014bc3d5698SJohn Baldwin#if 12<16 && defined(__ARMEL__)
1015bc3d5698SJohn Baldwin	vrev64.8	d12,d12
1016bc3d5698SJohn Baldwin#endif
1017bc3d5698SJohn Baldwin	veor	d25,d24
1018bc3d5698SJohn Baldwin	vbsl	d29,d17,d18		@ Ch(e,f,g)
1019bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#28
1020bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1021bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d19
1022bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#34
1023bc3d5698SJohn Baldwin	vsli.64	d24,d20,#36
1024bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1025bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#39
1026bc3d5698SJohn Baldwin	vadd.i64	d28,d12
1027bc3d5698SJohn Baldwin	vsli.64	d25,d20,#30
1028bc3d5698SJohn Baldwin	veor	d30,d20,d21
1029bc3d5698SJohn Baldwin	vsli.64	d26,d20,#25
1030bc3d5698SJohn Baldwin	veor	d19,d24,d25
1031bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1032bc3d5698SJohn Baldwin	vbsl	d30,d22,d21		@ Maj(a,b,c)
1033bc3d5698SJohn Baldwin	veor	d19,d26			@ Sigma0(a)
1034bc3d5698SJohn Baldwin	vadd.i64	d23,d27
1035bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1036bc3d5698SJohn Baldwin	@ vadd.i64	d19,d30
1037bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#14	@ 13
1038bc3d5698SJohn Baldwin#if 13<16
1039bc3d5698SJohn Baldwin	vld1.64	{d13},[r1]!	@ handles unaligned
1040bc3d5698SJohn Baldwin#endif
1041bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#18
1042bc3d5698SJohn Baldwin#if 13>0
1043bc3d5698SJohn Baldwin	vadd.i64	d19,d30			@ h+=Maj from the past
1044bc3d5698SJohn Baldwin#endif
1045bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#41
1046bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1047bc3d5698SJohn Baldwin	vsli.64	d24,d23,#50
1048bc3d5698SJohn Baldwin	vsli.64	d25,d23,#46
1049bc3d5698SJohn Baldwin	vmov	d29,d23
1050bc3d5698SJohn Baldwin	vsli.64	d26,d23,#23
1051bc3d5698SJohn Baldwin#if 13<16 && defined(__ARMEL__)
1052bc3d5698SJohn Baldwin	vrev64.8	d13,d13
1053bc3d5698SJohn Baldwin#endif
1054bc3d5698SJohn Baldwin	veor	d25,d24
1055bc3d5698SJohn Baldwin	vbsl	d29,d16,d17		@ Ch(e,f,g)
1056bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#28
1057bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1058bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d18
1059bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#34
1060bc3d5698SJohn Baldwin	vsli.64	d24,d19,#36
1061bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1062bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#39
1063bc3d5698SJohn Baldwin	vadd.i64	d28,d13
1064bc3d5698SJohn Baldwin	vsli.64	d25,d19,#30
1065bc3d5698SJohn Baldwin	veor	d30,d19,d20
1066bc3d5698SJohn Baldwin	vsli.64	d26,d19,#25
1067bc3d5698SJohn Baldwin	veor	d18,d24,d25
1068bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1069bc3d5698SJohn Baldwin	vbsl	d30,d21,d20		@ Maj(a,b,c)
1070bc3d5698SJohn Baldwin	veor	d18,d26			@ Sigma0(a)
1071bc3d5698SJohn Baldwin	vadd.i64	d22,d27
1072bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1073bc3d5698SJohn Baldwin	@ vadd.i64	d18,d30
1074bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#14	@ 14
1075bc3d5698SJohn Baldwin#if 14<16
1076bc3d5698SJohn Baldwin	vld1.64	{d14},[r1]!	@ handles unaligned
1077bc3d5698SJohn Baldwin#endif
1078bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#18
1079bc3d5698SJohn Baldwin#if 14>0
1080bc3d5698SJohn Baldwin	vadd.i64	d18,d30			@ h+=Maj from the past
1081bc3d5698SJohn Baldwin#endif
1082bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#41
1083bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1084bc3d5698SJohn Baldwin	vsli.64	d24,d22,#50
1085bc3d5698SJohn Baldwin	vsli.64	d25,d22,#46
1086bc3d5698SJohn Baldwin	vmov	d29,d22
1087bc3d5698SJohn Baldwin	vsli.64	d26,d22,#23
1088bc3d5698SJohn Baldwin#if 14<16 && defined(__ARMEL__)
1089bc3d5698SJohn Baldwin	vrev64.8	d14,d14
1090bc3d5698SJohn Baldwin#endif
1091bc3d5698SJohn Baldwin	veor	d25,d24
1092bc3d5698SJohn Baldwin	vbsl	d29,d23,d16		@ Ch(e,f,g)
1093bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#28
1094bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1095bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d17
1096bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#34
1097bc3d5698SJohn Baldwin	vsli.64	d24,d18,#36
1098bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1099bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#39
1100bc3d5698SJohn Baldwin	vadd.i64	d28,d14
1101bc3d5698SJohn Baldwin	vsli.64	d25,d18,#30
1102bc3d5698SJohn Baldwin	veor	d30,d18,d19
1103bc3d5698SJohn Baldwin	vsli.64	d26,d18,#25
1104bc3d5698SJohn Baldwin	veor	d17,d24,d25
1105bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1106bc3d5698SJohn Baldwin	vbsl	d30,d20,d19		@ Maj(a,b,c)
1107bc3d5698SJohn Baldwin	veor	d17,d26			@ Sigma0(a)
1108bc3d5698SJohn Baldwin	vadd.i64	d21,d27
1109bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1110bc3d5698SJohn Baldwin	@ vadd.i64	d17,d30
1111bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#14	@ 15
1112bc3d5698SJohn Baldwin#if 15<16
1113bc3d5698SJohn Baldwin	vld1.64	{d15},[r1]!	@ handles unaligned
1114bc3d5698SJohn Baldwin#endif
1115bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#18
1116bc3d5698SJohn Baldwin#if 15>0
1117bc3d5698SJohn Baldwin	vadd.i64	d17,d30			@ h+=Maj from the past
1118bc3d5698SJohn Baldwin#endif
1119bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#41
1120bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1121bc3d5698SJohn Baldwin	vsli.64	d24,d21,#50
1122bc3d5698SJohn Baldwin	vsli.64	d25,d21,#46
1123bc3d5698SJohn Baldwin	vmov	d29,d21
1124bc3d5698SJohn Baldwin	vsli.64	d26,d21,#23
1125bc3d5698SJohn Baldwin#if 15<16 && defined(__ARMEL__)
1126bc3d5698SJohn Baldwin	vrev64.8	d15,d15
1127bc3d5698SJohn Baldwin#endif
1128bc3d5698SJohn Baldwin	veor	d25,d24
1129bc3d5698SJohn Baldwin	vbsl	d29,d22,d23		@ Ch(e,f,g)
1130bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#28
1131bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1132bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d16
1133bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#34
1134bc3d5698SJohn Baldwin	vsli.64	d24,d17,#36
1135bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1136bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#39
1137bc3d5698SJohn Baldwin	vadd.i64	d28,d15
1138bc3d5698SJohn Baldwin	vsli.64	d25,d17,#30
1139bc3d5698SJohn Baldwin	veor	d30,d17,d18
1140bc3d5698SJohn Baldwin	vsli.64	d26,d17,#25
1141bc3d5698SJohn Baldwin	veor	d16,d24,d25
1142bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1143bc3d5698SJohn Baldwin	vbsl	d30,d19,d18		@ Maj(a,b,c)
1144bc3d5698SJohn Baldwin	veor	d16,d26			@ Sigma0(a)
1145bc3d5698SJohn Baldwin	vadd.i64	d20,d27
1146bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1147bc3d5698SJohn Baldwin	@ vadd.i64	d16,d30
1148bc3d5698SJohn Baldwin	mov	r12,#4
1149bc3d5698SJohn Baldwin.L16_79_neon:
1150bc3d5698SJohn Baldwin	subs	r12,#1
1151bc3d5698SJohn Baldwin	vshr.u64	q12,q7,#19
1152bc3d5698SJohn Baldwin	vshr.u64	q13,q7,#61
1153bc3d5698SJohn Baldwin	vadd.i64	d16,d30			@ h+=Maj from the past
1154bc3d5698SJohn Baldwin	vshr.u64	q15,q7,#6
1155bc3d5698SJohn Baldwin	vsli.64	q12,q7,#45
1156bc3d5698SJohn Baldwin	vext.8	q14,q0,q1,#8	@ X[i+1]
1157bc3d5698SJohn Baldwin	vsli.64	q13,q7,#3
1158bc3d5698SJohn Baldwin	veor	q15,q12
1159bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1160bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1161bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1162bc3d5698SJohn Baldwin	vadd.i64	q0,q15
1163bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1164bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1165bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1166bc3d5698SJohn Baldwin	vext.8	q14,q4,q5,#8	@ X[i+9]
1167bc3d5698SJohn Baldwin	veor	q15,q12
1168bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#14		@ from NEON_00_15
1169bc3d5698SJohn Baldwin	vadd.i64	q0,q14
1170bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#18		@ from NEON_00_15
1171bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1172bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#41		@ from NEON_00_15
1173bc3d5698SJohn Baldwin	vadd.i64	q0,q15
1174bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1175bc3d5698SJohn Baldwin	vsli.64	d24,d20,#50
1176bc3d5698SJohn Baldwin	vsli.64	d25,d20,#46
1177bc3d5698SJohn Baldwin	vmov	d29,d20
1178bc3d5698SJohn Baldwin	vsli.64	d26,d20,#23
1179bc3d5698SJohn Baldwin#if 16<16 && defined(__ARMEL__)
1180bc3d5698SJohn Baldwin	vrev64.8	,
1181bc3d5698SJohn Baldwin#endif
1182bc3d5698SJohn Baldwin	veor	d25,d24
1183bc3d5698SJohn Baldwin	vbsl	d29,d21,d22		@ Ch(e,f,g)
1184bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#28
1185bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1186bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d23
1187bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#34
1188bc3d5698SJohn Baldwin	vsli.64	d24,d16,#36
1189bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1190bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#39
1191bc3d5698SJohn Baldwin	vadd.i64	d28,d0
1192bc3d5698SJohn Baldwin	vsli.64	d25,d16,#30
1193bc3d5698SJohn Baldwin	veor	d30,d16,d17
1194bc3d5698SJohn Baldwin	vsli.64	d26,d16,#25
1195bc3d5698SJohn Baldwin	veor	d23,d24,d25
1196bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1197bc3d5698SJohn Baldwin	vbsl	d30,d18,d17		@ Maj(a,b,c)
1198bc3d5698SJohn Baldwin	veor	d23,d26			@ Sigma0(a)
1199bc3d5698SJohn Baldwin	vadd.i64	d19,d27
1200bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1201bc3d5698SJohn Baldwin	@ vadd.i64	d23,d30
1202bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#14	@ 17
1203bc3d5698SJohn Baldwin#if 17<16
1204bc3d5698SJohn Baldwin	vld1.64	{d1},[r1]!	@ handles unaligned
1205bc3d5698SJohn Baldwin#endif
1206bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#18
1207bc3d5698SJohn Baldwin#if 17>0
1208bc3d5698SJohn Baldwin	vadd.i64	d23,d30			@ h+=Maj from the past
1209bc3d5698SJohn Baldwin#endif
1210bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#41
1211bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1212bc3d5698SJohn Baldwin	vsli.64	d24,d19,#50
1213bc3d5698SJohn Baldwin	vsli.64	d25,d19,#46
1214bc3d5698SJohn Baldwin	vmov	d29,d19
1215bc3d5698SJohn Baldwin	vsli.64	d26,d19,#23
1216bc3d5698SJohn Baldwin#if 17<16 && defined(__ARMEL__)
1217bc3d5698SJohn Baldwin	vrev64.8	,
1218bc3d5698SJohn Baldwin#endif
1219bc3d5698SJohn Baldwin	veor	d25,d24
1220bc3d5698SJohn Baldwin	vbsl	d29,d20,d21		@ Ch(e,f,g)
1221bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#28
1222bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1223bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d22
1224bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#34
1225bc3d5698SJohn Baldwin	vsli.64	d24,d23,#36
1226bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1227bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#39
1228bc3d5698SJohn Baldwin	vadd.i64	d28,d1
1229bc3d5698SJohn Baldwin	vsli.64	d25,d23,#30
1230bc3d5698SJohn Baldwin	veor	d30,d23,d16
1231bc3d5698SJohn Baldwin	vsli.64	d26,d23,#25
1232bc3d5698SJohn Baldwin	veor	d22,d24,d25
1233bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1234bc3d5698SJohn Baldwin	vbsl	d30,d17,d16		@ Maj(a,b,c)
1235bc3d5698SJohn Baldwin	veor	d22,d26			@ Sigma0(a)
1236bc3d5698SJohn Baldwin	vadd.i64	d18,d27
1237bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1238bc3d5698SJohn Baldwin	@ vadd.i64	d22,d30
1239bc3d5698SJohn Baldwin	vshr.u64	q12,q0,#19
1240bc3d5698SJohn Baldwin	vshr.u64	q13,q0,#61
1241bc3d5698SJohn Baldwin	vadd.i64	d22,d30			@ h+=Maj from the past
1242bc3d5698SJohn Baldwin	vshr.u64	q15,q0,#6
1243bc3d5698SJohn Baldwin	vsli.64	q12,q0,#45
1244bc3d5698SJohn Baldwin	vext.8	q14,q1,q2,#8	@ X[i+1]
1245bc3d5698SJohn Baldwin	vsli.64	q13,q0,#3
1246bc3d5698SJohn Baldwin	veor	q15,q12
1247bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1248bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1249bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1250bc3d5698SJohn Baldwin	vadd.i64	q1,q15
1251bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1252bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1253bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1254bc3d5698SJohn Baldwin	vext.8	q14,q5,q6,#8	@ X[i+9]
1255bc3d5698SJohn Baldwin	veor	q15,q12
1256bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#14		@ from NEON_00_15
1257bc3d5698SJohn Baldwin	vadd.i64	q1,q14
1258bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#18		@ from NEON_00_15
1259bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1260bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#41		@ from NEON_00_15
1261bc3d5698SJohn Baldwin	vadd.i64	q1,q15
1262bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1263bc3d5698SJohn Baldwin	vsli.64	d24,d18,#50
1264bc3d5698SJohn Baldwin	vsli.64	d25,d18,#46
1265bc3d5698SJohn Baldwin	vmov	d29,d18
1266bc3d5698SJohn Baldwin	vsli.64	d26,d18,#23
1267bc3d5698SJohn Baldwin#if 18<16 && defined(__ARMEL__)
1268bc3d5698SJohn Baldwin	vrev64.8	,
1269bc3d5698SJohn Baldwin#endif
1270bc3d5698SJohn Baldwin	veor	d25,d24
1271bc3d5698SJohn Baldwin	vbsl	d29,d19,d20		@ Ch(e,f,g)
1272bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#28
1273bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1274bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d21
1275bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#34
1276bc3d5698SJohn Baldwin	vsli.64	d24,d22,#36
1277bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1278bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#39
1279bc3d5698SJohn Baldwin	vadd.i64	d28,d2
1280bc3d5698SJohn Baldwin	vsli.64	d25,d22,#30
1281bc3d5698SJohn Baldwin	veor	d30,d22,d23
1282bc3d5698SJohn Baldwin	vsli.64	d26,d22,#25
1283bc3d5698SJohn Baldwin	veor	d21,d24,d25
1284bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1285bc3d5698SJohn Baldwin	vbsl	d30,d16,d23		@ Maj(a,b,c)
1286bc3d5698SJohn Baldwin	veor	d21,d26			@ Sigma0(a)
1287bc3d5698SJohn Baldwin	vadd.i64	d17,d27
1288bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1289bc3d5698SJohn Baldwin	@ vadd.i64	d21,d30
1290bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#14	@ 19
1291bc3d5698SJohn Baldwin#if 19<16
1292bc3d5698SJohn Baldwin	vld1.64	{d3},[r1]!	@ handles unaligned
1293bc3d5698SJohn Baldwin#endif
1294bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#18
1295bc3d5698SJohn Baldwin#if 19>0
1296bc3d5698SJohn Baldwin	vadd.i64	d21,d30			@ h+=Maj from the past
1297bc3d5698SJohn Baldwin#endif
1298bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#41
1299bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1300bc3d5698SJohn Baldwin	vsli.64	d24,d17,#50
1301bc3d5698SJohn Baldwin	vsli.64	d25,d17,#46
1302bc3d5698SJohn Baldwin	vmov	d29,d17
1303bc3d5698SJohn Baldwin	vsli.64	d26,d17,#23
1304bc3d5698SJohn Baldwin#if 19<16 && defined(__ARMEL__)
1305bc3d5698SJohn Baldwin	vrev64.8	,
1306bc3d5698SJohn Baldwin#endif
1307bc3d5698SJohn Baldwin	veor	d25,d24
1308bc3d5698SJohn Baldwin	vbsl	d29,d18,d19		@ Ch(e,f,g)
1309bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#28
1310bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1311bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d20
1312bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#34
1313bc3d5698SJohn Baldwin	vsli.64	d24,d21,#36
1314bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1315bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#39
1316bc3d5698SJohn Baldwin	vadd.i64	d28,d3
1317bc3d5698SJohn Baldwin	vsli.64	d25,d21,#30
1318bc3d5698SJohn Baldwin	veor	d30,d21,d22
1319bc3d5698SJohn Baldwin	vsli.64	d26,d21,#25
1320bc3d5698SJohn Baldwin	veor	d20,d24,d25
1321bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1322bc3d5698SJohn Baldwin	vbsl	d30,d23,d22		@ Maj(a,b,c)
1323bc3d5698SJohn Baldwin	veor	d20,d26			@ Sigma0(a)
1324bc3d5698SJohn Baldwin	vadd.i64	d16,d27
1325bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1326bc3d5698SJohn Baldwin	@ vadd.i64	d20,d30
1327bc3d5698SJohn Baldwin	vshr.u64	q12,q1,#19
1328bc3d5698SJohn Baldwin	vshr.u64	q13,q1,#61
1329bc3d5698SJohn Baldwin	vadd.i64	d20,d30			@ h+=Maj from the past
1330bc3d5698SJohn Baldwin	vshr.u64	q15,q1,#6
1331bc3d5698SJohn Baldwin	vsli.64	q12,q1,#45
1332bc3d5698SJohn Baldwin	vext.8	q14,q2,q3,#8	@ X[i+1]
1333bc3d5698SJohn Baldwin	vsli.64	q13,q1,#3
1334bc3d5698SJohn Baldwin	veor	q15,q12
1335bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1336bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1337bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1338bc3d5698SJohn Baldwin	vadd.i64	q2,q15
1339bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1340bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1341bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1342bc3d5698SJohn Baldwin	vext.8	q14,q6,q7,#8	@ X[i+9]
1343bc3d5698SJohn Baldwin	veor	q15,q12
1344bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#14		@ from NEON_00_15
1345bc3d5698SJohn Baldwin	vadd.i64	q2,q14
1346bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#18		@ from NEON_00_15
1347bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1348bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#41		@ from NEON_00_15
1349bc3d5698SJohn Baldwin	vadd.i64	q2,q15
1350bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1351bc3d5698SJohn Baldwin	vsli.64	d24,d16,#50
1352bc3d5698SJohn Baldwin	vsli.64	d25,d16,#46
1353bc3d5698SJohn Baldwin	vmov	d29,d16
1354bc3d5698SJohn Baldwin	vsli.64	d26,d16,#23
1355bc3d5698SJohn Baldwin#if 20<16 && defined(__ARMEL__)
1356bc3d5698SJohn Baldwin	vrev64.8	,
1357bc3d5698SJohn Baldwin#endif
1358bc3d5698SJohn Baldwin	veor	d25,d24
1359bc3d5698SJohn Baldwin	vbsl	d29,d17,d18		@ Ch(e,f,g)
1360bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#28
1361bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1362bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d19
1363bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#34
1364bc3d5698SJohn Baldwin	vsli.64	d24,d20,#36
1365bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1366bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#39
1367bc3d5698SJohn Baldwin	vadd.i64	d28,d4
1368bc3d5698SJohn Baldwin	vsli.64	d25,d20,#30
1369bc3d5698SJohn Baldwin	veor	d30,d20,d21
1370bc3d5698SJohn Baldwin	vsli.64	d26,d20,#25
1371bc3d5698SJohn Baldwin	veor	d19,d24,d25
1372bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1373bc3d5698SJohn Baldwin	vbsl	d30,d22,d21		@ Maj(a,b,c)
1374bc3d5698SJohn Baldwin	veor	d19,d26			@ Sigma0(a)
1375bc3d5698SJohn Baldwin	vadd.i64	d23,d27
1376bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1377bc3d5698SJohn Baldwin	@ vadd.i64	d19,d30
1378bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#14	@ 21
1379bc3d5698SJohn Baldwin#if 21<16
1380bc3d5698SJohn Baldwin	vld1.64	{d5},[r1]!	@ handles unaligned
1381bc3d5698SJohn Baldwin#endif
1382bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#18
1383bc3d5698SJohn Baldwin#if 21>0
1384bc3d5698SJohn Baldwin	vadd.i64	d19,d30			@ h+=Maj from the past
1385bc3d5698SJohn Baldwin#endif
1386bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#41
1387bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1388bc3d5698SJohn Baldwin	vsli.64	d24,d23,#50
1389bc3d5698SJohn Baldwin	vsli.64	d25,d23,#46
1390bc3d5698SJohn Baldwin	vmov	d29,d23
1391bc3d5698SJohn Baldwin	vsli.64	d26,d23,#23
1392bc3d5698SJohn Baldwin#if 21<16 && defined(__ARMEL__)
1393bc3d5698SJohn Baldwin	vrev64.8	,
1394bc3d5698SJohn Baldwin#endif
1395bc3d5698SJohn Baldwin	veor	d25,d24
1396bc3d5698SJohn Baldwin	vbsl	d29,d16,d17		@ Ch(e,f,g)
1397bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#28
1398bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1399bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d18
1400bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#34
1401bc3d5698SJohn Baldwin	vsli.64	d24,d19,#36
1402bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1403bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#39
1404bc3d5698SJohn Baldwin	vadd.i64	d28,d5
1405bc3d5698SJohn Baldwin	vsli.64	d25,d19,#30
1406bc3d5698SJohn Baldwin	veor	d30,d19,d20
1407bc3d5698SJohn Baldwin	vsli.64	d26,d19,#25
1408bc3d5698SJohn Baldwin	veor	d18,d24,d25
1409bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1410bc3d5698SJohn Baldwin	vbsl	d30,d21,d20		@ Maj(a,b,c)
1411bc3d5698SJohn Baldwin	veor	d18,d26			@ Sigma0(a)
1412bc3d5698SJohn Baldwin	vadd.i64	d22,d27
1413bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1414bc3d5698SJohn Baldwin	@ vadd.i64	d18,d30
1415bc3d5698SJohn Baldwin	vshr.u64	q12,q2,#19
1416bc3d5698SJohn Baldwin	vshr.u64	q13,q2,#61
1417bc3d5698SJohn Baldwin	vadd.i64	d18,d30			@ h+=Maj from the past
1418bc3d5698SJohn Baldwin	vshr.u64	q15,q2,#6
1419bc3d5698SJohn Baldwin	vsli.64	q12,q2,#45
1420bc3d5698SJohn Baldwin	vext.8	q14,q3,q4,#8	@ X[i+1]
1421bc3d5698SJohn Baldwin	vsli.64	q13,q2,#3
1422bc3d5698SJohn Baldwin	veor	q15,q12
1423bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1424bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1425bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1426bc3d5698SJohn Baldwin	vadd.i64	q3,q15
1427bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1428bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1429bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1430bc3d5698SJohn Baldwin	vext.8	q14,q7,q0,#8	@ X[i+9]
1431bc3d5698SJohn Baldwin	veor	q15,q12
1432bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#14		@ from NEON_00_15
1433bc3d5698SJohn Baldwin	vadd.i64	q3,q14
1434bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#18		@ from NEON_00_15
1435bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1436bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#41		@ from NEON_00_15
1437bc3d5698SJohn Baldwin	vadd.i64	q3,q15
1438bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1439bc3d5698SJohn Baldwin	vsli.64	d24,d22,#50
1440bc3d5698SJohn Baldwin	vsli.64	d25,d22,#46
1441bc3d5698SJohn Baldwin	vmov	d29,d22
1442bc3d5698SJohn Baldwin	vsli.64	d26,d22,#23
1443bc3d5698SJohn Baldwin#if 22<16 && defined(__ARMEL__)
1444bc3d5698SJohn Baldwin	vrev64.8	,
1445bc3d5698SJohn Baldwin#endif
1446bc3d5698SJohn Baldwin	veor	d25,d24
1447bc3d5698SJohn Baldwin	vbsl	d29,d23,d16		@ Ch(e,f,g)
1448bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#28
1449bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1450bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d17
1451bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#34
1452bc3d5698SJohn Baldwin	vsli.64	d24,d18,#36
1453bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1454bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#39
1455bc3d5698SJohn Baldwin	vadd.i64	d28,d6
1456bc3d5698SJohn Baldwin	vsli.64	d25,d18,#30
1457bc3d5698SJohn Baldwin	veor	d30,d18,d19
1458bc3d5698SJohn Baldwin	vsli.64	d26,d18,#25
1459bc3d5698SJohn Baldwin	veor	d17,d24,d25
1460bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1461bc3d5698SJohn Baldwin	vbsl	d30,d20,d19		@ Maj(a,b,c)
1462bc3d5698SJohn Baldwin	veor	d17,d26			@ Sigma0(a)
1463bc3d5698SJohn Baldwin	vadd.i64	d21,d27
1464bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1465bc3d5698SJohn Baldwin	@ vadd.i64	d17,d30
1466bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#14	@ 23
1467bc3d5698SJohn Baldwin#if 23<16
1468bc3d5698SJohn Baldwin	vld1.64	{d7},[r1]!	@ handles unaligned
1469bc3d5698SJohn Baldwin#endif
1470bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#18
1471bc3d5698SJohn Baldwin#if 23>0
1472bc3d5698SJohn Baldwin	vadd.i64	d17,d30			@ h+=Maj from the past
1473bc3d5698SJohn Baldwin#endif
1474bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#41
1475bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1476bc3d5698SJohn Baldwin	vsli.64	d24,d21,#50
1477bc3d5698SJohn Baldwin	vsli.64	d25,d21,#46
1478bc3d5698SJohn Baldwin	vmov	d29,d21
1479bc3d5698SJohn Baldwin	vsli.64	d26,d21,#23
1480bc3d5698SJohn Baldwin#if 23<16 && defined(__ARMEL__)
1481bc3d5698SJohn Baldwin	vrev64.8	,
1482bc3d5698SJohn Baldwin#endif
1483bc3d5698SJohn Baldwin	veor	d25,d24
1484bc3d5698SJohn Baldwin	vbsl	d29,d22,d23		@ Ch(e,f,g)
1485bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#28
1486bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1487bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d16
1488bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#34
1489bc3d5698SJohn Baldwin	vsli.64	d24,d17,#36
1490bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1491bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#39
1492bc3d5698SJohn Baldwin	vadd.i64	d28,d7
1493bc3d5698SJohn Baldwin	vsli.64	d25,d17,#30
1494bc3d5698SJohn Baldwin	veor	d30,d17,d18
1495bc3d5698SJohn Baldwin	vsli.64	d26,d17,#25
1496bc3d5698SJohn Baldwin	veor	d16,d24,d25
1497bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1498bc3d5698SJohn Baldwin	vbsl	d30,d19,d18		@ Maj(a,b,c)
1499bc3d5698SJohn Baldwin	veor	d16,d26			@ Sigma0(a)
1500bc3d5698SJohn Baldwin	vadd.i64	d20,d27
1501bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1502bc3d5698SJohn Baldwin	@ vadd.i64	d16,d30
1503bc3d5698SJohn Baldwin	vshr.u64	q12,q3,#19
1504bc3d5698SJohn Baldwin	vshr.u64	q13,q3,#61
1505bc3d5698SJohn Baldwin	vadd.i64	d16,d30			@ h+=Maj from the past
1506bc3d5698SJohn Baldwin	vshr.u64	q15,q3,#6
1507bc3d5698SJohn Baldwin	vsli.64	q12,q3,#45
1508bc3d5698SJohn Baldwin	vext.8	q14,q4,q5,#8	@ X[i+1]
1509bc3d5698SJohn Baldwin	vsli.64	q13,q3,#3
1510bc3d5698SJohn Baldwin	veor	q15,q12
1511bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1512bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1513bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1514bc3d5698SJohn Baldwin	vadd.i64	q4,q15
1515bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1516bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1517bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1518bc3d5698SJohn Baldwin	vext.8	q14,q0,q1,#8	@ X[i+9]
1519bc3d5698SJohn Baldwin	veor	q15,q12
1520bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#14		@ from NEON_00_15
1521bc3d5698SJohn Baldwin	vadd.i64	q4,q14
1522bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#18		@ from NEON_00_15
1523bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1524bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#41		@ from NEON_00_15
1525bc3d5698SJohn Baldwin	vadd.i64	q4,q15
1526bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1527bc3d5698SJohn Baldwin	vsli.64	d24,d20,#50
1528bc3d5698SJohn Baldwin	vsli.64	d25,d20,#46
1529bc3d5698SJohn Baldwin	vmov	d29,d20
1530bc3d5698SJohn Baldwin	vsli.64	d26,d20,#23
1531bc3d5698SJohn Baldwin#if 24<16 && defined(__ARMEL__)
1532bc3d5698SJohn Baldwin	vrev64.8	,
1533bc3d5698SJohn Baldwin#endif
1534bc3d5698SJohn Baldwin	veor	d25,d24
1535bc3d5698SJohn Baldwin	vbsl	d29,d21,d22		@ Ch(e,f,g)
1536bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#28
1537bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1538bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d23
1539bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#34
1540bc3d5698SJohn Baldwin	vsli.64	d24,d16,#36
1541bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1542bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#39
1543bc3d5698SJohn Baldwin	vadd.i64	d28,d8
1544bc3d5698SJohn Baldwin	vsli.64	d25,d16,#30
1545bc3d5698SJohn Baldwin	veor	d30,d16,d17
1546bc3d5698SJohn Baldwin	vsli.64	d26,d16,#25
1547bc3d5698SJohn Baldwin	veor	d23,d24,d25
1548bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1549bc3d5698SJohn Baldwin	vbsl	d30,d18,d17		@ Maj(a,b,c)
1550bc3d5698SJohn Baldwin	veor	d23,d26			@ Sigma0(a)
1551bc3d5698SJohn Baldwin	vadd.i64	d19,d27
1552bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1553bc3d5698SJohn Baldwin	@ vadd.i64	d23,d30
1554bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#14	@ 25
1555bc3d5698SJohn Baldwin#if 25<16
1556bc3d5698SJohn Baldwin	vld1.64	{d9},[r1]!	@ handles unaligned
1557bc3d5698SJohn Baldwin#endif
1558bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#18
1559bc3d5698SJohn Baldwin#if 25>0
1560bc3d5698SJohn Baldwin	vadd.i64	d23,d30			@ h+=Maj from the past
1561bc3d5698SJohn Baldwin#endif
1562bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#41
1563bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1564bc3d5698SJohn Baldwin	vsli.64	d24,d19,#50
1565bc3d5698SJohn Baldwin	vsli.64	d25,d19,#46
1566bc3d5698SJohn Baldwin	vmov	d29,d19
1567bc3d5698SJohn Baldwin	vsli.64	d26,d19,#23
1568bc3d5698SJohn Baldwin#if 25<16 && defined(__ARMEL__)
1569bc3d5698SJohn Baldwin	vrev64.8	,
1570bc3d5698SJohn Baldwin#endif
1571bc3d5698SJohn Baldwin	veor	d25,d24
1572bc3d5698SJohn Baldwin	vbsl	d29,d20,d21		@ Ch(e,f,g)
1573bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#28
1574bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1575bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d22
1576bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#34
1577bc3d5698SJohn Baldwin	vsli.64	d24,d23,#36
1578bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1579bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#39
1580bc3d5698SJohn Baldwin	vadd.i64	d28,d9
1581bc3d5698SJohn Baldwin	vsli.64	d25,d23,#30
1582bc3d5698SJohn Baldwin	veor	d30,d23,d16
1583bc3d5698SJohn Baldwin	vsli.64	d26,d23,#25
1584bc3d5698SJohn Baldwin	veor	d22,d24,d25
1585bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1586bc3d5698SJohn Baldwin	vbsl	d30,d17,d16		@ Maj(a,b,c)
1587bc3d5698SJohn Baldwin	veor	d22,d26			@ Sigma0(a)
1588bc3d5698SJohn Baldwin	vadd.i64	d18,d27
1589bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1590bc3d5698SJohn Baldwin	@ vadd.i64	d22,d30
1591bc3d5698SJohn Baldwin	vshr.u64	q12,q4,#19
1592bc3d5698SJohn Baldwin	vshr.u64	q13,q4,#61
1593bc3d5698SJohn Baldwin	vadd.i64	d22,d30			@ h+=Maj from the past
1594bc3d5698SJohn Baldwin	vshr.u64	q15,q4,#6
1595bc3d5698SJohn Baldwin	vsli.64	q12,q4,#45
1596bc3d5698SJohn Baldwin	vext.8	q14,q5,q6,#8	@ X[i+1]
1597bc3d5698SJohn Baldwin	vsli.64	q13,q4,#3
1598bc3d5698SJohn Baldwin	veor	q15,q12
1599bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1600bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1601bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1602bc3d5698SJohn Baldwin	vadd.i64	q5,q15
1603bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1604bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1605bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1606bc3d5698SJohn Baldwin	vext.8	q14,q1,q2,#8	@ X[i+9]
1607bc3d5698SJohn Baldwin	veor	q15,q12
1608bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#14		@ from NEON_00_15
1609bc3d5698SJohn Baldwin	vadd.i64	q5,q14
1610bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#18		@ from NEON_00_15
1611bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1612bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#41		@ from NEON_00_15
1613bc3d5698SJohn Baldwin	vadd.i64	q5,q15
1614bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1615bc3d5698SJohn Baldwin	vsli.64	d24,d18,#50
1616bc3d5698SJohn Baldwin	vsli.64	d25,d18,#46
1617bc3d5698SJohn Baldwin	vmov	d29,d18
1618bc3d5698SJohn Baldwin	vsli.64	d26,d18,#23
1619bc3d5698SJohn Baldwin#if 26<16 && defined(__ARMEL__)
1620bc3d5698SJohn Baldwin	vrev64.8	,
1621bc3d5698SJohn Baldwin#endif
1622bc3d5698SJohn Baldwin	veor	d25,d24
1623bc3d5698SJohn Baldwin	vbsl	d29,d19,d20		@ Ch(e,f,g)
1624bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#28
1625bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1626bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d21
1627bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#34
1628bc3d5698SJohn Baldwin	vsli.64	d24,d22,#36
1629bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1630bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#39
1631bc3d5698SJohn Baldwin	vadd.i64	d28,d10
1632bc3d5698SJohn Baldwin	vsli.64	d25,d22,#30
1633bc3d5698SJohn Baldwin	veor	d30,d22,d23
1634bc3d5698SJohn Baldwin	vsli.64	d26,d22,#25
1635bc3d5698SJohn Baldwin	veor	d21,d24,d25
1636bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1637bc3d5698SJohn Baldwin	vbsl	d30,d16,d23		@ Maj(a,b,c)
1638bc3d5698SJohn Baldwin	veor	d21,d26			@ Sigma0(a)
1639bc3d5698SJohn Baldwin	vadd.i64	d17,d27
1640bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1641bc3d5698SJohn Baldwin	@ vadd.i64	d21,d30
1642bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#14	@ 27
1643bc3d5698SJohn Baldwin#if 27<16
1644bc3d5698SJohn Baldwin	vld1.64	{d11},[r1]!	@ handles unaligned
1645bc3d5698SJohn Baldwin#endif
1646bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#18
1647bc3d5698SJohn Baldwin#if 27>0
1648bc3d5698SJohn Baldwin	vadd.i64	d21,d30			@ h+=Maj from the past
1649bc3d5698SJohn Baldwin#endif
1650bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#41
1651bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1652bc3d5698SJohn Baldwin	vsli.64	d24,d17,#50
1653bc3d5698SJohn Baldwin	vsli.64	d25,d17,#46
1654bc3d5698SJohn Baldwin	vmov	d29,d17
1655bc3d5698SJohn Baldwin	vsli.64	d26,d17,#23
1656bc3d5698SJohn Baldwin#if 27<16 && defined(__ARMEL__)
1657bc3d5698SJohn Baldwin	vrev64.8	,
1658bc3d5698SJohn Baldwin#endif
1659bc3d5698SJohn Baldwin	veor	d25,d24
1660bc3d5698SJohn Baldwin	vbsl	d29,d18,d19		@ Ch(e,f,g)
1661bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#28
1662bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1663bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d20
1664bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#34
1665bc3d5698SJohn Baldwin	vsli.64	d24,d21,#36
1666bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1667bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#39
1668bc3d5698SJohn Baldwin	vadd.i64	d28,d11
1669bc3d5698SJohn Baldwin	vsli.64	d25,d21,#30
1670bc3d5698SJohn Baldwin	veor	d30,d21,d22
1671bc3d5698SJohn Baldwin	vsli.64	d26,d21,#25
1672bc3d5698SJohn Baldwin	veor	d20,d24,d25
1673bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1674bc3d5698SJohn Baldwin	vbsl	d30,d23,d22		@ Maj(a,b,c)
1675bc3d5698SJohn Baldwin	veor	d20,d26			@ Sigma0(a)
1676bc3d5698SJohn Baldwin	vadd.i64	d16,d27
1677bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1678bc3d5698SJohn Baldwin	@ vadd.i64	d20,d30
1679bc3d5698SJohn Baldwin	vshr.u64	q12,q5,#19
1680bc3d5698SJohn Baldwin	vshr.u64	q13,q5,#61
1681bc3d5698SJohn Baldwin	vadd.i64	d20,d30			@ h+=Maj from the past
1682bc3d5698SJohn Baldwin	vshr.u64	q15,q5,#6
1683bc3d5698SJohn Baldwin	vsli.64	q12,q5,#45
1684bc3d5698SJohn Baldwin	vext.8	q14,q6,q7,#8	@ X[i+1]
1685bc3d5698SJohn Baldwin	vsli.64	q13,q5,#3
1686bc3d5698SJohn Baldwin	veor	q15,q12
1687bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1688bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1689bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1690bc3d5698SJohn Baldwin	vadd.i64	q6,q15
1691bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1692bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1693bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1694bc3d5698SJohn Baldwin	vext.8	q14,q2,q3,#8	@ X[i+9]
1695bc3d5698SJohn Baldwin	veor	q15,q12
1696bc3d5698SJohn Baldwin	vshr.u64	d24,d16,#14		@ from NEON_00_15
1697bc3d5698SJohn Baldwin	vadd.i64	q6,q14
1698bc3d5698SJohn Baldwin	vshr.u64	d25,d16,#18		@ from NEON_00_15
1699bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1700bc3d5698SJohn Baldwin	vshr.u64	d26,d16,#41		@ from NEON_00_15
1701bc3d5698SJohn Baldwin	vadd.i64	q6,q15
1702bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1703bc3d5698SJohn Baldwin	vsli.64	d24,d16,#50
1704bc3d5698SJohn Baldwin	vsli.64	d25,d16,#46
1705bc3d5698SJohn Baldwin	vmov	d29,d16
1706bc3d5698SJohn Baldwin	vsli.64	d26,d16,#23
1707bc3d5698SJohn Baldwin#if 28<16 && defined(__ARMEL__)
1708bc3d5698SJohn Baldwin	vrev64.8	,
1709bc3d5698SJohn Baldwin#endif
1710bc3d5698SJohn Baldwin	veor	d25,d24
1711bc3d5698SJohn Baldwin	vbsl	d29,d17,d18		@ Ch(e,f,g)
1712bc3d5698SJohn Baldwin	vshr.u64	d24,d20,#28
1713bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1714bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d19
1715bc3d5698SJohn Baldwin	vshr.u64	d25,d20,#34
1716bc3d5698SJohn Baldwin	vsli.64	d24,d20,#36
1717bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1718bc3d5698SJohn Baldwin	vshr.u64	d26,d20,#39
1719bc3d5698SJohn Baldwin	vadd.i64	d28,d12
1720bc3d5698SJohn Baldwin	vsli.64	d25,d20,#30
1721bc3d5698SJohn Baldwin	veor	d30,d20,d21
1722bc3d5698SJohn Baldwin	vsli.64	d26,d20,#25
1723bc3d5698SJohn Baldwin	veor	d19,d24,d25
1724bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1725bc3d5698SJohn Baldwin	vbsl	d30,d22,d21		@ Maj(a,b,c)
1726bc3d5698SJohn Baldwin	veor	d19,d26			@ Sigma0(a)
1727bc3d5698SJohn Baldwin	vadd.i64	d23,d27
1728bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1729bc3d5698SJohn Baldwin	@ vadd.i64	d19,d30
1730bc3d5698SJohn Baldwin	vshr.u64	d24,d23,#14	@ 29
1731bc3d5698SJohn Baldwin#if 29<16
1732bc3d5698SJohn Baldwin	vld1.64	{d13},[r1]!	@ handles unaligned
1733bc3d5698SJohn Baldwin#endif
1734bc3d5698SJohn Baldwin	vshr.u64	d25,d23,#18
1735bc3d5698SJohn Baldwin#if 29>0
1736bc3d5698SJohn Baldwin	vadd.i64	d19,d30			@ h+=Maj from the past
1737bc3d5698SJohn Baldwin#endif
1738bc3d5698SJohn Baldwin	vshr.u64	d26,d23,#41
1739bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1740bc3d5698SJohn Baldwin	vsli.64	d24,d23,#50
1741bc3d5698SJohn Baldwin	vsli.64	d25,d23,#46
1742bc3d5698SJohn Baldwin	vmov	d29,d23
1743bc3d5698SJohn Baldwin	vsli.64	d26,d23,#23
1744bc3d5698SJohn Baldwin#if 29<16 && defined(__ARMEL__)
1745bc3d5698SJohn Baldwin	vrev64.8	,
1746bc3d5698SJohn Baldwin#endif
1747bc3d5698SJohn Baldwin	veor	d25,d24
1748bc3d5698SJohn Baldwin	vbsl	d29,d16,d17		@ Ch(e,f,g)
1749bc3d5698SJohn Baldwin	vshr.u64	d24,d19,#28
1750bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1751bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d18
1752bc3d5698SJohn Baldwin	vshr.u64	d25,d19,#34
1753bc3d5698SJohn Baldwin	vsli.64	d24,d19,#36
1754bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1755bc3d5698SJohn Baldwin	vshr.u64	d26,d19,#39
1756bc3d5698SJohn Baldwin	vadd.i64	d28,d13
1757bc3d5698SJohn Baldwin	vsli.64	d25,d19,#30
1758bc3d5698SJohn Baldwin	veor	d30,d19,d20
1759bc3d5698SJohn Baldwin	vsli.64	d26,d19,#25
1760bc3d5698SJohn Baldwin	veor	d18,d24,d25
1761bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1762bc3d5698SJohn Baldwin	vbsl	d30,d21,d20		@ Maj(a,b,c)
1763bc3d5698SJohn Baldwin	veor	d18,d26			@ Sigma0(a)
1764bc3d5698SJohn Baldwin	vadd.i64	d22,d27
1765bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1766bc3d5698SJohn Baldwin	@ vadd.i64	d18,d30
1767bc3d5698SJohn Baldwin	vshr.u64	q12,q6,#19
1768bc3d5698SJohn Baldwin	vshr.u64	q13,q6,#61
1769bc3d5698SJohn Baldwin	vadd.i64	d18,d30			@ h+=Maj from the past
1770bc3d5698SJohn Baldwin	vshr.u64	q15,q6,#6
1771bc3d5698SJohn Baldwin	vsli.64	q12,q6,#45
1772bc3d5698SJohn Baldwin	vext.8	q14,q7,q0,#8	@ X[i+1]
1773bc3d5698SJohn Baldwin	vsli.64	q13,q6,#3
1774bc3d5698SJohn Baldwin	veor	q15,q12
1775bc3d5698SJohn Baldwin	vshr.u64	q12,q14,#1
1776bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma1(X[i+14])
1777bc3d5698SJohn Baldwin	vshr.u64	q13,q14,#8
1778bc3d5698SJohn Baldwin	vadd.i64	q7,q15
1779bc3d5698SJohn Baldwin	vshr.u64	q15,q14,#7
1780bc3d5698SJohn Baldwin	vsli.64	q12,q14,#63
1781bc3d5698SJohn Baldwin	vsli.64	q13,q14,#56
1782bc3d5698SJohn Baldwin	vext.8	q14,q3,q4,#8	@ X[i+9]
1783bc3d5698SJohn Baldwin	veor	q15,q12
1784bc3d5698SJohn Baldwin	vshr.u64	d24,d22,#14		@ from NEON_00_15
1785bc3d5698SJohn Baldwin	vadd.i64	q7,q14
1786bc3d5698SJohn Baldwin	vshr.u64	d25,d22,#18		@ from NEON_00_15
1787bc3d5698SJohn Baldwin	veor	q15,q13				@ sigma0(X[i+1])
1788bc3d5698SJohn Baldwin	vshr.u64	d26,d22,#41		@ from NEON_00_15
1789bc3d5698SJohn Baldwin	vadd.i64	q7,q15
1790bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1791bc3d5698SJohn Baldwin	vsli.64	d24,d22,#50
1792bc3d5698SJohn Baldwin	vsli.64	d25,d22,#46
1793bc3d5698SJohn Baldwin	vmov	d29,d22
1794bc3d5698SJohn Baldwin	vsli.64	d26,d22,#23
1795bc3d5698SJohn Baldwin#if 30<16 && defined(__ARMEL__)
1796bc3d5698SJohn Baldwin	vrev64.8	,
1797bc3d5698SJohn Baldwin#endif
1798bc3d5698SJohn Baldwin	veor	d25,d24
1799bc3d5698SJohn Baldwin	vbsl	d29,d23,d16		@ Ch(e,f,g)
1800bc3d5698SJohn Baldwin	vshr.u64	d24,d18,#28
1801bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1802bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d17
1803bc3d5698SJohn Baldwin	vshr.u64	d25,d18,#34
1804bc3d5698SJohn Baldwin	vsli.64	d24,d18,#36
1805bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1806bc3d5698SJohn Baldwin	vshr.u64	d26,d18,#39
1807bc3d5698SJohn Baldwin	vadd.i64	d28,d14
1808bc3d5698SJohn Baldwin	vsli.64	d25,d18,#30
1809bc3d5698SJohn Baldwin	veor	d30,d18,d19
1810bc3d5698SJohn Baldwin	vsli.64	d26,d18,#25
1811bc3d5698SJohn Baldwin	veor	d17,d24,d25
1812bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1813bc3d5698SJohn Baldwin	vbsl	d30,d20,d19		@ Maj(a,b,c)
1814bc3d5698SJohn Baldwin	veor	d17,d26			@ Sigma0(a)
1815bc3d5698SJohn Baldwin	vadd.i64	d21,d27
1816bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1817bc3d5698SJohn Baldwin	@ vadd.i64	d17,d30
1818bc3d5698SJohn Baldwin	vshr.u64	d24,d21,#14	@ 31
1819bc3d5698SJohn Baldwin#if 31<16
1820bc3d5698SJohn Baldwin	vld1.64	{d15},[r1]!	@ handles unaligned
1821bc3d5698SJohn Baldwin#endif
1822bc3d5698SJohn Baldwin	vshr.u64	d25,d21,#18
1823bc3d5698SJohn Baldwin#if 31>0
1824bc3d5698SJohn Baldwin	vadd.i64	d17,d30			@ h+=Maj from the past
1825bc3d5698SJohn Baldwin#endif
1826bc3d5698SJohn Baldwin	vshr.u64	d26,d21,#41
1827bc3d5698SJohn Baldwin	vld1.64	{d28},[r3,:64]!	@ K[i++]
1828bc3d5698SJohn Baldwin	vsli.64	d24,d21,#50
1829bc3d5698SJohn Baldwin	vsli.64	d25,d21,#46
1830bc3d5698SJohn Baldwin	vmov	d29,d21
1831bc3d5698SJohn Baldwin	vsli.64	d26,d21,#23
1832bc3d5698SJohn Baldwin#if 31<16 && defined(__ARMEL__)
1833bc3d5698SJohn Baldwin	vrev64.8	,
1834bc3d5698SJohn Baldwin#endif
1835bc3d5698SJohn Baldwin	veor	d25,d24
1836bc3d5698SJohn Baldwin	vbsl	d29,d22,d23		@ Ch(e,f,g)
1837bc3d5698SJohn Baldwin	vshr.u64	d24,d17,#28
1838bc3d5698SJohn Baldwin	veor	d26,d25			@ Sigma1(e)
1839bc3d5698SJohn Baldwin	vadd.i64	d27,d29,d16
1840bc3d5698SJohn Baldwin	vshr.u64	d25,d17,#34
1841bc3d5698SJohn Baldwin	vsli.64	d24,d17,#36
1842bc3d5698SJohn Baldwin	vadd.i64	d27,d26
1843bc3d5698SJohn Baldwin	vshr.u64	d26,d17,#39
1844bc3d5698SJohn Baldwin	vadd.i64	d28,d15
1845bc3d5698SJohn Baldwin	vsli.64	d25,d17,#30
1846bc3d5698SJohn Baldwin	veor	d30,d17,d18
1847bc3d5698SJohn Baldwin	vsli.64	d26,d17,#25
1848bc3d5698SJohn Baldwin	veor	d16,d24,d25
1849bc3d5698SJohn Baldwin	vadd.i64	d27,d28
1850bc3d5698SJohn Baldwin	vbsl	d30,d19,d18		@ Maj(a,b,c)
1851bc3d5698SJohn Baldwin	veor	d16,d26			@ Sigma0(a)
1852bc3d5698SJohn Baldwin	vadd.i64	d20,d27
1853bc3d5698SJohn Baldwin	vadd.i64	d30,d27
1854bc3d5698SJohn Baldwin	@ vadd.i64	d16,d30
1855bc3d5698SJohn Baldwin	bne	.L16_79_neon
1856bc3d5698SJohn Baldwin
1857bc3d5698SJohn Baldwin	vadd.i64	d16,d30		@ h+=Maj from the past
1858bc3d5698SJohn Baldwin	vldmia	r0,{d24,d25,d26,d27,d28,d29,d30,d31}	@ load context to temp
1859bc3d5698SJohn Baldwin	vadd.i64	q8,q12		@ vectorized accumulate
1860bc3d5698SJohn Baldwin	vadd.i64	q9,q13
1861bc3d5698SJohn Baldwin	vadd.i64	q10,q14
1862bc3d5698SJohn Baldwin	vadd.i64	q11,q15
1863bc3d5698SJohn Baldwin	vstmia	r0,{d16,d17,d18,d19,d20,d21,d22,d23}	@ save context
1864bc3d5698SJohn Baldwin	teq	r1,r2
1865bc3d5698SJohn Baldwin	sub	r3,#640	@ rewind K512
1866bc3d5698SJohn Baldwin	bne	.Loop_neon
1867bc3d5698SJohn Baldwin
1868bc3d5698SJohn Baldwin	VFP_ABI_POP
1869bc3d5698SJohn Baldwin	bx	lr				@ .word	0xe12fff1e
1870bc3d5698SJohn Baldwin.size	sha512_block_data_order_neon,.-sha512_block_data_order_neon
1871bc3d5698SJohn Baldwin#endif
1872bc3d5698SJohn Baldwin.byte	83,72,65,53,49,50,32,98,108,111,99,107,32,116,114,97,110,115,102,111,114,109,32,102,111,114,32,65,82,77,118,52,47,78,69,79,78,44,32,67,82,89,80,84,79,71,65,77,83,32,98,121,32,60,97,112,112,114,111,64,111,112,101,110,115,115,108,46,111,114,103,62,0
1873bc3d5698SJohn Baldwin.align	2
1874bc3d5698SJohn Baldwin.align	2
1875bc3d5698SJohn Baldwin#if __ARM_MAX_ARCH__>=7 && !defined(__KERNEL__)
1876bc3d5698SJohn Baldwin.comm	OPENSSL_armcap_P,4,4
1877bc3d5698SJohn Baldwin#endif
1878