xref: /freebsd/sys/contrib/openzfs/module/icp/asm-aarch64/sha2/sha512-armv8.S (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1*61145dc2SMartin Matuska// SPDX-License-Identifier: Apache-2.0
22a58b312SMartin Matuska/*
32a58b312SMartin Matuska * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved.
42a58b312SMartin Matuska *
52a58b312SMartin Matuska * Licensed under the Apache License, Version 2.0 (the "License");
62a58b312SMartin Matuska * you may not use this file except in compliance with the License.
72a58b312SMartin Matuska * You may obtain a copy of the License at
82a58b312SMartin Matuska *
92a58b312SMartin Matuska *     https://www.apache.org/licenses/LICENSE-2.0
102a58b312SMartin Matuska *
112a58b312SMartin Matuska * Unless required by applicable law or agreed to in writing, software
122a58b312SMartin Matuska * distributed under the License is distributed on an "AS IS" BASIS,
132a58b312SMartin Matuska * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
142a58b312SMartin Matuska * See the License for the specific language governing permissions and
152a58b312SMartin Matuska * limitations under the License.
162a58b312SMartin Matuska */
172a58b312SMartin Matuska
182a58b312SMartin Matuska/*
192a58b312SMartin Matuska * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de>
202a58b312SMartin Matuska * - modified assembly to fit into OpenZFS
212a58b312SMartin Matuska */
222a58b312SMartin Matuska
232a58b312SMartin Matuska#if defined(__aarch64__)
242a58b312SMartin Matuska
251719886fSMartin Matuska	.section	.note.gnu.property,"a",@note
261719886fSMartin Matuska	.p2align	3
271719886fSMartin Matuska	.word	4
281719886fSMartin Matuska	.word	16
291719886fSMartin Matuska	.word	5
301719886fSMartin Matuska	.asciz	"GNU"
311719886fSMartin Matuska	.word	3221225472
321719886fSMartin Matuska	.word	4
331719886fSMartin Matuska	.word	3
341719886fSMartin Matuska	.word	0
352a58b312SMartin Matuska.text
362a58b312SMartin Matuska
372a58b312SMartin Matuska.align	6
382a58b312SMartin Matuska.type	.LK512,%object
392a58b312SMartin Matuska.LK512:
402a58b312SMartin Matuska	.quad	0x428a2f98d728ae22,0x7137449123ef65cd
412a58b312SMartin Matuska	.quad	0xb5c0fbcfec4d3b2f,0xe9b5dba58189dbbc
422a58b312SMartin Matuska	.quad	0x3956c25bf348b538,0x59f111f1b605d019
432a58b312SMartin Matuska	.quad	0x923f82a4af194f9b,0xab1c5ed5da6d8118
442a58b312SMartin Matuska	.quad	0xd807aa98a3030242,0x12835b0145706fbe
452a58b312SMartin Matuska	.quad	0x243185be4ee4b28c,0x550c7dc3d5ffb4e2
462a58b312SMartin Matuska	.quad	0x72be5d74f27b896f,0x80deb1fe3b1696b1
472a58b312SMartin Matuska	.quad	0x9bdc06a725c71235,0xc19bf174cf692694
482a58b312SMartin Matuska	.quad	0xe49b69c19ef14ad2,0xefbe4786384f25e3
492a58b312SMartin Matuska	.quad	0x0fc19dc68b8cd5b5,0x240ca1cc77ac9c65
502a58b312SMartin Matuska	.quad	0x2de92c6f592b0275,0x4a7484aa6ea6e483
512a58b312SMartin Matuska	.quad	0x5cb0a9dcbd41fbd4,0x76f988da831153b5
522a58b312SMartin Matuska	.quad	0x983e5152ee66dfab,0xa831c66d2db43210
532a58b312SMartin Matuska	.quad	0xb00327c898fb213f,0xbf597fc7beef0ee4
542a58b312SMartin Matuska	.quad	0xc6e00bf33da88fc2,0xd5a79147930aa725
552a58b312SMartin Matuska	.quad	0x06ca6351e003826f,0x142929670a0e6e70
562a58b312SMartin Matuska	.quad	0x27b70a8546d22ffc,0x2e1b21385c26c926
572a58b312SMartin Matuska	.quad	0x4d2c6dfc5ac42aed,0x53380d139d95b3df
582a58b312SMartin Matuska	.quad	0x650a73548baf63de,0x766a0abb3c77b2a8
592a58b312SMartin Matuska	.quad	0x81c2c92e47edaee6,0x92722c851482353b
602a58b312SMartin Matuska	.quad	0xa2bfe8a14cf10364,0xa81a664bbc423001
612a58b312SMartin Matuska	.quad	0xc24b8b70d0f89791,0xc76c51a30654be30
622a58b312SMartin Matuska	.quad	0xd192e819d6ef5218,0xd69906245565a910
632a58b312SMartin Matuska	.quad	0xf40e35855771202a,0x106aa07032bbd1b8
642a58b312SMartin Matuska	.quad	0x19a4c116b8d2d0c8,0x1e376c085141ab53
652a58b312SMartin Matuska	.quad	0x2748774cdf8eeb99,0x34b0bcb5e19b48a8
662a58b312SMartin Matuska	.quad	0x391c0cb3c5c95a63,0x4ed8aa4ae3418acb
672a58b312SMartin Matuska	.quad	0x5b9cca4f7763e373,0x682e6ff3d6b2b8a3
682a58b312SMartin Matuska	.quad	0x748f82ee5defb2fc,0x78a5636f43172f60
692a58b312SMartin Matuska	.quad	0x84c87814a1f0ab72,0x8cc702081a6439ec
702a58b312SMartin Matuska	.quad	0x90befffa23631e28,0xa4506cebde82bde9
712a58b312SMartin Matuska	.quad	0xbef9a3f7b2c67915,0xc67178f2e372532b
722a58b312SMartin Matuska	.quad	0xca273eceea26619c,0xd186b8c721c0c207
732a58b312SMartin Matuska	.quad	0xeada7dd6cde0eb1e,0xf57d4f7fee6ed178
742a58b312SMartin Matuska	.quad	0x06f067aa72176fba,0x0a637dc5a2c898a6
752a58b312SMartin Matuska	.quad	0x113f9804bef90dae,0x1b710b35131c471b
762a58b312SMartin Matuska	.quad	0x28db77f523047d84,0x32caab7b40c72493
772a58b312SMartin Matuska	.quad	0x3c9ebe0a15c9bebc,0x431d67c49c100d4c
782a58b312SMartin Matuska	.quad	0x4cc5d4becb3e42b6,0x597f299cfc657e2a
792a58b312SMartin Matuska	.quad	0x5fcb6fab3ad6faec,0x6c44198c4a475817
802a58b312SMartin Matuska	.quad	0	// terminator
812a58b312SMartin Matuska.size	.LK512,.-.LK512
822a58b312SMartin Matuska
832a58b312SMartin Matuska.globl	zfs_sha512_block_armv7
842a58b312SMartin Matuska.type	zfs_sha512_block_armv7,%function
852a58b312SMartin Matuska.align	6
862a58b312SMartin Matuskazfs_sha512_block_armv7:
87f7f4bd06SMartin Matuska	hint	#34					// bti c
882a58b312SMartin Matuska	stp	x29,x30,[sp,#-128]!
892a58b312SMartin Matuska	add	x29,sp,#0
902a58b312SMartin Matuska
912a58b312SMartin Matuska	stp	x19,x20,[sp,#16]
922a58b312SMartin Matuska	stp	x21,x22,[sp,#32]
932a58b312SMartin Matuska	stp	x23,x24,[sp,#48]
942a58b312SMartin Matuska	stp	x25,x26,[sp,#64]
952a58b312SMartin Matuska	stp	x27,x28,[sp,#80]
962a58b312SMartin Matuska	sub	sp,sp,#4*8
972a58b312SMartin Matuska
982a58b312SMartin Matuska	ldp	x20,x21,[x0]				// load context
992a58b312SMartin Matuska	ldp	x22,x23,[x0,#2*8]
1002a58b312SMartin Matuska	ldp	x24,x25,[x0,#4*8]
1012a58b312SMartin Matuska	add	x2,x1,x2,lsl#7	// end of input
1022a58b312SMartin Matuska	ldp	x26,x27,[x0,#6*8]
1032a58b312SMartin Matuska	adr	x30,.LK512
1042a58b312SMartin Matuska	stp	x0,x2,[x29,#96]
1052a58b312SMartin Matuska
1062a58b312SMartin Matuska.Loop:
1072a58b312SMartin Matuska	ldp	x3,x4,[x1],#2*8
1082a58b312SMartin Matuska	ldr	x19,[x30],#8			// *K++
1092a58b312SMartin Matuska	eor	x28,x21,x22				// magic seed
1102a58b312SMartin Matuska	str	x1,[x29,#112]
1112a58b312SMartin Matuska#ifndef	__AARCH64EB__
1122a58b312SMartin Matuska	rev	x3,x3			// 0
1132a58b312SMartin Matuska#endif
1142a58b312SMartin Matuska	ror	x16,x24,#14
1152a58b312SMartin Matuska	add	x27,x27,x19			// h+=K[i]
1162a58b312SMartin Matuska	eor	x6,x24,x24,ror#23
1172a58b312SMartin Matuska	and	x17,x25,x24
1182a58b312SMartin Matuska	bic	x19,x26,x24
1192a58b312SMartin Matuska	add	x27,x27,x3			// h+=X[i]
1202a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
1212a58b312SMartin Matuska	eor	x19,x20,x21			// a^b, b^c in next round
1222a58b312SMartin Matuska	eor	x16,x16,x6,ror#18	// Sigma1(e)
1232a58b312SMartin Matuska	ror	x6,x20,#28
1242a58b312SMartin Matuska	add	x27,x27,x17			// h+=Ch(e,f,g)
1252a58b312SMartin Matuska	eor	x17,x20,x20,ror#5
1262a58b312SMartin Matuska	add	x27,x27,x16			// h+=Sigma1(e)
1272a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
1282a58b312SMartin Matuska	add	x23,x23,x27			// d+=h
1292a58b312SMartin Matuska	eor	x28,x28,x21			// Maj(a,b,c)
1302a58b312SMartin Matuska	eor	x17,x6,x17,ror#34	// Sigma0(a)
1312a58b312SMartin Matuska	add	x27,x27,x28			// h+=Maj(a,b,c)
1322a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
1332a58b312SMartin Matuska	//add	x27,x27,x17			// h+=Sigma0(a)
1342a58b312SMartin Matuska#ifndef	__AARCH64EB__
1352a58b312SMartin Matuska	rev	x4,x4			// 1
1362a58b312SMartin Matuska#endif
1372a58b312SMartin Matuska	ldp	x5,x6,[x1],#2*8
1382a58b312SMartin Matuska	add	x27,x27,x17			// h+=Sigma0(a)
1392a58b312SMartin Matuska	ror	x16,x23,#14
1402a58b312SMartin Matuska	add	x26,x26,x28			// h+=K[i]
1412a58b312SMartin Matuska	eor	x7,x23,x23,ror#23
1422a58b312SMartin Matuska	and	x17,x24,x23
1432a58b312SMartin Matuska	bic	x28,x25,x23
1442a58b312SMartin Matuska	add	x26,x26,x4			// h+=X[i]
1452a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
1462a58b312SMartin Matuska	eor	x28,x27,x20			// a^b, b^c in next round
1472a58b312SMartin Matuska	eor	x16,x16,x7,ror#18	// Sigma1(e)
1482a58b312SMartin Matuska	ror	x7,x27,#28
1492a58b312SMartin Matuska	add	x26,x26,x17			// h+=Ch(e,f,g)
1502a58b312SMartin Matuska	eor	x17,x27,x27,ror#5
1512a58b312SMartin Matuska	add	x26,x26,x16			// h+=Sigma1(e)
1522a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
1532a58b312SMartin Matuska	add	x22,x22,x26			// d+=h
1542a58b312SMartin Matuska	eor	x19,x19,x20			// Maj(a,b,c)
1552a58b312SMartin Matuska	eor	x17,x7,x17,ror#34	// Sigma0(a)
1562a58b312SMartin Matuska	add	x26,x26,x19			// h+=Maj(a,b,c)
1572a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
1582a58b312SMartin Matuska	//add	x26,x26,x17			// h+=Sigma0(a)
1592a58b312SMartin Matuska#ifndef	__AARCH64EB__
1602a58b312SMartin Matuska	rev	x5,x5			// 2
1612a58b312SMartin Matuska#endif
1622a58b312SMartin Matuska	add	x26,x26,x17			// h+=Sigma0(a)
1632a58b312SMartin Matuska	ror	x16,x22,#14
1642a58b312SMartin Matuska	add	x25,x25,x19			// h+=K[i]
1652a58b312SMartin Matuska	eor	x8,x22,x22,ror#23
1662a58b312SMartin Matuska	and	x17,x23,x22
1672a58b312SMartin Matuska	bic	x19,x24,x22
1682a58b312SMartin Matuska	add	x25,x25,x5			// h+=X[i]
1692a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
1702a58b312SMartin Matuska	eor	x19,x26,x27			// a^b, b^c in next round
1712a58b312SMartin Matuska	eor	x16,x16,x8,ror#18	// Sigma1(e)
1722a58b312SMartin Matuska	ror	x8,x26,#28
1732a58b312SMartin Matuska	add	x25,x25,x17			// h+=Ch(e,f,g)
1742a58b312SMartin Matuska	eor	x17,x26,x26,ror#5
1752a58b312SMartin Matuska	add	x25,x25,x16			// h+=Sigma1(e)
1762a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
1772a58b312SMartin Matuska	add	x21,x21,x25			// d+=h
1782a58b312SMartin Matuska	eor	x28,x28,x27			// Maj(a,b,c)
1792a58b312SMartin Matuska	eor	x17,x8,x17,ror#34	// Sigma0(a)
1802a58b312SMartin Matuska	add	x25,x25,x28			// h+=Maj(a,b,c)
1812a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
1822a58b312SMartin Matuska	//add	x25,x25,x17			// h+=Sigma0(a)
1832a58b312SMartin Matuska#ifndef	__AARCH64EB__
1842a58b312SMartin Matuska	rev	x6,x6			// 3
1852a58b312SMartin Matuska#endif
1862a58b312SMartin Matuska	ldp	x7,x8,[x1],#2*8
1872a58b312SMartin Matuska	add	x25,x25,x17			// h+=Sigma0(a)
1882a58b312SMartin Matuska	ror	x16,x21,#14
1892a58b312SMartin Matuska	add	x24,x24,x28			// h+=K[i]
1902a58b312SMartin Matuska	eor	x9,x21,x21,ror#23
1912a58b312SMartin Matuska	and	x17,x22,x21
1922a58b312SMartin Matuska	bic	x28,x23,x21
1932a58b312SMartin Matuska	add	x24,x24,x6			// h+=X[i]
1942a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
1952a58b312SMartin Matuska	eor	x28,x25,x26			// a^b, b^c in next round
1962a58b312SMartin Matuska	eor	x16,x16,x9,ror#18	// Sigma1(e)
1972a58b312SMartin Matuska	ror	x9,x25,#28
1982a58b312SMartin Matuska	add	x24,x24,x17			// h+=Ch(e,f,g)
1992a58b312SMartin Matuska	eor	x17,x25,x25,ror#5
2002a58b312SMartin Matuska	add	x24,x24,x16			// h+=Sigma1(e)
2012a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
2022a58b312SMartin Matuska	add	x20,x20,x24			// d+=h
2032a58b312SMartin Matuska	eor	x19,x19,x26			// Maj(a,b,c)
2042a58b312SMartin Matuska	eor	x17,x9,x17,ror#34	// Sigma0(a)
2052a58b312SMartin Matuska	add	x24,x24,x19			// h+=Maj(a,b,c)
2062a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
2072a58b312SMartin Matuska	//add	x24,x24,x17			// h+=Sigma0(a)
2082a58b312SMartin Matuska#ifndef	__AARCH64EB__
2092a58b312SMartin Matuska	rev	x7,x7			// 4
2102a58b312SMartin Matuska#endif
2112a58b312SMartin Matuska	add	x24,x24,x17			// h+=Sigma0(a)
2122a58b312SMartin Matuska	ror	x16,x20,#14
2132a58b312SMartin Matuska	add	x23,x23,x19			// h+=K[i]
2142a58b312SMartin Matuska	eor	x10,x20,x20,ror#23
2152a58b312SMartin Matuska	and	x17,x21,x20
2162a58b312SMartin Matuska	bic	x19,x22,x20
2172a58b312SMartin Matuska	add	x23,x23,x7			// h+=X[i]
2182a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
2192a58b312SMartin Matuska	eor	x19,x24,x25			// a^b, b^c in next round
2202a58b312SMartin Matuska	eor	x16,x16,x10,ror#18	// Sigma1(e)
2212a58b312SMartin Matuska	ror	x10,x24,#28
2222a58b312SMartin Matuska	add	x23,x23,x17			// h+=Ch(e,f,g)
2232a58b312SMartin Matuska	eor	x17,x24,x24,ror#5
2242a58b312SMartin Matuska	add	x23,x23,x16			// h+=Sigma1(e)
2252a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
2262a58b312SMartin Matuska	add	x27,x27,x23			// d+=h
2272a58b312SMartin Matuska	eor	x28,x28,x25			// Maj(a,b,c)
2282a58b312SMartin Matuska	eor	x17,x10,x17,ror#34	// Sigma0(a)
2292a58b312SMartin Matuska	add	x23,x23,x28			// h+=Maj(a,b,c)
2302a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
2312a58b312SMartin Matuska	//add	x23,x23,x17			// h+=Sigma0(a)
2322a58b312SMartin Matuska#ifndef	__AARCH64EB__
2332a58b312SMartin Matuska	rev	x8,x8			// 5
2342a58b312SMartin Matuska#endif
2352a58b312SMartin Matuska	ldp	x9,x10,[x1],#2*8
2362a58b312SMartin Matuska	add	x23,x23,x17			// h+=Sigma0(a)
2372a58b312SMartin Matuska	ror	x16,x27,#14
2382a58b312SMartin Matuska	add	x22,x22,x28			// h+=K[i]
2392a58b312SMartin Matuska	eor	x11,x27,x27,ror#23
2402a58b312SMartin Matuska	and	x17,x20,x27
2412a58b312SMartin Matuska	bic	x28,x21,x27
2422a58b312SMartin Matuska	add	x22,x22,x8			// h+=X[i]
2432a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
2442a58b312SMartin Matuska	eor	x28,x23,x24			// a^b, b^c in next round
2452a58b312SMartin Matuska	eor	x16,x16,x11,ror#18	// Sigma1(e)
2462a58b312SMartin Matuska	ror	x11,x23,#28
2472a58b312SMartin Matuska	add	x22,x22,x17			// h+=Ch(e,f,g)
2482a58b312SMartin Matuska	eor	x17,x23,x23,ror#5
2492a58b312SMartin Matuska	add	x22,x22,x16			// h+=Sigma1(e)
2502a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
2512a58b312SMartin Matuska	add	x26,x26,x22			// d+=h
2522a58b312SMartin Matuska	eor	x19,x19,x24			// Maj(a,b,c)
2532a58b312SMartin Matuska	eor	x17,x11,x17,ror#34	// Sigma0(a)
2542a58b312SMartin Matuska	add	x22,x22,x19			// h+=Maj(a,b,c)
2552a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
2562a58b312SMartin Matuska	//add	x22,x22,x17			// h+=Sigma0(a)
2572a58b312SMartin Matuska#ifndef	__AARCH64EB__
2582a58b312SMartin Matuska	rev	x9,x9			// 6
2592a58b312SMartin Matuska#endif
2602a58b312SMartin Matuska	add	x22,x22,x17			// h+=Sigma0(a)
2612a58b312SMartin Matuska	ror	x16,x26,#14
2622a58b312SMartin Matuska	add	x21,x21,x19			// h+=K[i]
2632a58b312SMartin Matuska	eor	x12,x26,x26,ror#23
2642a58b312SMartin Matuska	and	x17,x27,x26
2652a58b312SMartin Matuska	bic	x19,x20,x26
2662a58b312SMartin Matuska	add	x21,x21,x9			// h+=X[i]
2672a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
2682a58b312SMartin Matuska	eor	x19,x22,x23			// a^b, b^c in next round
2692a58b312SMartin Matuska	eor	x16,x16,x12,ror#18	// Sigma1(e)
2702a58b312SMartin Matuska	ror	x12,x22,#28
2712a58b312SMartin Matuska	add	x21,x21,x17			// h+=Ch(e,f,g)
2722a58b312SMartin Matuska	eor	x17,x22,x22,ror#5
2732a58b312SMartin Matuska	add	x21,x21,x16			// h+=Sigma1(e)
2742a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
2752a58b312SMartin Matuska	add	x25,x25,x21			// d+=h
2762a58b312SMartin Matuska	eor	x28,x28,x23			// Maj(a,b,c)
2772a58b312SMartin Matuska	eor	x17,x12,x17,ror#34	// Sigma0(a)
2782a58b312SMartin Matuska	add	x21,x21,x28			// h+=Maj(a,b,c)
2792a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
2802a58b312SMartin Matuska	//add	x21,x21,x17			// h+=Sigma0(a)
2812a58b312SMartin Matuska#ifndef	__AARCH64EB__
2822a58b312SMartin Matuska	rev	x10,x10			// 7
2832a58b312SMartin Matuska#endif
2842a58b312SMartin Matuska	ldp	x11,x12,[x1],#2*8
2852a58b312SMartin Matuska	add	x21,x21,x17			// h+=Sigma0(a)
2862a58b312SMartin Matuska	ror	x16,x25,#14
2872a58b312SMartin Matuska	add	x20,x20,x28			// h+=K[i]
2882a58b312SMartin Matuska	eor	x13,x25,x25,ror#23
2892a58b312SMartin Matuska	and	x17,x26,x25
2902a58b312SMartin Matuska	bic	x28,x27,x25
2912a58b312SMartin Matuska	add	x20,x20,x10			// h+=X[i]
2922a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
2932a58b312SMartin Matuska	eor	x28,x21,x22			// a^b, b^c in next round
2942a58b312SMartin Matuska	eor	x16,x16,x13,ror#18	// Sigma1(e)
2952a58b312SMartin Matuska	ror	x13,x21,#28
2962a58b312SMartin Matuska	add	x20,x20,x17			// h+=Ch(e,f,g)
2972a58b312SMartin Matuska	eor	x17,x21,x21,ror#5
2982a58b312SMartin Matuska	add	x20,x20,x16			// h+=Sigma1(e)
2992a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
3002a58b312SMartin Matuska	add	x24,x24,x20			// d+=h
3012a58b312SMartin Matuska	eor	x19,x19,x22			// Maj(a,b,c)
3022a58b312SMartin Matuska	eor	x17,x13,x17,ror#34	// Sigma0(a)
3032a58b312SMartin Matuska	add	x20,x20,x19			// h+=Maj(a,b,c)
3042a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
3052a58b312SMartin Matuska	//add	x20,x20,x17			// h+=Sigma0(a)
3062a58b312SMartin Matuska#ifndef	__AARCH64EB__
3072a58b312SMartin Matuska	rev	x11,x11			// 8
3082a58b312SMartin Matuska#endif
3092a58b312SMartin Matuska	add	x20,x20,x17			// h+=Sigma0(a)
3102a58b312SMartin Matuska	ror	x16,x24,#14
3112a58b312SMartin Matuska	add	x27,x27,x19			// h+=K[i]
3122a58b312SMartin Matuska	eor	x14,x24,x24,ror#23
3132a58b312SMartin Matuska	and	x17,x25,x24
3142a58b312SMartin Matuska	bic	x19,x26,x24
3152a58b312SMartin Matuska	add	x27,x27,x11			// h+=X[i]
3162a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
3172a58b312SMartin Matuska	eor	x19,x20,x21			// a^b, b^c in next round
3182a58b312SMartin Matuska	eor	x16,x16,x14,ror#18	// Sigma1(e)
3192a58b312SMartin Matuska	ror	x14,x20,#28
3202a58b312SMartin Matuska	add	x27,x27,x17			// h+=Ch(e,f,g)
3212a58b312SMartin Matuska	eor	x17,x20,x20,ror#5
3222a58b312SMartin Matuska	add	x27,x27,x16			// h+=Sigma1(e)
3232a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
3242a58b312SMartin Matuska	add	x23,x23,x27			// d+=h
3252a58b312SMartin Matuska	eor	x28,x28,x21			// Maj(a,b,c)
3262a58b312SMartin Matuska	eor	x17,x14,x17,ror#34	// Sigma0(a)
3272a58b312SMartin Matuska	add	x27,x27,x28			// h+=Maj(a,b,c)
3282a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
3292a58b312SMartin Matuska	//add	x27,x27,x17			// h+=Sigma0(a)
3302a58b312SMartin Matuska#ifndef	__AARCH64EB__
3312a58b312SMartin Matuska	rev	x12,x12			// 9
3322a58b312SMartin Matuska#endif
3332a58b312SMartin Matuska	ldp	x13,x14,[x1],#2*8
3342a58b312SMartin Matuska	add	x27,x27,x17			// h+=Sigma0(a)
3352a58b312SMartin Matuska	ror	x16,x23,#14
3362a58b312SMartin Matuska	add	x26,x26,x28			// h+=K[i]
3372a58b312SMartin Matuska	eor	x15,x23,x23,ror#23
3382a58b312SMartin Matuska	and	x17,x24,x23
3392a58b312SMartin Matuska	bic	x28,x25,x23
3402a58b312SMartin Matuska	add	x26,x26,x12			// h+=X[i]
3412a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
3422a58b312SMartin Matuska	eor	x28,x27,x20			// a^b, b^c in next round
3432a58b312SMartin Matuska	eor	x16,x16,x15,ror#18	// Sigma1(e)
3442a58b312SMartin Matuska	ror	x15,x27,#28
3452a58b312SMartin Matuska	add	x26,x26,x17			// h+=Ch(e,f,g)
3462a58b312SMartin Matuska	eor	x17,x27,x27,ror#5
3472a58b312SMartin Matuska	add	x26,x26,x16			// h+=Sigma1(e)
3482a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
3492a58b312SMartin Matuska	add	x22,x22,x26			// d+=h
3502a58b312SMartin Matuska	eor	x19,x19,x20			// Maj(a,b,c)
3512a58b312SMartin Matuska	eor	x17,x15,x17,ror#34	// Sigma0(a)
3522a58b312SMartin Matuska	add	x26,x26,x19			// h+=Maj(a,b,c)
3532a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
3542a58b312SMartin Matuska	//add	x26,x26,x17			// h+=Sigma0(a)
3552a58b312SMartin Matuska#ifndef	__AARCH64EB__
3562a58b312SMartin Matuska	rev	x13,x13			// 10
3572a58b312SMartin Matuska#endif
3582a58b312SMartin Matuska	add	x26,x26,x17			// h+=Sigma0(a)
3592a58b312SMartin Matuska	ror	x16,x22,#14
3602a58b312SMartin Matuska	add	x25,x25,x19			// h+=K[i]
3612a58b312SMartin Matuska	eor	x0,x22,x22,ror#23
3622a58b312SMartin Matuska	and	x17,x23,x22
3632a58b312SMartin Matuska	bic	x19,x24,x22
3642a58b312SMartin Matuska	add	x25,x25,x13			// h+=X[i]
3652a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
3662a58b312SMartin Matuska	eor	x19,x26,x27			// a^b, b^c in next round
3672a58b312SMartin Matuska	eor	x16,x16,x0,ror#18	// Sigma1(e)
3682a58b312SMartin Matuska	ror	x0,x26,#28
3692a58b312SMartin Matuska	add	x25,x25,x17			// h+=Ch(e,f,g)
3702a58b312SMartin Matuska	eor	x17,x26,x26,ror#5
3712a58b312SMartin Matuska	add	x25,x25,x16			// h+=Sigma1(e)
3722a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
3732a58b312SMartin Matuska	add	x21,x21,x25			// d+=h
3742a58b312SMartin Matuska	eor	x28,x28,x27			// Maj(a,b,c)
3752a58b312SMartin Matuska	eor	x17,x0,x17,ror#34	// Sigma0(a)
3762a58b312SMartin Matuska	add	x25,x25,x28			// h+=Maj(a,b,c)
3772a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
3782a58b312SMartin Matuska	//add	x25,x25,x17			// h+=Sigma0(a)
3792a58b312SMartin Matuska#ifndef	__AARCH64EB__
3802a58b312SMartin Matuska	rev	x14,x14			// 11
3812a58b312SMartin Matuska#endif
3822a58b312SMartin Matuska	ldp	x15,x0,[x1],#2*8
3832a58b312SMartin Matuska	add	x25,x25,x17			// h+=Sigma0(a)
3842a58b312SMartin Matuska	str	x6,[sp,#24]
3852a58b312SMartin Matuska	ror	x16,x21,#14
3862a58b312SMartin Matuska	add	x24,x24,x28			// h+=K[i]
3872a58b312SMartin Matuska	eor	x6,x21,x21,ror#23
3882a58b312SMartin Matuska	and	x17,x22,x21
3892a58b312SMartin Matuska	bic	x28,x23,x21
3902a58b312SMartin Matuska	add	x24,x24,x14			// h+=X[i]
3912a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
3922a58b312SMartin Matuska	eor	x28,x25,x26			// a^b, b^c in next round
3932a58b312SMartin Matuska	eor	x16,x16,x6,ror#18	// Sigma1(e)
3942a58b312SMartin Matuska	ror	x6,x25,#28
3952a58b312SMartin Matuska	add	x24,x24,x17			// h+=Ch(e,f,g)
3962a58b312SMartin Matuska	eor	x17,x25,x25,ror#5
3972a58b312SMartin Matuska	add	x24,x24,x16			// h+=Sigma1(e)
3982a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
3992a58b312SMartin Matuska	add	x20,x20,x24			// d+=h
4002a58b312SMartin Matuska	eor	x19,x19,x26			// Maj(a,b,c)
4012a58b312SMartin Matuska	eor	x17,x6,x17,ror#34	// Sigma0(a)
4022a58b312SMartin Matuska	add	x24,x24,x19			// h+=Maj(a,b,c)
4032a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
4042a58b312SMartin Matuska	//add	x24,x24,x17			// h+=Sigma0(a)
4052a58b312SMartin Matuska#ifndef	__AARCH64EB__
4062a58b312SMartin Matuska	rev	x15,x15			// 12
4072a58b312SMartin Matuska#endif
4082a58b312SMartin Matuska	add	x24,x24,x17			// h+=Sigma0(a)
4092a58b312SMartin Matuska	str	x7,[sp,#0]
4102a58b312SMartin Matuska	ror	x16,x20,#14
4112a58b312SMartin Matuska	add	x23,x23,x19			// h+=K[i]
4122a58b312SMartin Matuska	eor	x7,x20,x20,ror#23
4132a58b312SMartin Matuska	and	x17,x21,x20
4142a58b312SMartin Matuska	bic	x19,x22,x20
4152a58b312SMartin Matuska	add	x23,x23,x15			// h+=X[i]
4162a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
4172a58b312SMartin Matuska	eor	x19,x24,x25			// a^b, b^c in next round
4182a58b312SMartin Matuska	eor	x16,x16,x7,ror#18	// Sigma1(e)
4192a58b312SMartin Matuska	ror	x7,x24,#28
4202a58b312SMartin Matuska	add	x23,x23,x17			// h+=Ch(e,f,g)
4212a58b312SMartin Matuska	eor	x17,x24,x24,ror#5
4222a58b312SMartin Matuska	add	x23,x23,x16			// h+=Sigma1(e)
4232a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
4242a58b312SMartin Matuska	add	x27,x27,x23			// d+=h
4252a58b312SMartin Matuska	eor	x28,x28,x25			// Maj(a,b,c)
4262a58b312SMartin Matuska	eor	x17,x7,x17,ror#34	// Sigma0(a)
4272a58b312SMartin Matuska	add	x23,x23,x28			// h+=Maj(a,b,c)
4282a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
4292a58b312SMartin Matuska	//add	x23,x23,x17			// h+=Sigma0(a)
4302a58b312SMartin Matuska#ifndef	__AARCH64EB__
4312a58b312SMartin Matuska	rev	x0,x0			// 13
4322a58b312SMartin Matuska#endif
4332a58b312SMartin Matuska	ldp	x1,x2,[x1]
4342a58b312SMartin Matuska	add	x23,x23,x17			// h+=Sigma0(a)
4352a58b312SMartin Matuska	str	x8,[sp,#8]
4362a58b312SMartin Matuska	ror	x16,x27,#14
4372a58b312SMartin Matuska	add	x22,x22,x28			// h+=K[i]
4382a58b312SMartin Matuska	eor	x8,x27,x27,ror#23
4392a58b312SMartin Matuska	and	x17,x20,x27
4402a58b312SMartin Matuska	bic	x28,x21,x27
4412a58b312SMartin Matuska	add	x22,x22,x0			// h+=X[i]
4422a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
4432a58b312SMartin Matuska	eor	x28,x23,x24			// a^b, b^c in next round
4442a58b312SMartin Matuska	eor	x16,x16,x8,ror#18	// Sigma1(e)
4452a58b312SMartin Matuska	ror	x8,x23,#28
4462a58b312SMartin Matuska	add	x22,x22,x17			// h+=Ch(e,f,g)
4472a58b312SMartin Matuska	eor	x17,x23,x23,ror#5
4482a58b312SMartin Matuska	add	x22,x22,x16			// h+=Sigma1(e)
4492a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
4502a58b312SMartin Matuska	add	x26,x26,x22			// d+=h
4512a58b312SMartin Matuska	eor	x19,x19,x24			// Maj(a,b,c)
4522a58b312SMartin Matuska	eor	x17,x8,x17,ror#34	// Sigma0(a)
4532a58b312SMartin Matuska	add	x22,x22,x19			// h+=Maj(a,b,c)
4542a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
4552a58b312SMartin Matuska	//add	x22,x22,x17			// h+=Sigma0(a)
4562a58b312SMartin Matuska#ifndef	__AARCH64EB__
4572a58b312SMartin Matuska	rev	x1,x1			// 14
4582a58b312SMartin Matuska#endif
4592a58b312SMartin Matuska	ldr	x6,[sp,#24]
4602a58b312SMartin Matuska	add	x22,x22,x17			// h+=Sigma0(a)
4612a58b312SMartin Matuska	str	x9,[sp,#16]
4622a58b312SMartin Matuska	ror	x16,x26,#14
4632a58b312SMartin Matuska	add	x21,x21,x19			// h+=K[i]
4642a58b312SMartin Matuska	eor	x9,x26,x26,ror#23
4652a58b312SMartin Matuska	and	x17,x27,x26
4662a58b312SMartin Matuska	bic	x19,x20,x26
4672a58b312SMartin Matuska	add	x21,x21,x1			// h+=X[i]
4682a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
4692a58b312SMartin Matuska	eor	x19,x22,x23			// a^b, b^c in next round
4702a58b312SMartin Matuska	eor	x16,x16,x9,ror#18	// Sigma1(e)
4712a58b312SMartin Matuska	ror	x9,x22,#28
4722a58b312SMartin Matuska	add	x21,x21,x17			// h+=Ch(e,f,g)
4732a58b312SMartin Matuska	eor	x17,x22,x22,ror#5
4742a58b312SMartin Matuska	add	x21,x21,x16			// h+=Sigma1(e)
4752a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
4762a58b312SMartin Matuska	add	x25,x25,x21			// d+=h
4772a58b312SMartin Matuska	eor	x28,x28,x23			// Maj(a,b,c)
4782a58b312SMartin Matuska	eor	x17,x9,x17,ror#34	// Sigma0(a)
4792a58b312SMartin Matuska	add	x21,x21,x28			// h+=Maj(a,b,c)
4802a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
4812a58b312SMartin Matuska	//add	x21,x21,x17			// h+=Sigma0(a)
4822a58b312SMartin Matuska#ifndef	__AARCH64EB__
4832a58b312SMartin Matuska	rev	x2,x2			// 15
4842a58b312SMartin Matuska#endif
4852a58b312SMartin Matuska	ldr	x7,[sp,#0]
4862a58b312SMartin Matuska	add	x21,x21,x17			// h+=Sigma0(a)
4872a58b312SMartin Matuska	str	x10,[sp,#24]
4882a58b312SMartin Matuska	ror	x16,x25,#14
4892a58b312SMartin Matuska	add	x20,x20,x28			// h+=K[i]
4902a58b312SMartin Matuska	ror	x9,x4,#1
4912a58b312SMartin Matuska	and	x17,x26,x25
4922a58b312SMartin Matuska	ror	x8,x1,#19
4932a58b312SMartin Matuska	bic	x28,x27,x25
4942a58b312SMartin Matuska	ror	x10,x21,#28
4952a58b312SMartin Matuska	add	x20,x20,x2			// h+=X[i]
4962a58b312SMartin Matuska	eor	x16,x16,x25,ror#18
4972a58b312SMartin Matuska	eor	x9,x9,x4,ror#8
4982a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
4992a58b312SMartin Matuska	eor	x28,x21,x22			// a^b, b^c in next round
5002a58b312SMartin Matuska	eor	x16,x16,x25,ror#41	// Sigma1(e)
5012a58b312SMartin Matuska	eor	x10,x10,x21,ror#34
5022a58b312SMartin Matuska	add	x20,x20,x17			// h+=Ch(e,f,g)
5032a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
5042a58b312SMartin Matuska	eor	x8,x8,x1,ror#61
5052a58b312SMartin Matuska	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
5062a58b312SMartin Matuska	add	x20,x20,x16			// h+=Sigma1(e)
5072a58b312SMartin Matuska	eor	x19,x19,x22			// Maj(a,b,c)
5082a58b312SMartin Matuska	eor	x17,x10,x21,ror#39	// Sigma0(a)
5092a58b312SMartin Matuska	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
5102a58b312SMartin Matuska	add	x3,x3,x12
5112a58b312SMartin Matuska	add	x24,x24,x20			// d+=h
5122a58b312SMartin Matuska	add	x20,x20,x19			// h+=Maj(a,b,c)
5132a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
5142a58b312SMartin Matuska	add	x3,x3,x9
5152a58b312SMartin Matuska	add	x20,x20,x17			// h+=Sigma0(a)
5162a58b312SMartin Matuska	add	x3,x3,x8
5172a58b312SMartin Matuska.Loop_16_xx:
5182a58b312SMartin Matuska	ldr	x8,[sp,#8]
5192a58b312SMartin Matuska	str	x11,[sp,#0]
5202a58b312SMartin Matuska	ror	x16,x24,#14
5212a58b312SMartin Matuska	add	x27,x27,x19			// h+=K[i]
5222a58b312SMartin Matuska	ror	x10,x5,#1
5232a58b312SMartin Matuska	and	x17,x25,x24
5242a58b312SMartin Matuska	ror	x9,x2,#19
5252a58b312SMartin Matuska	bic	x19,x26,x24
5262a58b312SMartin Matuska	ror	x11,x20,#28
5272a58b312SMartin Matuska	add	x27,x27,x3			// h+=X[i]
5282a58b312SMartin Matuska	eor	x16,x16,x24,ror#18
5292a58b312SMartin Matuska	eor	x10,x10,x5,ror#8
5302a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
5312a58b312SMartin Matuska	eor	x19,x20,x21			// a^b, b^c in next round
5322a58b312SMartin Matuska	eor	x16,x16,x24,ror#41	// Sigma1(e)
5332a58b312SMartin Matuska	eor	x11,x11,x20,ror#34
5342a58b312SMartin Matuska	add	x27,x27,x17			// h+=Ch(e,f,g)
5352a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
5362a58b312SMartin Matuska	eor	x9,x9,x2,ror#61
5372a58b312SMartin Matuska	eor	x10,x10,x5,lsr#7	// sigma0(X[i+1])
5382a58b312SMartin Matuska	add	x27,x27,x16			// h+=Sigma1(e)
5392a58b312SMartin Matuska	eor	x28,x28,x21			// Maj(a,b,c)
5402a58b312SMartin Matuska	eor	x17,x11,x20,ror#39	// Sigma0(a)
5412a58b312SMartin Matuska	eor	x9,x9,x2,lsr#6	// sigma1(X[i+14])
5422a58b312SMartin Matuska	add	x4,x4,x13
5432a58b312SMartin Matuska	add	x23,x23,x27			// d+=h
5442a58b312SMartin Matuska	add	x27,x27,x28			// h+=Maj(a,b,c)
5452a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
5462a58b312SMartin Matuska	add	x4,x4,x10
5472a58b312SMartin Matuska	add	x27,x27,x17			// h+=Sigma0(a)
5482a58b312SMartin Matuska	add	x4,x4,x9
5492a58b312SMartin Matuska	ldr	x9,[sp,#16]
5502a58b312SMartin Matuska	str	x12,[sp,#8]
5512a58b312SMartin Matuska	ror	x16,x23,#14
5522a58b312SMartin Matuska	add	x26,x26,x28			// h+=K[i]
5532a58b312SMartin Matuska	ror	x11,x6,#1
5542a58b312SMartin Matuska	and	x17,x24,x23
5552a58b312SMartin Matuska	ror	x10,x3,#19
5562a58b312SMartin Matuska	bic	x28,x25,x23
5572a58b312SMartin Matuska	ror	x12,x27,#28
5582a58b312SMartin Matuska	add	x26,x26,x4			// h+=X[i]
5592a58b312SMartin Matuska	eor	x16,x16,x23,ror#18
5602a58b312SMartin Matuska	eor	x11,x11,x6,ror#8
5612a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
5622a58b312SMartin Matuska	eor	x28,x27,x20			// a^b, b^c in next round
5632a58b312SMartin Matuska	eor	x16,x16,x23,ror#41	// Sigma1(e)
5642a58b312SMartin Matuska	eor	x12,x12,x27,ror#34
5652a58b312SMartin Matuska	add	x26,x26,x17			// h+=Ch(e,f,g)
5662a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
5672a58b312SMartin Matuska	eor	x10,x10,x3,ror#61
5682a58b312SMartin Matuska	eor	x11,x11,x6,lsr#7	// sigma0(X[i+1])
5692a58b312SMartin Matuska	add	x26,x26,x16			// h+=Sigma1(e)
5702a58b312SMartin Matuska	eor	x19,x19,x20			// Maj(a,b,c)
5712a58b312SMartin Matuska	eor	x17,x12,x27,ror#39	// Sigma0(a)
5722a58b312SMartin Matuska	eor	x10,x10,x3,lsr#6	// sigma1(X[i+14])
5732a58b312SMartin Matuska	add	x5,x5,x14
5742a58b312SMartin Matuska	add	x22,x22,x26			// d+=h
5752a58b312SMartin Matuska	add	x26,x26,x19			// h+=Maj(a,b,c)
5762a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
5772a58b312SMartin Matuska	add	x5,x5,x11
5782a58b312SMartin Matuska	add	x26,x26,x17			// h+=Sigma0(a)
5792a58b312SMartin Matuska	add	x5,x5,x10
5802a58b312SMartin Matuska	ldr	x10,[sp,#24]
5812a58b312SMartin Matuska	str	x13,[sp,#16]
5822a58b312SMartin Matuska	ror	x16,x22,#14
5832a58b312SMartin Matuska	add	x25,x25,x19			// h+=K[i]
5842a58b312SMartin Matuska	ror	x12,x7,#1
5852a58b312SMartin Matuska	and	x17,x23,x22
5862a58b312SMartin Matuska	ror	x11,x4,#19
5872a58b312SMartin Matuska	bic	x19,x24,x22
5882a58b312SMartin Matuska	ror	x13,x26,#28
5892a58b312SMartin Matuska	add	x25,x25,x5			// h+=X[i]
5902a58b312SMartin Matuska	eor	x16,x16,x22,ror#18
5912a58b312SMartin Matuska	eor	x12,x12,x7,ror#8
5922a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
5932a58b312SMartin Matuska	eor	x19,x26,x27			// a^b, b^c in next round
5942a58b312SMartin Matuska	eor	x16,x16,x22,ror#41	// Sigma1(e)
5952a58b312SMartin Matuska	eor	x13,x13,x26,ror#34
5962a58b312SMartin Matuska	add	x25,x25,x17			// h+=Ch(e,f,g)
5972a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
5982a58b312SMartin Matuska	eor	x11,x11,x4,ror#61
5992a58b312SMartin Matuska	eor	x12,x12,x7,lsr#7	// sigma0(X[i+1])
6002a58b312SMartin Matuska	add	x25,x25,x16			// h+=Sigma1(e)
6012a58b312SMartin Matuska	eor	x28,x28,x27			// Maj(a,b,c)
6022a58b312SMartin Matuska	eor	x17,x13,x26,ror#39	// Sigma0(a)
6032a58b312SMartin Matuska	eor	x11,x11,x4,lsr#6	// sigma1(X[i+14])
6042a58b312SMartin Matuska	add	x6,x6,x15
6052a58b312SMartin Matuska	add	x21,x21,x25			// d+=h
6062a58b312SMartin Matuska	add	x25,x25,x28			// h+=Maj(a,b,c)
6072a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
6082a58b312SMartin Matuska	add	x6,x6,x12
6092a58b312SMartin Matuska	add	x25,x25,x17			// h+=Sigma0(a)
6102a58b312SMartin Matuska	add	x6,x6,x11
6112a58b312SMartin Matuska	ldr	x11,[sp,#0]
6122a58b312SMartin Matuska	str	x14,[sp,#24]
6132a58b312SMartin Matuska	ror	x16,x21,#14
6142a58b312SMartin Matuska	add	x24,x24,x28			// h+=K[i]
6152a58b312SMartin Matuska	ror	x13,x8,#1
6162a58b312SMartin Matuska	and	x17,x22,x21
6172a58b312SMartin Matuska	ror	x12,x5,#19
6182a58b312SMartin Matuska	bic	x28,x23,x21
6192a58b312SMartin Matuska	ror	x14,x25,#28
6202a58b312SMartin Matuska	add	x24,x24,x6			// h+=X[i]
6212a58b312SMartin Matuska	eor	x16,x16,x21,ror#18
6222a58b312SMartin Matuska	eor	x13,x13,x8,ror#8
6232a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
6242a58b312SMartin Matuska	eor	x28,x25,x26			// a^b, b^c in next round
6252a58b312SMartin Matuska	eor	x16,x16,x21,ror#41	// Sigma1(e)
6262a58b312SMartin Matuska	eor	x14,x14,x25,ror#34
6272a58b312SMartin Matuska	add	x24,x24,x17			// h+=Ch(e,f,g)
6282a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
6292a58b312SMartin Matuska	eor	x12,x12,x5,ror#61
6302a58b312SMartin Matuska	eor	x13,x13,x8,lsr#7	// sigma0(X[i+1])
6312a58b312SMartin Matuska	add	x24,x24,x16			// h+=Sigma1(e)
6322a58b312SMartin Matuska	eor	x19,x19,x26			// Maj(a,b,c)
6332a58b312SMartin Matuska	eor	x17,x14,x25,ror#39	// Sigma0(a)
6342a58b312SMartin Matuska	eor	x12,x12,x5,lsr#6	// sigma1(X[i+14])
6352a58b312SMartin Matuska	add	x7,x7,x0
6362a58b312SMartin Matuska	add	x20,x20,x24			// d+=h
6372a58b312SMartin Matuska	add	x24,x24,x19			// h+=Maj(a,b,c)
6382a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
6392a58b312SMartin Matuska	add	x7,x7,x13
6402a58b312SMartin Matuska	add	x24,x24,x17			// h+=Sigma0(a)
6412a58b312SMartin Matuska	add	x7,x7,x12
6422a58b312SMartin Matuska	ldr	x12,[sp,#8]
6432a58b312SMartin Matuska	str	x15,[sp,#0]
6442a58b312SMartin Matuska	ror	x16,x20,#14
6452a58b312SMartin Matuska	add	x23,x23,x19			// h+=K[i]
6462a58b312SMartin Matuska	ror	x14,x9,#1
6472a58b312SMartin Matuska	and	x17,x21,x20
6482a58b312SMartin Matuska	ror	x13,x6,#19
6492a58b312SMartin Matuska	bic	x19,x22,x20
6502a58b312SMartin Matuska	ror	x15,x24,#28
6512a58b312SMartin Matuska	add	x23,x23,x7			// h+=X[i]
6522a58b312SMartin Matuska	eor	x16,x16,x20,ror#18
6532a58b312SMartin Matuska	eor	x14,x14,x9,ror#8
6542a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
6552a58b312SMartin Matuska	eor	x19,x24,x25			// a^b, b^c in next round
6562a58b312SMartin Matuska	eor	x16,x16,x20,ror#41	// Sigma1(e)
6572a58b312SMartin Matuska	eor	x15,x15,x24,ror#34
6582a58b312SMartin Matuska	add	x23,x23,x17			// h+=Ch(e,f,g)
6592a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
6602a58b312SMartin Matuska	eor	x13,x13,x6,ror#61
6612a58b312SMartin Matuska	eor	x14,x14,x9,lsr#7	// sigma0(X[i+1])
6622a58b312SMartin Matuska	add	x23,x23,x16			// h+=Sigma1(e)
6632a58b312SMartin Matuska	eor	x28,x28,x25			// Maj(a,b,c)
6642a58b312SMartin Matuska	eor	x17,x15,x24,ror#39	// Sigma0(a)
6652a58b312SMartin Matuska	eor	x13,x13,x6,lsr#6	// sigma1(X[i+14])
6662a58b312SMartin Matuska	add	x8,x8,x1
6672a58b312SMartin Matuska	add	x27,x27,x23			// d+=h
6682a58b312SMartin Matuska	add	x23,x23,x28			// h+=Maj(a,b,c)
6692a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
6702a58b312SMartin Matuska	add	x8,x8,x14
6712a58b312SMartin Matuska	add	x23,x23,x17			// h+=Sigma0(a)
6722a58b312SMartin Matuska	add	x8,x8,x13
6732a58b312SMartin Matuska	ldr	x13,[sp,#16]
6742a58b312SMartin Matuska	str	x0,[sp,#8]
6752a58b312SMartin Matuska	ror	x16,x27,#14
6762a58b312SMartin Matuska	add	x22,x22,x28			// h+=K[i]
6772a58b312SMartin Matuska	ror	x15,x10,#1
6782a58b312SMartin Matuska	and	x17,x20,x27
6792a58b312SMartin Matuska	ror	x14,x7,#19
6802a58b312SMartin Matuska	bic	x28,x21,x27
6812a58b312SMartin Matuska	ror	x0,x23,#28
6822a58b312SMartin Matuska	add	x22,x22,x8			// h+=X[i]
6832a58b312SMartin Matuska	eor	x16,x16,x27,ror#18
6842a58b312SMartin Matuska	eor	x15,x15,x10,ror#8
6852a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
6862a58b312SMartin Matuska	eor	x28,x23,x24			// a^b, b^c in next round
6872a58b312SMartin Matuska	eor	x16,x16,x27,ror#41	// Sigma1(e)
6882a58b312SMartin Matuska	eor	x0,x0,x23,ror#34
6892a58b312SMartin Matuska	add	x22,x22,x17			// h+=Ch(e,f,g)
6902a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
6912a58b312SMartin Matuska	eor	x14,x14,x7,ror#61
6922a58b312SMartin Matuska	eor	x15,x15,x10,lsr#7	// sigma0(X[i+1])
6932a58b312SMartin Matuska	add	x22,x22,x16			// h+=Sigma1(e)
6942a58b312SMartin Matuska	eor	x19,x19,x24			// Maj(a,b,c)
6952a58b312SMartin Matuska	eor	x17,x0,x23,ror#39	// Sigma0(a)
6962a58b312SMartin Matuska	eor	x14,x14,x7,lsr#6	// sigma1(X[i+14])
6972a58b312SMartin Matuska	add	x9,x9,x2
6982a58b312SMartin Matuska	add	x26,x26,x22			// d+=h
6992a58b312SMartin Matuska	add	x22,x22,x19			// h+=Maj(a,b,c)
7002a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
7012a58b312SMartin Matuska	add	x9,x9,x15
7022a58b312SMartin Matuska	add	x22,x22,x17			// h+=Sigma0(a)
7032a58b312SMartin Matuska	add	x9,x9,x14
7042a58b312SMartin Matuska	ldr	x14,[sp,#24]
7052a58b312SMartin Matuska	str	x1,[sp,#16]
7062a58b312SMartin Matuska	ror	x16,x26,#14
7072a58b312SMartin Matuska	add	x21,x21,x19			// h+=K[i]
7082a58b312SMartin Matuska	ror	x0,x11,#1
7092a58b312SMartin Matuska	and	x17,x27,x26
7102a58b312SMartin Matuska	ror	x15,x8,#19
7112a58b312SMartin Matuska	bic	x19,x20,x26
7122a58b312SMartin Matuska	ror	x1,x22,#28
7132a58b312SMartin Matuska	add	x21,x21,x9			// h+=X[i]
7142a58b312SMartin Matuska	eor	x16,x16,x26,ror#18
7152a58b312SMartin Matuska	eor	x0,x0,x11,ror#8
7162a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
7172a58b312SMartin Matuska	eor	x19,x22,x23			// a^b, b^c in next round
7182a58b312SMartin Matuska	eor	x16,x16,x26,ror#41	// Sigma1(e)
7192a58b312SMartin Matuska	eor	x1,x1,x22,ror#34
7202a58b312SMartin Matuska	add	x21,x21,x17			// h+=Ch(e,f,g)
7212a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
7222a58b312SMartin Matuska	eor	x15,x15,x8,ror#61
7232a58b312SMartin Matuska	eor	x0,x0,x11,lsr#7	// sigma0(X[i+1])
7242a58b312SMartin Matuska	add	x21,x21,x16			// h+=Sigma1(e)
7252a58b312SMartin Matuska	eor	x28,x28,x23			// Maj(a,b,c)
7262a58b312SMartin Matuska	eor	x17,x1,x22,ror#39	// Sigma0(a)
7272a58b312SMartin Matuska	eor	x15,x15,x8,lsr#6	// sigma1(X[i+14])
7282a58b312SMartin Matuska	add	x10,x10,x3
7292a58b312SMartin Matuska	add	x25,x25,x21			// d+=h
7302a58b312SMartin Matuska	add	x21,x21,x28			// h+=Maj(a,b,c)
7312a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
7322a58b312SMartin Matuska	add	x10,x10,x0
7332a58b312SMartin Matuska	add	x21,x21,x17			// h+=Sigma0(a)
7342a58b312SMartin Matuska	add	x10,x10,x15
7352a58b312SMartin Matuska	ldr	x15,[sp,#0]
7362a58b312SMartin Matuska	str	x2,[sp,#24]
7372a58b312SMartin Matuska	ror	x16,x25,#14
7382a58b312SMartin Matuska	add	x20,x20,x28			// h+=K[i]
7392a58b312SMartin Matuska	ror	x1,x12,#1
7402a58b312SMartin Matuska	and	x17,x26,x25
7412a58b312SMartin Matuska	ror	x0,x9,#19
7422a58b312SMartin Matuska	bic	x28,x27,x25
7432a58b312SMartin Matuska	ror	x2,x21,#28
7442a58b312SMartin Matuska	add	x20,x20,x10			// h+=X[i]
7452a58b312SMartin Matuska	eor	x16,x16,x25,ror#18
7462a58b312SMartin Matuska	eor	x1,x1,x12,ror#8
7472a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
7482a58b312SMartin Matuska	eor	x28,x21,x22			// a^b, b^c in next round
7492a58b312SMartin Matuska	eor	x16,x16,x25,ror#41	// Sigma1(e)
7502a58b312SMartin Matuska	eor	x2,x2,x21,ror#34
7512a58b312SMartin Matuska	add	x20,x20,x17			// h+=Ch(e,f,g)
7522a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
7532a58b312SMartin Matuska	eor	x0,x0,x9,ror#61
7542a58b312SMartin Matuska	eor	x1,x1,x12,lsr#7	// sigma0(X[i+1])
7552a58b312SMartin Matuska	add	x20,x20,x16			// h+=Sigma1(e)
7562a58b312SMartin Matuska	eor	x19,x19,x22			// Maj(a,b,c)
7572a58b312SMartin Matuska	eor	x17,x2,x21,ror#39	// Sigma0(a)
7582a58b312SMartin Matuska	eor	x0,x0,x9,lsr#6	// sigma1(X[i+14])
7592a58b312SMartin Matuska	add	x11,x11,x4
7602a58b312SMartin Matuska	add	x24,x24,x20			// d+=h
7612a58b312SMartin Matuska	add	x20,x20,x19			// h+=Maj(a,b,c)
7622a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
7632a58b312SMartin Matuska	add	x11,x11,x1
7642a58b312SMartin Matuska	add	x20,x20,x17			// h+=Sigma0(a)
7652a58b312SMartin Matuska	add	x11,x11,x0
7662a58b312SMartin Matuska	ldr	x0,[sp,#8]
7672a58b312SMartin Matuska	str	x3,[sp,#0]
7682a58b312SMartin Matuska	ror	x16,x24,#14
7692a58b312SMartin Matuska	add	x27,x27,x19			// h+=K[i]
7702a58b312SMartin Matuska	ror	x2,x13,#1
7712a58b312SMartin Matuska	and	x17,x25,x24
7722a58b312SMartin Matuska	ror	x1,x10,#19
7732a58b312SMartin Matuska	bic	x19,x26,x24
7742a58b312SMartin Matuska	ror	x3,x20,#28
7752a58b312SMartin Matuska	add	x27,x27,x11			// h+=X[i]
7762a58b312SMartin Matuska	eor	x16,x16,x24,ror#18
7772a58b312SMartin Matuska	eor	x2,x2,x13,ror#8
7782a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
7792a58b312SMartin Matuska	eor	x19,x20,x21			// a^b, b^c in next round
7802a58b312SMartin Matuska	eor	x16,x16,x24,ror#41	// Sigma1(e)
7812a58b312SMartin Matuska	eor	x3,x3,x20,ror#34
7822a58b312SMartin Matuska	add	x27,x27,x17			// h+=Ch(e,f,g)
7832a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
7842a58b312SMartin Matuska	eor	x1,x1,x10,ror#61
7852a58b312SMartin Matuska	eor	x2,x2,x13,lsr#7	// sigma0(X[i+1])
7862a58b312SMartin Matuska	add	x27,x27,x16			// h+=Sigma1(e)
7872a58b312SMartin Matuska	eor	x28,x28,x21			// Maj(a,b,c)
7882a58b312SMartin Matuska	eor	x17,x3,x20,ror#39	// Sigma0(a)
7892a58b312SMartin Matuska	eor	x1,x1,x10,lsr#6	// sigma1(X[i+14])
7902a58b312SMartin Matuska	add	x12,x12,x5
7912a58b312SMartin Matuska	add	x23,x23,x27			// d+=h
7922a58b312SMartin Matuska	add	x27,x27,x28			// h+=Maj(a,b,c)
7932a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
7942a58b312SMartin Matuska	add	x12,x12,x2
7952a58b312SMartin Matuska	add	x27,x27,x17			// h+=Sigma0(a)
7962a58b312SMartin Matuska	add	x12,x12,x1
7972a58b312SMartin Matuska	ldr	x1,[sp,#16]
7982a58b312SMartin Matuska	str	x4,[sp,#8]
7992a58b312SMartin Matuska	ror	x16,x23,#14
8002a58b312SMartin Matuska	add	x26,x26,x28			// h+=K[i]
8012a58b312SMartin Matuska	ror	x3,x14,#1
8022a58b312SMartin Matuska	and	x17,x24,x23
8032a58b312SMartin Matuska	ror	x2,x11,#19
8042a58b312SMartin Matuska	bic	x28,x25,x23
8052a58b312SMartin Matuska	ror	x4,x27,#28
8062a58b312SMartin Matuska	add	x26,x26,x12			// h+=X[i]
8072a58b312SMartin Matuska	eor	x16,x16,x23,ror#18
8082a58b312SMartin Matuska	eor	x3,x3,x14,ror#8
8092a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
8102a58b312SMartin Matuska	eor	x28,x27,x20			// a^b, b^c in next round
8112a58b312SMartin Matuska	eor	x16,x16,x23,ror#41	// Sigma1(e)
8122a58b312SMartin Matuska	eor	x4,x4,x27,ror#34
8132a58b312SMartin Matuska	add	x26,x26,x17			// h+=Ch(e,f,g)
8142a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
8152a58b312SMartin Matuska	eor	x2,x2,x11,ror#61
8162a58b312SMartin Matuska	eor	x3,x3,x14,lsr#7	// sigma0(X[i+1])
8172a58b312SMartin Matuska	add	x26,x26,x16			// h+=Sigma1(e)
8182a58b312SMartin Matuska	eor	x19,x19,x20			// Maj(a,b,c)
8192a58b312SMartin Matuska	eor	x17,x4,x27,ror#39	// Sigma0(a)
8202a58b312SMartin Matuska	eor	x2,x2,x11,lsr#6	// sigma1(X[i+14])
8212a58b312SMartin Matuska	add	x13,x13,x6
8222a58b312SMartin Matuska	add	x22,x22,x26			// d+=h
8232a58b312SMartin Matuska	add	x26,x26,x19			// h+=Maj(a,b,c)
8242a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
8252a58b312SMartin Matuska	add	x13,x13,x3
8262a58b312SMartin Matuska	add	x26,x26,x17			// h+=Sigma0(a)
8272a58b312SMartin Matuska	add	x13,x13,x2
8282a58b312SMartin Matuska	ldr	x2,[sp,#24]
8292a58b312SMartin Matuska	str	x5,[sp,#16]
8302a58b312SMartin Matuska	ror	x16,x22,#14
8312a58b312SMartin Matuska	add	x25,x25,x19			// h+=K[i]
8322a58b312SMartin Matuska	ror	x4,x15,#1
8332a58b312SMartin Matuska	and	x17,x23,x22
8342a58b312SMartin Matuska	ror	x3,x12,#19
8352a58b312SMartin Matuska	bic	x19,x24,x22
8362a58b312SMartin Matuska	ror	x5,x26,#28
8372a58b312SMartin Matuska	add	x25,x25,x13			// h+=X[i]
8382a58b312SMartin Matuska	eor	x16,x16,x22,ror#18
8392a58b312SMartin Matuska	eor	x4,x4,x15,ror#8
8402a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
8412a58b312SMartin Matuska	eor	x19,x26,x27			// a^b, b^c in next round
8422a58b312SMartin Matuska	eor	x16,x16,x22,ror#41	// Sigma1(e)
8432a58b312SMartin Matuska	eor	x5,x5,x26,ror#34
8442a58b312SMartin Matuska	add	x25,x25,x17			// h+=Ch(e,f,g)
8452a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
8462a58b312SMartin Matuska	eor	x3,x3,x12,ror#61
8472a58b312SMartin Matuska	eor	x4,x4,x15,lsr#7	// sigma0(X[i+1])
8482a58b312SMartin Matuska	add	x25,x25,x16			// h+=Sigma1(e)
8492a58b312SMartin Matuska	eor	x28,x28,x27			// Maj(a,b,c)
8502a58b312SMartin Matuska	eor	x17,x5,x26,ror#39	// Sigma0(a)
8512a58b312SMartin Matuska	eor	x3,x3,x12,lsr#6	// sigma1(X[i+14])
8522a58b312SMartin Matuska	add	x14,x14,x7
8532a58b312SMartin Matuska	add	x21,x21,x25			// d+=h
8542a58b312SMartin Matuska	add	x25,x25,x28			// h+=Maj(a,b,c)
8552a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
8562a58b312SMartin Matuska	add	x14,x14,x4
8572a58b312SMartin Matuska	add	x25,x25,x17			// h+=Sigma0(a)
8582a58b312SMartin Matuska	add	x14,x14,x3
8592a58b312SMartin Matuska	ldr	x3,[sp,#0]
8602a58b312SMartin Matuska	str	x6,[sp,#24]
8612a58b312SMartin Matuska	ror	x16,x21,#14
8622a58b312SMartin Matuska	add	x24,x24,x28			// h+=K[i]
8632a58b312SMartin Matuska	ror	x5,x0,#1
8642a58b312SMartin Matuska	and	x17,x22,x21
8652a58b312SMartin Matuska	ror	x4,x13,#19
8662a58b312SMartin Matuska	bic	x28,x23,x21
8672a58b312SMartin Matuska	ror	x6,x25,#28
8682a58b312SMartin Matuska	add	x24,x24,x14			// h+=X[i]
8692a58b312SMartin Matuska	eor	x16,x16,x21,ror#18
8702a58b312SMartin Matuska	eor	x5,x5,x0,ror#8
8712a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
8722a58b312SMartin Matuska	eor	x28,x25,x26			// a^b, b^c in next round
8732a58b312SMartin Matuska	eor	x16,x16,x21,ror#41	// Sigma1(e)
8742a58b312SMartin Matuska	eor	x6,x6,x25,ror#34
8752a58b312SMartin Matuska	add	x24,x24,x17			// h+=Ch(e,f,g)
8762a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
8772a58b312SMartin Matuska	eor	x4,x4,x13,ror#61
8782a58b312SMartin Matuska	eor	x5,x5,x0,lsr#7	// sigma0(X[i+1])
8792a58b312SMartin Matuska	add	x24,x24,x16			// h+=Sigma1(e)
8802a58b312SMartin Matuska	eor	x19,x19,x26			// Maj(a,b,c)
8812a58b312SMartin Matuska	eor	x17,x6,x25,ror#39	// Sigma0(a)
8822a58b312SMartin Matuska	eor	x4,x4,x13,lsr#6	// sigma1(X[i+14])
8832a58b312SMartin Matuska	add	x15,x15,x8
8842a58b312SMartin Matuska	add	x20,x20,x24			// d+=h
8852a58b312SMartin Matuska	add	x24,x24,x19			// h+=Maj(a,b,c)
8862a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
8872a58b312SMartin Matuska	add	x15,x15,x5
8882a58b312SMartin Matuska	add	x24,x24,x17			// h+=Sigma0(a)
8892a58b312SMartin Matuska	add	x15,x15,x4
8902a58b312SMartin Matuska	ldr	x4,[sp,#8]
8912a58b312SMartin Matuska	str	x7,[sp,#0]
8922a58b312SMartin Matuska	ror	x16,x20,#14
8932a58b312SMartin Matuska	add	x23,x23,x19			// h+=K[i]
8942a58b312SMartin Matuska	ror	x6,x1,#1
8952a58b312SMartin Matuska	and	x17,x21,x20
8962a58b312SMartin Matuska	ror	x5,x14,#19
8972a58b312SMartin Matuska	bic	x19,x22,x20
8982a58b312SMartin Matuska	ror	x7,x24,#28
8992a58b312SMartin Matuska	add	x23,x23,x15			// h+=X[i]
9002a58b312SMartin Matuska	eor	x16,x16,x20,ror#18
9012a58b312SMartin Matuska	eor	x6,x6,x1,ror#8
9022a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
9032a58b312SMartin Matuska	eor	x19,x24,x25			// a^b, b^c in next round
9042a58b312SMartin Matuska	eor	x16,x16,x20,ror#41	// Sigma1(e)
9052a58b312SMartin Matuska	eor	x7,x7,x24,ror#34
9062a58b312SMartin Matuska	add	x23,x23,x17			// h+=Ch(e,f,g)
9072a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
9082a58b312SMartin Matuska	eor	x5,x5,x14,ror#61
9092a58b312SMartin Matuska	eor	x6,x6,x1,lsr#7	// sigma0(X[i+1])
9102a58b312SMartin Matuska	add	x23,x23,x16			// h+=Sigma1(e)
9112a58b312SMartin Matuska	eor	x28,x28,x25			// Maj(a,b,c)
9122a58b312SMartin Matuska	eor	x17,x7,x24,ror#39	// Sigma0(a)
9132a58b312SMartin Matuska	eor	x5,x5,x14,lsr#6	// sigma1(X[i+14])
9142a58b312SMartin Matuska	add	x0,x0,x9
9152a58b312SMartin Matuska	add	x27,x27,x23			// d+=h
9162a58b312SMartin Matuska	add	x23,x23,x28			// h+=Maj(a,b,c)
9172a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
9182a58b312SMartin Matuska	add	x0,x0,x6
9192a58b312SMartin Matuska	add	x23,x23,x17			// h+=Sigma0(a)
9202a58b312SMartin Matuska	add	x0,x0,x5
9212a58b312SMartin Matuska	ldr	x5,[sp,#16]
9222a58b312SMartin Matuska	str	x8,[sp,#8]
9232a58b312SMartin Matuska	ror	x16,x27,#14
9242a58b312SMartin Matuska	add	x22,x22,x28			// h+=K[i]
9252a58b312SMartin Matuska	ror	x7,x2,#1
9262a58b312SMartin Matuska	and	x17,x20,x27
9272a58b312SMartin Matuska	ror	x6,x15,#19
9282a58b312SMartin Matuska	bic	x28,x21,x27
9292a58b312SMartin Matuska	ror	x8,x23,#28
9302a58b312SMartin Matuska	add	x22,x22,x0			// h+=X[i]
9312a58b312SMartin Matuska	eor	x16,x16,x27,ror#18
9322a58b312SMartin Matuska	eor	x7,x7,x2,ror#8
9332a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
9342a58b312SMartin Matuska	eor	x28,x23,x24			// a^b, b^c in next round
9352a58b312SMartin Matuska	eor	x16,x16,x27,ror#41	// Sigma1(e)
9362a58b312SMartin Matuska	eor	x8,x8,x23,ror#34
9372a58b312SMartin Matuska	add	x22,x22,x17			// h+=Ch(e,f,g)
9382a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
9392a58b312SMartin Matuska	eor	x6,x6,x15,ror#61
9402a58b312SMartin Matuska	eor	x7,x7,x2,lsr#7	// sigma0(X[i+1])
9412a58b312SMartin Matuska	add	x22,x22,x16			// h+=Sigma1(e)
9422a58b312SMartin Matuska	eor	x19,x19,x24			// Maj(a,b,c)
9432a58b312SMartin Matuska	eor	x17,x8,x23,ror#39	// Sigma0(a)
9442a58b312SMartin Matuska	eor	x6,x6,x15,lsr#6	// sigma1(X[i+14])
9452a58b312SMartin Matuska	add	x1,x1,x10
9462a58b312SMartin Matuska	add	x26,x26,x22			// d+=h
9472a58b312SMartin Matuska	add	x22,x22,x19			// h+=Maj(a,b,c)
9482a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
9492a58b312SMartin Matuska	add	x1,x1,x7
9502a58b312SMartin Matuska	add	x22,x22,x17			// h+=Sigma0(a)
9512a58b312SMartin Matuska	add	x1,x1,x6
9522a58b312SMartin Matuska	ldr	x6,[sp,#24]
9532a58b312SMartin Matuska	str	x9,[sp,#16]
9542a58b312SMartin Matuska	ror	x16,x26,#14
9552a58b312SMartin Matuska	add	x21,x21,x19			// h+=K[i]
9562a58b312SMartin Matuska	ror	x8,x3,#1
9572a58b312SMartin Matuska	and	x17,x27,x26
9582a58b312SMartin Matuska	ror	x7,x0,#19
9592a58b312SMartin Matuska	bic	x19,x20,x26
9602a58b312SMartin Matuska	ror	x9,x22,#28
9612a58b312SMartin Matuska	add	x21,x21,x1			// h+=X[i]
9622a58b312SMartin Matuska	eor	x16,x16,x26,ror#18
9632a58b312SMartin Matuska	eor	x8,x8,x3,ror#8
9642a58b312SMartin Matuska	orr	x17,x17,x19			// Ch(e,f,g)
9652a58b312SMartin Matuska	eor	x19,x22,x23			// a^b, b^c in next round
9662a58b312SMartin Matuska	eor	x16,x16,x26,ror#41	// Sigma1(e)
9672a58b312SMartin Matuska	eor	x9,x9,x22,ror#34
9682a58b312SMartin Matuska	add	x21,x21,x17			// h+=Ch(e,f,g)
9692a58b312SMartin Matuska	and	x28,x28,x19			// (b^c)&=(a^b)
9702a58b312SMartin Matuska	eor	x7,x7,x0,ror#61
9712a58b312SMartin Matuska	eor	x8,x8,x3,lsr#7	// sigma0(X[i+1])
9722a58b312SMartin Matuska	add	x21,x21,x16			// h+=Sigma1(e)
9732a58b312SMartin Matuska	eor	x28,x28,x23			// Maj(a,b,c)
9742a58b312SMartin Matuska	eor	x17,x9,x22,ror#39	// Sigma0(a)
9752a58b312SMartin Matuska	eor	x7,x7,x0,lsr#6	// sigma1(X[i+14])
9762a58b312SMartin Matuska	add	x2,x2,x11
9772a58b312SMartin Matuska	add	x25,x25,x21			// d+=h
9782a58b312SMartin Matuska	add	x21,x21,x28			// h+=Maj(a,b,c)
9792a58b312SMartin Matuska	ldr	x28,[x30],#8		// *K++, x19 in next round
9802a58b312SMartin Matuska	add	x2,x2,x8
9812a58b312SMartin Matuska	add	x21,x21,x17			// h+=Sigma0(a)
9822a58b312SMartin Matuska	add	x2,x2,x7
9832a58b312SMartin Matuska	ldr	x7,[sp,#0]
9842a58b312SMartin Matuska	str	x10,[sp,#24]
9852a58b312SMartin Matuska	ror	x16,x25,#14
9862a58b312SMartin Matuska	add	x20,x20,x28			// h+=K[i]
9872a58b312SMartin Matuska	ror	x9,x4,#1
9882a58b312SMartin Matuska	and	x17,x26,x25
9892a58b312SMartin Matuska	ror	x8,x1,#19
9902a58b312SMartin Matuska	bic	x28,x27,x25
9912a58b312SMartin Matuska	ror	x10,x21,#28
9922a58b312SMartin Matuska	add	x20,x20,x2			// h+=X[i]
9932a58b312SMartin Matuska	eor	x16,x16,x25,ror#18
9942a58b312SMartin Matuska	eor	x9,x9,x4,ror#8
9952a58b312SMartin Matuska	orr	x17,x17,x28			// Ch(e,f,g)
9962a58b312SMartin Matuska	eor	x28,x21,x22			// a^b, b^c in next round
9972a58b312SMartin Matuska	eor	x16,x16,x25,ror#41	// Sigma1(e)
9982a58b312SMartin Matuska	eor	x10,x10,x21,ror#34
9992a58b312SMartin Matuska	add	x20,x20,x17			// h+=Ch(e,f,g)
10002a58b312SMartin Matuska	and	x19,x19,x28			// (b^c)&=(a^b)
10012a58b312SMartin Matuska	eor	x8,x8,x1,ror#61
10022a58b312SMartin Matuska	eor	x9,x9,x4,lsr#7	// sigma0(X[i+1])
10032a58b312SMartin Matuska	add	x20,x20,x16			// h+=Sigma1(e)
10042a58b312SMartin Matuska	eor	x19,x19,x22			// Maj(a,b,c)
10052a58b312SMartin Matuska	eor	x17,x10,x21,ror#39	// Sigma0(a)
10062a58b312SMartin Matuska	eor	x8,x8,x1,lsr#6	// sigma1(X[i+14])
10072a58b312SMartin Matuska	add	x3,x3,x12
10082a58b312SMartin Matuska	add	x24,x24,x20			// d+=h
10092a58b312SMartin Matuska	add	x20,x20,x19			// h+=Maj(a,b,c)
10102a58b312SMartin Matuska	ldr	x19,[x30],#8		// *K++, x28 in next round
10112a58b312SMartin Matuska	add	x3,x3,x9
10122a58b312SMartin Matuska	add	x20,x20,x17			// h+=Sigma0(a)
10132a58b312SMartin Matuska	add	x3,x3,x8
10142a58b312SMartin Matuska	cbnz	x19,.Loop_16_xx
10152a58b312SMartin Matuska
10162a58b312SMartin Matuska	ldp	x0,x2,[x29,#96]
10172a58b312SMartin Matuska	ldr	x1,[x29,#112]
10182a58b312SMartin Matuska	sub	x30,x30,#648		// rewind
10192a58b312SMartin Matuska
10202a58b312SMartin Matuska	ldp	x3,x4,[x0]
10212a58b312SMartin Matuska	ldp	x5,x6,[x0,#2*8]
10222a58b312SMartin Matuska	add	x1,x1,#14*8			// advance input pointer
10232a58b312SMartin Matuska	ldp	x7,x8,[x0,#4*8]
10242a58b312SMartin Matuska	add	x20,x20,x3
10252a58b312SMartin Matuska	ldp	x9,x10,[x0,#6*8]
10262a58b312SMartin Matuska	add	x21,x21,x4
10272a58b312SMartin Matuska	add	x22,x22,x5
10282a58b312SMartin Matuska	add	x23,x23,x6
10292a58b312SMartin Matuska	stp	x20,x21,[x0]
10302a58b312SMartin Matuska	add	x24,x24,x7
10312a58b312SMartin Matuska	add	x25,x25,x8
10322a58b312SMartin Matuska	stp	x22,x23,[x0,#2*8]
10332a58b312SMartin Matuska	add	x26,x26,x9
10342a58b312SMartin Matuska	add	x27,x27,x10
10352a58b312SMartin Matuska	cmp	x1,x2
10362a58b312SMartin Matuska	stp	x24,x25,[x0,#4*8]
10372a58b312SMartin Matuska	stp	x26,x27,[x0,#6*8]
10382a58b312SMartin Matuska	b.ne	.Loop
10392a58b312SMartin Matuska
10402a58b312SMartin Matuska	ldp	x19,x20,[x29,#16]
10412a58b312SMartin Matuska	add	sp,sp,#4*8
10422a58b312SMartin Matuska	ldp	x21,x22,[x29,#32]
10432a58b312SMartin Matuska	ldp	x23,x24,[x29,#48]
10442a58b312SMartin Matuska	ldp	x25,x26,[x29,#64]
10452a58b312SMartin Matuska	ldp	x27,x28,[x29,#80]
10462a58b312SMartin Matuska	ldp	x29,x30,[sp],#128
10472a58b312SMartin Matuska	ret
10482a58b312SMartin Matuska.size	zfs_sha512_block_armv7,.-zfs_sha512_block_armv7
10492a58b312SMartin Matuska
10502a58b312SMartin Matuska
10512a58b312SMartin Matuska.globl	zfs_sha512_block_armv8
10522a58b312SMartin Matuska.type	zfs_sha512_block_armv8,%function
10532a58b312SMartin Matuska.align	6
10542a58b312SMartin Matuskazfs_sha512_block_armv8:
1055f7f4bd06SMartin Matuska	hint		#34				// bti c
10562a58b312SMartin Matuska.Lv8_entry:
10572a58b312SMartin Matuska	// Armv8.3-A PAuth: even though x30 is pushed to stack it is not popped later
10582a58b312SMartin Matuska	stp		x29,x30,[sp,#-16]!
10592a58b312SMartin Matuska	add		x29,sp,#0
10602a58b312SMartin Matuska
10612a58b312SMartin Matuska	ld1		{v16.16b-v19.16b},[x1],#64	// load input
10622a58b312SMartin Matuska	ld1		{v20.16b-v23.16b},[x1],#64
10632a58b312SMartin Matuska
10642a58b312SMartin Matuska	ld1		{v0.2d-v3.2d},[x0]		// load context
10652a58b312SMartin Matuska	adr		x3,.LK512
10662a58b312SMartin Matuska
10672a58b312SMartin Matuska	rev64		v16.16b,v16.16b
10682a58b312SMartin Matuska	rev64		v17.16b,v17.16b
10692a58b312SMartin Matuska	rev64		v18.16b,v18.16b
10702a58b312SMartin Matuska	rev64		v19.16b,v19.16b
10712a58b312SMartin Matuska	rev64		v20.16b,v20.16b
10722a58b312SMartin Matuska	rev64		v21.16b,v21.16b
10732a58b312SMartin Matuska	rev64		v22.16b,v22.16b
10742a58b312SMartin Matuska	rev64		v23.16b,v23.16b
10752a58b312SMartin Matuska	b		.Loop_hw
10762a58b312SMartin Matuska
10772a58b312SMartin Matuska.align	4
10782a58b312SMartin Matuska.Loop_hw:
10792a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
10802a58b312SMartin Matuska	subs		x2,x2,#1
10812a58b312SMartin Matuska	sub		x4,x1,#128
10822a58b312SMartin Matuska	orr		v26.16b,v0.16b,v0.16b			// offload
10832a58b312SMartin Matuska	orr		v27.16b,v1.16b,v1.16b
10842a58b312SMartin Matuska	orr		v28.16b,v2.16b,v2.16b
10852a58b312SMartin Matuska	orr		v29.16b,v3.16b,v3.16b
10862a58b312SMartin Matuska	csel		x1,x1,x4,ne			// conditional rewind
10872a58b312SMartin Matuska	add		v24.2d,v24.2d,v16.2d
10882a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
10892a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
10902a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
10912a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
10922a58b312SMartin Matuska	add		v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
10932a58b312SMartin Matuska	 .inst	0xcec08230	//sha512su0 v16.16b,v17.16b
10942a58b312SMartin Matuska	 ext		v7.16b,v20.16b,v21.16b,#8
10952a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
10962a58b312SMartin Matuska	 .inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
10972a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
10982a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
10992a58b312SMartin Matuska	add		v25.2d,v25.2d,v17.2d
11002a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
11012a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
11022a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
11032a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
11042a58b312SMartin Matuska	add		v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
11052a58b312SMartin Matuska	 .inst	0xcec08251	//sha512su0 v17.16b,v18.16b
11062a58b312SMartin Matuska	 ext		v7.16b,v21.16b,v22.16b,#8
11072a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
11082a58b312SMartin Matuska	 .inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
11092a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
11102a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
11112a58b312SMartin Matuska	add		v24.2d,v24.2d,v18.2d
11122a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
11132a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
11142a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
11152a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
11162a58b312SMartin Matuska	add		v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
11172a58b312SMartin Matuska	 .inst	0xcec08272	//sha512su0 v18.16b,v19.16b
11182a58b312SMartin Matuska	 ext		v7.16b,v22.16b,v23.16b,#8
11192a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
11202a58b312SMartin Matuska	 .inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
11212a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
11222a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
11232a58b312SMartin Matuska	add		v25.2d,v25.2d,v19.2d
11242a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
11252a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
11262a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
11272a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
11282a58b312SMartin Matuska	add		v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
11292a58b312SMartin Matuska	 .inst	0xcec08293	//sha512su0 v19.16b,v20.16b
11302a58b312SMartin Matuska	 ext		v7.16b,v23.16b,v16.16b,#8
11312a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
11322a58b312SMartin Matuska	 .inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
11332a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
11342a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
11352a58b312SMartin Matuska	add		v24.2d,v24.2d,v20.2d
11362a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
11372a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
11382a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
11392a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
11402a58b312SMartin Matuska	add		v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
11412a58b312SMartin Matuska	 .inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
11422a58b312SMartin Matuska	 ext		v7.16b,v16.16b,v17.16b,#8
11432a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
11442a58b312SMartin Matuska	 .inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
11452a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
11462a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
11472a58b312SMartin Matuska	add		v25.2d,v25.2d,v21.2d
11482a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
11492a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
11502a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
11512a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
11522a58b312SMartin Matuska	add		v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
11532a58b312SMartin Matuska	 .inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
11542a58b312SMartin Matuska	 ext		v7.16b,v17.16b,v18.16b,#8
11552a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
11562a58b312SMartin Matuska	 .inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
11572a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
11582a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
11592a58b312SMartin Matuska	add		v24.2d,v24.2d,v22.2d
11602a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
11612a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
11622a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
11632a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
11642a58b312SMartin Matuska	add		v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
11652a58b312SMartin Matuska	 .inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
11662a58b312SMartin Matuska	 ext		v7.16b,v18.16b,v19.16b,#8
11672a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
11682a58b312SMartin Matuska	 .inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
11692a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
11702a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
11712a58b312SMartin Matuska	add		v25.2d,v25.2d,v23.2d
11722a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
11732a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
11742a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
11752a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
11762a58b312SMartin Matuska	add		v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
11772a58b312SMartin Matuska	 .inst	0xcec08217	//sha512su0 v23.16b,v16.16b
11782a58b312SMartin Matuska	 ext		v7.16b,v19.16b,v20.16b,#8
11792a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
11802a58b312SMartin Matuska	 .inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
11812a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
11822a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
11832a58b312SMartin Matuska	add		v24.2d,v24.2d,v16.2d
11842a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
11852a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
11862a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
11872a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
11882a58b312SMartin Matuska	add		v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
11892a58b312SMartin Matuska	 .inst	0xcec08230	//sha512su0 v16.16b,v17.16b
11902a58b312SMartin Matuska	 ext		v7.16b,v20.16b,v21.16b,#8
11912a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
11922a58b312SMartin Matuska	 .inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
11932a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
11942a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
11952a58b312SMartin Matuska	add		v25.2d,v25.2d,v17.2d
11962a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
11972a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
11982a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
11992a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
12002a58b312SMartin Matuska	add		v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
12012a58b312SMartin Matuska	 .inst	0xcec08251	//sha512su0 v17.16b,v18.16b
12022a58b312SMartin Matuska	 ext		v7.16b,v21.16b,v22.16b,#8
12032a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
12042a58b312SMartin Matuska	 .inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
12052a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
12062a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
12072a58b312SMartin Matuska	add		v24.2d,v24.2d,v18.2d
12082a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
12092a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
12102a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
12112a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
12122a58b312SMartin Matuska	add		v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
12132a58b312SMartin Matuska	 .inst	0xcec08272	//sha512su0 v18.16b,v19.16b
12142a58b312SMartin Matuska	 ext		v7.16b,v22.16b,v23.16b,#8
12152a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
12162a58b312SMartin Matuska	 .inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
12172a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
12182a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
12192a58b312SMartin Matuska	add		v25.2d,v25.2d,v19.2d
12202a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
12212a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
12222a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
12232a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
12242a58b312SMartin Matuska	add		v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
12252a58b312SMartin Matuska	 .inst	0xcec08293	//sha512su0 v19.16b,v20.16b
12262a58b312SMartin Matuska	 ext		v7.16b,v23.16b,v16.16b,#8
12272a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
12282a58b312SMartin Matuska	 .inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
12292a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
12302a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
12312a58b312SMartin Matuska	add		v24.2d,v24.2d,v20.2d
12322a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
12332a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
12342a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
12352a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
12362a58b312SMartin Matuska	add		v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
12372a58b312SMartin Matuska	 .inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
12382a58b312SMartin Matuska	 ext		v7.16b,v16.16b,v17.16b,#8
12392a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
12402a58b312SMartin Matuska	 .inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
12412a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
12422a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
12432a58b312SMartin Matuska	add		v25.2d,v25.2d,v21.2d
12442a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
12452a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
12462a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
12472a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
12482a58b312SMartin Matuska	add		v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
12492a58b312SMartin Matuska	 .inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
12502a58b312SMartin Matuska	 ext		v7.16b,v17.16b,v18.16b,#8
12512a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
12522a58b312SMartin Matuska	 .inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
12532a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
12542a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
12552a58b312SMartin Matuska	add		v24.2d,v24.2d,v22.2d
12562a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
12572a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
12582a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
12592a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
12602a58b312SMartin Matuska	add		v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
12612a58b312SMartin Matuska	 .inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
12622a58b312SMartin Matuska	 ext		v7.16b,v18.16b,v19.16b,#8
12632a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
12642a58b312SMartin Matuska	 .inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
12652a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
12662a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
12672a58b312SMartin Matuska	add		v25.2d,v25.2d,v23.2d
12682a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
12692a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
12702a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
12712a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
12722a58b312SMartin Matuska	add		v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
12732a58b312SMartin Matuska	 .inst	0xcec08217	//sha512su0 v23.16b,v16.16b
12742a58b312SMartin Matuska	 ext		v7.16b,v19.16b,v20.16b,#8
12752a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
12762a58b312SMartin Matuska	 .inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
12772a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
12782a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
12792a58b312SMartin Matuska	add		v24.2d,v24.2d,v16.2d
12802a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
12812a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
12822a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
12832a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
12842a58b312SMartin Matuska	add		v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
12852a58b312SMartin Matuska	 .inst	0xcec08230	//sha512su0 v16.16b,v17.16b
12862a58b312SMartin Matuska	 ext		v7.16b,v20.16b,v21.16b,#8
12872a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
12882a58b312SMartin Matuska	 .inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
12892a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
12902a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
12912a58b312SMartin Matuska	add		v25.2d,v25.2d,v17.2d
12922a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
12932a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
12942a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
12952a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
12962a58b312SMartin Matuska	add		v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
12972a58b312SMartin Matuska	 .inst	0xcec08251	//sha512su0 v17.16b,v18.16b
12982a58b312SMartin Matuska	 ext		v7.16b,v21.16b,v22.16b,#8
12992a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
13002a58b312SMartin Matuska	 .inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
13012a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
13022a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
13032a58b312SMartin Matuska	add		v24.2d,v24.2d,v18.2d
13042a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
13052a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
13062a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
13072a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
13082a58b312SMartin Matuska	add		v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
13092a58b312SMartin Matuska	 .inst	0xcec08272	//sha512su0 v18.16b,v19.16b
13102a58b312SMartin Matuska	 ext		v7.16b,v22.16b,v23.16b,#8
13112a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
13122a58b312SMartin Matuska	 .inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
13132a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
13142a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
13152a58b312SMartin Matuska	add		v25.2d,v25.2d,v19.2d
13162a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
13172a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
13182a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
13192a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
13202a58b312SMartin Matuska	add		v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
13212a58b312SMartin Matuska	 .inst	0xcec08293	//sha512su0 v19.16b,v20.16b
13222a58b312SMartin Matuska	 ext		v7.16b,v23.16b,v16.16b,#8
13232a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
13242a58b312SMartin Matuska	 .inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
13252a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
13262a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
13272a58b312SMartin Matuska	add		v24.2d,v24.2d,v20.2d
13282a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
13292a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
13302a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
13312a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
13322a58b312SMartin Matuska	add		v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
13332a58b312SMartin Matuska	 .inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
13342a58b312SMartin Matuska	 ext		v7.16b,v16.16b,v17.16b,#8
13352a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
13362a58b312SMartin Matuska	 .inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
13372a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
13382a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
13392a58b312SMartin Matuska	add		v25.2d,v25.2d,v21.2d
13402a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
13412a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
13422a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
13432a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
13442a58b312SMartin Matuska	add		v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
13452a58b312SMartin Matuska	 .inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
13462a58b312SMartin Matuska	 ext		v7.16b,v17.16b,v18.16b,#8
13472a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
13482a58b312SMartin Matuska	 .inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
13492a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
13502a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
13512a58b312SMartin Matuska	add		v24.2d,v24.2d,v22.2d
13522a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
13532a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
13542a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
13552a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
13562a58b312SMartin Matuska	add		v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
13572a58b312SMartin Matuska	 .inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
13582a58b312SMartin Matuska	 ext		v7.16b,v18.16b,v19.16b,#8
13592a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
13602a58b312SMartin Matuska	 .inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
13612a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
13622a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
13632a58b312SMartin Matuska	add		v25.2d,v25.2d,v23.2d
13642a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
13652a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
13662a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
13672a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
13682a58b312SMartin Matuska	add		v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
13692a58b312SMartin Matuska	 .inst	0xcec08217	//sha512su0 v23.16b,v16.16b
13702a58b312SMartin Matuska	 ext		v7.16b,v19.16b,v20.16b,#8
13712a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
13722a58b312SMartin Matuska	 .inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
13732a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
13742a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
13752a58b312SMartin Matuska	add		v24.2d,v24.2d,v16.2d
13762a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
13772a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
13782a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
13792a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
13802a58b312SMartin Matuska	add		v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
13812a58b312SMartin Matuska	 .inst	0xcec08230	//sha512su0 v16.16b,v17.16b
13822a58b312SMartin Matuska	 ext		v7.16b,v20.16b,v21.16b,#8
13832a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
13842a58b312SMartin Matuska	 .inst	0xce678af0	//sha512su1 v16.16b,v23.16b,v7.16b
13852a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
13862a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
13872a58b312SMartin Matuska	add		v25.2d,v25.2d,v17.2d
13882a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
13892a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
13902a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
13912a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
13922a58b312SMartin Matuska	add		v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
13932a58b312SMartin Matuska	 .inst	0xcec08251	//sha512su0 v17.16b,v18.16b
13942a58b312SMartin Matuska	 ext		v7.16b,v21.16b,v22.16b,#8
13952a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
13962a58b312SMartin Matuska	 .inst	0xce678a11	//sha512su1 v17.16b,v16.16b,v7.16b
13972a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
13982a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
13992a58b312SMartin Matuska	add		v24.2d,v24.2d,v18.2d
14002a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
14012a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
14022a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
14032a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
14042a58b312SMartin Matuska	add		v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
14052a58b312SMartin Matuska	 .inst	0xcec08272	//sha512su0 v18.16b,v19.16b
14062a58b312SMartin Matuska	 ext		v7.16b,v22.16b,v23.16b,#8
14072a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
14082a58b312SMartin Matuska	 .inst	0xce678a32	//sha512su1 v18.16b,v17.16b,v7.16b
14092a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
14102a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
14112a58b312SMartin Matuska	add		v25.2d,v25.2d,v19.2d
14122a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
14132a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
14142a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
14152a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
14162a58b312SMartin Matuska	add		v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
14172a58b312SMartin Matuska	 .inst	0xcec08293	//sha512su0 v19.16b,v20.16b
14182a58b312SMartin Matuska	 ext		v7.16b,v23.16b,v16.16b,#8
14192a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
14202a58b312SMartin Matuska	 .inst	0xce678a53	//sha512su1 v19.16b,v18.16b,v7.16b
14212a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
14222a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
14232a58b312SMartin Matuska	add		v24.2d,v24.2d,v20.2d
14242a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
14252a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
14262a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
14272a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
14282a58b312SMartin Matuska	add		v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
14292a58b312SMartin Matuska	 .inst	0xcec082b4	//sha512su0 v20.16b,v21.16b
14302a58b312SMartin Matuska	 ext		v7.16b,v16.16b,v17.16b,#8
14312a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
14322a58b312SMartin Matuska	 .inst	0xce678a74	//sha512su1 v20.16b,v19.16b,v7.16b
14332a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
14342a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
14352a58b312SMartin Matuska	add		v25.2d,v25.2d,v21.2d
14362a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
14372a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
14382a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
14392a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
14402a58b312SMartin Matuska	add		v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
14412a58b312SMartin Matuska	 .inst	0xcec082d5	//sha512su0 v21.16b,v22.16b
14422a58b312SMartin Matuska	 ext		v7.16b,v17.16b,v18.16b,#8
14432a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
14442a58b312SMartin Matuska	 .inst	0xce678a95	//sha512su1 v21.16b,v20.16b,v7.16b
14452a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
14462a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
14472a58b312SMartin Matuska	add		v24.2d,v24.2d,v22.2d
14482a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
14492a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
14502a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
14512a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
14522a58b312SMartin Matuska	add		v3.2d,v3.2d,v24.2d			// "T1 + H + K512[i]"
14532a58b312SMartin Matuska	 .inst	0xcec082f6	//sha512su0 v22.16b,v23.16b
14542a58b312SMartin Matuska	 ext		v7.16b,v18.16b,v19.16b,#8
14552a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
14562a58b312SMartin Matuska	 .inst	0xce678ab6	//sha512su1 v22.16b,v21.16b,v7.16b
14572a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
14582a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
14592a58b312SMartin Matuska	add		v25.2d,v25.2d,v23.2d
14602a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
14612a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
14622a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
14632a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
14642a58b312SMartin Matuska	add		v2.2d,v2.2d,v25.2d			// "T1 + H + K512[i]"
14652a58b312SMartin Matuska	 .inst	0xcec08217	//sha512su0 v23.16b,v16.16b
14662a58b312SMartin Matuska	 ext		v7.16b,v19.16b,v20.16b,#8
14672a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
14682a58b312SMartin Matuska	 .inst	0xce678ad7	//sha512su1 v23.16b,v22.16b,v7.16b
14692a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
14702a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
14712a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
14722a58b312SMartin Matuska	add		v24.2d,v24.2d,v16.2d
14732a58b312SMartin Matuska	 ld1		{v16.16b},[x1],#16		// load next input
14742a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
14752a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
14762a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
14772a58b312SMartin Matuska	add		v4.2d,v4.2d,v24.2d			// "T1 + H + K512[i]"
14782a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
14792a58b312SMartin Matuska	 rev64		v16.16b,v16.16b
14802a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
14812a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
14822a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
14832a58b312SMartin Matuska	add		v25.2d,v25.2d,v17.2d
14842a58b312SMartin Matuska	 ld1		{v17.16b},[x1],#16		// load next input
14852a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
14862a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
14872a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
14882a58b312SMartin Matuska	add		v1.2d,v1.2d,v25.2d			// "T1 + H + K512[i]"
14892a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
14902a58b312SMartin Matuska	 rev64		v17.16b,v17.16b
14912a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
14922a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
14932a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
14942a58b312SMartin Matuska	add		v24.2d,v24.2d,v18.2d
14952a58b312SMartin Matuska	 ld1		{v18.16b},[x1],#16		// load next input
14962a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
14972a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
14982a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
14992a58b312SMartin Matuska	add		v0.2d,v0.2d,v24.2d			// "T1 + H + K512[i]"
15002a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
15012a58b312SMartin Matuska	 rev64		v18.16b,v18.16b
15022a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
15032a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
15042a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
15052a58b312SMartin Matuska	add		v25.2d,v25.2d,v19.2d
15062a58b312SMartin Matuska	 ld1		{v19.16b},[x1],#16		// load next input
15072a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
15082a58b312SMartin Matuska	ext		v5.16b,v2.16b,v3.16b,#8
15092a58b312SMartin Matuska	ext		v6.16b,v1.16b,v2.16b,#8
15102a58b312SMartin Matuska	add		v3.2d,v3.2d,v25.2d			// "T1 + H + K512[i]"
15112a58b312SMartin Matuska	.inst	0xce6680a3	//sha512h v3.16b,v5.16b,v6.16b
15122a58b312SMartin Matuska	 rev64		v19.16b,v19.16b
15132a58b312SMartin Matuska	add		v4.2d,v1.2d,v3.2d		// "D + T1"
15142a58b312SMartin Matuska	.inst	0xce608423	//sha512h2 v3.16b,v1.16b,v0.16b
15152a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
15162a58b312SMartin Matuska	add		v24.2d,v24.2d,v20.2d
15172a58b312SMartin Matuska	 ld1		{v20.16b},[x1],#16		// load next input
15182a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
15192a58b312SMartin Matuska	ext		v5.16b,v4.16b,v2.16b,#8
15202a58b312SMartin Matuska	ext		v6.16b,v0.16b,v4.16b,#8
15212a58b312SMartin Matuska	add		v2.2d,v2.2d,v24.2d			// "T1 + H + K512[i]"
15222a58b312SMartin Matuska	.inst	0xce6680a2	//sha512h v2.16b,v5.16b,v6.16b
15232a58b312SMartin Matuska	 rev64		v20.16b,v20.16b
15242a58b312SMartin Matuska	add		v1.2d,v0.2d,v2.2d		// "D + T1"
15252a58b312SMartin Matuska	.inst	0xce638402	//sha512h2 v2.16b,v0.16b,v3.16b
15262a58b312SMartin Matuska	ld1		{v24.2d},[x3],#16
15272a58b312SMartin Matuska	add		v25.2d,v25.2d,v21.2d
15282a58b312SMartin Matuska	 ld1		{v21.16b},[x1],#16		// load next input
15292a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
15302a58b312SMartin Matuska	ext		v5.16b,v1.16b,v4.16b,#8
15312a58b312SMartin Matuska	ext		v6.16b,v3.16b,v1.16b,#8
15322a58b312SMartin Matuska	add		v4.2d,v4.2d,v25.2d			// "T1 + H + K512[i]"
15332a58b312SMartin Matuska	.inst	0xce6680a4	//sha512h v4.16b,v5.16b,v6.16b
15342a58b312SMartin Matuska	 rev64		v21.16b,v21.16b
15352a58b312SMartin Matuska	add		v0.2d,v3.2d,v4.2d		// "D + T1"
15362a58b312SMartin Matuska	.inst	0xce628464	//sha512h2 v4.16b,v3.16b,v2.16b
15372a58b312SMartin Matuska	ld1		{v25.2d},[x3],#16
15382a58b312SMartin Matuska	add		v24.2d,v24.2d,v22.2d
15392a58b312SMartin Matuska	 ld1		{v22.16b},[x1],#16		// load next input
15402a58b312SMartin Matuska	ext		v24.16b,v24.16b,v24.16b,#8
15412a58b312SMartin Matuska	ext		v5.16b,v0.16b,v1.16b,#8
15422a58b312SMartin Matuska	ext		v6.16b,v2.16b,v0.16b,#8
15432a58b312SMartin Matuska	add		v1.2d,v1.2d,v24.2d			// "T1 + H + K512[i]"
15442a58b312SMartin Matuska	.inst	0xce6680a1	//sha512h v1.16b,v5.16b,v6.16b
15452a58b312SMartin Matuska	 rev64		v22.16b,v22.16b
15462a58b312SMartin Matuska	add		v3.2d,v2.2d,v1.2d		// "D + T1"
15472a58b312SMartin Matuska	.inst	0xce648441	//sha512h2 v1.16b,v2.16b,v4.16b
15482a58b312SMartin Matuska	sub		x3,x3,#80*8	// rewind
15492a58b312SMartin Matuska	add		v25.2d,v25.2d,v23.2d
15502a58b312SMartin Matuska	 ld1		{v23.16b},[x1],#16		// load next input
15512a58b312SMartin Matuska	ext		v25.16b,v25.16b,v25.16b,#8
15522a58b312SMartin Matuska	ext		v5.16b,v3.16b,v0.16b,#8
15532a58b312SMartin Matuska	ext		v6.16b,v4.16b,v3.16b,#8
15542a58b312SMartin Matuska	add		v0.2d,v0.2d,v25.2d			// "T1 + H + K512[i]"
15552a58b312SMartin Matuska	.inst	0xce6680a0	//sha512h v0.16b,v5.16b,v6.16b
15562a58b312SMartin Matuska	 rev64		v23.16b,v23.16b
15572a58b312SMartin Matuska	add		v2.2d,v4.2d,v0.2d		// "D + T1"
15582a58b312SMartin Matuska	.inst	0xce618480	//sha512h2 v0.16b,v4.16b,v1.16b
15592a58b312SMartin Matuska	add		v0.2d,v0.2d,v26.2d			// accumulate
15602a58b312SMartin Matuska	add		v1.2d,v1.2d,v27.2d
15612a58b312SMartin Matuska	add		v2.2d,v2.2d,v28.2d
15622a58b312SMartin Matuska	add		v3.2d,v3.2d,v29.2d
15632a58b312SMartin Matuska
15642a58b312SMartin Matuska	cbnz		x2,.Loop_hw
15652a58b312SMartin Matuska
15662a58b312SMartin Matuska	st1		{v0.2d-v3.2d},[x0]		// store context
15672a58b312SMartin Matuska
15682a58b312SMartin Matuska	ldr		x29,[sp],#16
15692a58b312SMartin Matuska	ret
15702a58b312SMartin Matuska.size	zfs_sha512_block_armv8,.-zfs_sha512_block_armv8
15712a58b312SMartin Matuska#endif
1572