xref: /linux/arch/arm64/crypto/aes-neonbs-core.S (revision 39e4716caa598a07a98598b2e7cd03055ce25fb9)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21abee99eSArd Biesheuvel/*
31abee99eSArd Biesheuvel * Bit sliced AES using NEON instructions
41abee99eSArd Biesheuvel *
51abee99eSArd Biesheuvel * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
61abee99eSArd Biesheuvel */
71abee99eSArd Biesheuvel
81abee99eSArd Biesheuvel/*
91abee99eSArd Biesheuvel * The algorithm implemented here is described in detail by the paper
101abee99eSArd Biesheuvel * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
111abee99eSArd Biesheuvel * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
121abee99eSArd Biesheuvel *
131abee99eSArd Biesheuvel * This implementation is based primarily on the OpenSSL implementation
141abee99eSArd Biesheuvel * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
151abee99eSArd Biesheuvel */
161abee99eSArd Biesheuvel
171abee99eSArd Biesheuvel#include <linux/linkage.h>
181abee99eSArd Biesheuvel#include <asm/assembler.h>
191abee99eSArd Biesheuvel
201abee99eSArd Biesheuvel	.text
211abee99eSArd Biesheuvel
221abee99eSArd Biesheuvel	rounds		.req	x11
231abee99eSArd Biesheuvel	bskey		.req	x12
241abee99eSArd Biesheuvel
251abee99eSArd Biesheuvel	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
261abee99eSArd Biesheuvel	eor		\b2, \b2, \b1
271abee99eSArd Biesheuvel	eor		\b5, \b5, \b6
281abee99eSArd Biesheuvel	eor		\b3, \b3, \b0
291abee99eSArd Biesheuvel	eor		\b6, \b6, \b2
301abee99eSArd Biesheuvel	eor		\b5, \b5, \b0
311abee99eSArd Biesheuvel	eor		\b6, \b6, \b3
321abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
331abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
341abee99eSArd Biesheuvel	eor		\b3, \b3, \b4
351abee99eSArd Biesheuvel	eor		\b4, \b4, \b5
361abee99eSArd Biesheuvel	eor		\b2, \b2, \b7
371abee99eSArd Biesheuvel	eor		\b3, \b3, \b1
381abee99eSArd Biesheuvel	eor		\b1, \b1, \b5
391abee99eSArd Biesheuvel	.endm
401abee99eSArd Biesheuvel
411abee99eSArd Biesheuvel	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
421abee99eSArd Biesheuvel	eor		\b0, \b0, \b6
431abee99eSArd Biesheuvel	eor		\b1, \b1, \b4
441abee99eSArd Biesheuvel	eor		\b4, \b4, \b6
451abee99eSArd Biesheuvel	eor		\b2, \b2, \b0
461abee99eSArd Biesheuvel	eor		\b6, \b6, \b1
471abee99eSArd Biesheuvel	eor		\b1, \b1, \b5
481abee99eSArd Biesheuvel	eor		\b5, \b5, \b3
491abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
501abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
511abee99eSArd Biesheuvel	eor		\b2, \b2, \b5
521abee99eSArd Biesheuvel	eor		\b4, \b4, \b7
531abee99eSArd Biesheuvel	.endm
541abee99eSArd Biesheuvel
551abee99eSArd Biesheuvel	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
561abee99eSArd Biesheuvel	eor		\b1, \b1, \b7
571abee99eSArd Biesheuvel	eor		\b4, \b4, \b7
581abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
591abee99eSArd Biesheuvel	eor		\b1, \b1, \b3
601abee99eSArd Biesheuvel	eor		\b2, \b2, \b5
611abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
621abee99eSArd Biesheuvel	eor		\b6, \b6, \b1
631abee99eSArd Biesheuvel	eor		\b2, \b2, \b0
641abee99eSArd Biesheuvel	eor		\b5, \b5, \b3
651abee99eSArd Biesheuvel	eor		\b4, \b4, \b6
661abee99eSArd Biesheuvel	eor		\b0, \b0, \b6
671abee99eSArd Biesheuvel	eor		\b1, \b1, \b4
681abee99eSArd Biesheuvel	.endm
691abee99eSArd Biesheuvel
701abee99eSArd Biesheuvel	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
711abee99eSArd Biesheuvel	eor		\b1, \b1, \b5
721abee99eSArd Biesheuvel	eor		\b2, \b2, \b7
731abee99eSArd Biesheuvel	eor		\b3, \b3, \b1
741abee99eSArd Biesheuvel	eor		\b4, \b4, \b5
751abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
761abee99eSArd Biesheuvel	eor		\b3, \b3, \b4
771abee99eSArd Biesheuvel	eor 		\b5, \b5, \b0
781abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
791abee99eSArd Biesheuvel	eor		\b6, \b6, \b2
801abee99eSArd Biesheuvel	eor		\b2, \b2, \b1
811abee99eSArd Biesheuvel	eor		\b6, \b6, \b3
821abee99eSArd Biesheuvel	eor		\b3, \b3, \b0
831abee99eSArd Biesheuvel	eor		\b5, \b5, \b6
841abee99eSArd Biesheuvel	.endm
851abee99eSArd Biesheuvel
861abee99eSArd Biesheuvel	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
871abee99eSArd Biesheuvel	eor 		\t0, \y0, \y1
881abee99eSArd Biesheuvel	and		\t0, \t0, \x0
891abee99eSArd Biesheuvel	eor		\x0, \x0, \x1
901abee99eSArd Biesheuvel	and		\t1, \x1, \y0
911abee99eSArd Biesheuvel	and		\x0, \x0, \y1
921abee99eSArd Biesheuvel	eor		\x1, \t1, \t0
931abee99eSArd Biesheuvel	eor		\x0, \x0, \t1
941abee99eSArd Biesheuvel	.endm
951abee99eSArd Biesheuvel
961abee99eSArd Biesheuvel	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
971abee99eSArd Biesheuvel	eor		\t0, \y0, \y1
981abee99eSArd Biesheuvel	eor 		\t1, \y2, \y3
991abee99eSArd Biesheuvel	and		\t0, \t0, \x0
1001abee99eSArd Biesheuvel	and		\t1, \t1, \x2
1011abee99eSArd Biesheuvel	eor		\x0, \x0, \x1
1021abee99eSArd Biesheuvel	eor		\x2, \x2, \x3
1031abee99eSArd Biesheuvel	and		\x1, \x1, \y0
1041abee99eSArd Biesheuvel	and		\x3, \x3, \y2
1051abee99eSArd Biesheuvel	and		\x0, \x0, \y1
1061abee99eSArd Biesheuvel	and		\x2, \x2, \y3
1071abee99eSArd Biesheuvel	eor		\x1, \x1, \x0
1081abee99eSArd Biesheuvel	eor		\x2, \x2, \x3
1091abee99eSArd Biesheuvel	eor		\x0, \x0, \t0
1101abee99eSArd Biesheuvel	eor		\x3, \x3, \t1
1111abee99eSArd Biesheuvel	.endm
1121abee99eSArd Biesheuvel
1131abee99eSArd Biesheuvel	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
1141abee99eSArd Biesheuvel				    y0, y1, y2, y3, t0, t1, t2, t3
1151abee99eSArd Biesheuvel	eor		\t0, \x0, \x2
1161abee99eSArd Biesheuvel	eor		\t1, \x1, \x3
1171abee99eSArd Biesheuvel	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
1181abee99eSArd Biesheuvel	eor		\y0, \y0, \y2
1191abee99eSArd Biesheuvel	eor		\y1, \y1, \y3
1201abee99eSArd Biesheuvel	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
1211abee99eSArd Biesheuvel	eor		\x0, \x0, \t0
1221abee99eSArd Biesheuvel	eor		\x2, \x2, \t0
1231abee99eSArd Biesheuvel	eor		\x1, \x1, \t1
1241abee99eSArd Biesheuvel	eor		\x3, \x3, \t1
1251abee99eSArd Biesheuvel	eor		\t0, \x4, \x6
1261abee99eSArd Biesheuvel	eor		\t1, \x5, \x7
1271abee99eSArd Biesheuvel	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
1281abee99eSArd Biesheuvel	eor		\y0, \y0, \y2
1291abee99eSArd Biesheuvel	eor		\y1, \y1, \y3
1301abee99eSArd Biesheuvel	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
1311abee99eSArd Biesheuvel	eor		\x4, \x4, \t0
1321abee99eSArd Biesheuvel	eor		\x6, \x6, \t0
1331abee99eSArd Biesheuvel	eor		\x5, \x5, \t1
1341abee99eSArd Biesheuvel	eor		\x7, \x7, \t1
1351abee99eSArd Biesheuvel	.endm
1361abee99eSArd Biesheuvel
1371abee99eSArd Biesheuvel	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
1381abee99eSArd Biesheuvel				   t0, t1, t2, t3, s0, s1, s2, s3
1391abee99eSArd Biesheuvel	eor		\t3, \x4, \x6
1401abee99eSArd Biesheuvel	eor		\t0, \x5, \x7
1411abee99eSArd Biesheuvel	eor		\t1, \x1, \x3
1421abee99eSArd Biesheuvel	eor		\s1, \x7, \x6
1431abee99eSArd Biesheuvel	eor		\s0, \x0, \x2
1441abee99eSArd Biesheuvel	eor		\s3, \t3, \t0
1451abee99eSArd Biesheuvel	orr		\t2, \t0, \t1
1461abee99eSArd Biesheuvel	and		\s2, \t3, \s0
1471abee99eSArd Biesheuvel	orr		\t3, \t3, \s0
1481abee99eSArd Biesheuvel	eor		\s0, \s0, \t1
1491abee99eSArd Biesheuvel	and		\t0, \t0, \t1
1501abee99eSArd Biesheuvel	eor		\t1, \x3, \x2
1511abee99eSArd Biesheuvel	and		\s3, \s3, \s0
1521abee99eSArd Biesheuvel	and		\s1, \s1, \t1
1531abee99eSArd Biesheuvel	eor		\t1, \x4, \x5
1541abee99eSArd Biesheuvel	eor		\s0, \x1, \x0
1551abee99eSArd Biesheuvel	eor		\t3, \t3, \s1
1561abee99eSArd Biesheuvel	eor		\t2, \t2, \s1
1571abee99eSArd Biesheuvel	and		\s1, \t1, \s0
1581abee99eSArd Biesheuvel	orr		\t1, \t1, \s0
1591abee99eSArd Biesheuvel	eor		\t3, \t3, \s3
1601abee99eSArd Biesheuvel	eor		\t0, \t0, \s1
1611abee99eSArd Biesheuvel	eor		\t2, \t2, \s2
1621abee99eSArd Biesheuvel	eor		\t1, \t1, \s3
1631abee99eSArd Biesheuvel	eor		\t0, \t0, \s2
1641abee99eSArd Biesheuvel	and		\s0, \x7, \x3
1651abee99eSArd Biesheuvel	eor		\t1, \t1, \s2
1661abee99eSArd Biesheuvel	and		\s1, \x6, \x2
1671abee99eSArd Biesheuvel	and		\s2, \x5, \x1
1681abee99eSArd Biesheuvel	orr		\s3, \x4, \x0
1691abee99eSArd Biesheuvel	eor		\t3, \t3, \s0
1701abee99eSArd Biesheuvel	eor		\t1, \t1, \s2
1711abee99eSArd Biesheuvel	eor		\s0, \t0, \s3
1721abee99eSArd Biesheuvel	eor		\t2, \t2, \s1
1731abee99eSArd Biesheuvel	and		\s2, \t3, \t1
1741abee99eSArd Biesheuvel	eor		\s1, \t2, \s2
1751abee99eSArd Biesheuvel	eor		\s3, \s0, \s2
1761abee99eSArd Biesheuvel	bsl		\s1, \t1, \s0
1771abee99eSArd Biesheuvel	not		\t0, \s0
1781abee99eSArd Biesheuvel	bsl		\s0, \s1, \s3
1791abee99eSArd Biesheuvel	bsl		\t0, \s1, \s3
1801abee99eSArd Biesheuvel	bsl		\s3, \t3, \t2
1811abee99eSArd Biesheuvel	eor		\t3, \t3, \t2
1821abee99eSArd Biesheuvel	and		\s2, \s0, \s3
1831abee99eSArd Biesheuvel	eor		\t1, \t1, \t0
1841abee99eSArd Biesheuvel	eor		\s2, \s2, \t3
1851abee99eSArd Biesheuvel	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
1861abee99eSArd Biesheuvel			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
1871abee99eSArd Biesheuvel	.endm
1881abee99eSArd Biesheuvel
1891abee99eSArd Biesheuvel	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
1901abee99eSArd Biesheuvel			      t0, t1, t2, t3, s0, s1, s2, s3
1911abee99eSArd Biesheuvel	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
1921abee99eSArd Biesheuvel			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
1931abee99eSArd Biesheuvel	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
1941abee99eSArd Biesheuvel			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
1951abee99eSArd Biesheuvel			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
1961abee99eSArd Biesheuvel			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
1971abee99eSArd Biesheuvel	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
1981abee99eSArd Biesheuvel			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
1991abee99eSArd Biesheuvel	.endm
2001abee99eSArd Biesheuvel
2011abee99eSArd Biesheuvel	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
2021abee99eSArd Biesheuvel				  t0, t1, t2, t3, s0, s1, s2, s3
2031abee99eSArd Biesheuvel	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
2041abee99eSArd Biesheuvel			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
2051abee99eSArd Biesheuvel	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
2061abee99eSArd Biesheuvel			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
2071abee99eSArd Biesheuvel			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
2081abee99eSArd Biesheuvel			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
2091abee99eSArd Biesheuvel	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
2101abee99eSArd Biesheuvel			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
2111abee99eSArd Biesheuvel	.endm
2121abee99eSArd Biesheuvel
2131abee99eSArd Biesheuvel	.macro		enc_next_rk
2141abee99eSArd Biesheuvel	ldp		q16, q17, [bskey], #128
2151abee99eSArd Biesheuvel	ldp		q18, q19, [bskey, #-96]
2161abee99eSArd Biesheuvel	ldp		q20, q21, [bskey, #-64]
2171abee99eSArd Biesheuvel	ldp		q22, q23, [bskey, #-32]
2181abee99eSArd Biesheuvel	.endm
2191abee99eSArd Biesheuvel
2201abee99eSArd Biesheuvel	.macro		dec_next_rk
2211abee99eSArd Biesheuvel	ldp		q16, q17, [bskey, #-128]!
2221abee99eSArd Biesheuvel	ldp		q18, q19, [bskey, #32]
2231abee99eSArd Biesheuvel	ldp		q20, q21, [bskey, #64]
2241abee99eSArd Biesheuvel	ldp		q22, q23, [bskey, #96]
2251abee99eSArd Biesheuvel	.endm
2261abee99eSArd Biesheuvel
2271abee99eSArd Biesheuvel	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
2281abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, v16.16b
2291abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, v17.16b
2301abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, v18.16b
2311abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, v19.16b
2321abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, v20.16b
2331abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, v21.16b
2341abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, v22.16b
2351abee99eSArd Biesheuvel	eor		\x7\().16b, \x7\().16b, v23.16b
2361abee99eSArd Biesheuvel	.endm
2371abee99eSArd Biesheuvel
2381abee99eSArd Biesheuvel	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
2391abee99eSArd Biesheuvel	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
2401abee99eSArd Biesheuvel	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
2411abee99eSArd Biesheuvel	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
2421abee99eSArd Biesheuvel	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
2431abee99eSArd Biesheuvel	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
2441abee99eSArd Biesheuvel	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
2451abee99eSArd Biesheuvel	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
2461abee99eSArd Biesheuvel	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
2471abee99eSArd Biesheuvel	.endm
2481abee99eSArd Biesheuvel
2491abee99eSArd Biesheuvel	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
2501abee99eSArd Biesheuvel				  t0, t1, t2, t3, t4, t5, t6, t7, inv
2511abee99eSArd Biesheuvel	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
2521abee99eSArd Biesheuvel	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
2531abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, \t0\().16b
2541abee99eSArd Biesheuvel	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
2551abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t1\().16b
2561abee99eSArd Biesheuvel	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
2571abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, \t2\().16b
2581abee99eSArd Biesheuvel	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
2591abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t3\().16b
2601abee99eSArd Biesheuvel	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
2611abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t4\().16b
2621abee99eSArd Biesheuvel	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
2631abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t5\().16b
2641abee99eSArd Biesheuvel	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
2651abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, \t6\().16b
2661abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \x0\().16b
2671abee99eSArd Biesheuvel	eor		\x7\().16b, \x7\().16b, \t7\().16b
2681abee99eSArd Biesheuvel	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
2691abee99eSArd Biesheuvel	eor		\t2\().16b, \t2\().16b, \x1\().16b
2701abee99eSArd Biesheuvel	eor		\t0\().16b, \t0\().16b, \x7\().16b
2711abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \x7\().16b
2721abee99eSArd Biesheuvel	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
2731abee99eSArd Biesheuvel	eor		\t5\().16b, \t5\().16b, \x4\().16b
2741abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, \t0\().16b
2751abee99eSArd Biesheuvel	eor		\t6\().16b, \t6\().16b, \x5\().16b
2761abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t1\().16b
2771abee99eSArd Biesheuvel	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
2781abee99eSArd Biesheuvel	eor		\t4\().16b, \t4\().16b, \x3\().16b
2791abee99eSArd Biesheuvel	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
2801abee99eSArd Biesheuvel	eor		\t7\().16b, \t7\().16b, \x6\().16b
2811abee99eSArd Biesheuvel	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
2821abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x2\().16b
2831abee99eSArd Biesheuvel	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
2841abee99eSArd Biesheuvel	eor		\t4\().16b, \t4\().16b, \x7\().16b
2851abee99eSArd Biesheuvel	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
2861abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x7\().16b
2871abee99eSArd Biesheuvel	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
2881abee99eSArd Biesheuvel	eor		\x7\().16b, \t1\().16b, \t5\().16b
2891abee99eSArd Biesheuvel	.ifb		\inv
2901abee99eSArd Biesheuvel	eor		\x2\().16b, \t0\().16b, \t4\().16b
2911abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t3\().16b
2921abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t7\().16b
2931abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t6\().16b
2941abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, \t2\().16b
2951abee99eSArd Biesheuvel	.else
2961abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x4\().16b
2971abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t7\().16b
2981abee99eSArd Biesheuvel	eor		\x2\().16b, \x3\().16b, \t6\().16b
2991abee99eSArd Biesheuvel	eor		\x3\().16b, \t0\().16b, \t4\().16b
3001abee99eSArd Biesheuvel	eor		\x4\().16b, \x6\().16b, \t2\().16b
3011abee99eSArd Biesheuvel	mov		\x6\().16b, \t3\().16b
3021abee99eSArd Biesheuvel	.endif
3031abee99eSArd Biesheuvel	.endm
3041abee99eSArd Biesheuvel
3051abee99eSArd Biesheuvel	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
3061abee99eSArd Biesheuvel				      t0, t1, t2, t3, t4, t5, t6, t7
3071abee99eSArd Biesheuvel	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
3081abee99eSArd Biesheuvel	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
3091abee99eSArd Biesheuvel	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
3101abee99eSArd Biesheuvel	eor		\t0\().16b, \t0\().16b, \x0\().16b
3111abee99eSArd Biesheuvel	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
3121abee99eSArd Biesheuvel	eor		\t6\().16b, \t6\().16b, \x6\().16b
3131abee99eSArd Biesheuvel	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
3141abee99eSArd Biesheuvel	eor		\t7\().16b, \t7\().16b, \x7\().16b
3151abee99eSArd Biesheuvel	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
3161abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \x1\().16b
3171abee99eSArd Biesheuvel	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
3181abee99eSArd Biesheuvel	eor		\t2\().16b, \t2\().16b, \x2\().16b
3191abee99eSArd Biesheuvel	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
3201abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x3\().16b
3211abee99eSArd Biesheuvel	eor		\t4\().16b, \t4\().16b, \x4\().16b
3221abee99eSArd Biesheuvel	eor		\t5\().16b, \t5\().16b, \x5\().16b
3231abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, \t6\().16b
3241abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t6\().16b
3251abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, \t0\().16b
3261abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t2\().16b
3271abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t1\().16b
3281abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t7\().16b
3291abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, \t7\().16b
3301abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t6\().16b
3311abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t3\().16b
3321abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t6\().16b
3331abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, \t4\().16b
3341abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t7\().16b
3351abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t7\().16b
3361abee99eSArd Biesheuvel	eor		\x7\().16b, \x7\().16b, \t5\().16b
3371abee99eSArd Biesheuvel	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
3381abee99eSArd Biesheuvel			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
3391abee99eSArd Biesheuvel	.endm
3401abee99eSArd Biesheuvel
3411abee99eSArd Biesheuvel	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
3421abee99eSArd Biesheuvel	ushr		\t0\().2d, \b0\().2d, #\n
3431abee99eSArd Biesheuvel	ushr		\t1\().2d, \b1\().2d, #\n
3441abee99eSArd Biesheuvel	eor		\t0\().16b, \t0\().16b, \a0\().16b
3451abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \a1\().16b
3461abee99eSArd Biesheuvel	and		\t0\().16b, \t0\().16b, \mask\().16b
3471abee99eSArd Biesheuvel	and		\t1\().16b, \t1\().16b, \mask\().16b
3481abee99eSArd Biesheuvel	eor		\a0\().16b, \a0\().16b, \t0\().16b
3491abee99eSArd Biesheuvel	shl		\t0\().2d, \t0\().2d, #\n
3501abee99eSArd Biesheuvel	eor		\a1\().16b, \a1\().16b, \t1\().16b
3511abee99eSArd Biesheuvel	shl		\t1\().2d, \t1\().2d, #\n
3521abee99eSArd Biesheuvel	eor		\b0\().16b, \b0\().16b, \t0\().16b
3531abee99eSArd Biesheuvel	eor		\b1\().16b, \b1\().16b, \t1\().16b
3541abee99eSArd Biesheuvel	.endm
3551abee99eSArd Biesheuvel
3561abee99eSArd Biesheuvel	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
3571abee99eSArd Biesheuvel	movi		\t0\().16b, #0x55
3581abee99eSArd Biesheuvel	movi		\t1\().16b, #0x33
3591abee99eSArd Biesheuvel	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
3601abee99eSArd Biesheuvel	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
3611abee99eSArd Biesheuvel	movi		\t0\().16b, #0x0f
3621abee99eSArd Biesheuvel	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
3631abee99eSArd Biesheuvel	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
3641abee99eSArd Biesheuvel	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
3651abee99eSArd Biesheuvel	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
3661abee99eSArd Biesheuvel	.endm
3671abee99eSArd Biesheuvel
3681abee99eSArd Biesheuvel
3691abee99eSArd Biesheuvel	.align		6
3701abee99eSArd BiesheuvelM0:	.octa		0x0004080c0105090d02060a0e03070b0f
3711abee99eSArd Biesheuvel
3721abee99eSArd BiesheuvelM0SR:	.octa		0x0004080c05090d010a0e02060f03070b
3731abee99eSArd BiesheuvelSR:	.octa		0x0f0e0d0c0a09080b0504070600030201
3741abee99eSArd BiesheuvelSRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
3751abee99eSArd Biesheuvel
3761abee99eSArd BiesheuvelM0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
3771abee99eSArd BiesheuvelISR:	.octa		0x0f0e0d0c080b0a090504070602010003
3781abee99eSArd BiesheuvelISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
3791abee99eSArd Biesheuvel
3801abee99eSArd Biesheuvel	/*
3811abee99eSArd Biesheuvel	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
3821abee99eSArd Biesheuvel	 */
3830e89640bSMark BrownSYM_FUNC_START(aesbs_convert_key)
3841abee99eSArd Biesheuvel	ld1		{v7.4s}, [x1], #16		// load round 0 key
3851abee99eSArd Biesheuvel	ld1		{v17.4s}, [x1], #16		// load round 1 key
3861abee99eSArd Biesheuvel
3871abee99eSArd Biesheuvel	movi		v8.16b,  #0x01			// bit masks
3881abee99eSArd Biesheuvel	movi		v9.16b,  #0x02
3891abee99eSArd Biesheuvel	movi		v10.16b, #0x04
3901abee99eSArd Biesheuvel	movi		v11.16b, #0x08
3911abee99eSArd Biesheuvel	movi		v12.16b, #0x10
3921abee99eSArd Biesheuvel	movi		v13.16b, #0x20
3931abee99eSArd Biesheuvel	movi		v14.16b, #0x40
3941abee99eSArd Biesheuvel	movi		v15.16b, #0x80
3951abee99eSArd Biesheuvel	ldr		q16, M0
3961abee99eSArd Biesheuvel
3971abee99eSArd Biesheuvel	sub		x2, x2, #1
3981abee99eSArd Biesheuvel	str		q7, [x0], #16		// save round 0 key
3991abee99eSArd Biesheuvel
4001abee99eSArd Biesheuvel.Lkey_loop:
4011abee99eSArd Biesheuvel	tbl		v7.16b ,{v17.16b}, v16.16b
4021abee99eSArd Biesheuvel	ld1		{v17.4s}, [x1], #16		// load next round key
4031abee99eSArd Biesheuvel
4041abee99eSArd Biesheuvel	cmtst		v0.16b, v7.16b, v8.16b
4051abee99eSArd Biesheuvel	cmtst		v1.16b, v7.16b, v9.16b
4061abee99eSArd Biesheuvel	cmtst		v2.16b, v7.16b, v10.16b
4071abee99eSArd Biesheuvel	cmtst		v3.16b, v7.16b, v11.16b
4081abee99eSArd Biesheuvel	cmtst		v4.16b, v7.16b, v12.16b
4091abee99eSArd Biesheuvel	cmtst		v5.16b, v7.16b, v13.16b
4101abee99eSArd Biesheuvel	cmtst		v6.16b, v7.16b, v14.16b
4111abee99eSArd Biesheuvel	cmtst		v7.16b, v7.16b, v15.16b
4121abee99eSArd Biesheuvel	not		v0.16b, v0.16b
4131abee99eSArd Biesheuvel	not		v1.16b, v1.16b
4141abee99eSArd Biesheuvel	not		v5.16b, v5.16b
4151abee99eSArd Biesheuvel	not		v6.16b, v6.16b
4161abee99eSArd Biesheuvel
4171abee99eSArd Biesheuvel	subs		x2, x2, #1
4181abee99eSArd Biesheuvel	stp		q0, q1, [x0], #128
4191abee99eSArd Biesheuvel	stp		q2, q3, [x0, #-96]
4201abee99eSArd Biesheuvel	stp		q4, q5, [x0, #-64]
4211abee99eSArd Biesheuvel	stp		q6, q7, [x0, #-32]
4221abee99eSArd Biesheuvel	b.ne		.Lkey_loop
4231abee99eSArd Biesheuvel
4241abee99eSArd Biesheuvel	movi		v7.16b, #0x63			// compose .L63
4251abee99eSArd Biesheuvel	eor		v17.16b, v17.16b, v7.16b
4261abee99eSArd Biesheuvel	str		q17, [x0]
4271abee99eSArd Biesheuvel	ret
4280e89640bSMark BrownSYM_FUNC_END(aesbs_convert_key)
4291abee99eSArd Biesheuvel
4301abee99eSArd Biesheuvel	.align		4
4310e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_encrypt8)
4321abee99eSArd Biesheuvel	ldr		q9, [bskey], #16		// round 0 key
4331abee99eSArd Biesheuvel	ldr		q8, M0SR
4341abee99eSArd Biesheuvel	ldr		q24, SR
4351abee99eSArd Biesheuvel
4361abee99eSArd Biesheuvel	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
4371abee99eSArd Biesheuvel	eor		v11.16b, v1.16b, v9.16b
4381abee99eSArd Biesheuvel	tbl		v0.16b, {v10.16b}, v8.16b
4391abee99eSArd Biesheuvel	eor		v12.16b, v2.16b, v9.16b
4401abee99eSArd Biesheuvel	tbl		v1.16b, {v11.16b}, v8.16b
4411abee99eSArd Biesheuvel	eor		v13.16b, v3.16b, v9.16b
4421abee99eSArd Biesheuvel	tbl		v2.16b, {v12.16b}, v8.16b
4431abee99eSArd Biesheuvel	eor		v14.16b, v4.16b, v9.16b
4441abee99eSArd Biesheuvel	tbl		v3.16b, {v13.16b}, v8.16b
4451abee99eSArd Biesheuvel	eor		v15.16b, v5.16b, v9.16b
4461abee99eSArd Biesheuvel	tbl		v4.16b, {v14.16b}, v8.16b
4471abee99eSArd Biesheuvel	eor		v10.16b, v6.16b, v9.16b
4481abee99eSArd Biesheuvel	tbl		v5.16b, {v15.16b}, v8.16b
4491abee99eSArd Biesheuvel	eor		v11.16b, v7.16b, v9.16b
4501abee99eSArd Biesheuvel	tbl		v6.16b, {v10.16b}, v8.16b
4511abee99eSArd Biesheuvel	tbl		v7.16b, {v11.16b}, v8.16b
4521abee99eSArd Biesheuvel
4531abee99eSArd Biesheuvel	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
4541abee99eSArd Biesheuvel
4551abee99eSArd Biesheuvel	sub		rounds, rounds, #1
4561abee99eSArd Biesheuvel	b		.Lenc_sbox
4571abee99eSArd Biesheuvel
4581abee99eSArd Biesheuvel.Lenc_loop:
4591abee99eSArd Biesheuvel	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
4601abee99eSArd Biesheuvel.Lenc_sbox:
4611abee99eSArd Biesheuvel	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
4621abee99eSArd Biesheuvel								v13, v14, v15
4631abee99eSArd Biesheuvel	subs		rounds, rounds, #1
4641abee99eSArd Biesheuvel	b.cc		.Lenc_done
4651abee99eSArd Biesheuvel
4661abee99eSArd Biesheuvel	enc_next_rk
4671abee99eSArd Biesheuvel
4681abee99eSArd Biesheuvel	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
4691abee99eSArd Biesheuvel								v13, v14, v15
4701abee99eSArd Biesheuvel
4711abee99eSArd Biesheuvel	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
4721abee99eSArd Biesheuvel
4731abee99eSArd Biesheuvel	b.ne		.Lenc_loop
4741abee99eSArd Biesheuvel	ldr		q24, SRM0
4751abee99eSArd Biesheuvel	b		.Lenc_loop
4761abee99eSArd Biesheuvel
4771abee99eSArd Biesheuvel.Lenc_done:
4781abee99eSArd Biesheuvel	ldr		q12, [bskey]			// last round key
4791abee99eSArd Biesheuvel
4801abee99eSArd Biesheuvel	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
4811abee99eSArd Biesheuvel
4821abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v12.16b
4831abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v12.16b
4841abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v12.16b
4851abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v12.16b
4861abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v12.16b
4871abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v12.16b
4881abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v12.16b
4891abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v12.16b
4901abee99eSArd Biesheuvel	ret
4910e89640bSMark BrownSYM_FUNC_END(aesbs_encrypt8)
4921abee99eSArd Biesheuvel
4931abee99eSArd Biesheuvel	.align		4
4940e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_decrypt8)
4951abee99eSArd Biesheuvel	lsl		x9, rounds, #7
4961abee99eSArd Biesheuvel	add		bskey, bskey, x9
4971abee99eSArd Biesheuvel
4981abee99eSArd Biesheuvel	ldr		q9, [bskey, #-112]!		// round 0 key
4991abee99eSArd Biesheuvel	ldr		q8, M0ISR
5001abee99eSArd Biesheuvel	ldr		q24, ISR
5011abee99eSArd Biesheuvel
5021abee99eSArd Biesheuvel	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
5031abee99eSArd Biesheuvel	eor		v11.16b, v1.16b, v9.16b
5041abee99eSArd Biesheuvel	tbl		v0.16b, {v10.16b}, v8.16b
5051abee99eSArd Biesheuvel	eor		v12.16b, v2.16b, v9.16b
5061abee99eSArd Biesheuvel	tbl		v1.16b, {v11.16b}, v8.16b
5071abee99eSArd Biesheuvel	eor		v13.16b, v3.16b, v9.16b
5081abee99eSArd Biesheuvel	tbl		v2.16b, {v12.16b}, v8.16b
5091abee99eSArd Biesheuvel	eor		v14.16b, v4.16b, v9.16b
5101abee99eSArd Biesheuvel	tbl		v3.16b, {v13.16b}, v8.16b
5111abee99eSArd Biesheuvel	eor		v15.16b, v5.16b, v9.16b
5121abee99eSArd Biesheuvel	tbl		v4.16b, {v14.16b}, v8.16b
5131abee99eSArd Biesheuvel	eor		v10.16b, v6.16b, v9.16b
5141abee99eSArd Biesheuvel	tbl		v5.16b, {v15.16b}, v8.16b
5151abee99eSArd Biesheuvel	eor		v11.16b, v7.16b, v9.16b
5161abee99eSArd Biesheuvel	tbl		v6.16b, {v10.16b}, v8.16b
5171abee99eSArd Biesheuvel	tbl		v7.16b, {v11.16b}, v8.16b
5181abee99eSArd Biesheuvel
5191abee99eSArd Biesheuvel	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
5201abee99eSArd Biesheuvel
5211abee99eSArd Biesheuvel	sub		rounds, rounds, #1
5221abee99eSArd Biesheuvel	b		.Ldec_sbox
5231abee99eSArd Biesheuvel
5241abee99eSArd Biesheuvel.Ldec_loop:
5251abee99eSArd Biesheuvel	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
5261abee99eSArd Biesheuvel.Ldec_sbox:
5271abee99eSArd Biesheuvel	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
5281abee99eSArd Biesheuvel								v13, v14, v15
5291abee99eSArd Biesheuvel	subs		rounds, rounds, #1
5301abee99eSArd Biesheuvel	b.cc		.Ldec_done
5311abee99eSArd Biesheuvel
5321abee99eSArd Biesheuvel	dec_next_rk
5331abee99eSArd Biesheuvel
5341abee99eSArd Biesheuvel	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
5351abee99eSArd Biesheuvel
5361abee99eSArd Biesheuvel	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
5371abee99eSArd Biesheuvel								v13, v14, v15
5381abee99eSArd Biesheuvel
5391abee99eSArd Biesheuvel	b.ne		.Ldec_loop
5401abee99eSArd Biesheuvel	ldr		q24, ISRM0
5411abee99eSArd Biesheuvel	b		.Ldec_loop
5421abee99eSArd Biesheuvel.Ldec_done:
5431abee99eSArd Biesheuvel	ldr		q12, [bskey, #-16]		// last round key
5441abee99eSArd Biesheuvel
5451abee99eSArd Biesheuvel	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
5461abee99eSArd Biesheuvel
5471abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v12.16b
5481abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v12.16b
5491abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v12.16b
5501abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v12.16b
5511abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v12.16b
5521abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v12.16b
5531abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v12.16b
5541abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v12.16b
5551abee99eSArd Biesheuvel	ret
5560e89640bSMark BrownSYM_FUNC_END(aesbs_decrypt8)
5571abee99eSArd Biesheuvel
5581abee99eSArd Biesheuvel	/*
5591abee99eSArd Biesheuvel	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5601abee99eSArd Biesheuvel	 *		     int blocks)
5611abee99eSArd Biesheuvel	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5621abee99eSArd Biesheuvel	 *		     int blocks)
5631abee99eSArd Biesheuvel	 */
5641abee99eSArd Biesheuvel	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
56520ab6332SArd Biesheuvel	frame_push	5
56620ab6332SArd Biesheuvel
56720ab6332SArd Biesheuvel	mov		x19, x0
56820ab6332SArd Biesheuvel	mov		x20, x1
56920ab6332SArd Biesheuvel	mov		x21, x2
57020ab6332SArd Biesheuvel	mov		x22, x3
57120ab6332SArd Biesheuvel	mov		x23, x4
5721abee99eSArd Biesheuvel
5731abee99eSArd Biesheuvel99:	mov		x5, #1
57420ab6332SArd Biesheuvel	lsl		x5, x5, x23
57520ab6332SArd Biesheuvel	subs		w23, w23, #8
57620ab6332SArd Biesheuvel	csel		x23, x23, xzr, pl
5771abee99eSArd Biesheuvel	csel		x5, x5, xzr, mi
5781abee99eSArd Biesheuvel
57920ab6332SArd Biesheuvel	ld1		{v0.16b}, [x20], #16
5801abee99eSArd Biesheuvel	tbnz		x5, #1, 0f
58120ab6332SArd Biesheuvel	ld1		{v1.16b}, [x20], #16
5821abee99eSArd Biesheuvel	tbnz		x5, #2, 0f
58320ab6332SArd Biesheuvel	ld1		{v2.16b}, [x20], #16
5841abee99eSArd Biesheuvel	tbnz		x5, #3, 0f
58520ab6332SArd Biesheuvel	ld1		{v3.16b}, [x20], #16
5861abee99eSArd Biesheuvel	tbnz		x5, #4, 0f
58720ab6332SArd Biesheuvel	ld1		{v4.16b}, [x20], #16
5881abee99eSArd Biesheuvel	tbnz		x5, #5, 0f
58920ab6332SArd Biesheuvel	ld1		{v5.16b}, [x20], #16
5901abee99eSArd Biesheuvel	tbnz		x5, #6, 0f
59120ab6332SArd Biesheuvel	ld1		{v6.16b}, [x20], #16
5921abee99eSArd Biesheuvel	tbnz		x5, #7, 0f
59320ab6332SArd Biesheuvel	ld1		{v7.16b}, [x20], #16
5941abee99eSArd Biesheuvel
59520ab6332SArd Biesheuvel0:	mov		bskey, x21
59620ab6332SArd Biesheuvel	mov		rounds, x22
5971abee99eSArd Biesheuvel	bl		\do8
5981abee99eSArd Biesheuvel
59920ab6332SArd Biesheuvel	st1		{\o0\().16b}, [x19], #16
6001abee99eSArd Biesheuvel	tbnz		x5, #1, 1f
60120ab6332SArd Biesheuvel	st1		{\o1\().16b}, [x19], #16
6021abee99eSArd Biesheuvel	tbnz		x5, #2, 1f
60320ab6332SArd Biesheuvel	st1		{\o2\().16b}, [x19], #16
6041abee99eSArd Biesheuvel	tbnz		x5, #3, 1f
60520ab6332SArd Biesheuvel	st1		{\o3\().16b}, [x19], #16
6061abee99eSArd Biesheuvel	tbnz		x5, #4, 1f
60720ab6332SArd Biesheuvel	st1		{\o4\().16b}, [x19], #16
6081abee99eSArd Biesheuvel	tbnz		x5, #5, 1f
60920ab6332SArd Biesheuvel	st1		{\o5\().16b}, [x19], #16
6101abee99eSArd Biesheuvel	tbnz		x5, #6, 1f
61120ab6332SArd Biesheuvel	st1		{\o6\().16b}, [x19], #16
6121abee99eSArd Biesheuvel	tbnz		x5, #7, 1f
61320ab6332SArd Biesheuvel	st1		{\o7\().16b}, [x19], #16
6141abee99eSArd Biesheuvel
61520ab6332SArd Biesheuvel	cbz		x23, 1f
61620ab6332SArd Biesheuvel	cond_yield_neon
61720ab6332SArd Biesheuvel	b		99b
6181abee99eSArd Biesheuvel
61920ab6332SArd Biesheuvel1:	frame_pop
6201abee99eSArd Biesheuvel	ret
6211abee99eSArd Biesheuvel	.endm
6221abee99eSArd Biesheuvel
6231abee99eSArd Biesheuvel	.align		4
6240e89640bSMark BrownSYM_FUNC_START(aesbs_ecb_encrypt)
6251abee99eSArd Biesheuvel	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
6260e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_encrypt)
6271abee99eSArd Biesheuvel
6281abee99eSArd Biesheuvel	.align		4
6290e89640bSMark BrownSYM_FUNC_START(aesbs_ecb_decrypt)
6301abee99eSArd Biesheuvel	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
6310e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_decrypt)
6321abee99eSArd Biesheuvel
6331abee99eSArd Biesheuvel	/*
6341abee99eSArd Biesheuvel	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
6351abee99eSArd Biesheuvel	 *		     int blocks, u8 iv[])
6361abee99eSArd Biesheuvel	 */
6371abee99eSArd Biesheuvel	.align		4
6380e89640bSMark BrownSYM_FUNC_START(aesbs_cbc_decrypt)
63920ab6332SArd Biesheuvel	frame_push	6
64020ab6332SArd Biesheuvel
64120ab6332SArd Biesheuvel	mov		x19, x0
64220ab6332SArd Biesheuvel	mov		x20, x1
64320ab6332SArd Biesheuvel	mov		x21, x2
64420ab6332SArd Biesheuvel	mov		x22, x3
64520ab6332SArd Biesheuvel	mov		x23, x4
64620ab6332SArd Biesheuvel	mov		x24, x5
6471abee99eSArd Biesheuvel
6481abee99eSArd Biesheuvel99:	mov		x6, #1
64920ab6332SArd Biesheuvel	lsl		x6, x6, x23
65020ab6332SArd Biesheuvel	subs		w23, w23, #8
65120ab6332SArd Biesheuvel	csel		x23, x23, xzr, pl
6521abee99eSArd Biesheuvel	csel		x6, x6, xzr, mi
6531abee99eSArd Biesheuvel
65420ab6332SArd Biesheuvel	ld1		{v0.16b}, [x20], #16
6551abee99eSArd Biesheuvel	mov		v25.16b, v0.16b
6561abee99eSArd Biesheuvel	tbnz		x6, #1, 0f
65720ab6332SArd Biesheuvel	ld1		{v1.16b}, [x20], #16
6581abee99eSArd Biesheuvel	mov		v26.16b, v1.16b
6591abee99eSArd Biesheuvel	tbnz		x6, #2, 0f
66020ab6332SArd Biesheuvel	ld1		{v2.16b}, [x20], #16
6611abee99eSArd Biesheuvel	mov		v27.16b, v2.16b
6621abee99eSArd Biesheuvel	tbnz		x6, #3, 0f
66320ab6332SArd Biesheuvel	ld1		{v3.16b}, [x20], #16
6641abee99eSArd Biesheuvel	mov		v28.16b, v3.16b
6651abee99eSArd Biesheuvel	tbnz		x6, #4, 0f
66620ab6332SArd Biesheuvel	ld1		{v4.16b}, [x20], #16
6671abee99eSArd Biesheuvel	mov		v29.16b, v4.16b
6681abee99eSArd Biesheuvel	tbnz		x6, #5, 0f
66920ab6332SArd Biesheuvel	ld1		{v5.16b}, [x20], #16
6701abee99eSArd Biesheuvel	mov		v30.16b, v5.16b
6711abee99eSArd Biesheuvel	tbnz		x6, #6, 0f
67220ab6332SArd Biesheuvel	ld1		{v6.16b}, [x20], #16
6731abee99eSArd Biesheuvel	mov		v31.16b, v6.16b
6741abee99eSArd Biesheuvel	tbnz		x6, #7, 0f
67520ab6332SArd Biesheuvel	ld1		{v7.16b}, [x20]
6761abee99eSArd Biesheuvel
67720ab6332SArd Biesheuvel0:	mov		bskey, x21
67820ab6332SArd Biesheuvel	mov		rounds, x22
6791abee99eSArd Biesheuvel	bl		aesbs_decrypt8
6801abee99eSArd Biesheuvel
68120ab6332SArd Biesheuvel	ld1		{v24.16b}, [x24]		// load IV
6821abee99eSArd Biesheuvel
6831abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v25.16b
6841abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v26.16b
6851abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v27.16b
6861abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v28.16b
6871abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v29.16b
6881abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v24.16b
6891abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v30.16b
6901abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v31.16b
6911abee99eSArd Biesheuvel
69220ab6332SArd Biesheuvel	st1		{v0.16b}, [x19], #16
6931abee99eSArd Biesheuvel	mov		v24.16b, v25.16b
6941abee99eSArd Biesheuvel	tbnz		x6, #1, 1f
69520ab6332SArd Biesheuvel	st1		{v1.16b}, [x19], #16
6961abee99eSArd Biesheuvel	mov		v24.16b, v26.16b
6971abee99eSArd Biesheuvel	tbnz		x6, #2, 1f
69820ab6332SArd Biesheuvel	st1		{v6.16b}, [x19], #16
6991abee99eSArd Biesheuvel	mov		v24.16b, v27.16b
7001abee99eSArd Biesheuvel	tbnz		x6, #3, 1f
70120ab6332SArd Biesheuvel	st1		{v4.16b}, [x19], #16
7021abee99eSArd Biesheuvel	mov		v24.16b, v28.16b
7031abee99eSArd Biesheuvel	tbnz		x6, #4, 1f
70420ab6332SArd Biesheuvel	st1		{v2.16b}, [x19], #16
7051abee99eSArd Biesheuvel	mov		v24.16b, v29.16b
7061abee99eSArd Biesheuvel	tbnz		x6, #5, 1f
70720ab6332SArd Biesheuvel	st1		{v7.16b}, [x19], #16
7081abee99eSArd Biesheuvel	mov		v24.16b, v30.16b
7091abee99eSArd Biesheuvel	tbnz		x6, #6, 1f
71020ab6332SArd Biesheuvel	st1		{v3.16b}, [x19], #16
7111abee99eSArd Biesheuvel	mov		v24.16b, v31.16b
7121abee99eSArd Biesheuvel	tbnz		x6, #7, 1f
71320ab6332SArd Biesheuvel	ld1		{v24.16b}, [x20], #16
71420ab6332SArd Biesheuvel	st1		{v5.16b}, [x19], #16
71520ab6332SArd Biesheuvel1:	st1		{v24.16b}, [x24]		// store IV
7161abee99eSArd Biesheuvel
71720ab6332SArd Biesheuvel	cbz		x23, 2f
71820ab6332SArd Biesheuvel	cond_yield_neon
71920ab6332SArd Biesheuvel	b		99b
7201abee99eSArd Biesheuvel
72120ab6332SArd Biesheuvel2:	frame_pop
7221abee99eSArd Biesheuvel	ret
7230e89640bSMark BrownSYM_FUNC_END(aesbs_cbc_decrypt)
7241abee99eSArd Biesheuvel
7251abee99eSArd Biesheuvel	.macro		next_tweak, out, in, const, tmp
7261abee99eSArd Biesheuvel	sshr		\tmp\().2d,  \in\().2d,   #63
7271abee99eSArd Biesheuvel	and		\tmp\().16b, \tmp\().16b, \const\().16b
7281abee99eSArd Biesheuvel	add		\out\().2d,  \in\().2d,   \in\().2d
7291abee99eSArd Biesheuvel	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
7301abee99eSArd Biesheuvel	eor		\out\().16b, \out\().16b, \tmp\().16b
7311abee99eSArd Biesheuvel	.endm
7321abee99eSArd Biesheuvel
7331abee99eSArd Biesheuvel	/*
7341abee99eSArd Biesheuvel	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
7351abee99eSArd Biesheuvel	 *		     int blocks, u8 iv[])
7361abee99eSArd Biesheuvel	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
7371abee99eSArd Biesheuvel	 *		     int blocks, u8 iv[])
7381abee99eSArd Biesheuvel	 */
7390e89640bSMark BrownSYM_FUNC_START_LOCAL(__xts_crypt8)
7401abee99eSArd Biesheuvel	mov		x6, #1
74120ab6332SArd Biesheuvel	lsl		x6, x6, x23
74220ab6332SArd Biesheuvel	subs		w23, w23, #8
74320ab6332SArd Biesheuvel	csel		x23, x23, xzr, pl
7441abee99eSArd Biesheuvel	csel		x6, x6, xzr, mi
7451abee99eSArd Biesheuvel
74620ab6332SArd Biesheuvel	ld1		{v0.16b}, [x20], #16
7471abee99eSArd Biesheuvel	next_tweak	v26, v25, v30, v31
7481abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v25.16b
7491abee99eSArd Biesheuvel	tbnz		x6, #1, 0f
7501abee99eSArd Biesheuvel
75120ab6332SArd Biesheuvel	ld1		{v1.16b}, [x20], #16
7521abee99eSArd Biesheuvel	next_tweak	v27, v26, v30, v31
7531abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v26.16b
7541abee99eSArd Biesheuvel	tbnz		x6, #2, 0f
7551abee99eSArd Biesheuvel
75620ab6332SArd Biesheuvel	ld1		{v2.16b}, [x20], #16
7571abee99eSArd Biesheuvel	next_tweak	v28, v27, v30, v31
7581abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v27.16b
7591abee99eSArd Biesheuvel	tbnz		x6, #3, 0f
7601abee99eSArd Biesheuvel
76120ab6332SArd Biesheuvel	ld1		{v3.16b}, [x20], #16
7621abee99eSArd Biesheuvel	next_tweak	v29, v28, v30, v31
7631abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v28.16b
7641abee99eSArd Biesheuvel	tbnz		x6, #4, 0f
7651abee99eSArd Biesheuvel
76620ab6332SArd Biesheuvel	ld1		{v4.16b}, [x20], #16
76720ab6332SArd Biesheuvel	str		q29, [sp, #.Lframe_local_offset]
7681abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v29.16b
7691abee99eSArd Biesheuvel	next_tweak	v29, v29, v30, v31
7701abee99eSArd Biesheuvel	tbnz		x6, #5, 0f
7711abee99eSArd Biesheuvel
77220ab6332SArd Biesheuvel	ld1		{v5.16b}, [x20], #16
77320ab6332SArd Biesheuvel	str		q29, [sp, #.Lframe_local_offset + 16]
7741abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v29.16b
7751abee99eSArd Biesheuvel	next_tweak	v29, v29, v30, v31
7761abee99eSArd Biesheuvel	tbnz		x6, #6, 0f
7771abee99eSArd Biesheuvel
77820ab6332SArd Biesheuvel	ld1		{v6.16b}, [x20], #16
77920ab6332SArd Biesheuvel	str		q29, [sp, #.Lframe_local_offset + 32]
7801abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v29.16b
7811abee99eSArd Biesheuvel	next_tweak	v29, v29, v30, v31
7821abee99eSArd Biesheuvel	tbnz		x6, #7, 0f
7831abee99eSArd Biesheuvel
78420ab6332SArd Biesheuvel	ld1		{v7.16b}, [x20], #16
78520ab6332SArd Biesheuvel	str		q29, [sp, #.Lframe_local_offset + 48]
7861abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v29.16b
7871abee99eSArd Biesheuvel	next_tweak	v29, v29, v30, v31
7881abee99eSArd Biesheuvel
78920ab6332SArd Biesheuvel0:	mov		bskey, x21
79020ab6332SArd Biesheuvel	mov		rounds, x22
791*39e4716cSJeremy Linton	br		x16
7920e89640bSMark BrownSYM_FUNC_END(__xts_crypt8)
7931abee99eSArd Biesheuvel
7941abee99eSArd Biesheuvel	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
79520ab6332SArd Biesheuvel	frame_push	6, 64
7961abee99eSArd Biesheuvel
79720ab6332SArd Biesheuvel	mov		x19, x0
79820ab6332SArd Biesheuvel	mov		x20, x1
79920ab6332SArd Biesheuvel	mov		x21, x2
80020ab6332SArd Biesheuvel	mov		x22, x3
80120ab6332SArd Biesheuvel	mov		x23, x4
80220ab6332SArd Biesheuvel	mov		x24, x5
80320ab6332SArd Biesheuvel
8047a3b1c6eSArd Biesheuvel0:	movi		v30.2s, #0x1
8057a3b1c6eSArd Biesheuvel	movi		v25.2s, #0x87
8067a3b1c6eSArd Biesheuvel	uzp1		v30.4s, v30.4s, v25.4s
80720ab6332SArd Biesheuvel	ld1		{v25.16b}, [x24]
8081abee99eSArd Biesheuvel
809*39e4716cSJeremy Linton99:	adr		x16, \do8
8101abee99eSArd Biesheuvel	bl		__xts_crypt8
8111abee99eSArd Biesheuvel
81220ab6332SArd Biesheuvel	ldp		q16, q17, [sp, #.Lframe_local_offset]
81320ab6332SArd Biesheuvel	ldp		q18, q19, [sp, #.Lframe_local_offset + 32]
8141abee99eSArd Biesheuvel
8151abee99eSArd Biesheuvel	eor		\o0\().16b, \o0\().16b, v25.16b
8161abee99eSArd Biesheuvel	eor		\o1\().16b, \o1\().16b, v26.16b
8171abee99eSArd Biesheuvel	eor		\o2\().16b, \o2\().16b, v27.16b
8181abee99eSArd Biesheuvel	eor		\o3\().16b, \o3\().16b, v28.16b
8191abee99eSArd Biesheuvel
82020ab6332SArd Biesheuvel	st1		{\o0\().16b}, [x19], #16
8211abee99eSArd Biesheuvel	mov		v25.16b, v26.16b
8221abee99eSArd Biesheuvel	tbnz		x6, #1, 1f
82320ab6332SArd Biesheuvel	st1		{\o1\().16b}, [x19], #16
8241abee99eSArd Biesheuvel	mov		v25.16b, v27.16b
8251abee99eSArd Biesheuvel	tbnz		x6, #2, 1f
82620ab6332SArd Biesheuvel	st1		{\o2\().16b}, [x19], #16
8271abee99eSArd Biesheuvel	mov		v25.16b, v28.16b
8281abee99eSArd Biesheuvel	tbnz		x6, #3, 1f
82920ab6332SArd Biesheuvel	st1		{\o3\().16b}, [x19], #16
8301abee99eSArd Biesheuvel	mov		v25.16b, v29.16b
8311abee99eSArd Biesheuvel	tbnz		x6, #4, 1f
8321abee99eSArd Biesheuvel
8331abee99eSArd Biesheuvel	eor		\o4\().16b, \o4\().16b, v16.16b
8341abee99eSArd Biesheuvel	eor		\o5\().16b, \o5\().16b, v17.16b
8351abee99eSArd Biesheuvel	eor		\o6\().16b, \o6\().16b, v18.16b
8361abee99eSArd Biesheuvel	eor		\o7\().16b, \o7\().16b, v19.16b
8371abee99eSArd Biesheuvel
83820ab6332SArd Biesheuvel	st1		{\o4\().16b}, [x19], #16
8391abee99eSArd Biesheuvel	tbnz		x6, #5, 1f
84020ab6332SArd Biesheuvel	st1		{\o5\().16b}, [x19], #16
8411abee99eSArd Biesheuvel	tbnz		x6, #6, 1f
84220ab6332SArd Biesheuvel	st1		{\o6\().16b}, [x19], #16
8431abee99eSArd Biesheuvel	tbnz		x6, #7, 1f
84420ab6332SArd Biesheuvel	st1		{\o7\().16b}, [x19], #16
8451abee99eSArd Biesheuvel
84620ab6332SArd Biesheuvel	cbz		x23, 1f
84720ab6332SArd Biesheuvel	st1		{v25.16b}, [x24]
8481abee99eSArd Biesheuvel
84920ab6332SArd Biesheuvel	cond_yield_neon	0b
85020ab6332SArd Biesheuvel	b		99b
85120ab6332SArd Biesheuvel
85220ab6332SArd Biesheuvel1:	st1		{v25.16b}, [x24]
85320ab6332SArd Biesheuvel	frame_pop
8541abee99eSArd Biesheuvel	ret
8551abee99eSArd Biesheuvel	.endm
8561abee99eSArd Biesheuvel
8570e89640bSMark BrownSYM_FUNC_START(aesbs_xts_encrypt)
8581abee99eSArd Biesheuvel	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
8590e89640bSMark BrownSYM_FUNC_END(aesbs_xts_encrypt)
8601abee99eSArd Biesheuvel
8610e89640bSMark BrownSYM_FUNC_START(aesbs_xts_decrypt)
8621abee99eSArd Biesheuvel	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
8630e89640bSMark BrownSYM_FUNC_END(aesbs_xts_decrypt)
8641abee99eSArd Biesheuvel
8651abee99eSArd Biesheuvel	.macro		next_ctr, v
8661abee99eSArd Biesheuvel	mov		\v\().d[1], x8
8671abee99eSArd Biesheuvel	adds		x8, x8, #1
8681abee99eSArd Biesheuvel	mov		\v\().d[0], x7
8691abee99eSArd Biesheuvel	adc		x7, x7, xzr
8701abee99eSArd Biesheuvel	rev64		\v\().16b, \v\().16b
8711abee99eSArd Biesheuvel	.endm
8721abee99eSArd Biesheuvel
8731abee99eSArd Biesheuvel	/*
8741abee99eSArd Biesheuvel	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
87588a3f582SArd Biesheuvel	 *		     int rounds, int blocks, u8 iv[], u8 final[])
8761abee99eSArd Biesheuvel	 */
8770e89640bSMark BrownSYM_FUNC_START(aesbs_ctr_encrypt)
87820ab6332SArd Biesheuvel	frame_push	8
8791abee99eSArd Biesheuvel
88020ab6332SArd Biesheuvel	mov		x19, x0
88120ab6332SArd Biesheuvel	mov		x20, x1
88220ab6332SArd Biesheuvel	mov		x21, x2
88320ab6332SArd Biesheuvel	mov		x22, x3
88420ab6332SArd Biesheuvel	mov		x23, x4
88520ab6332SArd Biesheuvel	mov		x24, x5
88620ab6332SArd Biesheuvel	mov		x25, x6
8871abee99eSArd Biesheuvel
88820ab6332SArd Biesheuvel	cmp		x25, #0
88920ab6332SArd Biesheuvel	cset		x26, ne
89020ab6332SArd Biesheuvel	add		x23, x23, x26		// do one extra block if final
89120ab6332SArd Biesheuvel
89220ab6332SArd Biesheuvel98:	ldp		x7, x8, [x24]
89320ab6332SArd Biesheuvel	ld1		{v0.16b}, [x24]
8941abee99eSArd BiesheuvelCPU_LE(	rev		x7, x7		)
8951abee99eSArd BiesheuvelCPU_LE(	rev		x8, x8		)
8961abee99eSArd Biesheuvel	adds		x8, x8, #1
8971abee99eSArd Biesheuvel	adc		x7, x7, xzr
8981abee99eSArd Biesheuvel
8991abee99eSArd Biesheuvel99:	mov		x9, #1
90020ab6332SArd Biesheuvel	lsl		x9, x9, x23
90120ab6332SArd Biesheuvel	subs		w23, w23, #8
90220ab6332SArd Biesheuvel	csel		x23, x23, xzr, pl
9031abee99eSArd Biesheuvel	csel		x9, x9, xzr, le
9041abee99eSArd Biesheuvel
90588a3f582SArd Biesheuvel	tbnz		x9, #1, 0f
9061abee99eSArd Biesheuvel	next_ctr	v1
90788a3f582SArd Biesheuvel	tbnz		x9, #2, 0f
9081abee99eSArd Biesheuvel	next_ctr	v2
90988a3f582SArd Biesheuvel	tbnz		x9, #3, 0f
9101abee99eSArd Biesheuvel	next_ctr	v3
91188a3f582SArd Biesheuvel	tbnz		x9, #4, 0f
9121abee99eSArd Biesheuvel	next_ctr	v4
91388a3f582SArd Biesheuvel	tbnz		x9, #5, 0f
9141abee99eSArd Biesheuvel	next_ctr	v5
91588a3f582SArd Biesheuvel	tbnz		x9, #6, 0f
9161abee99eSArd Biesheuvel	next_ctr	v6
91788a3f582SArd Biesheuvel	tbnz		x9, #7, 0f
9181abee99eSArd Biesheuvel	next_ctr	v7
9191abee99eSArd Biesheuvel
92020ab6332SArd Biesheuvel0:	mov		bskey, x21
92120ab6332SArd Biesheuvel	mov		rounds, x22
9221abee99eSArd Biesheuvel	bl		aesbs_encrypt8
9231abee99eSArd Biesheuvel
92420ab6332SArd Biesheuvel	lsr		x9, x9, x26		// disregard the extra block
9251abee99eSArd Biesheuvel	tbnz		x9, #0, 0f
9261abee99eSArd Biesheuvel
92720ab6332SArd Biesheuvel	ld1		{v8.16b}, [x20], #16
9281abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v8.16b
92920ab6332SArd Biesheuvel	st1		{v0.16b}, [x19], #16
9301abee99eSArd Biesheuvel	tbnz		x9, #1, 1f
9311abee99eSArd Biesheuvel
93220ab6332SArd Biesheuvel	ld1		{v9.16b}, [x20], #16
9331abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v9.16b
93420ab6332SArd Biesheuvel	st1		{v1.16b}, [x19], #16
9351abee99eSArd Biesheuvel	tbnz		x9, #2, 2f
9361abee99eSArd Biesheuvel
93720ab6332SArd Biesheuvel	ld1		{v10.16b}, [x20], #16
9381abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v10.16b
93920ab6332SArd Biesheuvel	st1		{v4.16b}, [x19], #16
9401abee99eSArd Biesheuvel	tbnz		x9, #3, 3f
9411abee99eSArd Biesheuvel
94220ab6332SArd Biesheuvel	ld1		{v11.16b}, [x20], #16
9431abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v11.16b
94420ab6332SArd Biesheuvel	st1		{v6.16b}, [x19], #16
9451abee99eSArd Biesheuvel	tbnz		x9, #4, 4f
9461abee99eSArd Biesheuvel
94720ab6332SArd Biesheuvel	ld1		{v12.16b}, [x20], #16
9481abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v12.16b
94920ab6332SArd Biesheuvel	st1		{v3.16b}, [x19], #16
9501abee99eSArd Biesheuvel	tbnz		x9, #5, 5f
9511abee99eSArd Biesheuvel
95220ab6332SArd Biesheuvel	ld1		{v13.16b}, [x20], #16
9531abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v13.16b
95420ab6332SArd Biesheuvel	st1		{v7.16b}, [x19], #16
9551abee99eSArd Biesheuvel	tbnz		x9, #6, 6f
9561abee99eSArd Biesheuvel
95720ab6332SArd Biesheuvel	ld1		{v14.16b}, [x20], #16
9581abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v14.16b
95920ab6332SArd Biesheuvel	st1		{v2.16b}, [x19], #16
9601abee99eSArd Biesheuvel	tbnz		x9, #7, 7f
9611abee99eSArd Biesheuvel
96220ab6332SArd Biesheuvel	ld1		{v15.16b}, [x20], #16
9631abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v15.16b
96420ab6332SArd Biesheuvel	st1		{v5.16b}, [x19], #16
9651abee99eSArd Biesheuvel
96688a3f582SArd Biesheuvel8:	next_ctr	v0
96720ab6332SArd Biesheuvel	st1		{v0.16b}, [x24]
96812455e32SEric Biggers	cbz		x23, .Lctr_done
9691abee99eSArd Biesheuvel
97020ab6332SArd Biesheuvel	cond_yield_neon	98b
97120ab6332SArd Biesheuvel	b		99b
97220ab6332SArd Biesheuvel
97312455e32SEric Biggers.Lctr_done:
97412455e32SEric Biggers	frame_pop
9751abee99eSArd Biesheuvel	ret
9761abee99eSArd Biesheuvel
9771abee99eSArd Biesheuvel	/*
97888a3f582SArd Biesheuvel	 * If we are handling the tail of the input (x6 != NULL), return the
97988a3f582SArd Biesheuvel	 * final keystream block back to the caller.
9801abee99eSArd Biesheuvel	 */
98112455e32SEric Biggers0:	cbz		x25, 8b
98212455e32SEric Biggers	st1		{v0.16b}, [x25]
98312455e32SEric Biggers	b		8b
98420ab6332SArd Biesheuvel1:	cbz		x25, 8b
98520ab6332SArd Biesheuvel	st1		{v1.16b}, [x25]
9861abee99eSArd Biesheuvel	b		8b
98720ab6332SArd Biesheuvel2:	cbz		x25, 8b
98820ab6332SArd Biesheuvel	st1		{v4.16b}, [x25]
9891abee99eSArd Biesheuvel	b		8b
99020ab6332SArd Biesheuvel3:	cbz		x25, 8b
99120ab6332SArd Biesheuvel	st1		{v6.16b}, [x25]
9921abee99eSArd Biesheuvel	b		8b
99320ab6332SArd Biesheuvel4:	cbz		x25, 8b
99420ab6332SArd Biesheuvel	st1		{v3.16b}, [x25]
9951abee99eSArd Biesheuvel	b		8b
99620ab6332SArd Biesheuvel5:	cbz		x25, 8b
99720ab6332SArd Biesheuvel	st1		{v7.16b}, [x25]
9981abee99eSArd Biesheuvel	b		8b
99920ab6332SArd Biesheuvel6:	cbz		x25, 8b
100020ab6332SArd Biesheuvel	st1		{v2.16b}, [x25]
10011abee99eSArd Biesheuvel	b		8b
100220ab6332SArd Biesheuvel7:	cbz		x25, 8b
100320ab6332SArd Biesheuvel	st1		{v5.16b}, [x25]
10041abee99eSArd Biesheuvel	b		8b
10050e89640bSMark BrownSYM_FUNC_END(aesbs_ctr_encrypt)
1006