xref: /linux/arch/arm64/crypto/aes-neonbs-core.S (revision 1ac731c529cd4d6adbce134754b51ff7d822b145)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
21abee99eSArd Biesheuvel/*
31abee99eSArd Biesheuvel * Bit sliced AES using NEON instructions
41abee99eSArd Biesheuvel *
51abee99eSArd Biesheuvel * Copyright (C) 2016 Linaro Ltd <ard.biesheuvel@linaro.org>
61abee99eSArd Biesheuvel */
71abee99eSArd Biesheuvel
81abee99eSArd Biesheuvel/*
91abee99eSArd Biesheuvel * The algorithm implemented here is described in detail by the paper
101abee99eSArd Biesheuvel * 'Faster and Timing-Attack Resistant AES-GCM' by Emilia Kaesper and
111abee99eSArd Biesheuvel * Peter Schwabe (https://eprint.iacr.org/2009/129.pdf)
121abee99eSArd Biesheuvel *
131abee99eSArd Biesheuvel * This implementation is based primarily on the OpenSSL implementation
141abee99eSArd Biesheuvel * for 32-bit ARM written by Andy Polyakov <appro@openssl.org>
151abee99eSArd Biesheuvel */
161abee99eSArd Biesheuvel
171abee99eSArd Biesheuvel#include <linux/linkage.h>
18*47446d7cSEric Biggers#include <linux/cfi_types.h>
191abee99eSArd Biesheuvel#include <asm/assembler.h>
201abee99eSArd Biesheuvel
211abee99eSArd Biesheuvel	.text
221abee99eSArd Biesheuvel
231abee99eSArd Biesheuvel	rounds		.req	x11
241abee99eSArd Biesheuvel	bskey		.req	x12
251abee99eSArd Biesheuvel
261abee99eSArd Biesheuvel	.macro		in_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
271abee99eSArd Biesheuvel	eor		\b2, \b2, \b1
281abee99eSArd Biesheuvel	eor		\b5, \b5, \b6
291abee99eSArd Biesheuvel	eor		\b3, \b3, \b0
301abee99eSArd Biesheuvel	eor		\b6, \b6, \b2
311abee99eSArd Biesheuvel	eor		\b5, \b5, \b0
321abee99eSArd Biesheuvel	eor		\b6, \b6, \b3
331abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
341abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
351abee99eSArd Biesheuvel	eor		\b3, \b3, \b4
361abee99eSArd Biesheuvel	eor		\b4, \b4, \b5
371abee99eSArd Biesheuvel	eor		\b2, \b2, \b7
381abee99eSArd Biesheuvel	eor		\b3, \b3, \b1
391abee99eSArd Biesheuvel	eor		\b1, \b1, \b5
401abee99eSArd Biesheuvel	.endm
411abee99eSArd Biesheuvel
421abee99eSArd Biesheuvel	.macro		out_bs_ch, b0, b1, b2, b3, b4, b5, b6, b7
431abee99eSArd Biesheuvel	eor		\b0, \b0, \b6
441abee99eSArd Biesheuvel	eor		\b1, \b1, \b4
451abee99eSArd Biesheuvel	eor		\b4, \b4, \b6
461abee99eSArd Biesheuvel	eor		\b2, \b2, \b0
471abee99eSArd Biesheuvel	eor		\b6, \b6, \b1
481abee99eSArd Biesheuvel	eor		\b1, \b1, \b5
491abee99eSArd Biesheuvel	eor		\b5, \b5, \b3
501abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
511abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
521abee99eSArd Biesheuvel	eor		\b2, \b2, \b5
531abee99eSArd Biesheuvel	eor		\b4, \b4, \b7
541abee99eSArd Biesheuvel	.endm
551abee99eSArd Biesheuvel
561abee99eSArd Biesheuvel	.macro		inv_in_bs_ch, b6, b1, b2, b4, b7, b0, b3, b5
571abee99eSArd Biesheuvel	eor		\b1, \b1, \b7
581abee99eSArd Biesheuvel	eor		\b4, \b4, \b7
591abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
601abee99eSArd Biesheuvel	eor		\b1, \b1, \b3
611abee99eSArd Biesheuvel	eor		\b2, \b2, \b5
621abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
631abee99eSArd Biesheuvel	eor		\b6, \b6, \b1
641abee99eSArd Biesheuvel	eor		\b2, \b2, \b0
651abee99eSArd Biesheuvel	eor		\b5, \b5, \b3
661abee99eSArd Biesheuvel	eor		\b4, \b4, \b6
671abee99eSArd Biesheuvel	eor		\b0, \b0, \b6
681abee99eSArd Biesheuvel	eor		\b1, \b1, \b4
691abee99eSArd Biesheuvel	.endm
701abee99eSArd Biesheuvel
711abee99eSArd Biesheuvel	.macro		inv_out_bs_ch, b6, b5, b0, b3, b7, b1, b4, b2
721abee99eSArd Biesheuvel	eor		\b1, \b1, \b5
731abee99eSArd Biesheuvel	eor		\b2, \b2, \b7
741abee99eSArd Biesheuvel	eor		\b3, \b3, \b1
751abee99eSArd Biesheuvel	eor		\b4, \b4, \b5
761abee99eSArd Biesheuvel	eor		\b7, \b7, \b5
771abee99eSArd Biesheuvel	eor		\b3, \b3, \b4
781abee99eSArd Biesheuvel	eor 		\b5, \b5, \b0
791abee99eSArd Biesheuvel	eor		\b3, \b3, \b7
801abee99eSArd Biesheuvel	eor		\b6, \b6, \b2
811abee99eSArd Biesheuvel	eor		\b2, \b2, \b1
821abee99eSArd Biesheuvel	eor		\b6, \b6, \b3
831abee99eSArd Biesheuvel	eor		\b3, \b3, \b0
841abee99eSArd Biesheuvel	eor		\b5, \b5, \b6
851abee99eSArd Biesheuvel	.endm
861abee99eSArd Biesheuvel
871abee99eSArd Biesheuvel	.macro		mul_gf4, x0, x1, y0, y1, t0, t1
881abee99eSArd Biesheuvel	eor 		\t0, \y0, \y1
891abee99eSArd Biesheuvel	and		\t0, \t0, \x0
901abee99eSArd Biesheuvel	eor		\x0, \x0, \x1
911abee99eSArd Biesheuvel	and		\t1, \x1, \y0
921abee99eSArd Biesheuvel	and		\x0, \x0, \y1
931abee99eSArd Biesheuvel	eor		\x1, \t1, \t0
941abee99eSArd Biesheuvel	eor		\x0, \x0, \t1
951abee99eSArd Biesheuvel	.endm
961abee99eSArd Biesheuvel
971abee99eSArd Biesheuvel	.macro		mul_gf4_n_gf4, x0, x1, y0, y1, t0, x2, x3, y2, y3, t1
981abee99eSArd Biesheuvel	eor		\t0, \y0, \y1
991abee99eSArd Biesheuvel	eor 		\t1, \y2, \y3
1001abee99eSArd Biesheuvel	and		\t0, \t0, \x0
1011abee99eSArd Biesheuvel	and		\t1, \t1, \x2
1021abee99eSArd Biesheuvel	eor		\x0, \x0, \x1
1031abee99eSArd Biesheuvel	eor		\x2, \x2, \x3
1041abee99eSArd Biesheuvel	and		\x1, \x1, \y0
1051abee99eSArd Biesheuvel	and		\x3, \x3, \y2
1061abee99eSArd Biesheuvel	and		\x0, \x0, \y1
1071abee99eSArd Biesheuvel	and		\x2, \x2, \y3
1081abee99eSArd Biesheuvel	eor		\x1, \x1, \x0
1091abee99eSArd Biesheuvel	eor		\x2, \x2, \x3
1101abee99eSArd Biesheuvel	eor		\x0, \x0, \t0
1111abee99eSArd Biesheuvel	eor		\x3, \x3, \t1
1121abee99eSArd Biesheuvel	.endm
1131abee99eSArd Biesheuvel
1141abee99eSArd Biesheuvel	.macro		mul_gf16_2, x0, x1, x2, x3, x4, x5, x6, x7, \
1151abee99eSArd Biesheuvel				    y0, y1, y2, y3, t0, t1, t2, t3
1161abee99eSArd Biesheuvel	eor		\t0, \x0, \x2
1171abee99eSArd Biesheuvel	eor		\t1, \x1, \x3
1181abee99eSArd Biesheuvel	mul_gf4  	\x0, \x1, \y0, \y1, \t2, \t3
1191abee99eSArd Biesheuvel	eor		\y0, \y0, \y2
1201abee99eSArd Biesheuvel	eor		\y1, \y1, \y3
1211abee99eSArd Biesheuvel	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x2, \x3, \y2, \y3, \t2
1221abee99eSArd Biesheuvel	eor		\x0, \x0, \t0
1231abee99eSArd Biesheuvel	eor		\x2, \x2, \t0
1241abee99eSArd Biesheuvel	eor		\x1, \x1, \t1
1251abee99eSArd Biesheuvel	eor		\x3, \x3, \t1
1261abee99eSArd Biesheuvel	eor		\t0, \x4, \x6
1271abee99eSArd Biesheuvel	eor		\t1, \x5, \x7
1281abee99eSArd Biesheuvel	mul_gf4_n_gf4	\t0, \t1, \y0, \y1, \t3, \x6, \x7, \y2, \y3, \t2
1291abee99eSArd Biesheuvel	eor		\y0, \y0, \y2
1301abee99eSArd Biesheuvel	eor		\y1, \y1, \y3
1311abee99eSArd Biesheuvel	mul_gf4  	\x4, \x5, \y0, \y1, \t2, \t3
1321abee99eSArd Biesheuvel	eor		\x4, \x4, \t0
1331abee99eSArd Biesheuvel	eor		\x6, \x6, \t0
1341abee99eSArd Biesheuvel	eor		\x5, \x5, \t1
1351abee99eSArd Biesheuvel	eor		\x7, \x7, \t1
1361abee99eSArd Biesheuvel	.endm
1371abee99eSArd Biesheuvel
1381abee99eSArd Biesheuvel	.macro		inv_gf256, x0, x1, x2, x3, x4, x5, x6, x7, \
1391abee99eSArd Biesheuvel				   t0, t1, t2, t3, s0, s1, s2, s3
1401abee99eSArd Biesheuvel	eor		\t3, \x4, \x6
1411abee99eSArd Biesheuvel	eor		\t0, \x5, \x7
1421abee99eSArd Biesheuvel	eor		\t1, \x1, \x3
1431abee99eSArd Biesheuvel	eor		\s1, \x7, \x6
1441abee99eSArd Biesheuvel	eor		\s0, \x0, \x2
1451abee99eSArd Biesheuvel	eor		\s3, \t3, \t0
1461abee99eSArd Biesheuvel	orr		\t2, \t0, \t1
1471abee99eSArd Biesheuvel	and		\s2, \t3, \s0
1481abee99eSArd Biesheuvel	orr		\t3, \t3, \s0
1491abee99eSArd Biesheuvel	eor		\s0, \s0, \t1
1501abee99eSArd Biesheuvel	and		\t0, \t0, \t1
1511abee99eSArd Biesheuvel	eor		\t1, \x3, \x2
1521abee99eSArd Biesheuvel	and		\s3, \s3, \s0
1531abee99eSArd Biesheuvel	and		\s1, \s1, \t1
1541abee99eSArd Biesheuvel	eor		\t1, \x4, \x5
1551abee99eSArd Biesheuvel	eor		\s0, \x1, \x0
1561abee99eSArd Biesheuvel	eor		\t3, \t3, \s1
1571abee99eSArd Biesheuvel	eor		\t2, \t2, \s1
1581abee99eSArd Biesheuvel	and		\s1, \t1, \s0
1591abee99eSArd Biesheuvel	orr		\t1, \t1, \s0
1601abee99eSArd Biesheuvel	eor		\t3, \t3, \s3
1611abee99eSArd Biesheuvel	eor		\t0, \t0, \s1
1621abee99eSArd Biesheuvel	eor		\t2, \t2, \s2
1631abee99eSArd Biesheuvel	eor		\t1, \t1, \s3
1641abee99eSArd Biesheuvel	eor		\t0, \t0, \s2
1651abee99eSArd Biesheuvel	and		\s0, \x7, \x3
1661abee99eSArd Biesheuvel	eor		\t1, \t1, \s2
1671abee99eSArd Biesheuvel	and		\s1, \x6, \x2
1681abee99eSArd Biesheuvel	and		\s2, \x5, \x1
1691abee99eSArd Biesheuvel	orr		\s3, \x4, \x0
1701abee99eSArd Biesheuvel	eor		\t3, \t3, \s0
1711abee99eSArd Biesheuvel	eor		\t1, \t1, \s2
1721abee99eSArd Biesheuvel	eor		\s0, \t0, \s3
1731abee99eSArd Biesheuvel	eor		\t2, \t2, \s1
1741abee99eSArd Biesheuvel	and		\s2, \t3, \t1
1751abee99eSArd Biesheuvel	eor		\s1, \t2, \s2
1761abee99eSArd Biesheuvel	eor		\s3, \s0, \s2
1771abee99eSArd Biesheuvel	bsl		\s1, \t1, \s0
1781abee99eSArd Biesheuvel	not		\t0, \s0
1791abee99eSArd Biesheuvel	bsl		\s0, \s1, \s3
1801abee99eSArd Biesheuvel	bsl		\t0, \s1, \s3
1811abee99eSArd Biesheuvel	bsl		\s3, \t3, \t2
1821abee99eSArd Biesheuvel	eor		\t3, \t3, \t2
1831abee99eSArd Biesheuvel	and		\s2, \s0, \s3
1841abee99eSArd Biesheuvel	eor		\t1, \t1, \t0
1851abee99eSArd Biesheuvel	eor		\s2, \s2, \t3
1861abee99eSArd Biesheuvel	mul_gf16_2	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
1871abee99eSArd Biesheuvel			\s3, \s2, \s1, \t1, \s0, \t0, \t2, \t3
1881abee99eSArd Biesheuvel	.endm
1891abee99eSArd Biesheuvel
1901abee99eSArd Biesheuvel	.macro		sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
1911abee99eSArd Biesheuvel			      t0, t1, t2, t3, s0, s1, s2, s3
1921abee99eSArd Biesheuvel	in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
1931abee99eSArd Biesheuvel			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
1941abee99eSArd Biesheuvel	inv_gf256	\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b, \
1951abee99eSArd Biesheuvel			\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
1961abee99eSArd Biesheuvel			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
1971abee99eSArd Biesheuvel			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
1981abee99eSArd Biesheuvel	out_bs_ch	\b7\().16b, \b1\().16b, \b4\().16b, \b2\().16b, \
1991abee99eSArd Biesheuvel			\b6\().16b, \b5\().16b, \b0\().16b, \b3\().16b
2001abee99eSArd Biesheuvel	.endm
2011abee99eSArd Biesheuvel
2021abee99eSArd Biesheuvel	.macro		inv_sbox, b0, b1, b2, b3, b4, b5, b6, b7, \
2031abee99eSArd Biesheuvel				  t0, t1, t2, t3, s0, s1, s2, s3
2041abee99eSArd Biesheuvel	inv_in_bs_ch	\b0\().16b, \b1\().16b, \b2\().16b, \b3\().16b, \
2051abee99eSArd Biesheuvel			\b4\().16b, \b5\().16b, \b6\().16b, \b7\().16b
2061abee99eSArd Biesheuvel	inv_gf256	\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b, \
2071abee99eSArd Biesheuvel			\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
2081abee99eSArd Biesheuvel			\t0\().16b, \t1\().16b, \t2\().16b, \t3\().16b, \
2091abee99eSArd Biesheuvel			\s0\().16b, \s1\().16b, \s2\().16b, \s3\().16b
2101abee99eSArd Biesheuvel	inv_out_bs_ch	\b3\().16b, \b7\().16b, \b0\().16b, \b4\().16b, \
2111abee99eSArd Biesheuvel			\b5\().16b, \b1\().16b, \b2\().16b, \b6\().16b
2121abee99eSArd Biesheuvel	.endm
2131abee99eSArd Biesheuvel
2141abee99eSArd Biesheuvel	.macro		enc_next_rk
2151abee99eSArd Biesheuvel	ldp		q16, q17, [bskey], #128
2161abee99eSArd Biesheuvel	ldp		q18, q19, [bskey, #-96]
2171abee99eSArd Biesheuvel	ldp		q20, q21, [bskey, #-64]
2181abee99eSArd Biesheuvel	ldp		q22, q23, [bskey, #-32]
2191abee99eSArd Biesheuvel	.endm
2201abee99eSArd Biesheuvel
2211abee99eSArd Biesheuvel	.macro		dec_next_rk
2221abee99eSArd Biesheuvel	ldp		q16, q17, [bskey, #-128]!
2231abee99eSArd Biesheuvel	ldp		q18, q19, [bskey, #32]
2241abee99eSArd Biesheuvel	ldp		q20, q21, [bskey, #64]
2251abee99eSArd Biesheuvel	ldp		q22, q23, [bskey, #96]
2261abee99eSArd Biesheuvel	.endm
2271abee99eSArd Biesheuvel
2281abee99eSArd Biesheuvel	.macro		add_round_key, x0, x1, x2, x3, x4, x5, x6, x7
2291abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, v16.16b
2301abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, v17.16b
2311abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, v18.16b
2321abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, v19.16b
2331abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, v20.16b
2341abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, v21.16b
2351abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, v22.16b
2361abee99eSArd Biesheuvel	eor		\x7\().16b, \x7\().16b, v23.16b
2371abee99eSArd Biesheuvel	.endm
2381abee99eSArd Biesheuvel
2391abee99eSArd Biesheuvel	.macro		shift_rows, x0, x1, x2, x3, x4, x5, x6, x7, mask
2401abee99eSArd Biesheuvel	tbl		\x0\().16b, {\x0\().16b}, \mask\().16b
2411abee99eSArd Biesheuvel	tbl		\x1\().16b, {\x1\().16b}, \mask\().16b
2421abee99eSArd Biesheuvel	tbl		\x2\().16b, {\x2\().16b}, \mask\().16b
2431abee99eSArd Biesheuvel	tbl		\x3\().16b, {\x3\().16b}, \mask\().16b
2441abee99eSArd Biesheuvel	tbl		\x4\().16b, {\x4\().16b}, \mask\().16b
2451abee99eSArd Biesheuvel	tbl		\x5\().16b, {\x5\().16b}, \mask\().16b
2461abee99eSArd Biesheuvel	tbl		\x6\().16b, {\x6\().16b}, \mask\().16b
2471abee99eSArd Biesheuvel	tbl		\x7\().16b, {\x7\().16b}, \mask\().16b
2481abee99eSArd Biesheuvel	.endm
2491abee99eSArd Biesheuvel
2501abee99eSArd Biesheuvel	.macro		mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
2511abee99eSArd Biesheuvel				  t0, t1, t2, t3, t4, t5, t6, t7, inv
2521abee99eSArd Biesheuvel	ext		\t0\().16b, \x0\().16b, \x0\().16b, #12
2531abee99eSArd Biesheuvel	ext		\t1\().16b, \x1\().16b, \x1\().16b, #12
2541abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, \t0\().16b
2551abee99eSArd Biesheuvel	ext		\t2\().16b, \x2\().16b, \x2\().16b, #12
2561abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t1\().16b
2571abee99eSArd Biesheuvel	ext		\t3\().16b, \x3\().16b, \x3\().16b, #12
2581abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, \t2\().16b
2591abee99eSArd Biesheuvel	ext		\t4\().16b, \x4\().16b, \x4\().16b, #12
2601abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t3\().16b
2611abee99eSArd Biesheuvel	ext		\t5\().16b, \x5\().16b, \x5\().16b, #12
2621abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t4\().16b
2631abee99eSArd Biesheuvel	ext		\t6\().16b, \x6\().16b, \x6\().16b, #12
2641abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t5\().16b
2651abee99eSArd Biesheuvel	ext		\t7\().16b, \x7\().16b, \x7\().16b, #12
2661abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, \t6\().16b
2671abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \x0\().16b
2681abee99eSArd Biesheuvel	eor		\x7\().16b, \x7\().16b, \t7\().16b
2691abee99eSArd Biesheuvel	ext		\x0\().16b, \x0\().16b, \x0\().16b, #8
2701abee99eSArd Biesheuvel	eor		\t2\().16b, \t2\().16b, \x1\().16b
2711abee99eSArd Biesheuvel	eor		\t0\().16b, \t0\().16b, \x7\().16b
2721abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \x7\().16b
2731abee99eSArd Biesheuvel	ext		\x1\().16b, \x1\().16b, \x1\().16b, #8
2741abee99eSArd Biesheuvel	eor		\t5\().16b, \t5\().16b, \x4\().16b
2751abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, \t0\().16b
2761abee99eSArd Biesheuvel	eor		\t6\().16b, \t6\().16b, \x5\().16b
2771abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t1\().16b
2781abee99eSArd Biesheuvel	ext		\t0\().16b, \x4\().16b, \x4\().16b, #8
2791abee99eSArd Biesheuvel	eor		\t4\().16b, \t4\().16b, \x3\().16b
2801abee99eSArd Biesheuvel	ext		\t1\().16b, \x5\().16b, \x5\().16b, #8
2811abee99eSArd Biesheuvel	eor		\t7\().16b, \t7\().16b, \x6\().16b
2821abee99eSArd Biesheuvel	ext		\x4\().16b, \x3\().16b, \x3\().16b, #8
2831abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x2\().16b
2841abee99eSArd Biesheuvel	ext		\x5\().16b, \x7\().16b, \x7\().16b, #8
2851abee99eSArd Biesheuvel	eor		\t4\().16b, \t4\().16b, \x7\().16b
2861abee99eSArd Biesheuvel	ext		\x3\().16b, \x6\().16b, \x6\().16b, #8
2871abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x7\().16b
2881abee99eSArd Biesheuvel	ext		\x6\().16b, \x2\().16b, \x2\().16b, #8
2891abee99eSArd Biesheuvel	eor		\x7\().16b, \t1\().16b, \t5\().16b
2901abee99eSArd Biesheuvel	.ifb		\inv
2911abee99eSArd Biesheuvel	eor		\x2\().16b, \t0\().16b, \t4\().16b
2921abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t3\().16b
2931abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t7\().16b
2941abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t6\().16b
2951abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, \t2\().16b
2961abee99eSArd Biesheuvel	.else
2971abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x4\().16b
2981abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t7\().16b
2991abee99eSArd Biesheuvel	eor		\x2\().16b, \x3\().16b, \t6\().16b
3001abee99eSArd Biesheuvel	eor		\x3\().16b, \t0\().16b, \t4\().16b
3011abee99eSArd Biesheuvel	eor		\x4\().16b, \x6\().16b, \t2\().16b
3021abee99eSArd Biesheuvel	mov		\x6\().16b, \t3\().16b
3031abee99eSArd Biesheuvel	.endif
3041abee99eSArd Biesheuvel	.endm
3051abee99eSArd Biesheuvel
3061abee99eSArd Biesheuvel	.macro		inv_mix_cols, x0, x1, x2, x3, x4, x5, x6, x7, \
3071abee99eSArd Biesheuvel				      t0, t1, t2, t3, t4, t5, t6, t7
3081abee99eSArd Biesheuvel	ext		\t0\().16b, \x0\().16b, \x0\().16b, #8
3091abee99eSArd Biesheuvel	ext		\t6\().16b, \x6\().16b, \x6\().16b, #8
3101abee99eSArd Biesheuvel	ext		\t7\().16b, \x7\().16b, \x7\().16b, #8
3111abee99eSArd Biesheuvel	eor		\t0\().16b, \t0\().16b, \x0\().16b
3121abee99eSArd Biesheuvel	ext		\t1\().16b, \x1\().16b, \x1\().16b, #8
3131abee99eSArd Biesheuvel	eor		\t6\().16b, \t6\().16b, \x6\().16b
3141abee99eSArd Biesheuvel	ext		\t2\().16b, \x2\().16b, \x2\().16b, #8
3151abee99eSArd Biesheuvel	eor		\t7\().16b, \t7\().16b, \x7\().16b
3161abee99eSArd Biesheuvel	ext		\t3\().16b, \x3\().16b, \x3\().16b, #8
3171abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \x1\().16b
3181abee99eSArd Biesheuvel	ext		\t4\().16b, \x4\().16b, \x4\().16b, #8
3191abee99eSArd Biesheuvel	eor		\t2\().16b, \t2\().16b, \x2\().16b
3201abee99eSArd Biesheuvel	ext		\t5\().16b, \x5\().16b, \x5\().16b, #8
3211abee99eSArd Biesheuvel	eor		\t3\().16b, \t3\().16b, \x3\().16b
3221abee99eSArd Biesheuvel	eor		\t4\().16b, \t4\().16b, \x4\().16b
3231abee99eSArd Biesheuvel	eor		\t5\().16b, \t5\().16b, \x5\().16b
3241abee99eSArd Biesheuvel	eor		\x0\().16b, \x0\().16b, \t6\().16b
3251abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t6\().16b
3261abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, \t0\().16b
3271abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t2\().16b
3281abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t1\().16b
3291abee99eSArd Biesheuvel	eor		\x1\().16b, \x1\().16b, \t7\().16b
3301abee99eSArd Biesheuvel	eor		\x2\().16b, \x2\().16b, \t7\().16b
3311abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t6\().16b
3321abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t3\().16b
3331abee99eSArd Biesheuvel	eor		\x3\().16b, \x3\().16b, \t6\().16b
3341abee99eSArd Biesheuvel	eor		\x6\().16b, \x6\().16b, \t4\().16b
3351abee99eSArd Biesheuvel	eor		\x4\().16b, \x4\().16b, \t7\().16b
3361abee99eSArd Biesheuvel	eor		\x5\().16b, \x5\().16b, \t7\().16b
3371abee99eSArd Biesheuvel	eor		\x7\().16b, \x7\().16b, \t5\().16b
3381abee99eSArd Biesheuvel	mix_cols	\x0, \x1, \x2, \x3, \x4, \x5, \x6, \x7, \
3391abee99eSArd Biesheuvel			\t0, \t1, \t2, \t3, \t4, \t5, \t6, \t7, 1
3401abee99eSArd Biesheuvel	.endm
3411abee99eSArd Biesheuvel
3421abee99eSArd Biesheuvel	.macro		swapmove_2x, a0, b0, a1, b1, n, mask, t0, t1
3431abee99eSArd Biesheuvel	ushr		\t0\().2d, \b0\().2d, #\n
3441abee99eSArd Biesheuvel	ushr		\t1\().2d, \b1\().2d, #\n
3451abee99eSArd Biesheuvel	eor		\t0\().16b, \t0\().16b, \a0\().16b
3461abee99eSArd Biesheuvel	eor		\t1\().16b, \t1\().16b, \a1\().16b
3471abee99eSArd Biesheuvel	and		\t0\().16b, \t0\().16b, \mask\().16b
3481abee99eSArd Biesheuvel	and		\t1\().16b, \t1\().16b, \mask\().16b
3491abee99eSArd Biesheuvel	eor		\a0\().16b, \a0\().16b, \t0\().16b
3501abee99eSArd Biesheuvel	shl		\t0\().2d, \t0\().2d, #\n
3511abee99eSArd Biesheuvel	eor		\a1\().16b, \a1\().16b, \t1\().16b
3521abee99eSArd Biesheuvel	shl		\t1\().2d, \t1\().2d, #\n
3531abee99eSArd Biesheuvel	eor		\b0\().16b, \b0\().16b, \t0\().16b
3541abee99eSArd Biesheuvel	eor		\b1\().16b, \b1\().16b, \t1\().16b
3551abee99eSArd Biesheuvel	.endm
3561abee99eSArd Biesheuvel
3571abee99eSArd Biesheuvel	.macro		bitslice, x7, x6, x5, x4, x3, x2, x1, x0, t0, t1, t2, t3
3581abee99eSArd Biesheuvel	movi		\t0\().16b, #0x55
3591abee99eSArd Biesheuvel	movi		\t1\().16b, #0x33
3601abee99eSArd Biesheuvel	swapmove_2x	\x0, \x1, \x2, \x3, 1, \t0, \t2, \t3
3611abee99eSArd Biesheuvel	swapmove_2x	\x4, \x5, \x6, \x7, 1, \t0, \t2, \t3
3621abee99eSArd Biesheuvel	movi		\t0\().16b, #0x0f
3631abee99eSArd Biesheuvel	swapmove_2x	\x0, \x2, \x1, \x3, 2, \t1, \t2, \t3
3641abee99eSArd Biesheuvel	swapmove_2x	\x4, \x6, \x5, \x7, 2, \t1, \t2, \t3
3651abee99eSArd Biesheuvel	swapmove_2x	\x0, \x4, \x1, \x5, 4, \t0, \t2, \t3
3661abee99eSArd Biesheuvel	swapmove_2x	\x2, \x6, \x3, \x7, 4, \t0, \t2, \t3
3671abee99eSArd Biesheuvel	.endm
3681abee99eSArd Biesheuvel
3691abee99eSArd Biesheuvel
3701abee99eSArd Biesheuvel	.align		6
3711abee99eSArd BiesheuvelM0:	.octa		0x0004080c0105090d02060a0e03070b0f
3721abee99eSArd Biesheuvel
3731abee99eSArd BiesheuvelM0SR:	.octa		0x0004080c05090d010a0e02060f03070b
3741abee99eSArd BiesheuvelSR:	.octa		0x0f0e0d0c0a09080b0504070600030201
3751abee99eSArd BiesheuvelSRM0:	.octa		0x01060b0c0207080d0304090e00050a0f
3761abee99eSArd Biesheuvel
3771abee99eSArd BiesheuvelM0ISR:	.octa		0x0004080c0d0105090a0e0206070b0f03
3781abee99eSArd BiesheuvelISR:	.octa		0x0f0e0d0c080b0a090504070602010003
3791abee99eSArd BiesheuvelISRM0:	.octa		0x0306090c00070a0d01040b0e0205080f
3801abee99eSArd Biesheuvel
3811abee99eSArd Biesheuvel	/*
3821abee99eSArd Biesheuvel	 * void aesbs_convert_key(u8 out[], u32 const rk[], int rounds)
3831abee99eSArd Biesheuvel	 */
3840e89640bSMark BrownSYM_FUNC_START(aesbs_convert_key)
3851abee99eSArd Biesheuvel	ld1		{v7.4s}, [x1], #16		// load round 0 key
3861abee99eSArd Biesheuvel	ld1		{v17.4s}, [x1], #16		// load round 1 key
3871abee99eSArd Biesheuvel
3881abee99eSArd Biesheuvel	movi		v8.16b,  #0x01			// bit masks
3891abee99eSArd Biesheuvel	movi		v9.16b,  #0x02
3901abee99eSArd Biesheuvel	movi		v10.16b, #0x04
3911abee99eSArd Biesheuvel	movi		v11.16b, #0x08
3921abee99eSArd Biesheuvel	movi		v12.16b, #0x10
3931abee99eSArd Biesheuvel	movi		v13.16b, #0x20
3941abee99eSArd Biesheuvel	movi		v14.16b, #0x40
3951abee99eSArd Biesheuvel	movi		v15.16b, #0x80
3961abee99eSArd Biesheuvel	ldr		q16, M0
3971abee99eSArd Biesheuvel
3981abee99eSArd Biesheuvel	sub		x2, x2, #1
3991abee99eSArd Biesheuvel	str		q7, [x0], #16		// save round 0 key
4001abee99eSArd Biesheuvel
4011abee99eSArd Biesheuvel.Lkey_loop:
4021abee99eSArd Biesheuvel	tbl		v7.16b ,{v17.16b}, v16.16b
4031abee99eSArd Biesheuvel	ld1		{v17.4s}, [x1], #16		// load next round key
4041abee99eSArd Biesheuvel
4051abee99eSArd Biesheuvel	cmtst		v0.16b, v7.16b, v8.16b
4061abee99eSArd Biesheuvel	cmtst		v1.16b, v7.16b, v9.16b
4071abee99eSArd Biesheuvel	cmtst		v2.16b, v7.16b, v10.16b
4081abee99eSArd Biesheuvel	cmtst		v3.16b, v7.16b, v11.16b
4091abee99eSArd Biesheuvel	cmtst		v4.16b, v7.16b, v12.16b
4101abee99eSArd Biesheuvel	cmtst		v5.16b, v7.16b, v13.16b
4111abee99eSArd Biesheuvel	cmtst		v6.16b, v7.16b, v14.16b
4121abee99eSArd Biesheuvel	cmtst		v7.16b, v7.16b, v15.16b
4131abee99eSArd Biesheuvel	not		v0.16b, v0.16b
4141abee99eSArd Biesheuvel	not		v1.16b, v1.16b
4151abee99eSArd Biesheuvel	not		v5.16b, v5.16b
4161abee99eSArd Biesheuvel	not		v6.16b, v6.16b
4171abee99eSArd Biesheuvel
4181abee99eSArd Biesheuvel	subs		x2, x2, #1
4191abee99eSArd Biesheuvel	stp		q0, q1, [x0], #128
4201abee99eSArd Biesheuvel	stp		q2, q3, [x0, #-96]
4211abee99eSArd Biesheuvel	stp		q4, q5, [x0, #-64]
4221abee99eSArd Biesheuvel	stp		q6, q7, [x0, #-32]
4231abee99eSArd Biesheuvel	b.ne		.Lkey_loop
4241abee99eSArd Biesheuvel
4251abee99eSArd Biesheuvel	movi		v7.16b, #0x63			// compose .L63
4261abee99eSArd Biesheuvel	eor		v17.16b, v17.16b, v7.16b
4271abee99eSArd Biesheuvel	str		q17, [x0]
4281abee99eSArd Biesheuvel	ret
4290e89640bSMark BrownSYM_FUNC_END(aesbs_convert_key)
4301abee99eSArd Biesheuvel
4311abee99eSArd Biesheuvel	.align		4
4320e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_encrypt8)
4331abee99eSArd Biesheuvel	ldr		q9, [bskey], #16		// round 0 key
4341abee99eSArd Biesheuvel	ldr		q8, M0SR
4351abee99eSArd Biesheuvel	ldr		q24, SR
4361abee99eSArd Biesheuvel
4371abee99eSArd Biesheuvel	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
4381abee99eSArd Biesheuvel	eor		v11.16b, v1.16b, v9.16b
4391abee99eSArd Biesheuvel	tbl		v0.16b, {v10.16b}, v8.16b
4401abee99eSArd Biesheuvel	eor		v12.16b, v2.16b, v9.16b
4411abee99eSArd Biesheuvel	tbl		v1.16b, {v11.16b}, v8.16b
4421abee99eSArd Biesheuvel	eor		v13.16b, v3.16b, v9.16b
4431abee99eSArd Biesheuvel	tbl		v2.16b, {v12.16b}, v8.16b
4441abee99eSArd Biesheuvel	eor		v14.16b, v4.16b, v9.16b
4451abee99eSArd Biesheuvel	tbl		v3.16b, {v13.16b}, v8.16b
4461abee99eSArd Biesheuvel	eor		v15.16b, v5.16b, v9.16b
4471abee99eSArd Biesheuvel	tbl		v4.16b, {v14.16b}, v8.16b
4481abee99eSArd Biesheuvel	eor		v10.16b, v6.16b, v9.16b
4491abee99eSArd Biesheuvel	tbl		v5.16b, {v15.16b}, v8.16b
4501abee99eSArd Biesheuvel	eor		v11.16b, v7.16b, v9.16b
4511abee99eSArd Biesheuvel	tbl		v6.16b, {v10.16b}, v8.16b
4521abee99eSArd Biesheuvel	tbl		v7.16b, {v11.16b}, v8.16b
4531abee99eSArd Biesheuvel
4541abee99eSArd Biesheuvel	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
4551abee99eSArd Biesheuvel
4561abee99eSArd Biesheuvel	sub		rounds, rounds, #1
4571abee99eSArd Biesheuvel	b		.Lenc_sbox
4581abee99eSArd Biesheuvel
4591abee99eSArd Biesheuvel.Lenc_loop:
4601abee99eSArd Biesheuvel	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
4611abee99eSArd Biesheuvel.Lenc_sbox:
4621abee99eSArd Biesheuvel	sbox		v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
4631abee99eSArd Biesheuvel								v13, v14, v15
4641abee99eSArd Biesheuvel	subs		rounds, rounds, #1
4651abee99eSArd Biesheuvel	b.cc		.Lenc_done
4661abee99eSArd Biesheuvel
4671abee99eSArd Biesheuvel	enc_next_rk
4681abee99eSArd Biesheuvel
4691abee99eSArd Biesheuvel	mix_cols	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11, v12, \
4701abee99eSArd Biesheuvel								v13, v14, v15
4711abee99eSArd Biesheuvel
4721abee99eSArd Biesheuvel	add_round_key	v0, v1, v2, v3, v4, v5, v6, v7
4731abee99eSArd Biesheuvel
4741abee99eSArd Biesheuvel	b.ne		.Lenc_loop
4751abee99eSArd Biesheuvel	ldr		q24, SRM0
4761abee99eSArd Biesheuvel	b		.Lenc_loop
4771abee99eSArd Biesheuvel
4781abee99eSArd Biesheuvel.Lenc_done:
4791abee99eSArd Biesheuvel	ldr		q12, [bskey]			// last round key
4801abee99eSArd Biesheuvel
4811abee99eSArd Biesheuvel	bitslice	v0, v1, v4, v6, v3, v7, v2, v5, v8, v9, v10, v11
4821abee99eSArd Biesheuvel
4831abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v12.16b
4841abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v12.16b
4851abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v12.16b
4861abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v12.16b
4871abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v12.16b
4881abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v12.16b
4891abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v12.16b
4901abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v12.16b
4911abee99eSArd Biesheuvel	ret
4920e89640bSMark BrownSYM_FUNC_END(aesbs_encrypt8)
4931abee99eSArd Biesheuvel
4941abee99eSArd Biesheuvel	.align		4
4950e89640bSMark BrownSYM_FUNC_START_LOCAL(aesbs_decrypt8)
4961abee99eSArd Biesheuvel	lsl		x9, rounds, #7
4971abee99eSArd Biesheuvel	add		bskey, bskey, x9
4981abee99eSArd Biesheuvel
4991abee99eSArd Biesheuvel	ldr		q9, [bskey, #-112]!		// round 0 key
5001abee99eSArd Biesheuvel	ldr		q8, M0ISR
5011abee99eSArd Biesheuvel	ldr		q24, ISR
5021abee99eSArd Biesheuvel
5031abee99eSArd Biesheuvel	eor		v10.16b, v0.16b, v9.16b		// xor with round0 key
5041abee99eSArd Biesheuvel	eor		v11.16b, v1.16b, v9.16b
5051abee99eSArd Biesheuvel	tbl		v0.16b, {v10.16b}, v8.16b
5061abee99eSArd Biesheuvel	eor		v12.16b, v2.16b, v9.16b
5071abee99eSArd Biesheuvel	tbl		v1.16b, {v11.16b}, v8.16b
5081abee99eSArd Biesheuvel	eor		v13.16b, v3.16b, v9.16b
5091abee99eSArd Biesheuvel	tbl		v2.16b, {v12.16b}, v8.16b
5101abee99eSArd Biesheuvel	eor		v14.16b, v4.16b, v9.16b
5111abee99eSArd Biesheuvel	tbl		v3.16b, {v13.16b}, v8.16b
5121abee99eSArd Biesheuvel	eor		v15.16b, v5.16b, v9.16b
5131abee99eSArd Biesheuvel	tbl		v4.16b, {v14.16b}, v8.16b
5141abee99eSArd Biesheuvel	eor		v10.16b, v6.16b, v9.16b
5151abee99eSArd Biesheuvel	tbl		v5.16b, {v15.16b}, v8.16b
5161abee99eSArd Biesheuvel	eor		v11.16b, v7.16b, v9.16b
5171abee99eSArd Biesheuvel	tbl		v6.16b, {v10.16b}, v8.16b
5181abee99eSArd Biesheuvel	tbl		v7.16b, {v11.16b}, v8.16b
5191abee99eSArd Biesheuvel
5201abee99eSArd Biesheuvel	bitslice	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11
5211abee99eSArd Biesheuvel
5221abee99eSArd Biesheuvel	sub		rounds, rounds, #1
5231abee99eSArd Biesheuvel	b		.Ldec_sbox
5241abee99eSArd Biesheuvel
5251abee99eSArd Biesheuvel.Ldec_loop:
5261abee99eSArd Biesheuvel	shift_rows	v0, v1, v2, v3, v4, v5, v6, v7, v24
5271abee99eSArd Biesheuvel.Ldec_sbox:
5281abee99eSArd Biesheuvel	inv_sbox	v0, v1, v2, v3, v4, v5, v6, v7, v8, v9, v10, v11, v12, \
5291abee99eSArd Biesheuvel								v13, v14, v15
5301abee99eSArd Biesheuvel	subs		rounds, rounds, #1
5311abee99eSArd Biesheuvel	b.cc		.Ldec_done
5321abee99eSArd Biesheuvel
5331abee99eSArd Biesheuvel	dec_next_rk
5341abee99eSArd Biesheuvel
5351abee99eSArd Biesheuvel	add_round_key	v0, v1, v6, v4, v2, v7, v3, v5
5361abee99eSArd Biesheuvel
5371abee99eSArd Biesheuvel	inv_mix_cols	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11, v12, \
5381abee99eSArd Biesheuvel								v13, v14, v15
5391abee99eSArd Biesheuvel
5401abee99eSArd Biesheuvel	b.ne		.Ldec_loop
5411abee99eSArd Biesheuvel	ldr		q24, ISRM0
5421abee99eSArd Biesheuvel	b		.Ldec_loop
5431abee99eSArd Biesheuvel.Ldec_done:
5441abee99eSArd Biesheuvel	ldr		q12, [bskey, #-16]		// last round key
5451abee99eSArd Biesheuvel
5461abee99eSArd Biesheuvel	bitslice	v0, v1, v6, v4, v2, v7, v3, v5, v8, v9, v10, v11
5471abee99eSArd Biesheuvel
5481abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v12.16b
5491abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v12.16b
5501abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v12.16b
5511abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v12.16b
5521abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v12.16b
5531abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v12.16b
5541abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v12.16b
5551abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v12.16b
5561abee99eSArd Biesheuvel	ret
5570e89640bSMark BrownSYM_FUNC_END(aesbs_decrypt8)
5581abee99eSArd Biesheuvel
5591abee99eSArd Biesheuvel	/*
5601abee99eSArd Biesheuvel	 * aesbs_ecb_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5611abee99eSArd Biesheuvel	 *		     int blocks)
5621abee99eSArd Biesheuvel	 * aesbs_ecb_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
5631abee99eSArd Biesheuvel	 *		     int blocks)
5641abee99eSArd Biesheuvel	 */
5651abee99eSArd Biesheuvel	.macro		__ecb_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
56620ab6332SArd Biesheuvel	frame_push	5
56720ab6332SArd Biesheuvel
56820ab6332SArd Biesheuvel	mov		x19, x0
56920ab6332SArd Biesheuvel	mov		x20, x1
57020ab6332SArd Biesheuvel	mov		x21, x2
57120ab6332SArd Biesheuvel	mov		x22, x3
57220ab6332SArd Biesheuvel	mov		x23, x4
5731abee99eSArd Biesheuvel
5741abee99eSArd Biesheuvel99:	mov		x5, #1
57520ab6332SArd Biesheuvel	lsl		x5, x5, x23
57620ab6332SArd Biesheuvel	subs		w23, w23, #8
57720ab6332SArd Biesheuvel	csel		x23, x23, xzr, pl
5781abee99eSArd Biesheuvel	csel		x5, x5, xzr, mi
5791abee99eSArd Biesheuvel
58020ab6332SArd Biesheuvel	ld1		{v0.16b}, [x20], #16
5811abee99eSArd Biesheuvel	tbnz		x5, #1, 0f
58220ab6332SArd Biesheuvel	ld1		{v1.16b}, [x20], #16
5831abee99eSArd Biesheuvel	tbnz		x5, #2, 0f
58420ab6332SArd Biesheuvel	ld1		{v2.16b}, [x20], #16
5851abee99eSArd Biesheuvel	tbnz		x5, #3, 0f
58620ab6332SArd Biesheuvel	ld1		{v3.16b}, [x20], #16
5871abee99eSArd Biesheuvel	tbnz		x5, #4, 0f
58820ab6332SArd Biesheuvel	ld1		{v4.16b}, [x20], #16
5891abee99eSArd Biesheuvel	tbnz		x5, #5, 0f
59020ab6332SArd Biesheuvel	ld1		{v5.16b}, [x20], #16
5911abee99eSArd Biesheuvel	tbnz		x5, #6, 0f
59220ab6332SArd Biesheuvel	ld1		{v6.16b}, [x20], #16
5931abee99eSArd Biesheuvel	tbnz		x5, #7, 0f
59420ab6332SArd Biesheuvel	ld1		{v7.16b}, [x20], #16
5951abee99eSArd Biesheuvel
59620ab6332SArd Biesheuvel0:	mov		bskey, x21
59720ab6332SArd Biesheuvel	mov		rounds, x22
5981abee99eSArd Biesheuvel	bl		\do8
5991abee99eSArd Biesheuvel
60020ab6332SArd Biesheuvel	st1		{\o0\().16b}, [x19], #16
6011abee99eSArd Biesheuvel	tbnz		x5, #1, 1f
60220ab6332SArd Biesheuvel	st1		{\o1\().16b}, [x19], #16
6031abee99eSArd Biesheuvel	tbnz		x5, #2, 1f
60420ab6332SArd Biesheuvel	st1		{\o2\().16b}, [x19], #16
6051abee99eSArd Biesheuvel	tbnz		x5, #3, 1f
60620ab6332SArd Biesheuvel	st1		{\o3\().16b}, [x19], #16
6071abee99eSArd Biesheuvel	tbnz		x5, #4, 1f
60820ab6332SArd Biesheuvel	st1		{\o4\().16b}, [x19], #16
6091abee99eSArd Biesheuvel	tbnz		x5, #5, 1f
61020ab6332SArd Biesheuvel	st1		{\o5\().16b}, [x19], #16
6111abee99eSArd Biesheuvel	tbnz		x5, #6, 1f
61220ab6332SArd Biesheuvel	st1		{\o6\().16b}, [x19], #16
6131abee99eSArd Biesheuvel	tbnz		x5, #7, 1f
61420ab6332SArd Biesheuvel	st1		{\o7\().16b}, [x19], #16
6151abee99eSArd Biesheuvel
61620ab6332SArd Biesheuvel	cbz		x23, 1f
61720ab6332SArd Biesheuvel	b		99b
6181abee99eSArd Biesheuvel
61920ab6332SArd Biesheuvel1:	frame_pop
6201abee99eSArd Biesheuvel	ret
6211abee99eSArd Biesheuvel	.endm
6221abee99eSArd Biesheuvel
6231abee99eSArd Biesheuvel	.align		4
624*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_ecb_encrypt)
6251abee99eSArd Biesheuvel	__ecb_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
6260e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_encrypt)
6271abee99eSArd Biesheuvel
6281abee99eSArd Biesheuvel	.align		4
629*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_ecb_decrypt)
6301abee99eSArd Biesheuvel	__ecb_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
6310e89640bSMark BrownSYM_FUNC_END(aesbs_ecb_decrypt)
6321abee99eSArd Biesheuvel
6331abee99eSArd Biesheuvel	/*
6341abee99eSArd Biesheuvel	 * aesbs_cbc_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
6351abee99eSArd Biesheuvel	 *		     int blocks, u8 iv[])
6361abee99eSArd Biesheuvel	 */
6371abee99eSArd Biesheuvel	.align		4
6380e89640bSMark BrownSYM_FUNC_START(aesbs_cbc_decrypt)
63920ab6332SArd Biesheuvel	frame_push	6
64020ab6332SArd Biesheuvel
64120ab6332SArd Biesheuvel	mov		x19, x0
64220ab6332SArd Biesheuvel	mov		x20, x1
64320ab6332SArd Biesheuvel	mov		x21, x2
64420ab6332SArd Biesheuvel	mov		x22, x3
64520ab6332SArd Biesheuvel	mov		x23, x4
64620ab6332SArd Biesheuvel	mov		x24, x5
6471abee99eSArd Biesheuvel
6481abee99eSArd Biesheuvel99:	mov		x6, #1
64920ab6332SArd Biesheuvel	lsl		x6, x6, x23
65020ab6332SArd Biesheuvel	subs		w23, w23, #8
65120ab6332SArd Biesheuvel	csel		x23, x23, xzr, pl
6521abee99eSArd Biesheuvel	csel		x6, x6, xzr, mi
6531abee99eSArd Biesheuvel
65420ab6332SArd Biesheuvel	ld1		{v0.16b}, [x20], #16
6551abee99eSArd Biesheuvel	mov		v25.16b, v0.16b
6561abee99eSArd Biesheuvel	tbnz		x6, #1, 0f
65720ab6332SArd Biesheuvel	ld1		{v1.16b}, [x20], #16
6581abee99eSArd Biesheuvel	mov		v26.16b, v1.16b
6591abee99eSArd Biesheuvel	tbnz		x6, #2, 0f
66020ab6332SArd Biesheuvel	ld1		{v2.16b}, [x20], #16
6611abee99eSArd Biesheuvel	mov		v27.16b, v2.16b
6621abee99eSArd Biesheuvel	tbnz		x6, #3, 0f
66320ab6332SArd Biesheuvel	ld1		{v3.16b}, [x20], #16
6641abee99eSArd Biesheuvel	mov		v28.16b, v3.16b
6651abee99eSArd Biesheuvel	tbnz		x6, #4, 0f
66620ab6332SArd Biesheuvel	ld1		{v4.16b}, [x20], #16
6671abee99eSArd Biesheuvel	mov		v29.16b, v4.16b
6681abee99eSArd Biesheuvel	tbnz		x6, #5, 0f
66920ab6332SArd Biesheuvel	ld1		{v5.16b}, [x20], #16
6701abee99eSArd Biesheuvel	mov		v30.16b, v5.16b
6711abee99eSArd Biesheuvel	tbnz		x6, #6, 0f
67220ab6332SArd Biesheuvel	ld1		{v6.16b}, [x20], #16
6731abee99eSArd Biesheuvel	mov		v31.16b, v6.16b
6741abee99eSArd Biesheuvel	tbnz		x6, #7, 0f
67520ab6332SArd Biesheuvel	ld1		{v7.16b}, [x20]
6761abee99eSArd Biesheuvel
67720ab6332SArd Biesheuvel0:	mov		bskey, x21
67820ab6332SArd Biesheuvel	mov		rounds, x22
6791abee99eSArd Biesheuvel	bl		aesbs_decrypt8
6801abee99eSArd Biesheuvel
68120ab6332SArd Biesheuvel	ld1		{v24.16b}, [x24]		// load IV
6821abee99eSArd Biesheuvel
6831abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v25.16b
6841abee99eSArd Biesheuvel	eor		v6.16b, v6.16b, v26.16b
6851abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v27.16b
6861abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v28.16b
6871abee99eSArd Biesheuvel	eor		v7.16b, v7.16b, v29.16b
6881abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v24.16b
6891abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v30.16b
6901abee99eSArd Biesheuvel	eor		v5.16b, v5.16b, v31.16b
6911abee99eSArd Biesheuvel
69220ab6332SArd Biesheuvel	st1		{v0.16b}, [x19], #16
6931abee99eSArd Biesheuvel	mov		v24.16b, v25.16b
6941abee99eSArd Biesheuvel	tbnz		x6, #1, 1f
69520ab6332SArd Biesheuvel	st1		{v1.16b}, [x19], #16
6961abee99eSArd Biesheuvel	mov		v24.16b, v26.16b
6971abee99eSArd Biesheuvel	tbnz		x6, #2, 1f
69820ab6332SArd Biesheuvel	st1		{v6.16b}, [x19], #16
6991abee99eSArd Biesheuvel	mov		v24.16b, v27.16b
7001abee99eSArd Biesheuvel	tbnz		x6, #3, 1f
70120ab6332SArd Biesheuvel	st1		{v4.16b}, [x19], #16
7021abee99eSArd Biesheuvel	mov		v24.16b, v28.16b
7031abee99eSArd Biesheuvel	tbnz		x6, #4, 1f
70420ab6332SArd Biesheuvel	st1		{v2.16b}, [x19], #16
7051abee99eSArd Biesheuvel	mov		v24.16b, v29.16b
7061abee99eSArd Biesheuvel	tbnz		x6, #5, 1f
70720ab6332SArd Biesheuvel	st1		{v7.16b}, [x19], #16
7081abee99eSArd Biesheuvel	mov		v24.16b, v30.16b
7091abee99eSArd Biesheuvel	tbnz		x6, #6, 1f
71020ab6332SArd Biesheuvel	st1		{v3.16b}, [x19], #16
7111abee99eSArd Biesheuvel	mov		v24.16b, v31.16b
7121abee99eSArd Biesheuvel	tbnz		x6, #7, 1f
71320ab6332SArd Biesheuvel	ld1		{v24.16b}, [x20], #16
71420ab6332SArd Biesheuvel	st1		{v5.16b}, [x19], #16
71520ab6332SArd Biesheuvel1:	st1		{v24.16b}, [x24]		// store IV
7161abee99eSArd Biesheuvel
71720ab6332SArd Biesheuvel	cbz		x23, 2f
71820ab6332SArd Biesheuvel	b		99b
7191abee99eSArd Biesheuvel
72020ab6332SArd Biesheuvel2:	frame_pop
7211abee99eSArd Biesheuvel	ret
7220e89640bSMark BrownSYM_FUNC_END(aesbs_cbc_decrypt)
7231abee99eSArd Biesheuvel
7241abee99eSArd Biesheuvel	.macro		next_tweak, out, in, const, tmp
7251abee99eSArd Biesheuvel	sshr		\tmp\().2d,  \in\().2d,   #63
7261abee99eSArd Biesheuvel	and		\tmp\().16b, \tmp\().16b, \const\().16b
7271abee99eSArd Biesheuvel	add		\out\().2d,  \in\().2d,   \in\().2d
7281abee99eSArd Biesheuvel	ext		\tmp\().16b, \tmp\().16b, \tmp\().16b, #8
7291abee99eSArd Biesheuvel	eor		\out\().16b, \out\().16b, \tmp\().16b
7301abee99eSArd Biesheuvel	.endm
7311abee99eSArd Biesheuvel
7321abee99eSArd Biesheuvel	/*
7331abee99eSArd Biesheuvel	 * aesbs_xts_encrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
7341abee99eSArd Biesheuvel	 *		     int blocks, u8 iv[])
7351abee99eSArd Biesheuvel	 * aesbs_xts_decrypt(u8 out[], u8 const in[], u8 const rk[], int rounds,
7361abee99eSArd Biesheuvel	 *		     int blocks, u8 iv[])
7371abee99eSArd Biesheuvel	 */
7380e89640bSMark BrownSYM_FUNC_START_LOCAL(__xts_crypt8)
739dfc6031eSArd Biesheuvel	movi		v18.2s, #0x1
740dfc6031eSArd Biesheuvel	movi		v19.2s, #0x87
741dfc6031eSArd Biesheuvel	uzp1		v18.4s, v18.4s, v19.4s
7421abee99eSArd Biesheuvel
743dfc6031eSArd Biesheuvel	ld1		{v0.16b-v3.16b}, [x1], #64
744dfc6031eSArd Biesheuvel	ld1		{v4.16b-v7.16b}, [x1], #64
745dfc6031eSArd Biesheuvel
746dfc6031eSArd Biesheuvel	next_tweak	v26, v25, v18, v19
747dfc6031eSArd Biesheuvel	next_tweak	v27, v26, v18, v19
748dfc6031eSArd Biesheuvel	next_tweak	v28, v27, v18, v19
749dfc6031eSArd Biesheuvel	next_tweak	v29, v28, v18, v19
750dfc6031eSArd Biesheuvel	next_tweak	v30, v29, v18, v19
751dfc6031eSArd Biesheuvel	next_tweak	v31, v30, v18, v19
752dfc6031eSArd Biesheuvel	next_tweak	v16, v31, v18, v19
753dfc6031eSArd Biesheuvel	next_tweak	v17, v16, v18, v19
754dfc6031eSArd Biesheuvel
7551abee99eSArd Biesheuvel	eor		v0.16b, v0.16b, v25.16b
7561abee99eSArd Biesheuvel	eor		v1.16b, v1.16b, v26.16b
7571abee99eSArd Biesheuvel	eor		v2.16b, v2.16b, v27.16b
7581abee99eSArd Biesheuvel	eor		v3.16b, v3.16b, v28.16b
7591abee99eSArd Biesheuvel	eor		v4.16b, v4.16b, v29.16b
760dfc6031eSArd Biesheuvel	eor		v5.16b, v5.16b, v30.16b
761dfc6031eSArd Biesheuvel	eor		v6.16b, v6.16b, v31.16b
762dfc6031eSArd Biesheuvel	eor		v7.16b, v7.16b, v16.16b
7631abee99eSArd Biesheuvel
76467ab02dcSArd Biesheuvel	stp		q16, q17, [x6]
7651abee99eSArd Biesheuvel
766dfc6031eSArd Biesheuvel	mov		bskey, x2
767dfc6031eSArd Biesheuvel	mov		rounds, x3
76839e4716cSJeremy Linton	br		x16
7690e89640bSMark BrownSYM_FUNC_END(__xts_crypt8)
7701abee99eSArd Biesheuvel
7711abee99eSArd Biesheuvel	.macro		__xts_crypt, do8, o0, o1, o2, o3, o4, o5, o6, o7
77267ab02dcSArd Biesheuvel	frame_push	0, 32
77367ab02dcSArd Biesheuvel	add		x6, sp, #.Lframe_local_offset
7741abee99eSArd Biesheuvel
775dfc6031eSArd Biesheuvel	ld1		{v25.16b}, [x5]
77620ab6332SArd Biesheuvel
777dfc6031eSArd Biesheuvel0:	adr		x16, \do8
7781abee99eSArd Biesheuvel	bl		__xts_crypt8
7791abee99eSArd Biesheuvel
780dfc6031eSArd Biesheuvel	eor		v16.16b, \o0\().16b, v25.16b
781dfc6031eSArd Biesheuvel	eor		v17.16b, \o1\().16b, v26.16b
782dfc6031eSArd Biesheuvel	eor		v18.16b, \o2\().16b, v27.16b
783dfc6031eSArd Biesheuvel	eor		v19.16b, \o3\().16b, v28.16b
7841abee99eSArd Biesheuvel
78567ab02dcSArd Biesheuvel	ldp		q24, q25, [x6]
7861abee99eSArd Biesheuvel
787dfc6031eSArd Biesheuvel	eor		v20.16b, \o4\().16b, v29.16b
788dfc6031eSArd Biesheuvel	eor		v21.16b, \o5\().16b, v30.16b
789dfc6031eSArd Biesheuvel	eor		v22.16b, \o6\().16b, v31.16b
790dfc6031eSArd Biesheuvel	eor		v23.16b, \o7\().16b, v24.16b
7911abee99eSArd Biesheuvel
792dfc6031eSArd Biesheuvel	st1		{v16.16b-v19.16b}, [x0], #64
793dfc6031eSArd Biesheuvel	st1		{v20.16b-v23.16b}, [x0], #64
7941abee99eSArd Biesheuvel
795dfc6031eSArd Biesheuvel	subs		x4, x4, #8
796dfc6031eSArd Biesheuvel	b.gt		0b
7971abee99eSArd Biesheuvel
798dfc6031eSArd Biesheuvel	st1		{v25.16b}, [x5]
79967ab02dcSArd Biesheuvel	frame_pop
8001abee99eSArd Biesheuvel	ret
8011abee99eSArd Biesheuvel	.endm
8021abee99eSArd Biesheuvel
803*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_xts_encrypt)
8041abee99eSArd Biesheuvel	__xts_crypt	aesbs_encrypt8, v0, v1, v4, v6, v3, v7, v2, v5
8050e89640bSMark BrownSYM_FUNC_END(aesbs_xts_encrypt)
8061abee99eSArd Biesheuvel
807*47446d7cSEric BiggersSYM_TYPED_FUNC_START(aesbs_xts_decrypt)
8081abee99eSArd Biesheuvel	__xts_crypt	aesbs_decrypt8, v0, v1, v6, v4, v2, v7, v3, v5
8090e89640bSMark BrownSYM_FUNC_END(aesbs_xts_decrypt)
8101abee99eSArd Biesheuvel
8111abee99eSArd Biesheuvel	.macro		next_ctr, v
8121abee99eSArd Biesheuvel	mov		\v\().d[1], x8
8131abee99eSArd Biesheuvel	adds		x8, x8, #1
8141abee99eSArd Biesheuvel	mov		\v\().d[0], x7
8151abee99eSArd Biesheuvel	adc		x7, x7, xzr
8161abee99eSArd Biesheuvel	rev64		\v\().16b, \v\().16b
8171abee99eSArd Biesheuvel	.endm
8181abee99eSArd Biesheuvel
8191abee99eSArd Biesheuvel	/*
8201abee99eSArd Biesheuvel	 * aesbs_ctr_encrypt(u8 out[], u8 const in[], u8 const rk[],
821fc074e13SArd Biesheuvel	 *		     int rounds, int blocks, u8 iv[])
8221abee99eSArd Biesheuvel	 */
8230e89640bSMark BrownSYM_FUNC_START(aesbs_ctr_encrypt)
82467ab02dcSArd Biesheuvel	frame_push	0
825fc074e13SArd Biesheuvel	ldp		x7, x8, [x5]
826fc074e13SArd Biesheuvel	ld1		{v0.16b}, [x5]
8271abee99eSArd BiesheuvelCPU_LE(	rev		x7, x7		)
8281abee99eSArd BiesheuvelCPU_LE(	rev		x8, x8		)
8291abee99eSArd Biesheuvel	adds		x8, x8, #1
8301abee99eSArd Biesheuvel	adc		x7, x7, xzr
8311abee99eSArd Biesheuvel
832fc074e13SArd Biesheuvel0:	next_ctr	v1
8331abee99eSArd Biesheuvel	next_ctr	v2
8341abee99eSArd Biesheuvel	next_ctr	v3
8351abee99eSArd Biesheuvel	next_ctr	v4
8361abee99eSArd Biesheuvel	next_ctr	v5
8371abee99eSArd Biesheuvel	next_ctr	v6
8381abee99eSArd Biesheuvel	next_ctr	v7
8391abee99eSArd Biesheuvel
840fc074e13SArd Biesheuvel	mov		bskey, x2
841fc074e13SArd Biesheuvel	mov		rounds, x3
8421abee99eSArd Biesheuvel	bl		aesbs_encrypt8
8431abee99eSArd Biesheuvel
844fc074e13SArd Biesheuvel	ld1		{ v8.16b-v11.16b}, [x1], #64
845fc074e13SArd Biesheuvel	ld1		{v12.16b-v15.16b}, [x1], #64
8461abee99eSArd Biesheuvel
847fc074e13SArd Biesheuvel	eor		v8.16b, v0.16b, v8.16b
848fc074e13SArd Biesheuvel	eor		v9.16b, v1.16b, v9.16b
849fc074e13SArd Biesheuvel	eor		v10.16b, v4.16b, v10.16b
850fc074e13SArd Biesheuvel	eor		v11.16b, v6.16b, v11.16b
851fc074e13SArd Biesheuvel	eor		v12.16b, v3.16b, v12.16b
852fc074e13SArd Biesheuvel	eor		v13.16b, v7.16b, v13.16b
853fc074e13SArd Biesheuvel	eor		v14.16b, v2.16b, v14.16b
854fc074e13SArd Biesheuvel	eor		v15.16b, v5.16b, v15.16b
8551abee99eSArd Biesheuvel
856fc074e13SArd Biesheuvel	st1		{ v8.16b-v11.16b}, [x0], #64
857fc074e13SArd Biesheuvel	st1		{v12.16b-v15.16b}, [x0], #64
8581abee99eSArd Biesheuvel
859fc074e13SArd Biesheuvel	next_ctr	v0
860fc074e13SArd Biesheuvel	subs		x4, x4, #8
861fc074e13SArd Biesheuvel	b.gt		0b
8621abee99eSArd Biesheuvel
863fc074e13SArd Biesheuvel	st1		{v0.16b}, [x5]
86467ab02dcSArd Biesheuvel	frame_pop
8651abee99eSArd Biesheuvel	ret
8660e89640bSMark BrownSYM_FUNC_END(aesbs_ctr_encrypt)
867