xref: /linux/arch/arm64/crypto/aes-neon.S (revision 621cde16e49b3ecf7d59a8106a20aaebfb4a59a9)
1d2912cb1SThomas Gleixner/* SPDX-License-Identifier: GPL-2.0-only */
249788fe2SArd Biesheuvel/*
349788fe2SArd Biesheuvel * linux/arch/arm64/crypto/aes-neon.S - AES cipher for ARMv8 NEON
449788fe2SArd Biesheuvel *
54edd7d01SArd Biesheuvel * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
649788fe2SArd Biesheuvel */
749788fe2SArd Biesheuvel
849788fe2SArd Biesheuvel#include <linux/linkage.h>
9a2c435ccSArd Biesheuvel#include <asm/assembler.h>
1049788fe2SArd Biesheuvel
11b8e50548SMark Brown#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
12b8e50548SMark Brown#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
1349788fe2SArd Biesheuvel
142e5d2f33SArd Biesheuvel	xtsmask		.req	v7
157367bfebSArd Biesheuvel	cbciv		.req	v7
167367bfebSArd Biesheuvel	vctr		.req	v4
172e5d2f33SArd Biesheuvel
182e5d2f33SArd Biesheuvel	.macro		xts_reload_mask, tmp
192e5d2f33SArd Biesheuvel	xts_load_mask	\tmp
202e5d2f33SArd Biesheuvel	.endm
212e5d2f33SArd Biesheuvel
2267cfa5d3SArd Biesheuvel	/* special case for the neon-bs driver calling into this one for CTS */
2367cfa5d3SArd Biesheuvel	.macro		xts_cts_skip_tw, reg, lbl
2467cfa5d3SArd Biesheuvel	tbnz		\reg, #1, \lbl
2567cfa5d3SArd Biesheuvel	.endm
2667cfa5d3SArd Biesheuvel
2749788fe2SArd Biesheuvel	/* multiply by polynomial 'x' in GF(2^8) */
2849788fe2SArd Biesheuvel	.macro		mul_by_x, out, in, temp, const
2949788fe2SArd Biesheuvel	sshr		\temp, \in, #7
304edd7d01SArd Biesheuvel	shl		\out, \in, #1
3149788fe2SArd Biesheuvel	and		\temp, \temp, \const
3249788fe2SArd Biesheuvel	eor		\out, \out, \temp
3349788fe2SArd Biesheuvel	.endm
3449788fe2SArd Biesheuvel
354edd7d01SArd Biesheuvel	/* multiply by polynomial 'x^2' in GF(2^8) */
364edd7d01SArd Biesheuvel	.macro		mul_by_x2, out, in, temp, const
374edd7d01SArd Biesheuvel	ushr		\temp, \in, #6
384edd7d01SArd Biesheuvel	shl		\out, \in, #2
394edd7d01SArd Biesheuvel	pmul		\temp, \temp, \const
404edd7d01SArd Biesheuvel	eor		\out, \out, \temp
414edd7d01SArd Biesheuvel	.endm
424edd7d01SArd Biesheuvel
4349788fe2SArd Biesheuvel	/* preload the entire Sbox */
4449788fe2SArd Biesheuvel	.macro		prepare, sbox, shiftrows, temp
454edd7d01SArd Biesheuvel	movi		v12.16b, #0x1b
4662c24708SArd Biesheuvel	ldr_l		q13, \shiftrows, \temp
4762c24708SArd Biesheuvel	ldr_l		q14, .Lror32by8, \temp
4862c24708SArd Biesheuvel	adr_l		\temp, \sbox
4949788fe2SArd Biesheuvel	ld1		{v16.16b-v19.16b}, [\temp], #64
5049788fe2SArd Biesheuvel	ld1		{v20.16b-v23.16b}, [\temp], #64
5149788fe2SArd Biesheuvel	ld1		{v24.16b-v27.16b}, [\temp], #64
5249788fe2SArd Biesheuvel	ld1		{v28.16b-v31.16b}, [\temp]
5349788fe2SArd Biesheuvel	.endm
5449788fe2SArd Biesheuvel
5549788fe2SArd Biesheuvel	/* do preload for encryption */
5649788fe2SArd Biesheuvel	.macro		enc_prepare, ignore0, ignore1, temp
5758144b8dSArd Biesheuvel	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
5849788fe2SArd Biesheuvel	.endm
5949788fe2SArd Biesheuvel
6049788fe2SArd Biesheuvel	.macro		enc_switch_key, ignore0, ignore1, temp
6149788fe2SArd Biesheuvel	/* do nothing */
6249788fe2SArd Biesheuvel	.endm
6349788fe2SArd Biesheuvel
6449788fe2SArd Biesheuvel	/* do preload for decryption */
6549788fe2SArd Biesheuvel	.macro		dec_prepare, ignore0, ignore1, temp
6658144b8dSArd Biesheuvel	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
6749788fe2SArd Biesheuvel	.endm
6849788fe2SArd Biesheuvel
691b069597SJilin Yuan	/* apply SubBytes transformation using the preloaded Sbox */
7049788fe2SArd Biesheuvel	.macro		sub_bytes, in
714edd7d01SArd Biesheuvel	sub		v9.16b, \in\().16b, v15.16b
7249788fe2SArd Biesheuvel	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
734edd7d01SArd Biesheuvel	sub		v10.16b, v9.16b, v15.16b
7449788fe2SArd Biesheuvel	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
754edd7d01SArd Biesheuvel	sub		v11.16b, v10.16b, v15.16b
7649788fe2SArd Biesheuvel	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
7749788fe2SArd Biesheuvel	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
7849788fe2SArd Biesheuvel	.endm
7949788fe2SArd Biesheuvel
8049788fe2SArd Biesheuvel	/* apply MixColumns transformation */
814edd7d01SArd Biesheuvel	.macro		mix_columns, in, enc
824edd7d01SArd Biesheuvel	.if		\enc == 0
8349788fe2SArd Biesheuvel	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
844edd7d01SArd Biesheuvel	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
854edd7d01SArd Biesheuvel	eor		\in\().16b, \in\().16b, v8.16b
864edd7d01SArd Biesheuvel	rev32		v8.8h, v8.8h
874edd7d01SArd Biesheuvel	eor		\in\().16b, \in\().16b, v8.16b
884edd7d01SArd Biesheuvel	.endif
894edd7d01SArd Biesheuvel
904edd7d01SArd Biesheuvel	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
914edd7d01SArd Biesheuvel	rev32		v8.8h, \in\().8h
924edd7d01SArd Biesheuvel	eor		v8.16b, v8.16b, v9.16b
934edd7d01SArd Biesheuvel	eor		\in\().16b, \in\().16b, v8.16b
944edd7d01SArd Biesheuvel	tbl		\in\().16b, {\in\().16b}, v14.16b
954edd7d01SArd Biesheuvel	eor		\in\().16b, \in\().16b, v8.16b
9649788fe2SArd Biesheuvel	.endm
9749788fe2SArd Biesheuvel
9849788fe2SArd Biesheuvel	.macro		do_block, enc, in, rounds, rk, rkp, i
99a2c435ccSArd Biesheuvel	ld1		{v15.4s}, [\rk]
10049788fe2SArd Biesheuvel	add		\rkp, \rk, #16
10149788fe2SArd Biesheuvel	mov		\i, \rounds
102*571e557cSArd Biesheuvel.La\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
1034edd7d01SArd Biesheuvel	movi		v15.16b, #0x40
10449788fe2SArd Biesheuvel	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
10549788fe2SArd Biesheuvel	sub_bytes	\in
106*571e557cSArd Biesheuvel	sub		\i, \i, #1
1074edd7d01SArd Biesheuvel	ld1		{v15.4s}, [\rkp], #16
108*571e557cSArd Biesheuvel	cbz		\i, .Lb\@
1094edd7d01SArd Biesheuvel	mix_columns	\in, \enc
110*571e557cSArd Biesheuvel	b		.La\@
111*571e557cSArd Biesheuvel.Lb\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
11249788fe2SArd Biesheuvel	.endm
11349788fe2SArd Biesheuvel
11449788fe2SArd Biesheuvel	.macro		encrypt_block, in, rounds, rk, rkp, i
11549788fe2SArd Biesheuvel	do_block	1, \in, \rounds, \rk, \rkp, \i
11649788fe2SArd Biesheuvel	.endm
11749788fe2SArd Biesheuvel
11849788fe2SArd Biesheuvel	.macro		decrypt_block, in, rounds, rk, rkp, i
11949788fe2SArd Biesheuvel	do_block	0, \in, \rounds, \rk, \rkp, \i
12049788fe2SArd Biesheuvel	.endm
12149788fe2SArd Biesheuvel
12249788fe2SArd Biesheuvel	/*
12349788fe2SArd Biesheuvel	 * Interleaved versions: functionally equivalent to the
124e2174139SArd Biesheuvel	 * ones above, but applied to AES states in parallel.
12549788fe2SArd Biesheuvel	 */
12649788fe2SArd Biesheuvel
12749788fe2SArd Biesheuvel	.macro		sub_bytes_4x, in0, in1, in2, in3
1284edd7d01SArd Biesheuvel	sub		v8.16b, \in0\().16b, v15.16b
12949788fe2SArd Biesheuvel	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
1304edd7d01SArd Biesheuvel	sub		v9.16b, \in1\().16b, v15.16b
13149788fe2SArd Biesheuvel	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
1324edd7d01SArd Biesheuvel	sub		v10.16b, \in2\().16b, v15.16b
13349788fe2SArd Biesheuvel	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
1344edd7d01SArd Biesheuvel	sub		v11.16b, \in3\().16b, v15.16b
13549788fe2SArd Biesheuvel	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
13649788fe2SArd Biesheuvel	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
13749788fe2SArd Biesheuvel	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
1384edd7d01SArd Biesheuvel	sub		v8.16b, v8.16b, v15.16b
13949788fe2SArd Biesheuvel	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
1404edd7d01SArd Biesheuvel	sub		v9.16b, v9.16b, v15.16b
14149788fe2SArd Biesheuvel	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
1424edd7d01SArd Biesheuvel	sub		v10.16b, v10.16b, v15.16b
14349788fe2SArd Biesheuvel	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
1444edd7d01SArd Biesheuvel	sub		v11.16b, v11.16b, v15.16b
14549788fe2SArd Biesheuvel	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
1464edd7d01SArd Biesheuvel	sub		v8.16b, v8.16b, v15.16b
14749788fe2SArd Biesheuvel	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
1484edd7d01SArd Biesheuvel	sub		v9.16b, v9.16b, v15.16b
14949788fe2SArd Biesheuvel	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
1504edd7d01SArd Biesheuvel	sub		v10.16b, v10.16b, v15.16b
15149788fe2SArd Biesheuvel	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
1524edd7d01SArd Biesheuvel	sub		v11.16b, v11.16b, v15.16b
15349788fe2SArd Biesheuvel	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
15449788fe2SArd Biesheuvel	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
15549788fe2SArd Biesheuvel	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
15649788fe2SArd Biesheuvel	.endm
15749788fe2SArd Biesheuvel
15849788fe2SArd Biesheuvel	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
15949788fe2SArd Biesheuvel	sshr		\tmp0\().16b, \in0\().16b, #7
1604edd7d01SArd Biesheuvel	shl		\out0\().16b, \in0\().16b, #1
16149788fe2SArd Biesheuvel	sshr		\tmp1\().16b, \in1\().16b, #7
16249788fe2SArd Biesheuvel	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
1634edd7d01SArd Biesheuvel	shl		\out1\().16b, \in1\().16b, #1
16449788fe2SArd Biesheuvel	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
16549788fe2SArd Biesheuvel	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
16649788fe2SArd Biesheuvel	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
16749788fe2SArd Biesheuvel	.endm
16849788fe2SArd Biesheuvel
1694edd7d01SArd Biesheuvel	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
1704edd7d01SArd Biesheuvel	ushr		\tmp0\().16b, \in0\().16b, #6
1714edd7d01SArd Biesheuvel	shl		\out0\().16b, \in0\().16b, #2
1724edd7d01SArd Biesheuvel	ushr		\tmp1\().16b, \in1\().16b, #6
1734edd7d01SArd Biesheuvel	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
1744edd7d01SArd Biesheuvel	shl		\out1\().16b, \in1\().16b, #2
1754edd7d01SArd Biesheuvel	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
1764edd7d01SArd Biesheuvel	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
1774edd7d01SArd Biesheuvel	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
1784edd7d01SArd Biesheuvel	.endm
1794edd7d01SArd Biesheuvel
1804edd7d01SArd Biesheuvel	.macro		mix_columns_2x, in0, in1, enc
1814edd7d01SArd Biesheuvel	.if		\enc == 0
1824edd7d01SArd Biesheuvel	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
1834edd7d01SArd Biesheuvel	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
1844edd7d01SArd Biesheuvel	eor		\in0\().16b, \in0\().16b, v8.16b
1854edd7d01SArd Biesheuvel	rev32		v8.8h, v8.8h
1864edd7d01SArd Biesheuvel	eor		\in1\().16b, \in1\().16b, v9.16b
1874edd7d01SArd Biesheuvel	rev32		v9.8h, v9.8h
1884edd7d01SArd Biesheuvel	eor		\in0\().16b, \in0\().16b, v8.16b
1894edd7d01SArd Biesheuvel	eor		\in1\().16b, \in1\().16b, v9.16b
1904edd7d01SArd Biesheuvel	.endif
1914edd7d01SArd Biesheuvel
1924edd7d01SArd Biesheuvel	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
19349788fe2SArd Biesheuvel	rev32		v10.8h, \in0\().8h
19449788fe2SArd Biesheuvel	rev32		v11.8h, \in1\().8h
1954edd7d01SArd Biesheuvel	eor		v10.16b, v10.16b, v8.16b
1964edd7d01SArd Biesheuvel	eor		v11.16b, v11.16b, v9.16b
1974edd7d01SArd Biesheuvel	eor		\in0\().16b, \in0\().16b, v10.16b
1984edd7d01SArd Biesheuvel	eor		\in1\().16b, \in1\().16b, v11.16b
1994edd7d01SArd Biesheuvel	tbl		\in0\().16b, {\in0\().16b}, v14.16b
2004edd7d01SArd Biesheuvel	tbl		\in1\().16b, {\in1\().16b}, v14.16b
2014edd7d01SArd Biesheuvel	eor		\in0\().16b, \in0\().16b, v10.16b
2024edd7d01SArd Biesheuvel	eor		\in1\().16b, \in1\().16b, v11.16b
20349788fe2SArd Biesheuvel	.endm
20449788fe2SArd Biesheuvel
20549788fe2SArd Biesheuvel	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206a2c435ccSArd Biesheuvel	ld1		{v15.4s}, [\rk]
20749788fe2SArd Biesheuvel	add		\rkp, \rk, #16
20849788fe2SArd Biesheuvel	mov		\i, \rounds
209*571e557cSArd Biesheuvel.La\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
21049788fe2SArd Biesheuvel	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
21149788fe2SArd Biesheuvel	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
21249788fe2SArd Biesheuvel	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
2134edd7d01SArd Biesheuvel	movi		v15.16b, #0x40
21449788fe2SArd Biesheuvel	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
21549788fe2SArd Biesheuvel	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
21649788fe2SArd Biesheuvel	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
21749788fe2SArd Biesheuvel	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
2184edd7d01SArd Biesheuvel	sub_bytes_4x	\in0, \in1, \in2, \in3
219*571e557cSArd Biesheuvel	sub		\i, \i, #1
2204edd7d01SArd Biesheuvel	ld1		{v15.4s}, [\rkp], #16
221*571e557cSArd Biesheuvel	cbz		\i, .Lb\@
2224edd7d01SArd Biesheuvel	mix_columns_2x	\in0, \in1, \enc
2234edd7d01SArd Biesheuvel	mix_columns_2x	\in2, \in3, \enc
224*571e557cSArd Biesheuvel	b		.La\@
225*571e557cSArd Biesheuvel.Lb\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
22649788fe2SArd Biesheuvel	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
22749788fe2SArd Biesheuvel	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
22849788fe2SArd Biesheuvel	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
22949788fe2SArd Biesheuvel	.endm
23049788fe2SArd Biesheuvel
23149788fe2SArd Biesheuvel	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
23249788fe2SArd Biesheuvel	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
23349788fe2SArd Biesheuvel	.endm
23449788fe2SArd Biesheuvel
23549788fe2SArd Biesheuvel	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
23649788fe2SArd Biesheuvel	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
23749788fe2SArd Biesheuvel	.endm
23849788fe2SArd Biesheuvel
23949788fe2SArd Biesheuvel#include "aes-modes.S"
24049788fe2SArd Biesheuvel
24162c24708SArd Biesheuvel	.section	".rodata", "a"
24258144b8dSArd Biesheuvel	.align		4
2434edd7d01SArd Biesheuvel.LForward_ShiftRows:
2444edd7d01SArd Biesheuvel	.octa		0x0b06010c07020d08030e09040f0a0500
2454edd7d01SArd Biesheuvel
2464edd7d01SArd Biesheuvel.LReverse_ShiftRows:
2474edd7d01SArd Biesheuvel	.octa		0x0306090c0f0205080b0e0104070a0d00
2484edd7d01SArd Biesheuvel
2494edd7d01SArd Biesheuvel.Lror32by8:
2504edd7d01SArd Biesheuvel	.octa		0x0c0f0e0d080b0a090407060500030201
251