xref: /linux/lib/crypto/arm64/aes-neon.S (revision 370c3883195566ee3e7d79e0146c3d735a406573)
1*4b908403SEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */
2*4b908403SEric Biggers/*
3*4b908403SEric Biggers * AES cipher for ARMv8 NEON
4*4b908403SEric Biggers *
5*4b908403SEric Biggers * Copyright (C) 2013 - 2017 Linaro Ltd. <ard.biesheuvel@linaro.org>
6*4b908403SEric Biggers */
7*4b908403SEric Biggers
8*4b908403SEric Biggers#include <linux/linkage.h>
9*4b908403SEric Biggers#include <asm/assembler.h>
10*4b908403SEric Biggers
11*4b908403SEric Biggers#define AES_FUNC_START(func)		SYM_FUNC_START(neon_ ## func)
12*4b908403SEric Biggers#define AES_FUNC_END(func)		SYM_FUNC_END(neon_ ## func)
13*4b908403SEric Biggers
14*4b908403SEric Biggers	xtsmask		.req	v7
15*4b908403SEric Biggers	cbciv		.req	v7
16*4b908403SEric Biggers	vctr		.req	v4
17*4b908403SEric Biggers
18*4b908403SEric Biggers	.macro		xts_reload_mask, tmp
19*4b908403SEric Biggers	xts_load_mask	\tmp
20*4b908403SEric Biggers	.endm
21*4b908403SEric Biggers
22*4b908403SEric Biggers	/* special case for the neon-bs driver calling into this one for CTS */
23*4b908403SEric Biggers	.macro		xts_cts_skip_tw, reg, lbl
24*4b908403SEric Biggers	tbnz		\reg, #1, \lbl
25*4b908403SEric Biggers	.endm
26*4b908403SEric Biggers
27*4b908403SEric Biggers	/* multiply by polynomial 'x' in GF(2^8) */
28*4b908403SEric Biggers	.macro		mul_by_x, out, in, temp, const
29*4b908403SEric Biggers	sshr		\temp, \in, #7
30*4b908403SEric Biggers	shl		\out, \in, #1
31*4b908403SEric Biggers	and		\temp, \temp, \const
32*4b908403SEric Biggers	eor		\out, \out, \temp
33*4b908403SEric Biggers	.endm
34*4b908403SEric Biggers
35*4b908403SEric Biggers	/* multiply by polynomial 'x^2' in GF(2^8) */
36*4b908403SEric Biggers	.macro		mul_by_x2, out, in, temp, const
37*4b908403SEric Biggers	ushr		\temp, \in, #6
38*4b908403SEric Biggers	shl		\out, \in, #2
39*4b908403SEric Biggers	pmul		\temp, \temp, \const
40*4b908403SEric Biggers	eor		\out, \out, \temp
41*4b908403SEric Biggers	.endm
42*4b908403SEric Biggers
43*4b908403SEric Biggers	/* preload the entire Sbox */
44*4b908403SEric Biggers	.macro		prepare, sbox, shiftrows, temp
45*4b908403SEric Biggers	movi		v12.16b, #0x1b
46*4b908403SEric Biggers	ldr_l		q13, \shiftrows, \temp
47*4b908403SEric Biggers	ldr_l		q14, .Lror32by8, \temp
48*4b908403SEric Biggers	adr_l		\temp, \sbox
49*4b908403SEric Biggers	ld1		{v16.16b-v19.16b}, [\temp], #64
50*4b908403SEric Biggers	ld1		{v20.16b-v23.16b}, [\temp], #64
51*4b908403SEric Biggers	ld1		{v24.16b-v27.16b}, [\temp], #64
52*4b908403SEric Biggers	ld1		{v28.16b-v31.16b}, [\temp]
53*4b908403SEric Biggers	.endm
54*4b908403SEric Biggers
55*4b908403SEric Biggers	/* do preload for encryption */
56*4b908403SEric Biggers	.macro		enc_prepare, ignore0, ignore1, temp
57*4b908403SEric Biggers	prepare		crypto_aes_sbox, .LForward_ShiftRows, \temp
58*4b908403SEric Biggers	.endm
59*4b908403SEric Biggers
60*4b908403SEric Biggers	.macro		enc_switch_key, ignore0, ignore1, temp
61*4b908403SEric Biggers	/* do nothing */
62*4b908403SEric Biggers	.endm
63*4b908403SEric Biggers
64*4b908403SEric Biggers	/* do preload for decryption */
65*4b908403SEric Biggers	.macro		dec_prepare, ignore0, ignore1, temp
66*4b908403SEric Biggers	prepare		crypto_aes_inv_sbox, .LReverse_ShiftRows, \temp
67*4b908403SEric Biggers	.endm
68*4b908403SEric Biggers
69*4b908403SEric Biggers	/* apply SubBytes transformation using the preloaded Sbox */
70*4b908403SEric Biggers	.macro		sub_bytes, in
71*4b908403SEric Biggers	sub		v9.16b, \in\().16b, v15.16b
72*4b908403SEric Biggers	tbl		\in\().16b, {v16.16b-v19.16b}, \in\().16b
73*4b908403SEric Biggers	sub		v10.16b, v9.16b, v15.16b
74*4b908403SEric Biggers	tbx		\in\().16b, {v20.16b-v23.16b}, v9.16b
75*4b908403SEric Biggers	sub		v11.16b, v10.16b, v15.16b
76*4b908403SEric Biggers	tbx		\in\().16b, {v24.16b-v27.16b}, v10.16b
77*4b908403SEric Biggers	tbx		\in\().16b, {v28.16b-v31.16b}, v11.16b
78*4b908403SEric Biggers	.endm
79*4b908403SEric Biggers
80*4b908403SEric Biggers	/* apply MixColumns transformation */
81*4b908403SEric Biggers	.macro		mix_columns, in, enc
82*4b908403SEric Biggers	.if		\enc == 0
83*4b908403SEric Biggers	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
84*4b908403SEric Biggers	mul_by_x2	v8.16b, \in\().16b, v9.16b, v12.16b
85*4b908403SEric Biggers	eor		\in\().16b, \in\().16b, v8.16b
86*4b908403SEric Biggers	rev32		v8.8h, v8.8h
87*4b908403SEric Biggers	eor		\in\().16b, \in\().16b, v8.16b
88*4b908403SEric Biggers	.endif
89*4b908403SEric Biggers
90*4b908403SEric Biggers	mul_by_x	v9.16b, \in\().16b, v8.16b, v12.16b
91*4b908403SEric Biggers	rev32		v8.8h, \in\().8h
92*4b908403SEric Biggers	eor		v8.16b, v8.16b, v9.16b
93*4b908403SEric Biggers	eor		\in\().16b, \in\().16b, v8.16b
94*4b908403SEric Biggers	tbl		\in\().16b, {\in\().16b}, v14.16b
95*4b908403SEric Biggers	eor		\in\().16b, \in\().16b, v8.16b
96*4b908403SEric Biggers	.endm
97*4b908403SEric Biggers
98*4b908403SEric Biggers	.macro		do_block, enc, in, rounds, rk, rkp, i
99*4b908403SEric Biggers	ld1		{v15.4s}, [\rk]
100*4b908403SEric Biggers	add		\rkp, \rk, #16
101*4b908403SEric Biggers	mov		\i, \rounds
102*4b908403SEric Biggers.La\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
103*4b908403SEric Biggers	movi		v15.16b, #0x40
104*4b908403SEric Biggers	tbl		\in\().16b, {\in\().16b}, v13.16b	/* ShiftRows */
105*4b908403SEric Biggers	sub_bytes	\in
106*4b908403SEric Biggers	sub		\i, \i, #1
107*4b908403SEric Biggers	ld1		{v15.4s}, [\rkp], #16
108*4b908403SEric Biggers	cbz		\i, .Lb\@
109*4b908403SEric Biggers	mix_columns	\in, \enc
110*4b908403SEric Biggers	b		.La\@
111*4b908403SEric Biggers.Lb\@:	eor		\in\().16b, \in\().16b, v15.16b		/* ^round key */
112*4b908403SEric Biggers	.endm
113*4b908403SEric Biggers
114*4b908403SEric Biggers	.macro		encrypt_block, in, rounds, rk, rkp, i
115*4b908403SEric Biggers	do_block	1, \in, \rounds, \rk, \rkp, \i
116*4b908403SEric Biggers	.endm
117*4b908403SEric Biggers
118*4b908403SEric Biggers	.macro		decrypt_block, in, rounds, rk, rkp, i
119*4b908403SEric Biggers	do_block	0, \in, \rounds, \rk, \rkp, \i
120*4b908403SEric Biggers	.endm
121*4b908403SEric Biggers
122*4b908403SEric Biggers	/*
123*4b908403SEric Biggers	 * Interleaved versions: functionally equivalent to the
124*4b908403SEric Biggers	 * ones above, but applied to AES states in parallel.
125*4b908403SEric Biggers	 */
126*4b908403SEric Biggers
127*4b908403SEric Biggers	.macro		sub_bytes_4x, in0, in1, in2, in3
128*4b908403SEric Biggers	sub		v8.16b, \in0\().16b, v15.16b
129*4b908403SEric Biggers	tbl		\in0\().16b, {v16.16b-v19.16b}, \in0\().16b
130*4b908403SEric Biggers	sub		v9.16b, \in1\().16b, v15.16b
131*4b908403SEric Biggers	tbl		\in1\().16b, {v16.16b-v19.16b}, \in1\().16b
132*4b908403SEric Biggers	sub		v10.16b, \in2\().16b, v15.16b
133*4b908403SEric Biggers	tbl		\in2\().16b, {v16.16b-v19.16b}, \in2\().16b
134*4b908403SEric Biggers	sub		v11.16b, \in3\().16b, v15.16b
135*4b908403SEric Biggers	tbl		\in3\().16b, {v16.16b-v19.16b}, \in3\().16b
136*4b908403SEric Biggers	tbx		\in0\().16b, {v20.16b-v23.16b}, v8.16b
137*4b908403SEric Biggers	tbx		\in1\().16b, {v20.16b-v23.16b}, v9.16b
138*4b908403SEric Biggers	sub		v8.16b, v8.16b, v15.16b
139*4b908403SEric Biggers	tbx		\in2\().16b, {v20.16b-v23.16b}, v10.16b
140*4b908403SEric Biggers	sub		v9.16b, v9.16b, v15.16b
141*4b908403SEric Biggers	tbx		\in3\().16b, {v20.16b-v23.16b}, v11.16b
142*4b908403SEric Biggers	sub		v10.16b, v10.16b, v15.16b
143*4b908403SEric Biggers	tbx		\in0\().16b, {v24.16b-v27.16b}, v8.16b
144*4b908403SEric Biggers	sub		v11.16b, v11.16b, v15.16b
145*4b908403SEric Biggers	tbx		\in1\().16b, {v24.16b-v27.16b}, v9.16b
146*4b908403SEric Biggers	sub		v8.16b, v8.16b, v15.16b
147*4b908403SEric Biggers	tbx		\in2\().16b, {v24.16b-v27.16b}, v10.16b
148*4b908403SEric Biggers	sub		v9.16b, v9.16b, v15.16b
149*4b908403SEric Biggers	tbx		\in3\().16b, {v24.16b-v27.16b}, v11.16b
150*4b908403SEric Biggers	sub		v10.16b, v10.16b, v15.16b
151*4b908403SEric Biggers	tbx		\in0\().16b, {v28.16b-v31.16b}, v8.16b
152*4b908403SEric Biggers	sub		v11.16b, v11.16b, v15.16b
153*4b908403SEric Biggers	tbx		\in1\().16b, {v28.16b-v31.16b}, v9.16b
154*4b908403SEric Biggers	tbx		\in2\().16b, {v28.16b-v31.16b}, v10.16b
155*4b908403SEric Biggers	tbx		\in3\().16b, {v28.16b-v31.16b}, v11.16b
156*4b908403SEric Biggers	.endm
157*4b908403SEric Biggers
158*4b908403SEric Biggers	.macro		mul_by_x_2x, out0, out1, in0, in1, tmp0, tmp1, const
159*4b908403SEric Biggers	sshr		\tmp0\().16b, \in0\().16b, #7
160*4b908403SEric Biggers	shl		\out0\().16b, \in0\().16b, #1
161*4b908403SEric Biggers	sshr		\tmp1\().16b, \in1\().16b, #7
162*4b908403SEric Biggers	and		\tmp0\().16b, \tmp0\().16b, \const\().16b
163*4b908403SEric Biggers	shl		\out1\().16b, \in1\().16b, #1
164*4b908403SEric Biggers	and		\tmp1\().16b, \tmp1\().16b, \const\().16b
165*4b908403SEric Biggers	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
166*4b908403SEric Biggers	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
167*4b908403SEric Biggers	.endm
168*4b908403SEric Biggers
169*4b908403SEric Biggers	.macro		mul_by_x2_2x, out0, out1, in0, in1, tmp0, tmp1, const
170*4b908403SEric Biggers	ushr		\tmp0\().16b, \in0\().16b, #6
171*4b908403SEric Biggers	shl		\out0\().16b, \in0\().16b, #2
172*4b908403SEric Biggers	ushr		\tmp1\().16b, \in1\().16b, #6
173*4b908403SEric Biggers	pmul		\tmp0\().16b, \tmp0\().16b, \const\().16b
174*4b908403SEric Biggers	shl		\out1\().16b, \in1\().16b, #2
175*4b908403SEric Biggers	pmul		\tmp1\().16b, \tmp1\().16b, \const\().16b
176*4b908403SEric Biggers	eor		\out0\().16b, \out0\().16b, \tmp0\().16b
177*4b908403SEric Biggers	eor		\out1\().16b, \out1\().16b, \tmp1\().16b
178*4b908403SEric Biggers	.endm
179*4b908403SEric Biggers
180*4b908403SEric Biggers	.macro		mix_columns_2x, in0, in1, enc
181*4b908403SEric Biggers	.if		\enc == 0
182*4b908403SEric Biggers	/* Inverse MixColumns: pre-multiply by { 5, 0, 4, 0 } */
183*4b908403SEric Biggers	mul_by_x2_2x	v8, v9, \in0, \in1, v10, v11, v12
184*4b908403SEric Biggers	eor		\in0\().16b, \in0\().16b, v8.16b
185*4b908403SEric Biggers	rev32		v8.8h, v8.8h
186*4b908403SEric Biggers	eor		\in1\().16b, \in1\().16b, v9.16b
187*4b908403SEric Biggers	rev32		v9.8h, v9.8h
188*4b908403SEric Biggers	eor		\in0\().16b, \in0\().16b, v8.16b
189*4b908403SEric Biggers	eor		\in1\().16b, \in1\().16b, v9.16b
190*4b908403SEric Biggers	.endif
191*4b908403SEric Biggers
192*4b908403SEric Biggers	mul_by_x_2x	v8, v9, \in0, \in1, v10, v11, v12
193*4b908403SEric Biggers	rev32		v10.8h, \in0\().8h
194*4b908403SEric Biggers	rev32		v11.8h, \in1\().8h
195*4b908403SEric Biggers	eor		v10.16b, v10.16b, v8.16b
196*4b908403SEric Biggers	eor		v11.16b, v11.16b, v9.16b
197*4b908403SEric Biggers	eor		\in0\().16b, \in0\().16b, v10.16b
198*4b908403SEric Biggers	eor		\in1\().16b, \in1\().16b, v11.16b
199*4b908403SEric Biggers	tbl		\in0\().16b, {\in0\().16b}, v14.16b
200*4b908403SEric Biggers	tbl		\in1\().16b, {\in1\().16b}, v14.16b
201*4b908403SEric Biggers	eor		\in0\().16b, \in0\().16b, v10.16b
202*4b908403SEric Biggers	eor		\in1\().16b, \in1\().16b, v11.16b
203*4b908403SEric Biggers	.endm
204*4b908403SEric Biggers
205*4b908403SEric Biggers	.macro		do_block_4x, enc, in0, in1, in2, in3, rounds, rk, rkp, i
206*4b908403SEric Biggers	ld1		{v15.4s}, [\rk]
207*4b908403SEric Biggers	add		\rkp, \rk, #16
208*4b908403SEric Biggers	mov		\i, \rounds
209*4b908403SEric Biggers.La\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
210*4b908403SEric Biggers	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
211*4b908403SEric Biggers	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
212*4b908403SEric Biggers	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
213*4b908403SEric Biggers	movi		v15.16b, #0x40
214*4b908403SEric Biggers	tbl		\in0\().16b, {\in0\().16b}, v13.16b	/* ShiftRows */
215*4b908403SEric Biggers	tbl		\in1\().16b, {\in1\().16b}, v13.16b	/* ShiftRows */
216*4b908403SEric Biggers	tbl		\in2\().16b, {\in2\().16b}, v13.16b	/* ShiftRows */
217*4b908403SEric Biggers	tbl		\in3\().16b, {\in3\().16b}, v13.16b	/* ShiftRows */
218*4b908403SEric Biggers	sub_bytes_4x	\in0, \in1, \in2, \in3
219*4b908403SEric Biggers	sub		\i, \i, #1
220*4b908403SEric Biggers	ld1		{v15.4s}, [\rkp], #16
221*4b908403SEric Biggers	cbz		\i, .Lb\@
222*4b908403SEric Biggers	mix_columns_2x	\in0, \in1, \enc
223*4b908403SEric Biggers	mix_columns_2x	\in2, \in3, \enc
224*4b908403SEric Biggers	b		.La\@
225*4b908403SEric Biggers.Lb\@:	eor		\in0\().16b, \in0\().16b, v15.16b	/* ^round key */
226*4b908403SEric Biggers	eor		\in1\().16b, \in1\().16b, v15.16b	/* ^round key */
227*4b908403SEric Biggers	eor		\in2\().16b, \in2\().16b, v15.16b	/* ^round key */
228*4b908403SEric Biggers	eor		\in3\().16b, \in3\().16b, v15.16b	/* ^round key */
229*4b908403SEric Biggers	.endm
230*4b908403SEric Biggers
231*4b908403SEric Biggers	.macro		encrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
232*4b908403SEric Biggers	do_block_4x	1, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
233*4b908403SEric Biggers	.endm
234*4b908403SEric Biggers
235*4b908403SEric Biggers	.macro		decrypt_block4x, in0, in1, in2, in3, rounds, rk, rkp, i
236*4b908403SEric Biggers	do_block_4x	0, \in0, \in1, \in2, \in3, \rounds, \rk, \rkp, \i
237*4b908403SEric Biggers	.endm
238*4b908403SEric Biggers
239*4b908403SEric Biggers#include "aes-modes.S"
240*4b908403SEric Biggers
241*4b908403SEric Biggers	.section	".rodata", "a"
242*4b908403SEric Biggers	.align		4
243*4b908403SEric Biggers.LForward_ShiftRows:
244*4b908403SEric Biggers	.octa		0x0b06010c07020d08030e09040f0a0500
245*4b908403SEric Biggers
246*4b908403SEric Biggers.LReverse_ShiftRows:
247*4b908403SEric Biggers	.octa		0x0306090c0f0205080b0e0104070a0d00
248*4b908403SEric Biggers
249*4b908403SEric Biggers.Lror32by8:
250*4b908403SEric Biggers	.octa		0x0c0f0e0d080b0a090407060500030201
251