xref: /linux/lib/crypto/arm/aes-cipher-core.S (revision c17ee635fd3a482b2ad2bf5e269755c2eae5f25e)
1*fa229775SEric Biggers/* SPDX-License-Identifier: GPL-2.0-only */
2*fa229775SEric Biggers/*
3*fa229775SEric Biggers * Scalar AES core transform
4*fa229775SEric Biggers *
5*fa229775SEric Biggers * Copyright (C) 2017 Linaro Ltd.
6*fa229775SEric Biggers * Author: Ard Biesheuvel <ard.biesheuvel@linaro.org>
7*fa229775SEric Biggers */
8*fa229775SEric Biggers
9*fa229775SEric Biggers#include <linux/linkage.h>
10*fa229775SEric Biggers#include <asm/assembler.h>
11*fa229775SEric Biggers#include <asm/cache.h>
12*fa229775SEric Biggers
13*fa229775SEric Biggers	.text
14*fa229775SEric Biggers	.align		5
15*fa229775SEric Biggers
16*fa229775SEric Biggers	rk		.req	r0
17*fa229775SEric Biggers	rounds		.req	r1
18*fa229775SEric Biggers	in		.req	r2
19*fa229775SEric Biggers	out		.req	r3
20*fa229775SEric Biggers	ttab		.req	ip
21*fa229775SEric Biggers
22*fa229775SEric Biggers	t0		.req	lr
23*fa229775SEric Biggers	t1		.req	r2
24*fa229775SEric Biggers	t2		.req	r3
25*fa229775SEric Biggers
26*fa229775SEric Biggers	.macro		__select, out, in, idx
27*fa229775SEric Biggers	.if		__LINUX_ARM_ARCH__ < 7
28*fa229775SEric Biggers	and		\out, \in, #0xff << (8 * \idx)
29*fa229775SEric Biggers	.else
30*fa229775SEric Biggers	ubfx		\out, \in, #(8 * \idx), #8
31*fa229775SEric Biggers	.endif
32*fa229775SEric Biggers	.endm
33*fa229775SEric Biggers
34*fa229775SEric Biggers	.macro		__load, out, in, idx, sz, op
35*fa229775SEric Biggers	.if		__LINUX_ARM_ARCH__ < 7 && \idx > 0
36*fa229775SEric Biggers	ldr\op		\out, [ttab, \in, lsr #(8 * \idx) - \sz]
37*fa229775SEric Biggers	.else
38*fa229775SEric Biggers	ldr\op		\out, [ttab, \in, lsl #\sz]
39*fa229775SEric Biggers	.endif
40*fa229775SEric Biggers	.endm
41*fa229775SEric Biggers
42*fa229775SEric Biggers	.macro		__hround, out0, out1, in0, in1, in2, in3, t3, t4, enc, sz, op, oldcpsr
43*fa229775SEric Biggers	__select	\out0, \in0, 0
44*fa229775SEric Biggers	__select	t0, \in1, 1
45*fa229775SEric Biggers	__load		\out0, \out0, 0, \sz, \op
46*fa229775SEric Biggers	__load		t0, t0, 1, \sz, \op
47*fa229775SEric Biggers
48*fa229775SEric Biggers	.if		\enc
49*fa229775SEric Biggers	__select	\out1, \in1, 0
50*fa229775SEric Biggers	__select	t1, \in2, 1
51*fa229775SEric Biggers	.else
52*fa229775SEric Biggers	__select	\out1, \in3, 0
53*fa229775SEric Biggers	__select	t1, \in0, 1
54*fa229775SEric Biggers	.endif
55*fa229775SEric Biggers	__load		\out1, \out1, 0, \sz, \op
56*fa229775SEric Biggers	__select	t2, \in2, 2
57*fa229775SEric Biggers	__load		t1, t1, 1, \sz, \op
58*fa229775SEric Biggers	__load		t2, t2, 2, \sz, \op
59*fa229775SEric Biggers
60*fa229775SEric Biggers	eor		\out0, \out0, t0, ror #24
61*fa229775SEric Biggers
62*fa229775SEric Biggers	__select	t0, \in3, 3
63*fa229775SEric Biggers	.if		\enc
64*fa229775SEric Biggers	__select	\t3, \in3, 2
65*fa229775SEric Biggers	__select	\t4, \in0, 3
66*fa229775SEric Biggers	.else
67*fa229775SEric Biggers	__select	\t3, \in1, 2
68*fa229775SEric Biggers	__select	\t4, \in2, 3
69*fa229775SEric Biggers	.endif
70*fa229775SEric Biggers	__load		\t3, \t3, 2, \sz, \op
71*fa229775SEric Biggers	__load		t0, t0, 3, \sz, \op
72*fa229775SEric Biggers	__load		\t4, \t4, 3, \sz, \op
73*fa229775SEric Biggers
74*fa229775SEric Biggers	.ifnb		\oldcpsr
75*fa229775SEric Biggers	/*
76*fa229775SEric Biggers	 * This is the final round and we're done with all data-dependent table
77*fa229775SEric Biggers	 * lookups, so we can safely re-enable interrupts.
78*fa229775SEric Biggers	 */
79*fa229775SEric Biggers	restore_irqs	\oldcpsr
80*fa229775SEric Biggers	.endif
81*fa229775SEric Biggers
82*fa229775SEric Biggers	eor		\out1, \out1, t1, ror #24
83*fa229775SEric Biggers	eor		\out0, \out0, t2, ror #16
84*fa229775SEric Biggers	ldm		rk!, {t1, t2}
85*fa229775SEric Biggers	eor		\out1, \out1, \t3, ror #16
86*fa229775SEric Biggers	eor		\out0, \out0, t0, ror #8
87*fa229775SEric Biggers	eor		\out1, \out1, \t4, ror #8
88*fa229775SEric Biggers	eor		\out0, \out0, t1
89*fa229775SEric Biggers	eor		\out1, \out1, t2
90*fa229775SEric Biggers	.endm
91*fa229775SEric Biggers
92*fa229775SEric Biggers	.macro		fround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
93*fa229775SEric Biggers	__hround	\out0, \out1, \in0, \in1, \in2, \in3, \out2, \out3, 1, \sz, \op
94*fa229775SEric Biggers	__hround	\out2, \out3, \in2, \in3, \in0, \in1, \in1, \in2, 1, \sz, \op, \oldcpsr
95*fa229775SEric Biggers	.endm
96*fa229775SEric Biggers
97*fa229775SEric Biggers	.macro		iround, out0, out1, out2, out3, in0, in1, in2, in3, sz=2, op, oldcpsr
98*fa229775SEric Biggers	__hround	\out0, \out1, \in0, \in3, \in2, \in1, \out2, \out3, 0, \sz, \op
99*fa229775SEric Biggers	__hround	\out2, \out3, \in2, \in1, \in0, \in3, \in1, \in0, 0, \sz, \op, \oldcpsr
100*fa229775SEric Biggers	.endm
101*fa229775SEric Biggers
102*fa229775SEric Biggers	.macro		do_crypt, round, ttab, ltab, bsz
103*fa229775SEric Biggers	push		{r3-r11, lr}
104*fa229775SEric Biggers
105*fa229775SEric Biggers	// Load keys first, to reduce latency in case they're not cached yet.
106*fa229775SEric Biggers	ldm		rk!, {r8-r11}
107*fa229775SEric Biggers
108*fa229775SEric Biggers	ldr		r4, [in]
109*fa229775SEric Biggers	ldr		r5, [in, #4]
110*fa229775SEric Biggers	ldr		r6, [in, #8]
111*fa229775SEric Biggers	ldr		r7, [in, #12]
112*fa229775SEric Biggers
113*fa229775SEric Biggers#ifdef CONFIG_CPU_BIG_ENDIAN
114*fa229775SEric Biggers	rev_l		r4, t0
115*fa229775SEric Biggers	rev_l		r5, t0
116*fa229775SEric Biggers	rev_l		r6, t0
117*fa229775SEric Biggers	rev_l		r7, t0
118*fa229775SEric Biggers#endif
119*fa229775SEric Biggers
120*fa229775SEric Biggers	eor		r4, r4, r8
121*fa229775SEric Biggers	eor		r5, r5, r9
122*fa229775SEric Biggers	eor		r6, r6, r10
123*fa229775SEric Biggers	eor		r7, r7, r11
124*fa229775SEric Biggers
125*fa229775SEric Biggers	mov_l		ttab, \ttab
126*fa229775SEric Biggers	/*
127*fa229775SEric Biggers	 * Disable interrupts and prefetch the 1024-byte 'ft' or 'it' table into
128*fa229775SEric Biggers	 * L1 cache, assuming cacheline size >= 32.  This is a hardening measure
129*fa229775SEric Biggers	 * intended to make cache-timing attacks more difficult.  They may not
130*fa229775SEric Biggers	 * be fully prevented, however; see the paper
131*fa229775SEric Biggers	 * https://cr.yp.to/antiforgery/cachetiming-20050414.pdf
132*fa229775SEric Biggers	 * ("Cache-timing attacks on AES") for a discussion of the many
133*fa229775SEric Biggers	 * difficulties involved in writing truly constant-time AES software.
134*fa229775SEric Biggers	 */
135*fa229775SEric Biggers	 save_and_disable_irqs	t0
136*fa229775SEric Biggers	.set		i, 0
137*fa229775SEric Biggers	.rept		1024 / 128
138*fa229775SEric Biggers	ldr		r8, [ttab, #i + 0]
139*fa229775SEric Biggers	ldr		r9, [ttab, #i + 32]
140*fa229775SEric Biggers	ldr		r10, [ttab, #i + 64]
141*fa229775SEric Biggers	ldr		r11, [ttab, #i + 96]
142*fa229775SEric Biggers	.set		i, i + 128
143*fa229775SEric Biggers	.endr
144*fa229775SEric Biggers	push		{t0}		// oldcpsr
145*fa229775SEric Biggers
146*fa229775SEric Biggers	tst		rounds, #2
147*fa229775SEric Biggers	bne		1f
148*fa229775SEric Biggers
149*fa229775SEric Biggers0:	\round		r8, r9, r10, r11, r4, r5, r6, r7
150*fa229775SEric Biggers	\round		r4, r5, r6, r7, r8, r9, r10, r11
151*fa229775SEric Biggers
152*fa229775SEric Biggers1:	subs		rounds, rounds, #4
153*fa229775SEric Biggers	\round		r8, r9, r10, r11, r4, r5, r6, r7
154*fa229775SEric Biggers	bls		2f
155*fa229775SEric Biggers	\round		r4, r5, r6, r7, r8, r9, r10, r11
156*fa229775SEric Biggers	b		0b
157*fa229775SEric Biggers
158*fa229775SEric Biggers2:	.ifb		\ltab
159*fa229775SEric Biggers	add		ttab, ttab, #1
160*fa229775SEric Biggers	.else
161*fa229775SEric Biggers	mov_l		ttab, \ltab
162*fa229775SEric Biggers	// Prefetch inverse S-box for final round; see explanation above
163*fa229775SEric Biggers	.set		i, 0
164*fa229775SEric Biggers	.rept		256 / 64
165*fa229775SEric Biggers	ldr		t0, [ttab, #i + 0]
166*fa229775SEric Biggers	ldr		t1, [ttab, #i + 32]
167*fa229775SEric Biggers	.set		i, i + 64
168*fa229775SEric Biggers	.endr
169*fa229775SEric Biggers	.endif
170*fa229775SEric Biggers
171*fa229775SEric Biggers	pop		{rounds}	// oldcpsr
172*fa229775SEric Biggers	\round		r4, r5, r6, r7, r8, r9, r10, r11, \bsz, b, rounds
173*fa229775SEric Biggers
174*fa229775SEric Biggers#ifdef CONFIG_CPU_BIG_ENDIAN
175*fa229775SEric Biggers	rev_l		r4, t0
176*fa229775SEric Biggers	rev_l		r5, t0
177*fa229775SEric Biggers	rev_l		r6, t0
178*fa229775SEric Biggers	rev_l		r7, t0
179*fa229775SEric Biggers#endif
180*fa229775SEric Biggers
181*fa229775SEric Biggers	ldr		out, [sp]
182*fa229775SEric Biggers
183*fa229775SEric Biggers	str		r4, [out]
184*fa229775SEric Biggers	str		r5, [out, #4]
185*fa229775SEric Biggers	str		r6, [out, #8]
186*fa229775SEric Biggers	str		r7, [out, #12]
187*fa229775SEric Biggers
188*fa229775SEric Biggers	pop		{r3-r11, pc}
189*fa229775SEric Biggers
190*fa229775SEric Biggers	.align		3
191*fa229775SEric Biggers	.ltorg
192*fa229775SEric Biggers	.endm
193*fa229775SEric Biggers
194*fa229775SEric BiggersENTRY(__aes_arm_encrypt)
195*fa229775SEric Biggers	do_crypt	fround, aes_enc_tab,, 2
196*fa229775SEric BiggersENDPROC(__aes_arm_encrypt)
197*fa229775SEric Biggers
198*fa229775SEric Biggers	.align		5
199*fa229775SEric BiggersENTRY(__aes_arm_decrypt)
200*fa229775SEric Biggers	do_crypt	iround, aes_dec_tab, crypto_aes_inv_sbox, 0
201*fa229775SEric BiggersENDPROC(__aes_arm_decrypt)
202