xref: /linux/arch/riscv/crypto/aes-riscv64-zvkned.S (revision 3ba84ac69b53e6ee07c31d54554e00793d7b144f)
1/* SPDX-License-Identifier: Apache-2.0 OR BSD-2-Clause */
2//
3// This file is dual-licensed, meaning that you can use it under your
4// choice of either of the following two licenses:
5//
6// Copyright 2023 The OpenSSL Project Authors. All Rights Reserved.
7//
8// Licensed under the Apache License 2.0 (the "License"). You can obtain
9// a copy in the file LICENSE in the source distribution or at
10// https://www.openssl.org/source/license.html
11//
12// or
13//
14// Copyright (c) 2023, Christoph Müllner <christoph.muellner@vrull.eu>
15// Copyright (c) 2023, Phoebe Chen <phoebe.chen@sifive.com>
16// Copyright (c) 2023, Jerry Shih <jerry.shih@sifive.com>
17// Copyright 2024 Google LLC
18// All rights reserved.
19//
20// Redistribution and use in source and binary forms, with or without
21// modification, are permitted provided that the following conditions
22// are met:
23// 1. Redistributions of source code must retain the above copyright
24//    notice, this list of conditions and the following disclaimer.
25// 2. Redistributions in binary form must reproduce the above copyright
26//    notice, this list of conditions and the following disclaimer in the
27//    documentation and/or other materials provided with the distribution.
28//
29// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
30// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
31// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
32// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
33// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
34// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
35// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
36// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
37// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
38// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
39// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
40
41// The generated code of this file depends on the following RISC-V extensions:
42// - RV64I
43// - RISC-V Vector ('V') with VLEN >= 128
44// - RISC-V Vector AES block cipher extension ('Zvkned')
45
46#include <linux/linkage.h>
47
48.text
49.option arch, +zvkned
50
51#include "aes-macros.S"
52
53#define KEYP		a0
54#define INP		a1
55#define OUTP		a2
56#define LEN		a3
57#define IVP		a4
58
59.macro	__aes_crypt_zvkned	enc, keylen
60	vle32.v		v16, (INP)
61	aes_crypt	v16, \enc, \keylen
62	vse32.v		v16, (OUTP)
63	ret
64.endm
65
66.macro	aes_crypt_zvkned	enc
67	aes_begin	KEYP, 128f, 192f
68	__aes_crypt_zvkned	\enc, 256
69128:
70	__aes_crypt_zvkned	\enc, 128
71192:
72	__aes_crypt_zvkned	\enc, 192
73.endm
74
75// void aes_encrypt_zvkned(const struct crypto_aes_ctx *key,
76//			   const u8 in[16], u8 out[16]);
77SYM_FUNC_START(aes_encrypt_zvkned)
78	aes_crypt_zvkned	1
79SYM_FUNC_END(aes_encrypt_zvkned)
80
81// Same prototype and calling convention as the encryption function
82SYM_FUNC_START(aes_decrypt_zvkned)
83	aes_crypt_zvkned	0
84SYM_FUNC_END(aes_decrypt_zvkned)
85
86.macro	__aes_ecb_crypt	enc, keylen
87	srli		t0, LEN, 2
88	// t0 is the remaining length in 32-bit words.  It's a multiple of 4.
891:
90	vsetvli		t1, t0, e32, m8, ta, ma
91	sub		t0, t0, t1	// Subtract number of words processed
92	slli		t1, t1, 2	// Words to bytes
93	vle32.v		v16, (INP)
94	aes_crypt	v16, \enc, \keylen
95	vse32.v		v16, (OUTP)
96	add		INP, INP, t1
97	add		OUTP, OUTP, t1
98	bnez		t0, 1b
99
100	ret
101.endm
102
103.macro	aes_ecb_crypt	enc
104	aes_begin	KEYP, 128f, 192f
105	__aes_ecb_crypt	\enc, 256
106128:
107	__aes_ecb_crypt	\enc, 128
108192:
109	__aes_ecb_crypt	\enc, 192
110.endm
111
112// void aes_ecb_encrypt_zvkned(const struct crypto_aes_ctx *key,
113//			       const u8 *in, u8 *out, size_t len);
114//
115// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
116SYM_FUNC_START(aes_ecb_encrypt_zvkned)
117	aes_ecb_crypt	1
118SYM_FUNC_END(aes_ecb_encrypt_zvkned)
119
120// Same prototype and calling convention as the encryption function
121SYM_FUNC_START(aes_ecb_decrypt_zvkned)
122	aes_ecb_crypt	0
123SYM_FUNC_END(aes_ecb_decrypt_zvkned)
124
125.macro	aes_cbc_encrypt	keylen
126	vle32.v		v16, (IVP)	// Load IV
1271:
128	vle32.v		v17, (INP)	// Load plaintext block
129	vxor.vv		v16, v16, v17	// XOR with IV or prev ciphertext block
130	aes_encrypt	v16, \keylen	// Encrypt
131	vse32.v		v16, (OUTP)	// Store ciphertext block
132	addi		INP, INP, 16
133	addi		OUTP, OUTP, 16
134	addi		LEN, LEN, -16
135	bnez		LEN, 1b
136
137	vse32.v		v16, (IVP)	// Store next IV
138	ret
139.endm
140
141.macro	aes_cbc_decrypt	keylen
142	srli		LEN, LEN, 2	// Convert LEN from bytes to words
143	vle32.v		v16, (IVP)	// Load IV
1441:
145	vsetvli		t0, LEN, e32, m4, ta, ma
146	vle32.v		v20, (INP)	// Load ciphertext blocks
147	vslideup.vi	v16, v20, 4	// Setup prev ciphertext blocks
148	addi		t1, t0, -4
149	vslidedown.vx	v24, v20, t1	// Save last ciphertext block
150	aes_decrypt	v20, \keylen	// Decrypt the blocks
151	vxor.vv		v20, v20, v16	// XOR with prev ciphertext blocks
152	vse32.v		v20, (OUTP)	// Store plaintext blocks
153	vmv.v.v		v16, v24	// Next "IV" is last ciphertext block
154	slli		t1, t0, 2	// Words to bytes
155	add		INP, INP, t1
156	add		OUTP, OUTP, t1
157	sub		LEN, LEN, t0
158	bnez		LEN, 1b
159
160	vsetivli	zero, 4, e32, m1, ta, ma
161	vse32.v		v16, (IVP)	// Store next IV
162	ret
163.endm
164
165// void aes_cbc_encrypt_zvkned(const struct crypto_aes_ctx *key,
166//			       const u8 *in, u8 *out, size_t len, u8 iv[16]);
167//
168// |len| must be nonzero and a multiple of 16 (AES_BLOCK_SIZE).
169SYM_FUNC_START(aes_cbc_encrypt_zvkned)
170	aes_begin	KEYP, 128f, 192f
171	aes_cbc_encrypt	256
172128:
173	aes_cbc_encrypt	128
174192:
175	aes_cbc_encrypt	192
176SYM_FUNC_END(aes_cbc_encrypt_zvkned)
177
178// Same prototype and calling convention as the encryption function
179SYM_FUNC_START(aes_cbc_decrypt_zvkned)
180	aes_begin	KEYP, 128f, 192f
181	aes_cbc_decrypt	256
182128:
183	aes_cbc_decrypt	128
184192:
185	aes_cbc_decrypt	192
186SYM_FUNC_END(aes_cbc_decrypt_zvkned)
187
188.macro	aes_cbc_cts_encrypt	keylen
189
190	// CBC-encrypt all blocks except the last.  But don't store the
191	// second-to-last block to the output buffer yet, since it will be
192	// handled specially in the ciphertext stealing step.  Exception: if the
193	// message is single-block, still encrypt the last (and only) block.
194	li		t0, 16
195	j		2f
1961:
197	vse32.v		v16, (OUTP)	// Store ciphertext block
198	addi		OUTP, OUTP, 16
1992:
200	vle32.v		v17, (INP)	// Load plaintext block
201	vxor.vv		v16, v16, v17	// XOR with IV or prev ciphertext block
202	aes_encrypt	v16, \keylen	// Encrypt
203	addi		INP, INP, 16
204	addi		LEN, LEN, -16
205	bgt		LEN, t0, 1b	// Repeat if more than one block remains
206
207	// Special case: if the message is a single block, just do CBC.
208	beqz		LEN, .Lcts_encrypt_done\@
209
210	// Encrypt the last two blocks using ciphertext stealing as follows:
211	//	C[n-1] = Encrypt(Encrypt(P[n-1] ^ C[n-2]) ^ P[n])
212	//	C[n] = Encrypt(P[n-1] ^ C[n-2])[0..LEN]
213	//
214	// C[i] denotes the i'th ciphertext block, and likewise P[i] the i'th
215	// plaintext block.  Block n, the last block, may be partial; its length
216	// is 1 <= LEN <= 16.  If there are only 2 blocks, C[n-2] means the IV.
217	//
218	// v16 already contains Encrypt(P[n-1] ^ C[n-2]).
219	// INP points to P[n].  OUTP points to where C[n-1] should go.
220	// To support in-place encryption, load P[n] before storing C[n].
221	addi		t0, OUTP, 16	// Get pointer to where C[n] should go
222	vsetvli		zero, LEN, e8, m1, tu, ma
223	vle8.v		v17, (INP)	// Load P[n]
224	vse8.v		v16, (t0)	// Store C[n]
225	vxor.vv		v16, v16, v17	// v16 = Encrypt(P[n-1] ^ C[n-2]) ^ P[n]
226	vsetivli	zero, 4, e32, m1, ta, ma
227	aes_encrypt	v16, \keylen
228.Lcts_encrypt_done\@:
229	vse32.v		v16, (OUTP)	// Store C[n-1] (or C[n] in single-block case)
230	ret
231.endm
232
233#define LEN32		t4 // Length of remaining full blocks in 32-bit words
234#define LEN_MOD16	t5 // Length of message in bytes mod 16
235
236.macro	aes_cbc_cts_decrypt	keylen
237	andi		LEN32, LEN, ~15
238	srli		LEN32, LEN32, 2
239	andi		LEN_MOD16, LEN, 15
240
241	// Save C[n-2] in v28 so that it's available later during the ciphertext
242	// stealing step.  If there are fewer than three blocks, C[n-2] means
243	// the IV, otherwise it means the third-to-last ciphertext block.
244	vmv.v.v		v28, v16	// IV
245	add		t0, LEN, -33
246	bltz		t0, .Lcts_decrypt_loop\@
247	andi		t0, t0, ~15
248	add		t0, t0, INP
249	vle32.v		v28, (t0)
250
251	// CBC-decrypt all full blocks.  For the last full block, or the last 2
252	// full blocks if the message is block-aligned, this doesn't write the
253	// correct output blocks (unless the message is only a single block),
254	// because it XORs the wrong values with the raw AES plaintexts.  But we
255	// fix this after this loop without redoing the AES decryptions.  This
256	// approach allows more of the AES decryptions to be parallelized.
257.Lcts_decrypt_loop\@:
258	vsetvli		t0, LEN32, e32, m4, ta, ma
259	addi		t1, t0, -4
260	vle32.v		v20, (INP)	// Load next set of ciphertext blocks
261	vmv.v.v		v24, v16	// Get IV or last ciphertext block of prev set
262	vslideup.vi	v24, v20, 4	// Setup prev ciphertext blocks
263	vslidedown.vx	v16, v20, t1	// Save last ciphertext block of this set
264	aes_decrypt	v20, \keylen	// Decrypt this set of blocks
265	vxor.vv		v24, v24, v20	// XOR prev ciphertext blocks with decrypted blocks
266	vse32.v		v24, (OUTP)	// Store this set of plaintext blocks
267	sub		LEN32, LEN32, t0
268	slli		t0, t0, 2	// Words to bytes
269	add		INP, INP, t0
270	add		OUTP, OUTP, t0
271	bnez		LEN32, .Lcts_decrypt_loop\@
272
273	vsetivli	zero, 4, e32, m4, ta, ma
274	vslidedown.vx	v20, v20, t1	// Extract raw plaintext of last full block
275	addi		t0, OUTP, -16	// Get pointer to last full plaintext block
276	bnez		LEN_MOD16, .Lcts_decrypt_non_block_aligned\@
277
278	// Special case: if the message is a single block, just do CBC.
279	li		t1, 16
280	beq		LEN, t1, .Lcts_decrypt_done\@
281
282	// Block-aligned message.  Just fix up the last 2 blocks.  We need:
283	//
284	//	P[n-1] = Decrypt(C[n]) ^ C[n-2]
285	//	P[n] = Decrypt(C[n-1]) ^ C[n]
286	//
287	// We have C[n] in v16, Decrypt(C[n]) in v20, and C[n-2] in v28.
288	// Together with Decrypt(C[n-1]) ^ C[n-2] from the output buffer, this
289	// is everything needed to fix the output without re-decrypting blocks.
290	addi		t1, OUTP, -32	// Get pointer to where P[n-1] should go
291	vxor.vv		v20, v20, v28	// Decrypt(C[n]) ^ C[n-2] == P[n-1]
292	vle32.v		v24, (t1)	// Decrypt(C[n-1]) ^ C[n-2]
293	vse32.v		v20, (t1)	// Store P[n-1]
294	vxor.vv		v20, v24, v16	// Decrypt(C[n-1]) ^ C[n-2] ^ C[n] == P[n] ^ C[n-2]
295	j		.Lcts_decrypt_finish\@
296
297.Lcts_decrypt_non_block_aligned\@:
298	// Decrypt the last two blocks using ciphertext stealing as follows:
299	//
300	//	P[n-1] = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16]) ^ C[n-2]
301	//	P[n] = (Decrypt(C[n-1]) ^ C[n])[0..LEN_MOD16]
302	//
303	// We already have Decrypt(C[n-1]) in v20 and C[n-2] in v28.
304	vmv.v.v		v16, v20	// v16 = Decrypt(C[n-1])
305	vsetvli		zero, LEN_MOD16, e8, m1, tu, ma
306	vle8.v		v20, (INP)	// v20 = C[n] || Decrypt(C[n-1])[LEN_MOD16..16]
307	vxor.vv		v16, v16, v20	// v16 = Decrypt(C[n-1]) ^ C[n]
308	vse8.v		v16, (OUTP)	// Store P[n]
309	vsetivli	zero, 4, e32, m1, ta, ma
310	aes_decrypt	v20, \keylen	// v20 = Decrypt(C[n] || Decrypt(C[n-1])[LEN_MOD16..16])
311.Lcts_decrypt_finish\@:
312	vxor.vv		v20, v20, v28	// XOR with C[n-2]
313	vse32.v		v20, (t0)	// Store last full plaintext block
314.Lcts_decrypt_done\@:
315	ret
316.endm
317
318.macro	aes_cbc_cts_crypt	keylen
319	vle32.v		v16, (IVP)	// Load IV
320	beqz		a5, .Lcts_decrypt\@
321	aes_cbc_cts_encrypt \keylen
322.Lcts_decrypt\@:
323	aes_cbc_cts_decrypt \keylen
324.endm
325
326// void aes_cbc_cts_crypt_zvkned(const struct crypto_aes_ctx *key,
327//			         const u8 *in, u8 *out, size_t len,
328//				 const u8 iv[16], bool enc);
329//
330// Encrypts or decrypts a message with the CS3 variant of AES-CBC-CTS.
331// This is the variant that unconditionally swaps the last two blocks.
332SYM_FUNC_START(aes_cbc_cts_crypt_zvkned)
333	aes_begin	KEYP, 128f, 192f
334	aes_cbc_cts_crypt 256
335128:
336	aes_cbc_cts_crypt 128
337192:
338	aes_cbc_cts_crypt 192
339SYM_FUNC_END(aes_cbc_cts_crypt_zvkned)
340