xref: /freebsd/sys/crypto/armv8/armv8_crypto_wrap.c (revision bc5304a006238115291e7568583632889dffbab9)
1 /*-
2  * Copyright (c) 2016 The FreeBSD Foundation
3  * Copyright (c) 2020 Ampere Computing
4  * All rights reserved.
5  *
6  * This software was developed by Andrew Turner under
7  * sponsorship from the FreeBSD Foundation.
8  *
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1. Redistributions of source code must retain the above copyright
13  *    notice, this list of conditions and the following disclaimer.
14  * 2. Redistributions in binary form must reproduce the above copyright
15  *    notice, this list of conditions and the following disclaimer in the
16  *    documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  * This file is derived from aesni_wrap.c:
31  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
32  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
33  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
34  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
35  * Copyright (c) 2014 The FreeBSD Foundation
36  */
37 
38 /*
39  * This code is built with floating-point enabled. Make sure to have entered
40  * into floating-point context before calling any of these functions.
41  */
42 
43 #include <sys/cdefs.h>
44 __FBSDID("$FreeBSD$");
45 
46 #include <sys/param.h>
47 #include <sys/systm.h>
48 #include <sys/malloc.h>
49 #include <sys/queue.h>
50 
51 #include <opencrypto/cryptodev.h>
52 #include <opencrypto/gmac.h>
53 #include <crypto/rijndael/rijndael.h>
54 #include <crypto/armv8/armv8_crypto.h>
55 
56 #include <arm_neon.h>
57 
58 static uint8x16_t
59 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
60 {
61 	uint8x16_t tmp;
62 	int i;
63 
64 	tmp = from;
65 	for (i = 0; i < rounds - 1; i += 2) {
66 		tmp = vaeseq_u8(tmp, keysched[i]);
67 		tmp = vaesmcq_u8(tmp);
68 		tmp = vaeseq_u8(tmp, keysched[i + 1]);
69 		tmp = vaesmcq_u8(tmp);
70 	}
71 
72 	tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
73 	tmp = vaesmcq_u8(tmp);
74 	tmp = vaeseq_u8(tmp, keysched[rounds]);
75 	tmp = veorq_u8(tmp, keysched[rounds + 1]);
76 
77 	return (tmp);
78 }
79 
80 static uint8x16_t
81 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
82 {
83 	uint8x16_t tmp;
84 	int i;
85 
86 	tmp = from;
87 	for (i = 0; i < rounds - 1; i += 2) {
88 		tmp = vaesdq_u8(tmp, keysched[i]);
89 		tmp = vaesimcq_u8(tmp);
90 		tmp = vaesdq_u8(tmp, keysched[i+1]);
91 		tmp = vaesimcq_u8(tmp);
92 	}
93 
94 	tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
95 	tmp = vaesimcq_u8(tmp);
96 	tmp = vaesdq_u8(tmp, keysched[rounds]);
97 	tmp = veorq_u8(tmp, keysched[rounds + 1]);
98 
99 	return (tmp);
100 }
101 
102 void
103 armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len,
104     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
105 {
106 	uint8x16_t tot, ivreg, tmp;
107 	size_t i;
108 
109 	len /= AES_BLOCK_LEN;
110 	ivreg = vld1q_u8(iv);
111 	for (i = 0; i < len; i++) {
112 		tmp = vld1q_u8(from);
113 		tot = armv8_aes_enc(key->aes_rounds - 1,
114 		    (const void*)key->aes_key, veorq_u8(tmp, ivreg));
115 		ivreg = tot;
116 		vst1q_u8(to, tot);
117 		from += AES_BLOCK_LEN;
118 		to += AES_BLOCK_LEN;
119 	}
120 }
121 
122 void
123 armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len,
124     uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
125 {
126 	uint8x16_t ivreg, nextiv, tmp;
127 	size_t i;
128 
129 	len /= AES_BLOCK_LEN;
130 	ivreg = vld1q_u8(iv);
131 	for (i = 0; i < len; i++) {
132 		nextiv = vld1q_u8(buf);
133 		tmp = armv8_aes_dec(key->aes_rounds - 1,
134 		    (const void*)key->aes_key, nextiv);
135 		vst1q_u8(buf, veorq_u8(tmp, ivreg));
136 		ivreg = nextiv;
137 		buf += AES_BLOCK_LEN;
138 	}
139 }
140 
141 #define	AES_XTS_BLOCKSIZE	16
142 #define	AES_XTS_IVSIZE		8
143 #define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
144 
145 static inline int32x4_t
146 xts_crank_lfsr(int32x4_t inp)
147 {
148 	const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
149 	int32x4_t xtweak, ret;
150 
151 	/* set up xor mask */
152 	xtweak = vextq_s32(inp, inp, 3);
153 	xtweak = vshrq_n_s32(xtweak, 31);
154 	xtweak &= alphamask;
155 
156 	/* next term */
157 	ret = vshlq_n_s32(inp, 1);
158 	ret ^= xtweak;
159 
160 	return ret;
161 }
162 
163 static void
164 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
165     uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
166 {
167 	uint8x16_t block;
168 
169 	block = vld1q_u8(from) ^ *tweak;
170 
171 	if (do_encrypt)
172 		block = armv8_aes_enc(rounds - 1, key_schedule, block);
173 	else
174 		block = armv8_aes_dec(rounds - 1, key_schedule, block);
175 
176 	vst1q_u8(to, block ^ *tweak);
177 
178 	*tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
179 }
180 
181 static void
182 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
183     const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from,
184     uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
185 {
186 	uint8x16_t tweakreg;
187 	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
188 	size_t i, cnt;
189 
190 	/*
191 	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
192 	 * of a 64-bit block number which we allow to be passed in directly.
193 	 */
194 #if BYTE_ORDER == LITTLE_ENDIAN
195 	bcopy(iv, tweak, AES_XTS_IVSIZE);
196 	/* Last 64 bits of IV are always zero. */
197 	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
198 #else
199 #error Only LITTLE_ENDIAN architectures are supported.
200 #endif
201 	tweakreg = vld1q_u8(tweak);
202 	tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
203 
204 	cnt = len / AES_XTS_BLOCKSIZE;
205 	for (i = 0; i < cnt; i++) {
206 		armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg,
207 		    from, to, do_encrypt);
208 		from += AES_XTS_BLOCKSIZE;
209 		to += AES_XTS_BLOCKSIZE;
210 	}
211 }
212 
213 void
214 armv8_aes_encrypt_xts(AES_key_t *data_schedule,
215     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
216     const uint8_t iv[static AES_BLOCK_LEN])
217 {
218 
219 	armv8_aes_crypt_xts(data_schedule->aes_rounds,
220 	    (const void *)&data_schedule->aes_key, tweak_schedule, len, from,
221 	    to, iv, 1);
222 }
223 
224 void
225 armv8_aes_decrypt_xts(AES_key_t *data_schedule,
226     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
227     const uint8_t iv[static AES_BLOCK_LEN])
228 {
229 
230 	armv8_aes_crypt_xts(data_schedule->aes_rounds,
231 	    (const void *)&data_schedule->aes_key, tweak_schedule, len, from,
232 	    to,iv, 0);
233 
234 }
235 
236 #define	AES_INC_COUNTER(counter)				\
237 	do {							\
238 		for (int pos = AES_BLOCK_LEN - 1;		\
239 		     pos >= 0; pos--)				\
240 			if (++(counter)[pos])			\
241 				break;				\
242 	} while (0)
243 
244 struct armv8_gcm_state {
245 	__uint128_val_t EK0;
246 	__uint128_val_t EKi;
247 	__uint128_val_t Xi;
248 	__uint128_val_t lenblock;
249 	uint8_t aes_counter[AES_BLOCK_LEN];
250 };
251 
252 void
253 armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len,
254     const uint8_t *from, uint8_t *to,
255     size_t authdatalen, const uint8_t *authdata,
256     uint8_t tag[static GMAC_DIGEST_LEN],
257     const uint8_t iv[static AES_GCM_IV_LEN],
258     const __uint128_val_t *Htable)
259 {
260 	struct armv8_gcm_state s;
261 	const uint64_t *from64;
262 	uint64_t *to64;
263 	uint8_t block[AES_BLOCK_LEN];
264 	size_t i, trailer;
265 
266 	bzero(&s.aes_counter, AES_BLOCK_LEN);
267 	memcpy(s.aes_counter, iv, AES_GCM_IV_LEN);
268 
269 	/* Setup the counter */
270 	s.aes_counter[AES_BLOCK_LEN - 1] = 1;
271 
272 	/* EK0 for a final GMAC round */
273 	aes_v8_encrypt(s.aes_counter, s.EK0.c, aes_key);
274 
275 	/* GCM starts with 2 as counter, 1 is used for final xor of tag. */
276 	s.aes_counter[AES_BLOCK_LEN - 1] = 2;
277 
278 	memset(s.Xi.c, 0, sizeof(s.Xi.c));
279 	trailer = authdatalen % AES_BLOCK_LEN;
280 	if (authdatalen - trailer > 0) {
281 		gcm_ghash_v8(s.Xi.u, Htable, authdata, authdatalen - trailer);
282 		authdata += authdatalen - trailer;
283 	}
284 	if (trailer > 0 || authdatalen == 0) {
285 		memset(block, 0, sizeof(block));
286 		memcpy(block, authdata, trailer);
287 		gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN);
288 	}
289 
290 	from64 = (const uint64_t*)from;
291 	to64 = (uint64_t*)to;
292 	trailer = len % AES_BLOCK_LEN;
293 
294 	for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) {
295 		aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
296 		AES_INC_COUNTER(s.aes_counter);
297 		to64[0] = from64[0] ^ s.EKi.u[0];
298 		to64[1] = from64[1] ^ s.EKi.u[1];
299 		gcm_ghash_v8(s.Xi.u, Htable, (uint8_t*)to64, AES_BLOCK_LEN);
300 
301 		to64 += 2;
302 		from64 += 2;
303 	}
304 
305 	to += (len - trailer);
306 	from += (len - trailer);
307 
308 	if (trailer) {
309 		aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
310 		AES_INC_COUNTER(s.aes_counter);
311 		memset(block, 0, sizeof(block));
312 		for (i = 0; i < trailer; i++) {
313 			block[i] = to[i] = from[i] ^ s.EKi.c[i];
314 		}
315 
316 		gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN);
317 	}
318 
319 	/* Lengths block */
320 	s.lenblock.u[0] = s.lenblock.u[1] = 0;
321 	s.lenblock.d[1] = htobe32(authdatalen * 8);
322 	s.lenblock.d[3] = htobe32(len * 8);
323 	gcm_ghash_v8(s.Xi.u, Htable, s.lenblock.c, AES_BLOCK_LEN);
324 
325 	s.Xi.u[0] ^= s.EK0.u[0];
326 	s.Xi.u[1] ^= s.EK0.u[1];
327 	memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN);
328 
329 	explicit_bzero(&s, sizeof(s));
330 }
331 
332 int
333 armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len,
334     const uint8_t *from, uint8_t *to,
335     size_t authdatalen, const uint8_t *authdata,
336     const uint8_t tag[static GMAC_DIGEST_LEN],
337     const uint8_t iv[static AES_GCM_IV_LEN],
338     const __uint128_val_t *Htable)
339 {
340 	struct armv8_gcm_state s;
341 	const uint64_t *from64;
342 	uint64_t *to64;
343 	uint8_t block[AES_BLOCK_LEN];
344 	size_t i, trailer;
345 	int error;
346 
347 	error = 0;
348 	bzero(&s.aes_counter, AES_BLOCK_LEN);
349 	memcpy(s.aes_counter, iv, AES_GCM_IV_LEN);
350 
351 	/* Setup the counter */
352 	s.aes_counter[AES_BLOCK_LEN - 1] = 1;
353 
354 	/* EK0 for a final GMAC round */
355 	aes_v8_encrypt(s.aes_counter, s.EK0.c, aes_key);
356 
357 	memset(s.Xi.c, 0, sizeof(s.Xi.c));
358 	trailer = authdatalen % AES_BLOCK_LEN;
359 	if (authdatalen - trailer > 0) {
360 		gcm_ghash_v8(s.Xi.u, Htable, authdata, authdatalen - trailer);
361 		authdata += authdatalen - trailer;
362 	}
363 	if (trailer > 0 || authdatalen == 0) {
364 		memset(block, 0, sizeof(block));
365 		memcpy(block, authdata, trailer);
366 		gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN);
367 	}
368 
369 	trailer = len % AES_BLOCK_LEN;
370 	if (len - trailer > 0)
371 		gcm_ghash_v8(s.Xi.u, Htable, from, len - trailer);
372 	if (trailer > 0) {
373 		memset(block, 0, sizeof(block));
374 		memcpy(block, from + len - trailer, trailer);
375 		gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN);
376 	}
377 
378 	/* Lengths block */
379 	s.lenblock.u[0] = s.lenblock.u[1] = 0;
380 	s.lenblock.d[1] = htobe32(authdatalen * 8);
381 	s.lenblock.d[3] = htobe32(len * 8);
382 	gcm_ghash_v8(s.Xi.u, Htable, s.lenblock.c, AES_BLOCK_LEN);
383 
384 	s.Xi.u[0] ^= s.EK0.u[0];
385 	s.Xi.u[1] ^= s.EK0.u[1];
386 	if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) {
387 		error = EBADMSG;
388 		goto out;
389 	}
390 
391 	/* GCM starts with 2 as counter, 1 is used for final xor of tag. */
392 	s.aes_counter[AES_BLOCK_LEN - 1] = 2;
393 
394 	from64 = (const uint64_t*)from;
395 	to64 = (uint64_t*)to;
396 
397 	for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) {
398 		aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
399 		AES_INC_COUNTER(s.aes_counter);
400 		to64[0] = from64[0] ^ s.EKi.u[0];
401 		to64[1] = from64[1] ^ s.EKi.u[1];
402 		to64 += 2;
403 		from64 += 2;
404 	}
405 
406 	to += (len - trailer);
407 	from += (len - trailer);
408 
409 	if (trailer) {
410 		aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key);
411 		AES_INC_COUNTER(s.aes_counter);
412 		for (i = 0; i < trailer; i++)
413 			to[i] = from[i] ^ s.EKi.c[i];
414 	}
415 
416 out:
417 	explicit_bzero(&s, sizeof(s));
418 	return (error);
419 }
420