xref: /freebsd/sys/crypto/aesni/aesni_wrap.c (revision 0572ccaa4543b0abef8ef81e384c1d04de9f3da1)
1 /*-
2  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 #include <sys/proc.h>
37 #include <sys/systm.h>
38 #include <crypto/aesni/aesni.h>
39 
40 #include "aesencdec.h"
41 
42 MALLOC_DECLARE(M_AESNI);
43 
44 struct blocks8 {
45 	__m128i	blk[8];
46 } __packed;
47 
48 void
49 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
50     const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
51 {
52 	__m128i tot, ivreg;
53 	size_t i;
54 
55 	len /= AES_BLOCK_LEN;
56 	ivreg = _mm_loadu_si128((const __m128i *)iv);
57 	for (i = 0; i < len; i++) {
58 		tot = aesni_enc(rounds - 1, key_schedule,
59 		    _mm_loadu_si128((const __m128i *)from) ^ ivreg);
60 		ivreg = tot;
61 		_mm_storeu_si128((__m128i *)to, tot);
62 		from += AES_BLOCK_LEN;
63 		to += AES_BLOCK_LEN;
64 	}
65 }
66 
67 void
68 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
69     uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
70 {
71 	__m128i blocks[8];
72 	struct blocks8 *blks;
73 	__m128i ivreg, nextiv;
74 	size_t i, j, cnt;
75 
76 	ivreg = _mm_loadu_si128((const __m128i *)iv);
77 	cnt = len / AES_BLOCK_LEN / 8;
78 	for (i = 0; i < cnt; i++) {
79 		blks = (struct blocks8 *)buf;
80 		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
81 		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
82 		    blks->blk[6], blks->blk[7], &blocks[0]);
83 		for (j = 0; j < 8; j++) {
84 			nextiv = blks->blk[j];
85 			blks->blk[j] = blocks[j] ^ ivreg;
86 			ivreg = nextiv;
87 		}
88 		buf += AES_BLOCK_LEN * 8;
89 	}
90 	i *= 8;
91 	cnt = len / AES_BLOCK_LEN;
92 	for (; i < cnt; i++) {
93 		nextiv = _mm_loadu_si128((void *)buf);
94 		_mm_storeu_si128((void *)buf,
95 		    aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
96 		ivreg = nextiv;
97 		buf += AES_BLOCK_LEN;
98 	}
99 }
100 
101 void
102 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
103     const uint8_t *from, uint8_t *to)
104 {
105 	__m128i tot;
106 	__m128i tout[8];
107 	struct blocks8 *top;
108 	const struct blocks8 *blks;
109 	size_t i, cnt;
110 
111 	cnt = len / AES_BLOCK_LEN / 8;
112 	for (i = 0; i < cnt; i++) {
113 		blks = (const struct blocks8 *)from;
114 		top = (struct blocks8 *)to;
115 		aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
116 		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
117 		    blks->blk[6], blks->blk[7], tout);
118 		top->blk[0] = tout[0];
119 		top->blk[1] = tout[1];
120 		top->blk[2] = tout[2];
121 		top->blk[3] = tout[3];
122 		top->blk[4] = tout[4];
123 		top->blk[5] = tout[5];
124 		top->blk[6] = tout[6];
125 		top->blk[7] = tout[7];
126 		from += AES_BLOCK_LEN * 8;
127 		to += AES_BLOCK_LEN * 8;
128 	}
129 	i *= 8;
130 	cnt = len / AES_BLOCK_LEN;
131 	for (; i < cnt; i++) {
132 		tot = aesni_enc(rounds - 1, key_schedule,
133 		    _mm_loadu_si128((const __m128i *)from));
134 		_mm_storeu_si128((__m128i *)to, tot);
135 		from += AES_BLOCK_LEN;
136 		to += AES_BLOCK_LEN;
137 	}
138 }
139 
140 void
141 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
142     const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
143 {
144 	__m128i tot;
145 	__m128i tout[8];
146 	const struct blocks8 *blks;
147 	struct blocks8 *top;
148 	size_t i, cnt;
149 
150 	cnt = len / AES_BLOCK_LEN / 8;
151 	for (i = 0; i < cnt; i++) {
152 		blks = (const struct blocks8 *)from;
153 		top = (struct blocks8 *)to;
154 		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
155 		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
156 		    blks->blk[6], blks->blk[7], tout);
157 		top->blk[0] = tout[0];
158 		top->blk[1] = tout[1];
159 		top->blk[2] = tout[2];
160 		top->blk[3] = tout[3];
161 		top->blk[4] = tout[4];
162 		top->blk[5] = tout[5];
163 		top->blk[6] = tout[6];
164 		top->blk[7] = tout[7];
165 		from += AES_BLOCK_LEN * 8;
166 		to += AES_BLOCK_LEN * 8;
167 	}
168 	i *= 8;
169 	cnt = len / AES_BLOCK_LEN;
170 	for (; i < cnt; i++) {
171 		tot = aesni_dec(rounds - 1, key_schedule,
172 		    _mm_loadu_si128((const __m128i *)from));
173 		_mm_storeu_si128((__m128i *)to, tot);
174 		from += AES_BLOCK_LEN;
175 		to += AES_BLOCK_LEN;
176 	}
177 }
178 
179 #define	AES_XTS_BLOCKSIZE	16
180 #define	AES_XTS_IVSIZE		8
181 #define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
182 
183 static inline __m128i
184 xts_crank_lfsr(__m128i inp)
185 {
186 	const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
187 	__m128i xtweak, ret;
188 
189 	/* set up xor mask */
190 	xtweak = _mm_shuffle_epi32(inp, 0x93);
191 	xtweak = _mm_srai_epi32(xtweak, 31);
192 	xtweak &= alphamask;
193 
194 	/* next term */
195 	ret = _mm_slli_epi32(inp, 1);
196 	ret ^= xtweak;
197 
198 	return ret;
199 }
200 
201 static void
202 aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
203     const uint8_t *from, uint8_t *to, int do_encrypt)
204 {
205 	__m128i block;
206 
207 	block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
208 
209 	if (do_encrypt)
210 		block = aesni_enc(rounds - 1, key_schedule, block);
211 	else
212 		block = aesni_dec(rounds - 1, key_schedule, block);
213 
214 	_mm_storeu_si128((__m128i *)to, block ^ *tweak);
215 
216 	*tweak = xts_crank_lfsr(*tweak);
217 }
218 
219 static void
220 aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
221     const uint8_t *from, uint8_t *to, int do_encrypt)
222 {
223 	__m128i tmptweak;
224 	__m128i a, b, c, d, e, f, g, h;
225 	__m128i tweaks[8];
226 	__m128i tmp[8];
227 	__m128i *top;
228 	const __m128i *fromp;
229 
230 	tmptweak = *tweak;
231 
232 	/*
233 	 * unroll the loop.  This lets gcc put values directly in the
234 	 * register and saves memory accesses.
235 	 */
236 	fromp = (const __m128i *)from;
237 #define PREPINP(v, pos) 					\
238 		do {						\
239 			tweaks[(pos)] = tmptweak;		\
240 			(v) = _mm_loadu_si128(&fromp[pos]) ^	\
241 			    tmptweak;				\
242 			tmptweak = xts_crank_lfsr(tmptweak);	\
243 		} while (0)
244 	PREPINP(a, 0);
245 	PREPINP(b, 1);
246 	PREPINP(c, 2);
247 	PREPINP(d, 3);
248 	PREPINP(e, 4);
249 	PREPINP(f, 5);
250 	PREPINP(g, 6);
251 	PREPINP(h, 7);
252 	*tweak = tmptweak;
253 
254 	if (do_encrypt)
255 		aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
256 		    tmp);
257 	else
258 		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
259 		    tmp);
260 
261 	top = (__m128i *)to;
262 	_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
263 	_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
264 	_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
265 	_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
266 	_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
267 	_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
268 	_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
269 	_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
270 }
271 
272 static void
273 aesni_crypt_xts(int rounds, const __m128i *data_schedule,
274     const __m128i *tweak_schedule, size_t len, const uint8_t *from,
275     uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
276 {
277 	__m128i tweakreg;
278 	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
279 	size_t i, cnt;
280 
281 	/*
282 	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
283 	 * of a 64-bit block number which we allow to be passed in directly.
284 	 */
285 #if BYTE_ORDER == LITTLE_ENDIAN
286 	bcopy(iv, tweak, AES_XTS_IVSIZE);
287 	/* Last 64 bits of IV are always zero. */
288 	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
289 #else
290 #error Only LITTLE_ENDIAN architectures are supported.
291 #endif
292 	tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
293 	tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
294 
295 	cnt = len / AES_XTS_BLOCKSIZE / 8;
296 	for (i = 0; i < cnt; i++) {
297 		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
298 		    from, to, do_encrypt);
299 		from += AES_XTS_BLOCKSIZE * 8;
300 		to += AES_XTS_BLOCKSIZE * 8;
301 	}
302 	i *= 8;
303 	cnt = len / AES_XTS_BLOCKSIZE;
304 	for (; i < cnt; i++) {
305 		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
306 		    from, to, do_encrypt);
307 		from += AES_XTS_BLOCKSIZE;
308 		to += AES_XTS_BLOCKSIZE;
309 	}
310 }
311 
312 void
313 aesni_encrypt_xts(int rounds, const void *data_schedule,
314     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
315     const uint8_t iv[AES_BLOCK_LEN])
316 {
317 
318 	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
319 	    iv, 1);
320 }
321 
322 void
323 aesni_decrypt_xts(int rounds, const void *data_schedule,
324     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
325     const uint8_t iv[AES_BLOCK_LEN])
326 {
327 
328 	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
329 	    iv, 0);
330 }
331 
332 static int
333 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
334     int keylen)
335 {
336 
337 	switch (ses->algo) {
338 	case CRYPTO_AES_CBC:
339 		switch (keylen) {
340 		case 128:
341 			ses->rounds = AES128_ROUNDS;
342 			break;
343 		case 192:
344 			ses->rounds = AES192_ROUNDS;
345 			break;
346 		case 256:
347 			ses->rounds = AES256_ROUNDS;
348 			break;
349 		default:
350 			return (EINVAL);
351 		}
352 		break;
353 	case CRYPTO_AES_XTS:
354 		switch (keylen) {
355 		case 256:
356 			ses->rounds = AES128_ROUNDS;
357 			break;
358 		case 512:
359 			ses->rounds = AES256_ROUNDS;
360 			break;
361 		default:
362 			return (EINVAL);
363 		}
364 		break;
365 	default:
366 		return (EINVAL);
367 	}
368 
369 	aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
370 	aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
371 	if (ses->algo == CRYPTO_AES_CBC)
372 		arc4rand(ses->iv, sizeof(ses->iv), 0);
373 	else /* if (ses->algo == CRYPTO_AES_XTS) */ {
374 		aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
375 		    ses->rounds);
376 	}
377 
378 	return (0);
379 }
380 
381 int
382 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
383 {
384 	struct thread *td;
385 	int error, saved_ctx;
386 
387 	td = curthread;
388 	if (!is_fpu_kern_thread(0)) {
389 		error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
390 		saved_ctx = 1;
391 	} else {
392 		error = 0;
393 		saved_ctx = 0;
394 	}
395 	if (error == 0) {
396 		error = aesni_cipher_setup_common(ses, encini->cri_key,
397 		    encini->cri_klen);
398 		if (saved_ctx)
399 			fpu_kern_leave(td, ses->fpu_ctx);
400 	}
401 	return (error);
402 }
403 
404 int
405 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
406     struct cryptop *crp)
407 {
408 	struct thread *td;
409 	uint8_t *buf;
410 	int error, allocated, saved_ctx;
411 
412 	buf = aesni_cipher_alloc(enccrd, crp, &allocated);
413 	if (buf == NULL)
414 		return (ENOMEM);
415 
416 	td = curthread;
417 	if (!is_fpu_kern_thread(0)) {
418 		error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
419 		if (error != 0)
420 			goto out;
421 		saved_ctx = 1;
422 	} else {
423 		saved_ctx = 0;
424 		error = 0;
425 	}
426 
427 	if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
428 		error = aesni_cipher_setup_common(ses, enccrd->crd_key,
429 		    enccrd->crd_klen);
430 		if (error != 0)
431 			goto out;
432 	}
433 
434 	if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
435 		if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
436 			bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
437 		if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
438 			crypto_copyback(crp->crp_flags, crp->crp_buf,
439 			    enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
440 		if (ses->algo == CRYPTO_AES_CBC) {
441 			aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
442 			    enccrd->crd_len, buf, buf, ses->iv);
443 		} else /* if (ses->algo == CRYPTO_AES_XTS) */ {
444 			aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
445 			    ses->xts_schedule, enccrd->crd_len, buf, buf,
446 			    ses->iv);
447 		}
448 	} else {
449 		if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
450 			bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
451 		else
452 			crypto_copydata(crp->crp_flags, crp->crp_buf,
453 			    enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
454 		if (ses->algo == CRYPTO_AES_CBC) {
455 			aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
456 			    enccrd->crd_len, buf, ses->iv);
457 		} else /* if (ses->algo == CRYPTO_AES_XTS) */ {
458 			aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
459 			    ses->xts_schedule, enccrd->crd_len, buf, buf,
460 			    ses->iv);
461 		}
462 	}
463 	if (saved_ctx)
464 		fpu_kern_leave(td, ses->fpu_ctx);
465 	if (allocated)
466 		crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
467 		    enccrd->crd_len, buf);
468 	if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
469 		crypto_copydata(crp->crp_flags, crp->crp_buf,
470 		    enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
471 		    AES_BLOCK_LEN, ses->iv);
472  out:
473 	if (allocated) {
474 		bzero(buf, enccrd->crd_len);
475 		free(buf, M_AESNI);
476 	}
477 	return (error);
478 }
479