xref: /freebsd/sys/crypto/aesni/aesni_wrap.c (revision ce3adf4362fcca6a43e500b2531f0038adbfbd21)
1 /*-
2  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
3  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
4  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
6  * All rights reserved.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/libkern.h>
35 #include <sys/malloc.h>
36 #include <sys/proc.h>
37 #include <sys/systm.h>
38 #include <crypto/aesni/aesni.h>
39 
40 #include "aesencdec.h"
41 
42 MALLOC_DECLARE(M_AESNI);
43 
44 void
45 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
46     const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN])
47 {
48 	__m128i tot, ivreg;
49 	size_t i;
50 
51 	len /= AES_BLOCK_LEN;
52 	ivreg = _mm_loadu_si128((const __m128i *)iv);
53 	for (i = 0; i < len; i++) {
54 		tot = aesni_enc(rounds - 1, key_schedule,
55 		    _mm_loadu_si128((const __m128i *)from) ^ ivreg);
56 		ivreg = tot;
57 		_mm_storeu_si128((__m128i *)to, tot);
58 		from += AES_BLOCK_LEN;
59 		to += AES_BLOCK_LEN;
60 	}
61 }
62 
63 void
64 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
65     uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN])
66 {
67 	__m128i blocks[8];
68 	__m128i *bufs;
69 	__m128i ivreg, nextiv;
70 	size_t i, j, cnt;
71 
72 	ivreg = _mm_loadu_si128((const __m128i *)iv);
73 	cnt = len / AES_BLOCK_LEN / 8;
74 	for (i = 0; i < cnt; i++) {
75 		bufs = (__m128i *)buf;
76 		aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1],
77 		    bufs[2], bufs[3], bufs[4], bufs[5], bufs[6],
78 		    bufs[7], &blocks[0]);
79 		for (j = 0; j < 8; j++) {
80 			nextiv = bufs[j];
81 			bufs[j] = blocks[j] ^ ivreg;
82 			ivreg = nextiv;
83 		}
84 		buf += AES_BLOCK_LEN * 8;
85 	}
86 	i *= 8;
87 	cnt = len / AES_BLOCK_LEN;
88 	for (; i < cnt; i++) {
89 		bufs = (__m128i *)buf;
90 		nextiv = bufs[0];
91 		bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg;
92 		ivreg = nextiv;
93 		buf += AES_BLOCK_LEN;
94 	}
95 }
96 
97 void
98 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
99     const uint8_t *from, uint8_t *to)
100 {
101 	__m128i tot;
102 	const __m128i *blocks;
103 	size_t i, cnt;
104 
105 	cnt = len / AES_BLOCK_LEN / 8;
106 	for (i = 0; i < cnt; i++) {
107 		blocks = (const __m128i *)from;
108 		aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1],
109 		    blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
110 		    blocks[7], (__m128i *)to);
111 		from += AES_BLOCK_LEN * 8;
112 		to += AES_BLOCK_LEN * 8;
113 	}
114 	i *= 8;
115 	cnt = len / AES_BLOCK_LEN;
116 	for (; i < cnt; i++) {
117 		tot = aesni_enc(rounds - 1, key_schedule,
118 		    _mm_loadu_si128((const __m128i *)from));
119 		_mm_storeu_si128((__m128i *)to, tot);
120 		from += AES_BLOCK_LEN;
121 		to += AES_BLOCK_LEN;
122 	}
123 }
124 
125 void
126 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
127     const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN])
128 {
129 	__m128i tot;
130 	const __m128i *blocks;
131 	size_t i, cnt;
132 
133 	cnt = len / AES_BLOCK_LEN / 8;
134 	for (i = 0; i < cnt; i++) {
135 		blocks = (const __m128i *)from;
136 		aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1],
137 		    blocks[2], blocks[3], blocks[4], blocks[5], blocks[6],
138 		    blocks[7], (__m128i *)to);
139 		from += AES_BLOCK_LEN * 8;
140 		to += AES_BLOCK_LEN * 8;
141 	}
142 	i *= 8;
143 	cnt = len / AES_BLOCK_LEN;
144 	for (; i < cnt; i++) {
145 		tot = aesni_dec(rounds - 1, key_schedule,
146 		    _mm_loadu_si128((const __m128i *)from));
147 		_mm_storeu_si128((__m128i *)to, tot);
148 		from += AES_BLOCK_LEN;
149 		to += AES_BLOCK_LEN;
150 	}
151 }
152 
153 #define	AES_XTS_BLOCKSIZE	16
154 #define	AES_XTS_IVSIZE		8
155 #define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
156 
157 static inline __m128i
158 xts_crank_lfsr(__m128i inp)
159 {
160 	const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
161 	__m128i xtweak, ret;
162 
163 	/* set up xor mask */
164 	xtweak = _mm_shuffle_epi32(inp, 0x93);
165 	xtweak = _mm_srai_epi32(xtweak, 31);
166 	xtweak &= alphamask;
167 
168 	/* next term */
169 	ret = _mm_slli_epi32(inp, 1);
170 	ret ^= xtweak;
171 
172 	return ret;
173 }
174 
175 static void
176 aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak,
177     const __m128i *from, __m128i *to, int do_encrypt)
178 {
179 	__m128i block;
180 
181 	block = *from ^ *tweak;
182 
183 	if (do_encrypt)
184 		block = aesni_enc(rounds - 1, key_schedule, block);
185 	else
186 		block = aesni_dec(rounds - 1, key_schedule, block);
187 
188 	*to = block ^ *tweak;
189 
190 	*tweak = xts_crank_lfsr(*tweak);
191 }
192 
193 static void
194 aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak,
195     const __m128i *from, __m128i *to, int do_encrypt)
196 {
197 	__m128i tmptweak;
198 	__m128i a, b, c, d, e, f, g, h;
199 	__m128i tweaks[8];
200 	__m128i tmp[8];
201 
202 	tmptweak = *tweak;
203 
204 	/*
205 	 * unroll the loop.  This lets gcc put values directly in the
206 	 * register and saves memory accesses.
207 	 */
208 #define PREPINP(v, pos) 					\
209 		do {						\
210 			tweaks[(pos)] = tmptweak;		\
211 			(v) = from[(pos)] ^ tmptweak;		\
212 			tmptweak = xts_crank_lfsr(tmptweak);	\
213 		} while (0)
214 	PREPINP(a, 0);
215 	PREPINP(b, 1);
216 	PREPINP(c, 2);
217 	PREPINP(d, 3);
218 	PREPINP(e, 4);
219 	PREPINP(f, 5);
220 	PREPINP(g, 6);
221 	PREPINP(h, 7);
222 	*tweak = tmptweak;
223 
224 	if (do_encrypt)
225 		aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
226 		    tmp);
227 	else
228 		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
229 		    tmp);
230 
231 	to[0] = tmp[0] ^ tweaks[0];
232 	to[1] = tmp[1] ^ tweaks[1];
233 	to[2] = tmp[2] ^ tweaks[2];
234 	to[3] = tmp[3] ^ tweaks[3];
235 	to[4] = tmp[4] ^ tweaks[4];
236 	to[5] = tmp[5] ^ tweaks[5];
237 	to[6] = tmp[6] ^ tweaks[6];
238 	to[7] = tmp[7] ^ tweaks[7];
239 }
240 
241 static void
242 aesni_crypt_xts(int rounds, const void *data_schedule,
243     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
244     const uint8_t iv[AES_BLOCK_LEN], int do_encrypt)
245 {
246 	__m128i tweakreg;
247 	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
248 	size_t i, cnt;
249 
250 	/*
251 	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
252 	 * of a 64-bit block number which we allow to be passed in directly.
253 	 */
254 #if BYTE_ORDER == LITTLE_ENDIAN
255 	bcopy(iv, tweak, AES_XTS_IVSIZE);
256 	/* Last 64 bits of IV are always zero. */
257 	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
258 #else
259 #error Only LITTLE_ENDIAN architectures are supported.
260 #endif
261 	tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
262 	tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
263 
264 	cnt = len / AES_XTS_BLOCKSIZE / 8;
265 	for (i = 0; i < cnt; i++) {
266 		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
267 		    (const __m128i *)from, (__m128i *)to, do_encrypt);
268 		from += AES_XTS_BLOCKSIZE * 8;
269 		to += AES_XTS_BLOCKSIZE * 8;
270 	}
271 	i *= 8;
272 	cnt = len / AES_XTS_BLOCKSIZE;
273 	for (; i < cnt; i++) {
274 		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
275 		    (const __m128i *)from, (__m128i *)to, do_encrypt);
276 		from += AES_XTS_BLOCKSIZE;
277 		to += AES_XTS_BLOCKSIZE;
278 	}
279 }
280 
281 void
282 aesni_encrypt_xts(int rounds, const void *data_schedule,
283     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
284     const uint8_t iv[AES_BLOCK_LEN])
285 {
286 
287 	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
288 	    iv, 1);
289 }
290 
291 void
292 aesni_decrypt_xts(int rounds, const void *data_schedule,
293     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
294     const uint8_t iv[AES_BLOCK_LEN])
295 {
296 
297 	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
298 	    iv, 0);
299 }
300 
301 static int
302 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key,
303     int keylen)
304 {
305 
306 	switch (ses->algo) {
307 	case CRYPTO_AES_CBC:
308 		switch (keylen) {
309 		case 128:
310 			ses->rounds = AES128_ROUNDS;
311 			break;
312 		case 192:
313 			ses->rounds = AES192_ROUNDS;
314 			break;
315 		case 256:
316 			ses->rounds = AES256_ROUNDS;
317 			break;
318 		default:
319 			return (EINVAL);
320 		}
321 		break;
322 	case CRYPTO_AES_XTS:
323 		switch (keylen) {
324 		case 256:
325 			ses->rounds = AES128_ROUNDS;
326 			break;
327 		case 512:
328 			ses->rounds = AES256_ROUNDS;
329 			break;
330 		default:
331 			return (EINVAL);
332 		}
333 		break;
334 	default:
335 		return (EINVAL);
336 	}
337 
338 	aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
339 	aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds);
340 	if (ses->algo == CRYPTO_AES_CBC)
341 		arc4rand(ses->iv, sizeof(ses->iv), 0);
342 	else /* if (ses->algo == CRYPTO_AES_XTS) */ {
343 		aesni_set_enckey(key + keylen / 16, ses->xts_schedule,
344 		    ses->rounds);
345 	}
346 
347 	return (0);
348 }
349 
350 int
351 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini)
352 {
353 	struct thread *td;
354 	int error, saved_ctx;
355 
356 	td = curthread;
357 	if (!is_fpu_kern_thread(0)) {
358 		error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
359 		saved_ctx = 1;
360 	} else {
361 		error = 0;
362 		saved_ctx = 0;
363 	}
364 	if (error == 0) {
365 		error = aesni_cipher_setup_common(ses, encini->cri_key,
366 		    encini->cri_klen);
367 		if (saved_ctx)
368 			fpu_kern_leave(td, ses->fpu_ctx);
369 	}
370 	return (error);
371 }
372 
373 int
374 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd,
375     struct cryptop *crp)
376 {
377 	struct thread *td;
378 	uint8_t *buf;
379 	int error, allocated, saved_ctx;
380 
381 	buf = aesni_cipher_alloc(enccrd, crp, &allocated);
382 	if (buf == NULL)
383 		return (ENOMEM);
384 
385 	td = curthread;
386 	if (!is_fpu_kern_thread(0)) {
387 		error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL);
388 		if (error != 0)
389 			goto out;
390 		saved_ctx = 1;
391 	} else {
392 		saved_ctx = 0;
393 		error = 0;
394 	}
395 
396 	if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) {
397 		error = aesni_cipher_setup_common(ses, enccrd->crd_key,
398 		    enccrd->crd_klen);
399 		if (error != 0)
400 			goto out;
401 	}
402 
403 	if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) {
404 		if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
405 			bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
406 		if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0)
407 			crypto_copyback(crp->crp_flags, crp->crp_buf,
408 			    enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
409 		if (ses->algo == CRYPTO_AES_CBC) {
410 			aesni_encrypt_cbc(ses->rounds, ses->enc_schedule,
411 			    enccrd->crd_len, buf, buf, ses->iv);
412 		} else /* if (ses->algo == CRYPTO_AES_XTS) */ {
413 			aesni_encrypt_xts(ses->rounds, ses->enc_schedule,
414 			    ses->xts_schedule, enccrd->crd_len, buf, buf,
415 			    ses->iv);
416 		}
417 	} else {
418 		if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0)
419 			bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN);
420 		else
421 			crypto_copydata(crp->crp_flags, crp->crp_buf,
422 			    enccrd->crd_inject, AES_BLOCK_LEN, ses->iv);
423 		if (ses->algo == CRYPTO_AES_CBC) {
424 			aesni_decrypt_cbc(ses->rounds, ses->dec_schedule,
425 			    enccrd->crd_len, buf, ses->iv);
426 		} else /* if (ses->algo == CRYPTO_AES_XTS) */ {
427 			aesni_decrypt_xts(ses->rounds, ses->dec_schedule,
428 			    ses->xts_schedule, enccrd->crd_len, buf, buf,
429 			    ses->iv);
430 		}
431 	}
432 	if (saved_ctx)
433 		fpu_kern_leave(td, ses->fpu_ctx);
434 	if (allocated)
435 		crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip,
436 		    enccrd->crd_len, buf);
437 	if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0)
438 		crypto_copydata(crp->crp_flags, crp->crp_buf,
439 		    enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN,
440 		    AES_BLOCK_LEN, ses->iv);
441  out:
442 	if (allocated) {
443 		bzero(buf, enccrd->crd_len);
444 		free(buf, M_AESNI);
445 	}
446 	return (error);
447 }
448