xref: /freebsd/sys/crypto/aesni/aesni_wrap.c (revision fdafd315ad0d0f28a11b9fb4476a9ab059c62b92)
15f270659SKonstantin Belousov /*-
245b56a6bSPawel Jakub Dawidek  * Copyright (C) 2008 Damien Miller <djm@mindrot.org>
35f270659SKonstantin Belousov  * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org>
414a0d246SPawel Jakub Dawidek  * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net>
5ff6c7bf5SJohn-Mark Gurney  * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org>
608fca7a5SJohn-Mark Gurney  * Copyright (c) 2014 The FreeBSD Foundation
75f270659SKonstantin Belousov  * All rights reserved.
85f270659SKonstantin Belousov  *
908fca7a5SJohn-Mark Gurney  * Portions of this software were developed by John-Mark Gurney
1008fca7a5SJohn-Mark Gurney  * under sponsorship of the FreeBSD Foundation and
1108fca7a5SJohn-Mark Gurney  * Rubicon Communications, LLC (Netgate).
1208fca7a5SJohn-Mark Gurney  *
135f270659SKonstantin Belousov  * Redistribution and use in source and binary forms, with or without
145f270659SKonstantin Belousov  * modification, are permitted provided that the following conditions
155f270659SKonstantin Belousov  * are met:
165f270659SKonstantin Belousov  * 1. Redistributions of source code must retain the above copyright
175f270659SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer.
185f270659SKonstantin Belousov  * 2. Redistributions in binary form must reproduce the above copyright
195f270659SKonstantin Belousov  *    notice, this list of conditions and the following disclaimer in the
205f270659SKonstantin Belousov  *    documentation and/or other materials provided with the distribution.
215f270659SKonstantin Belousov  *
225f270659SKonstantin Belousov  * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
235f270659SKonstantin Belousov  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
245f270659SKonstantin Belousov  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
255f270659SKonstantin Belousov  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
265f270659SKonstantin Belousov  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
275f270659SKonstantin Belousov  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
285f270659SKonstantin Belousov  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
295f270659SKonstantin Belousov  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
305f270659SKonstantin Belousov  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
315f270659SKonstantin Belousov  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
325f270659SKonstantin Belousov  * SUCH DAMAGE.
335f270659SKonstantin Belousov  */
345f270659SKonstantin Belousov 
355f270659SKonstantin Belousov #include <sys/param.h>
365f270659SKonstantin Belousov #include <sys/libkern.h>
375f270659SKonstantin Belousov #include <sys/malloc.h>
385f270659SKonstantin Belousov #include <sys/proc.h>
395f270659SKonstantin Belousov #include <sys/systm.h>
405f270659SKonstantin Belousov #include <crypto/aesni/aesni.h>
415f270659SKonstantin Belousov 
4208fca7a5SJohn-Mark Gurney #include <opencrypto/gmac.h>
4308fca7a5SJohn-Mark Gurney 
44ff6c7bf5SJohn-Mark Gurney #include "aesencdec.h"
4508fca7a5SJohn-Mark Gurney #include <smmintrin.h>
46ff6c7bf5SJohn-Mark Gurney 
475f270659SKonstantin Belousov MALLOC_DECLARE(M_AESNI);
485f270659SKonstantin Belousov 
49038ffd3eSJohn-Mark Gurney struct blocks8 {
50038ffd3eSJohn-Mark Gurney 	__m128i	blk[8];
51038ffd3eSJohn-Mark Gurney } __packed;
52038ffd3eSJohn-Mark Gurney 
535f270659SKonstantin Belousov void
aesni_encrypt_cbc(int rounds,const void * key_schedule,size_t len,const uint8_t * from,uint8_t * to,const uint8_t iv[static AES_BLOCK_LEN])545f270659SKonstantin Belousov aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
55571ebf76SConrad Meyer     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
565f270659SKonstantin Belousov {
57ff6c7bf5SJohn-Mark Gurney 	__m128i tot, ivreg;
585f270659SKonstantin Belousov 	size_t i;
595f270659SKonstantin Belousov 
605f270659SKonstantin Belousov 	len /= AES_BLOCK_LEN;
61ff6c7bf5SJohn-Mark Gurney 	ivreg = _mm_loadu_si128((const __m128i *)iv);
625f270659SKonstantin Belousov 	for (i = 0; i < len; i++) {
63ff6c7bf5SJohn-Mark Gurney 		tot = aesni_enc(rounds - 1, key_schedule,
64ff6c7bf5SJohn-Mark Gurney 		    _mm_loadu_si128((const __m128i *)from) ^ ivreg);
65ff6c7bf5SJohn-Mark Gurney 		ivreg = tot;
66ff6c7bf5SJohn-Mark Gurney 		_mm_storeu_si128((__m128i *)to, tot);
675f270659SKonstantin Belousov 		from += AES_BLOCK_LEN;
685f270659SKonstantin Belousov 		to += AES_BLOCK_LEN;
695f270659SKonstantin Belousov 	}
705f270659SKonstantin Belousov }
715f270659SKonstantin Belousov 
725f270659SKonstantin Belousov void
aesni_decrypt_cbc(int rounds,const void * key_schedule,size_t len,uint8_t * buf,const uint8_t iv[static AES_BLOCK_LEN])73ff6c7bf5SJohn-Mark Gurney aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
74571ebf76SConrad Meyer     uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
755f270659SKonstantin Belousov {
76ff6c7bf5SJohn-Mark Gurney 	__m128i blocks[8];
77038ffd3eSJohn-Mark Gurney 	struct blocks8 *blks;
78ff6c7bf5SJohn-Mark Gurney 	__m128i ivreg, nextiv;
79ff6c7bf5SJohn-Mark Gurney 	size_t i, j, cnt;
805f270659SKonstantin Belousov 
81ff6c7bf5SJohn-Mark Gurney 	ivreg = _mm_loadu_si128((const __m128i *)iv);
82ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN / 8;
83ff6c7bf5SJohn-Mark Gurney 	for (i = 0; i < cnt; i++) {
84038ffd3eSJohn-Mark Gurney 		blks = (struct blocks8 *)buf;
85038ffd3eSJohn-Mark Gurney 		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
86038ffd3eSJohn-Mark Gurney 		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
87038ffd3eSJohn-Mark Gurney 		    blks->blk[6], blks->blk[7], &blocks[0]);
88ff6c7bf5SJohn-Mark Gurney 		for (j = 0; j < 8; j++) {
89038ffd3eSJohn-Mark Gurney 			nextiv = blks->blk[j];
90038ffd3eSJohn-Mark Gurney 			blks->blk[j] = blocks[j] ^ ivreg;
91ff6c7bf5SJohn-Mark Gurney 			ivreg = nextiv;
92ff6c7bf5SJohn-Mark Gurney 		}
93ff6c7bf5SJohn-Mark Gurney 		buf += AES_BLOCK_LEN * 8;
94ff6c7bf5SJohn-Mark Gurney 	}
95ff6c7bf5SJohn-Mark Gurney 	i *= 8;
96ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN;
97ff6c7bf5SJohn-Mark Gurney 	for (; i < cnt; i++) {
98038ffd3eSJohn-Mark Gurney 		nextiv = _mm_loadu_si128((void *)buf);
99038ffd3eSJohn-Mark Gurney 		_mm_storeu_si128((void *)buf,
100038ffd3eSJohn-Mark Gurney 		    aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg);
101ff6c7bf5SJohn-Mark Gurney 		ivreg = nextiv;
102ff6c7bf5SJohn-Mark Gurney 		buf += AES_BLOCK_LEN;
103ff6c7bf5SJohn-Mark Gurney 	}
104ff6c7bf5SJohn-Mark Gurney }
105ff6c7bf5SJohn-Mark Gurney 
106ff6c7bf5SJohn-Mark Gurney void
aesni_encrypt_ecb(int rounds,const void * key_schedule,size_t len,const uint8_t * from,uint8_t * to)107ff6c7bf5SJohn-Mark Gurney aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len,
108ff6c7bf5SJohn-Mark Gurney     const uint8_t *from, uint8_t *to)
109ff6c7bf5SJohn-Mark Gurney {
110ff6c7bf5SJohn-Mark Gurney 	__m128i tot;
111038ffd3eSJohn-Mark Gurney 	__m128i tout[8];
112038ffd3eSJohn-Mark Gurney 	struct blocks8 *top;
113038ffd3eSJohn-Mark Gurney 	const struct blocks8 *blks;
114ff6c7bf5SJohn-Mark Gurney 	size_t i, cnt;
115ff6c7bf5SJohn-Mark Gurney 
116ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN / 8;
117ff6c7bf5SJohn-Mark Gurney 	for (i = 0; i < cnt; i++) {
118038ffd3eSJohn-Mark Gurney 		blks = (const struct blocks8 *)from;
119038ffd3eSJohn-Mark Gurney 		top = (struct blocks8 *)to;
120038ffd3eSJohn-Mark Gurney 		aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
121038ffd3eSJohn-Mark Gurney 		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
122038ffd3eSJohn-Mark Gurney 		    blks->blk[6], blks->blk[7], tout);
123038ffd3eSJohn-Mark Gurney 		top->blk[0] = tout[0];
124038ffd3eSJohn-Mark Gurney 		top->blk[1] = tout[1];
125038ffd3eSJohn-Mark Gurney 		top->blk[2] = tout[2];
126038ffd3eSJohn-Mark Gurney 		top->blk[3] = tout[3];
127038ffd3eSJohn-Mark Gurney 		top->blk[4] = tout[4];
128038ffd3eSJohn-Mark Gurney 		top->blk[5] = tout[5];
129038ffd3eSJohn-Mark Gurney 		top->blk[6] = tout[6];
130038ffd3eSJohn-Mark Gurney 		top->blk[7] = tout[7];
131ff6c7bf5SJohn-Mark Gurney 		from += AES_BLOCK_LEN * 8;
132ff6c7bf5SJohn-Mark Gurney 		to += AES_BLOCK_LEN * 8;
133ff6c7bf5SJohn-Mark Gurney 	}
134ff6c7bf5SJohn-Mark Gurney 	i *= 8;
135ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN;
136ff6c7bf5SJohn-Mark Gurney 	for (; i < cnt; i++) {
137ff6c7bf5SJohn-Mark Gurney 		tot = aesni_enc(rounds - 1, key_schedule,
138ff6c7bf5SJohn-Mark Gurney 		    _mm_loadu_si128((const __m128i *)from));
139ff6c7bf5SJohn-Mark Gurney 		_mm_storeu_si128((__m128i *)to, tot);
1405f270659SKonstantin Belousov 		from += AES_BLOCK_LEN;
1415f270659SKonstantin Belousov 		to += AES_BLOCK_LEN;
1425f270659SKonstantin Belousov 	}
1435f270659SKonstantin Belousov }
1445f270659SKonstantin Belousov 
1455f270659SKonstantin Belousov void
aesni_decrypt_ecb(int rounds,const void * key_schedule,size_t len,const uint8_t * from,uint8_t * to)1465f270659SKonstantin Belousov aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len,
147*d256a06fSJohn Baldwin     const uint8_t *from, uint8_t *to)
1485f270659SKonstantin Belousov {
149ff6c7bf5SJohn-Mark Gurney 	__m128i tot;
150038ffd3eSJohn-Mark Gurney 	__m128i tout[8];
151038ffd3eSJohn-Mark Gurney 	const struct blocks8 *blks;
152038ffd3eSJohn-Mark Gurney 	struct blocks8 *top;
153ff6c7bf5SJohn-Mark Gurney 	size_t i, cnt;
1545f270659SKonstantin Belousov 
155ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN / 8;
156ff6c7bf5SJohn-Mark Gurney 	for (i = 0; i < cnt; i++) {
157038ffd3eSJohn-Mark Gurney 		blks = (const struct blocks8 *)from;
158038ffd3eSJohn-Mark Gurney 		top = (struct blocks8 *)to;
159038ffd3eSJohn-Mark Gurney 		aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1],
160038ffd3eSJohn-Mark Gurney 		    blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5],
161038ffd3eSJohn-Mark Gurney 		    blks->blk[6], blks->blk[7], tout);
162038ffd3eSJohn-Mark Gurney 		top->blk[0] = tout[0];
163038ffd3eSJohn-Mark Gurney 		top->blk[1] = tout[1];
164038ffd3eSJohn-Mark Gurney 		top->blk[2] = tout[2];
165038ffd3eSJohn-Mark Gurney 		top->blk[3] = tout[3];
166038ffd3eSJohn-Mark Gurney 		top->blk[4] = tout[4];
167038ffd3eSJohn-Mark Gurney 		top->blk[5] = tout[5];
168038ffd3eSJohn-Mark Gurney 		top->blk[6] = tout[6];
169038ffd3eSJohn-Mark Gurney 		top->blk[7] = tout[7];
170ff6c7bf5SJohn-Mark Gurney 		from += AES_BLOCK_LEN * 8;
171ff6c7bf5SJohn-Mark Gurney 		to += AES_BLOCK_LEN * 8;
172ff6c7bf5SJohn-Mark Gurney 	}
173ff6c7bf5SJohn-Mark Gurney 	i *= 8;
174ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN;
175ff6c7bf5SJohn-Mark Gurney 	for (; i < cnt; i++) {
176ff6c7bf5SJohn-Mark Gurney 		tot = aesni_dec(rounds - 1, key_schedule,
177ff6c7bf5SJohn-Mark Gurney 		    _mm_loadu_si128((const __m128i *)from));
178ff6c7bf5SJohn-Mark Gurney 		_mm_storeu_si128((__m128i *)to, tot);
1795f270659SKonstantin Belousov 		from += AES_BLOCK_LEN;
1805f270659SKonstantin Belousov 		to += AES_BLOCK_LEN;
1815f270659SKonstantin Belousov 	}
1825f270659SKonstantin Belousov }
1835f270659SKonstantin Belousov 
18408fca7a5SJohn-Mark Gurney /*
18508fca7a5SJohn-Mark Gurney  * mixed endian increment, low 64bits stored in hi word to be compatible
18608fca7a5SJohn-Mark Gurney  * with _icm's BSWAP.
18708fca7a5SJohn-Mark Gurney  */
18808fca7a5SJohn-Mark Gurney static inline __m128i
nextc(__m128i x)18908fca7a5SJohn-Mark Gurney nextc(__m128i x)
19008fca7a5SJohn-Mark Gurney {
19108fca7a5SJohn-Mark Gurney 	const __m128i ONE = _mm_setr_epi32(0, 0, 1, 0);
19208fca7a5SJohn-Mark Gurney 	const __m128i ZERO = _mm_setzero_si128();
19308fca7a5SJohn-Mark Gurney 
19408fca7a5SJohn-Mark Gurney 	x = _mm_add_epi64(x, ONE);
19508fca7a5SJohn-Mark Gurney 	__m128i t = _mm_cmpeq_epi64(x, ZERO);
19608fca7a5SJohn-Mark Gurney 	t = _mm_unpackhi_epi64(t, ZERO);
19708fca7a5SJohn-Mark Gurney 	x = _mm_sub_epi64(x, t);
19808fca7a5SJohn-Mark Gurney 
19908fca7a5SJohn-Mark Gurney 	return x;
20008fca7a5SJohn-Mark Gurney }
20108fca7a5SJohn-Mark Gurney 
20208fca7a5SJohn-Mark Gurney void
aesni_encrypt_icm(int rounds,const void * key_schedule,size_t len,const uint8_t * from,uint8_t * to,const uint8_t iv[static AES_BLOCK_LEN])20308fca7a5SJohn-Mark Gurney aesni_encrypt_icm(int rounds, const void *key_schedule, size_t len,
204571ebf76SConrad Meyer     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
20508fca7a5SJohn-Mark Gurney {
20608fca7a5SJohn-Mark Gurney 	__m128i tot;
20708fca7a5SJohn-Mark Gurney 	__m128i tmp1, tmp2, tmp3, tmp4;
20808fca7a5SJohn-Mark Gurney 	__m128i tmp5, tmp6, tmp7, tmp8;
20908fca7a5SJohn-Mark Gurney 	__m128i ctr1, ctr2, ctr3, ctr4;
21008fca7a5SJohn-Mark Gurney 	__m128i ctr5, ctr6, ctr7, ctr8;
21108fca7a5SJohn-Mark Gurney 	__m128i BSWAP_EPI64;
21208fca7a5SJohn-Mark Gurney 	__m128i tout[8];
213564b6aa7SMark Johnston 	__m128i block;
21408fca7a5SJohn-Mark Gurney 	struct blocks8 *top;
21508fca7a5SJohn-Mark Gurney 	const struct blocks8 *blks;
216564b6aa7SMark Johnston 	size_t i, cnt, resid;
21708fca7a5SJohn-Mark Gurney 
21808fca7a5SJohn-Mark Gurney 	BSWAP_EPI64 = _mm_set_epi8(8,9,10,11,12,13,14,15,0,1,2,3,4,5,6,7);
21908fca7a5SJohn-Mark Gurney 
220d395fd0dSRyan Libby 	ctr1 = _mm_loadu_si128((const __m128i *)iv);
22108fca7a5SJohn-Mark Gurney 	ctr1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
22208fca7a5SJohn-Mark Gurney 
22308fca7a5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN / 8;
22408fca7a5SJohn-Mark Gurney 	for (i = 0; i < cnt; i++) {
22508fca7a5SJohn-Mark Gurney 		tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
22608fca7a5SJohn-Mark Gurney 		ctr2 = nextc(ctr1);
22708fca7a5SJohn-Mark Gurney 		tmp2 = _mm_shuffle_epi8(ctr2, BSWAP_EPI64);
22808fca7a5SJohn-Mark Gurney 		ctr3 = nextc(ctr2);
22908fca7a5SJohn-Mark Gurney 		tmp3 = _mm_shuffle_epi8(ctr3, BSWAP_EPI64);
23008fca7a5SJohn-Mark Gurney 		ctr4 = nextc(ctr3);
23108fca7a5SJohn-Mark Gurney 		tmp4 = _mm_shuffle_epi8(ctr4, BSWAP_EPI64);
23208fca7a5SJohn-Mark Gurney 		ctr5 = nextc(ctr4);
23308fca7a5SJohn-Mark Gurney 		tmp5 = _mm_shuffle_epi8(ctr5, BSWAP_EPI64);
23408fca7a5SJohn-Mark Gurney 		ctr6 = nextc(ctr5);
23508fca7a5SJohn-Mark Gurney 		tmp6 = _mm_shuffle_epi8(ctr6, BSWAP_EPI64);
23608fca7a5SJohn-Mark Gurney 		ctr7 = nextc(ctr6);
23708fca7a5SJohn-Mark Gurney 		tmp7 = _mm_shuffle_epi8(ctr7, BSWAP_EPI64);
23808fca7a5SJohn-Mark Gurney 		ctr8 = nextc(ctr7);
23908fca7a5SJohn-Mark Gurney 		tmp8 = _mm_shuffle_epi8(ctr8, BSWAP_EPI64);
24008fca7a5SJohn-Mark Gurney 		ctr1 = nextc(ctr8);
24108fca7a5SJohn-Mark Gurney 
24208fca7a5SJohn-Mark Gurney 		blks = (const struct blocks8 *)from;
24308fca7a5SJohn-Mark Gurney 		top = (struct blocks8 *)to;
24408fca7a5SJohn-Mark Gurney 		aesni_enc8(rounds - 1, key_schedule, tmp1, tmp2, tmp3, tmp4,
24508fca7a5SJohn-Mark Gurney 		    tmp5, tmp6, tmp7, tmp8, tout);
24608fca7a5SJohn-Mark Gurney 
24708fca7a5SJohn-Mark Gurney 		top->blk[0] = blks->blk[0] ^ tout[0];
24808fca7a5SJohn-Mark Gurney 		top->blk[1] = blks->blk[1] ^ tout[1];
24908fca7a5SJohn-Mark Gurney 		top->blk[2] = blks->blk[2] ^ tout[2];
25008fca7a5SJohn-Mark Gurney 		top->blk[3] = blks->blk[3] ^ tout[3];
25108fca7a5SJohn-Mark Gurney 		top->blk[4] = blks->blk[4] ^ tout[4];
25208fca7a5SJohn-Mark Gurney 		top->blk[5] = blks->blk[5] ^ tout[5];
25308fca7a5SJohn-Mark Gurney 		top->blk[6] = blks->blk[6] ^ tout[6];
25408fca7a5SJohn-Mark Gurney 		top->blk[7] = blks->blk[7] ^ tout[7];
25508fca7a5SJohn-Mark Gurney 
25608fca7a5SJohn-Mark Gurney 		from += AES_BLOCK_LEN * 8;
25708fca7a5SJohn-Mark Gurney 		to += AES_BLOCK_LEN * 8;
25808fca7a5SJohn-Mark Gurney 	}
25908fca7a5SJohn-Mark Gurney 	i *= 8;
26008fca7a5SJohn-Mark Gurney 	cnt = len / AES_BLOCK_LEN;
26108fca7a5SJohn-Mark Gurney 	for (; i < cnt; i++) {
26208fca7a5SJohn-Mark Gurney 		tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
26308fca7a5SJohn-Mark Gurney 		ctr1 = nextc(ctr1);
26408fca7a5SJohn-Mark Gurney 
26508fca7a5SJohn-Mark Gurney 		tot = aesni_enc(rounds - 1, key_schedule, tmp1);
26608fca7a5SJohn-Mark Gurney 
26708fca7a5SJohn-Mark Gurney 		tot = tot ^ _mm_loadu_si128((const __m128i *)from);
26808fca7a5SJohn-Mark Gurney 		_mm_storeu_si128((__m128i *)to, tot);
26908fca7a5SJohn-Mark Gurney 
27008fca7a5SJohn-Mark Gurney 		from += AES_BLOCK_LEN;
27108fca7a5SJohn-Mark Gurney 		to += AES_BLOCK_LEN;
27208fca7a5SJohn-Mark Gurney 	}
27308fca7a5SJohn-Mark Gurney 
274564b6aa7SMark Johnston 	/*
275564b6aa7SMark Johnston 	 * Handle remaining partial round.  Copy the remaining payload onto the
276564b6aa7SMark Johnston 	 * stack to ensure that the full block can be loaded safely.
277564b6aa7SMark Johnston 	 */
278564b6aa7SMark Johnston 	resid = len % AES_BLOCK_LEN;
279564b6aa7SMark Johnston 	if (resid != 0) {
28008fca7a5SJohn-Mark Gurney 		tmp1 = _mm_shuffle_epi8(ctr1, BSWAP_EPI64);
28108fca7a5SJohn-Mark Gurney 		tot = aesni_enc(rounds - 1, key_schedule, tmp1);
282564b6aa7SMark Johnston 		block = _mm_setzero_si128();
283564b6aa7SMark Johnston 		memcpy(&block, from, resid);
284564b6aa7SMark Johnston 		tot = tot ^ _mm_loadu_si128(&block);
285564b6aa7SMark Johnston 		memcpy(to, &tot, resid);
286564b6aa7SMark Johnston 		explicit_bzero(&block, sizeof(block));
28708fca7a5SJohn-Mark Gurney 	}
28808fca7a5SJohn-Mark Gurney }
28908fca7a5SJohn-Mark Gurney 
290ac970319SPawel Jakub Dawidek #define	AES_XTS_BLOCKSIZE	16
291ac970319SPawel Jakub Dawidek #define	AES_XTS_IVSIZE		8
292ac970319SPawel Jakub Dawidek #define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
293ac970319SPawel Jakub Dawidek 
294ff6c7bf5SJohn-Mark Gurney static inline __m128i
xts_crank_lfsr(__m128i inp)295ff6c7bf5SJohn-Mark Gurney xts_crank_lfsr(__m128i inp)
296ac970319SPawel Jakub Dawidek {
297ff6c7bf5SJohn-Mark Gurney 	const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA);
298ff6c7bf5SJohn-Mark Gurney 	__m128i xtweak, ret;
299ac970319SPawel Jakub Dawidek 
300ff6c7bf5SJohn-Mark Gurney 	/* set up xor mask */
301ff6c7bf5SJohn-Mark Gurney 	xtweak = _mm_shuffle_epi32(inp, 0x93);
302ff6c7bf5SJohn-Mark Gurney 	xtweak = _mm_srai_epi32(xtweak, 31);
303ff6c7bf5SJohn-Mark Gurney 	xtweak &= alphamask;
304ff6c7bf5SJohn-Mark Gurney 
305ff6c7bf5SJohn-Mark Gurney 	/* next term */
306ff6c7bf5SJohn-Mark Gurney 	ret = _mm_slli_epi32(inp, 1);
307ff6c7bf5SJohn-Mark Gurney 	ret ^= xtweak;
308ff6c7bf5SJohn-Mark Gurney 
309ff6c7bf5SJohn-Mark Gurney 	return ret;
310ff6c7bf5SJohn-Mark Gurney }
311ff6c7bf5SJohn-Mark Gurney 
312ff6c7bf5SJohn-Mark Gurney static void
aesni_crypt_xts_block(int rounds,const __m128i * key_schedule,__m128i * tweak,const uint8_t * from,uint8_t * to,int do_encrypt)313038ffd3eSJohn-Mark Gurney aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak,
314038ffd3eSJohn-Mark Gurney     const uint8_t *from, uint8_t *to, int do_encrypt)
315ff6c7bf5SJohn-Mark Gurney {
316ff6c7bf5SJohn-Mark Gurney 	__m128i block;
317ff6c7bf5SJohn-Mark Gurney 
318038ffd3eSJohn-Mark Gurney 	block = _mm_loadu_si128((const __m128i *)from) ^ *tweak;
319ac970319SPawel Jakub Dawidek 
320ac970319SPawel Jakub Dawidek 	if (do_encrypt)
321ff6c7bf5SJohn-Mark Gurney 		block = aesni_enc(rounds - 1, key_schedule, block);
322ac970319SPawel Jakub Dawidek 	else
323ff6c7bf5SJohn-Mark Gurney 		block = aesni_dec(rounds - 1, key_schedule, block);
324ac970319SPawel Jakub Dawidek 
325038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128((__m128i *)to, block ^ *tweak);
326ac970319SPawel Jakub Dawidek 
327ff6c7bf5SJohn-Mark Gurney 	*tweak = xts_crank_lfsr(*tweak);
328ac970319SPawel Jakub Dawidek }
329ff6c7bf5SJohn-Mark Gurney 
330ff6c7bf5SJohn-Mark Gurney static void
aesni_crypt_xts_block8(int rounds,const __m128i * key_schedule,__m128i * tweak,const uint8_t * from,uint8_t * to,int do_encrypt)331038ffd3eSJohn-Mark Gurney aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak,
332038ffd3eSJohn-Mark Gurney     const uint8_t *from, uint8_t *to, int do_encrypt)
333ff6c7bf5SJohn-Mark Gurney {
334ff6c7bf5SJohn-Mark Gurney 	__m128i tmptweak;
335ff6c7bf5SJohn-Mark Gurney 	__m128i a, b, c, d, e, f, g, h;
336ff6c7bf5SJohn-Mark Gurney 	__m128i tweaks[8];
337ff6c7bf5SJohn-Mark Gurney 	__m128i tmp[8];
338038ffd3eSJohn-Mark Gurney 	__m128i *top;
339038ffd3eSJohn-Mark Gurney 	const __m128i *fromp;
340ff6c7bf5SJohn-Mark Gurney 
341ff6c7bf5SJohn-Mark Gurney 	tmptweak = *tweak;
342ff6c7bf5SJohn-Mark Gurney 
343ff6c7bf5SJohn-Mark Gurney 	/*
344ff6c7bf5SJohn-Mark Gurney 	 * unroll the loop.  This lets gcc put values directly in the
345ff6c7bf5SJohn-Mark Gurney 	 * register and saves memory accesses.
346ff6c7bf5SJohn-Mark Gurney 	 */
347038ffd3eSJohn-Mark Gurney 	fromp = (const __m128i *)from;
348ff6c7bf5SJohn-Mark Gurney #define PREPINP(v, pos) 					\
349ff6c7bf5SJohn-Mark Gurney 		do {						\
350ff6c7bf5SJohn-Mark Gurney 			tweaks[(pos)] = tmptweak;		\
351038ffd3eSJohn-Mark Gurney 			(v) = _mm_loadu_si128(&fromp[pos]) ^	\
352038ffd3eSJohn-Mark Gurney 			    tmptweak;				\
353ff6c7bf5SJohn-Mark Gurney 			tmptweak = xts_crank_lfsr(tmptweak);	\
354ff6c7bf5SJohn-Mark Gurney 		} while (0)
355ff6c7bf5SJohn-Mark Gurney 	PREPINP(a, 0);
356ff6c7bf5SJohn-Mark Gurney 	PREPINP(b, 1);
357ff6c7bf5SJohn-Mark Gurney 	PREPINP(c, 2);
358ff6c7bf5SJohn-Mark Gurney 	PREPINP(d, 3);
359ff6c7bf5SJohn-Mark Gurney 	PREPINP(e, 4);
360ff6c7bf5SJohn-Mark Gurney 	PREPINP(f, 5);
361ff6c7bf5SJohn-Mark Gurney 	PREPINP(g, 6);
362ff6c7bf5SJohn-Mark Gurney 	PREPINP(h, 7);
363ff6c7bf5SJohn-Mark Gurney 	*tweak = tmptweak;
364ff6c7bf5SJohn-Mark Gurney 
365ff6c7bf5SJohn-Mark Gurney 	if (do_encrypt)
366ff6c7bf5SJohn-Mark Gurney 		aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
367ff6c7bf5SJohn-Mark Gurney 		    tmp);
368ff6c7bf5SJohn-Mark Gurney 	else
369ff6c7bf5SJohn-Mark Gurney 		aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h,
370ff6c7bf5SJohn-Mark Gurney 		    tmp);
371ff6c7bf5SJohn-Mark Gurney 
372038ffd3eSJohn-Mark Gurney 	top = (__m128i *)to;
373038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]);
374038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]);
375038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]);
376038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]);
377038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]);
378038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]);
379038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]);
380038ffd3eSJohn-Mark Gurney 	_mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]);
381ac970319SPawel Jakub Dawidek }
382ac970319SPawel Jakub Dawidek 
383ac970319SPawel Jakub Dawidek static void
aesni_crypt_xts(int rounds,const __m128i * data_schedule,const __m128i * tweak_schedule,size_t len,const uint8_t * from,uint8_t * to,const uint8_t iv[static AES_BLOCK_LEN],int do_encrypt)384038ffd3eSJohn-Mark Gurney aesni_crypt_xts(int rounds, const __m128i *data_schedule,
385038ffd3eSJohn-Mark Gurney     const __m128i *tweak_schedule, size_t len, const uint8_t *from,
386571ebf76SConrad Meyer     uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
387ac970319SPawel Jakub Dawidek {
388ff6c7bf5SJohn-Mark Gurney 	__m128i tweakreg;
389ff6c7bf5SJohn-Mark Gurney 	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
390ff6c7bf5SJohn-Mark Gurney 	size_t i, cnt;
391ac970319SPawel Jakub Dawidek 
392ac970319SPawel Jakub Dawidek 	/*
393ac970319SPawel Jakub Dawidek 	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
394ac970319SPawel Jakub Dawidek 	 * of a 64-bit block number which we allow to be passed in directly.
395ac970319SPawel Jakub Dawidek 	 */
3965fa1b350SPawel Jakub Dawidek #if BYTE_ORDER == LITTLE_ENDIAN
3975fa1b350SPawel Jakub Dawidek 	bcopy(iv, tweak, AES_XTS_IVSIZE);
398ac970319SPawel Jakub Dawidek 	/* Last 64 bits of IV are always zero. */
399ac970319SPawel Jakub Dawidek 	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
4005fa1b350SPawel Jakub Dawidek #else
4015fa1b350SPawel Jakub Dawidek #error Only LITTLE_ENDIAN architectures are supported.
4025fa1b350SPawel Jakub Dawidek #endif
403ff6c7bf5SJohn-Mark Gurney 	tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]);
404ff6c7bf5SJohn-Mark Gurney 	tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg);
405ac970319SPawel Jakub Dawidek 
406ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_XTS_BLOCKSIZE / 8;
407ff6c7bf5SJohn-Mark Gurney 	for (i = 0; i < cnt; i++) {
408ff6c7bf5SJohn-Mark Gurney 		aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg,
409038ffd3eSJohn-Mark Gurney 		    from, to, do_encrypt);
410ff6c7bf5SJohn-Mark Gurney 		from += AES_XTS_BLOCKSIZE * 8;
411ff6c7bf5SJohn-Mark Gurney 		to += AES_XTS_BLOCKSIZE * 8;
412ff6c7bf5SJohn-Mark Gurney 	}
413ff6c7bf5SJohn-Mark Gurney 	i *= 8;
414ff6c7bf5SJohn-Mark Gurney 	cnt = len / AES_XTS_BLOCKSIZE;
415ff6c7bf5SJohn-Mark Gurney 	for (; i < cnt; i++) {
416ff6c7bf5SJohn-Mark Gurney 		aesni_crypt_xts_block(rounds, data_schedule, &tweakreg,
417038ffd3eSJohn-Mark Gurney 		    from, to, do_encrypt);
418ac970319SPawel Jakub Dawidek 		from += AES_XTS_BLOCKSIZE;
419ac970319SPawel Jakub Dawidek 		to += AES_XTS_BLOCKSIZE;
420ac970319SPawel Jakub Dawidek 	}
421ac970319SPawel Jakub Dawidek }
422ac970319SPawel Jakub Dawidek 
423ff6c7bf5SJohn-Mark Gurney void
aesni_encrypt_xts(int rounds,const void * data_schedule,const void * tweak_schedule,size_t len,const uint8_t * from,uint8_t * to,const uint8_t iv[static AES_BLOCK_LEN])424ac970319SPawel Jakub Dawidek aesni_encrypt_xts(int rounds, const void *data_schedule,
425ac970319SPawel Jakub Dawidek     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
426571ebf76SConrad Meyer     const uint8_t iv[static AES_BLOCK_LEN])
427ac970319SPawel Jakub Dawidek {
428ac970319SPawel Jakub Dawidek 
429ac970319SPawel Jakub Dawidek 	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
430ac970319SPawel Jakub Dawidek 	    iv, 1);
431ac970319SPawel Jakub Dawidek }
432ac970319SPawel Jakub Dawidek 
433ff6c7bf5SJohn-Mark Gurney void
aesni_decrypt_xts(int rounds,const void * data_schedule,const void * tweak_schedule,size_t len,const uint8_t * from,uint8_t * to,const uint8_t iv[static AES_BLOCK_LEN])434ac970319SPawel Jakub Dawidek aesni_decrypt_xts(int rounds, const void *data_schedule,
435ac970319SPawel Jakub Dawidek     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
436571ebf76SConrad Meyer     const uint8_t iv[static AES_BLOCK_LEN])
437ac970319SPawel Jakub Dawidek {
438ac970319SPawel Jakub Dawidek 
439ac970319SPawel Jakub Dawidek 	aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
440ac970319SPawel Jakub Dawidek 	    iv, 0);
441ac970319SPawel Jakub Dawidek }
442ac970319SPawel Jakub Dawidek 
443c0341432SJohn Baldwin void
aesni_cipher_setup_common(struct aesni_session * ses,const struct crypto_session_params * csp,const uint8_t * key,int keylen)444c0341432SJohn Baldwin aesni_cipher_setup_common(struct aesni_session *ses,
445c0341432SJohn Baldwin     const struct crypto_session_params *csp, const uint8_t *key, int keylen)
4465f270659SKonstantin Belousov {
44708fca7a5SJohn-Mark Gurney 	int decsched;
44808fca7a5SJohn-Mark Gurney 
44908fca7a5SJohn-Mark Gurney 	decsched = 1;
4505f270659SKonstantin Belousov 
451c0341432SJohn Baldwin 	switch (csp->csp_cipher_alg) {
45208fca7a5SJohn-Mark Gurney 	case CRYPTO_AES_ICM:
45308fca7a5SJohn-Mark Gurney 	case CRYPTO_AES_NIST_GCM_16:
4547cff9f37SSean Eric Fagan 	case CRYPTO_AES_CCM_16:
45508fca7a5SJohn-Mark Gurney 		decsched = 0;
456c0341432SJohn Baldwin 		break;
457c0341432SJohn Baldwin 	}
458c0341432SJohn Baldwin 
459c0341432SJohn Baldwin 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
460c0341432SJohn Baldwin 		keylen /= 2;
461c0341432SJohn Baldwin 
462c0341432SJohn Baldwin 	switch (keylen * 8) {
4635f270659SKonstantin Belousov 	case 128:
4645f270659SKonstantin Belousov 		ses->rounds = AES128_ROUNDS;
4655f270659SKonstantin Belousov 		break;
4665f270659SKonstantin Belousov 	case 192:
4675f270659SKonstantin Belousov 		ses->rounds = AES192_ROUNDS;
4685f270659SKonstantin Belousov 		break;
4695f270659SKonstantin Belousov 	case 256:
4705f270659SKonstantin Belousov 		ses->rounds = AES256_ROUNDS;
4715f270659SKonstantin Belousov 		break;
4725f270659SKonstantin Belousov 	default:
473c0341432SJohn Baldwin 		panic("shouldn't happen");
474ac970319SPawel Jakub Dawidek 	}
4755f270659SKonstantin Belousov 
47630bd3bb0SPawel Jakub Dawidek 	aesni_set_enckey(key, ses->enc_schedule, ses->rounds);
47708fca7a5SJohn-Mark Gurney 	if (decsched)
47808fca7a5SJohn-Mark Gurney 		aesni_set_deckey(ses->enc_schedule, ses->dec_schedule,
47908fca7a5SJohn-Mark Gurney 		    ses->rounds);
48008fca7a5SJohn-Mark Gurney 
481c0341432SJohn Baldwin 	if (csp->csp_cipher_alg == CRYPTO_AES_XTS)
482c0341432SJohn Baldwin 		aesni_set_enckey(key + keylen, ses->xts_schedule,
483ac970319SPawel Jakub Dawidek 		    ses->rounds);
48430bd3bb0SPawel Jakub Dawidek }
485