xref: /freebsd/sys/crypto/armv8/armv8_crypto_wrap.c (revision c66ec88fed842fbaad62c30d510644ceb7bd2d71)
1 /*-
2  * Copyright (c) 2016 The FreeBSD Foundation
3  * All rights reserved.
4  *
5  * This software was developed by Andrew Turner under
6  * sponsorship from the FreeBSD Foundation.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions
10  * are met:
11  * 1. Redistributions of source code must retain the above copyright
12  *    notice, this list of conditions and the following disclaimer.
13  * 2. Redistributions in binary form must reproduce the above copyright
14  *    notice, this list of conditions and the following disclaimer in the
15  *    documentation and/or other materials provided with the distribution.
16  *
17  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
18  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
21  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
22  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
23  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
24  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
25  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
26  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
27  * SUCH DAMAGE.
28  */
29 
30 /*
31  * This code is built with floating-point enabled. Make sure to have entered
32  * into floating-point context before calling any of these functions.
33  */
34 
35 #include <sys/cdefs.h>
36 __FBSDID("$FreeBSD$");
37 
38 #include <sys/param.h>
39 #include <sys/systm.h>
40 #include <sys/malloc.h>
41 #include <sys/queue.h>
42 
43 #include <opencrypto/cryptodev.h>
44 #include <crypto/armv8/armv8_crypto.h>
45 
46 #include <arm_neon.h>
47 
48 static uint8x16_t
49 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
50 {
51 	uint8x16_t tmp;
52 	int i;
53 
54 	tmp = from;
55 	for (i = 0; i < rounds - 1; i += 2) {
56 		tmp = vaeseq_u8(tmp, keysched[i]);
57 		tmp = vaesmcq_u8(tmp);
58 		tmp = vaeseq_u8(tmp, keysched[i + 1]);
59 		tmp = vaesmcq_u8(tmp);
60 	}
61 
62 	tmp = vaeseq_u8(tmp, keysched[rounds - 1]);
63 	tmp = vaesmcq_u8(tmp);
64 	tmp = vaeseq_u8(tmp, keysched[rounds]);
65 	tmp = veorq_u8(tmp, keysched[rounds + 1]);
66 
67 	return (tmp);
68 }
69 
70 static uint8x16_t
71 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from)
72 {
73 	uint8x16_t tmp;
74 	int i;
75 
76 	tmp = from;
77 	for (i = 0; i < rounds - 1; i += 2) {
78 		tmp = vaesdq_u8(tmp, keysched[i]);
79 		tmp = vaesimcq_u8(tmp);
80 		tmp = vaesdq_u8(tmp, keysched[i+1]);
81 		tmp = vaesimcq_u8(tmp);
82 	}
83 
84 	tmp = vaesdq_u8(tmp, keysched[rounds - 1]);
85 	tmp = vaesimcq_u8(tmp);
86 	tmp = vaesdq_u8(tmp, keysched[rounds]);
87 	tmp = veorq_u8(tmp, keysched[rounds + 1]);
88 
89 	return (tmp);
90 }
91 
92 void
93 armv8_aes_encrypt_cbc(int rounds, const void *key_schedule, size_t len,
94     const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN])
95 {
96 	uint8x16_t tot, ivreg, tmp;
97 	size_t i;
98 
99 	len /= AES_BLOCK_LEN;
100 	ivreg = vld1q_u8(iv);
101 	for (i = 0; i < len; i++) {
102 		tmp = vld1q_u8(from);
103 		tot = armv8_aes_enc(rounds - 1, key_schedule,
104 		    veorq_u8(tmp, ivreg));
105 		ivreg = tot;
106 		vst1q_u8(to, tot);
107 		from += AES_BLOCK_LEN;
108 		to += AES_BLOCK_LEN;
109 	}
110 }
111 
112 void
113 armv8_aes_decrypt_cbc(int rounds, const void *key_schedule, size_t len,
114     uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN])
115 {
116 	uint8x16_t ivreg, nextiv, tmp;
117 	size_t i;
118 
119 	len /= AES_BLOCK_LEN;
120 	ivreg = vld1q_u8(iv);
121 	for (i = 0; i < len; i++) {
122 		nextiv = vld1q_u8(buf);
123 		tmp = armv8_aes_dec(rounds - 1, key_schedule, nextiv);
124 		vst1q_u8(buf, veorq_u8(tmp, ivreg));
125 		ivreg = nextiv;
126 		buf += AES_BLOCK_LEN;
127 	}
128 }
129 
130 #define	AES_XTS_BLOCKSIZE	16
131 #define	AES_XTS_IVSIZE		8
132 #define	AES_XTS_ALPHA		0x87	/* GF(2^128) generator polynomial */
133 
134 static inline int32x4_t
135 xts_crank_lfsr(int32x4_t inp)
136 {
137 	const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1};
138 	int32x4_t xtweak, ret;
139 
140 	/* set up xor mask */
141 	xtweak = vextq_s32(inp, inp, 3);
142 	xtweak = vshrq_n_s32(xtweak, 31);
143 	xtweak &= alphamask;
144 
145 	/* next term */
146 	ret = vshlq_n_s32(inp, 1);
147 	ret ^= xtweak;
148 
149 	return ret;
150 }
151 
152 static void
153 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule,
154     uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt)
155 {
156 	uint8x16_t block;
157 
158 	block = vld1q_u8(from) ^ *tweak;
159 
160 	if (do_encrypt)
161 		block = armv8_aes_enc(rounds - 1, key_schedule, block);
162 	else
163 		block = armv8_aes_dec(rounds - 1, key_schedule, block);
164 
165 	vst1q_u8(to, block ^ *tweak);
166 
167 	*tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak)));
168 }
169 
170 static void
171 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule,
172     const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from,
173     uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt)
174 {
175 	uint8x16_t tweakreg;
176 	uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16);
177 	size_t i, cnt;
178 
179 	/*
180 	 * Prepare tweak as E_k2(IV). IV is specified as LE representation
181 	 * of a 64-bit block number which we allow to be passed in directly.
182 	 */
183 #if BYTE_ORDER == LITTLE_ENDIAN
184 	bcopy(iv, tweak, AES_XTS_IVSIZE);
185 	/* Last 64 bits of IV are always zero. */
186 	bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE);
187 #else
188 #error Only LITTLE_ENDIAN architectures are supported.
189 #endif
190 	tweakreg = vld1q_u8(tweak);
191 	tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg);
192 
193 	cnt = len / AES_XTS_BLOCKSIZE;
194 	for (i = 0; i < cnt; i++) {
195 		armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg,
196 		    from, to, do_encrypt);
197 		from += AES_XTS_BLOCKSIZE;
198 		to += AES_XTS_BLOCKSIZE;
199 	}
200 }
201 
202 void
203 armv8_aes_encrypt_xts(int rounds, const void *data_schedule,
204     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
205     const uint8_t iv[static AES_BLOCK_LEN])
206 {
207 
208 	armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
209 	    iv, 1);
210 }
211 
212 void
213 armv8_aes_decrypt_xts(int rounds, const void *data_schedule,
214     const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to,
215     const uint8_t iv[static AES_BLOCK_LEN])
216 {
217 
218 	armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to,
219 	    iv, 0);
220 }
221