xref: /freebsd/sys/crypto/aesni/aesni_ccm.c (revision 95ee2897e98f5d444f26ed2334cc7c439f9c16c6)
1 /*-
2  * Copyright (c) 2014-2021 The FreeBSD Foundation
3  * Copyright (c) 2018 iXsystems, Inc
4  * All rights reserved.
5  *
6  * Portions of this software were developed by John-Mark Gurney
7  * under the sponsorship of the FreeBSD Foundation and
8  * Rubicon Communications, LLC (Netgate).
9  *
10  * Portions of this software were developed by Ararat River
11  * Consulting, LLC under sponsorship of the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1.  Redistributions of source code must retain the above copyright
17  *     notice, this list of conditions and the following disclaimer.
18  * 2.  Redistributions in binary form must reproduce the above copyright
19  *     notice, this list of conditions and the following disclaimer in the
20  *     documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *
35  * This file implements AES-CCM+CBC-MAC, as described
36  * at https://tools.ietf.org/html/rfc3610, using Intel's
37  * AES-NI instructions.
38  *
39  */
40 
41 #include <sys/types.h>
42 #include <sys/endian.h>
43 #include <sys/param.h>
44 
45 #include <sys/systm.h>
46 #include <crypto/aesni/aesni.h>
47 #include <crypto/aesni/aesni_os.h>
48 #include <crypto/aesni/aesencdec.h>
49 #define AESNI_ENC(d, k, nr)	aesni_enc(nr-1, (const __m128i*)k, d)
50 
51 #include <wmmintrin.h>
52 #include <emmintrin.h>
53 #include <smmintrin.h>
54 
55 /*
56  * Encrypt a single 128-bit block after
57  * doing an xor.  This is also used to
58  * decrypt (yay symmetric encryption).
59  */
60 static inline __m128i
xor_and_encrypt(__m128i a,__m128i b,const unsigned char * k,int nr)61 xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)
62 {
63 	__m128i retval = _mm_xor_si128(a, b);
64 
65 	retval = AESNI_ENC(retval, k, nr);
66 	return (retval);
67 }
68 
69 /*
70  * Put value at the end of block, starting at offset.
71  * (This goes backwards, putting bytes in *until* it
72  * reaches offset.)
73  */
74 static void
append_int(size_t value,__m128i * block,size_t offset)75 append_int(size_t value, __m128i *block, size_t offset)
76 {
77 	int indx = sizeof(*block) - 1;
78 	uint8_t *bp = (uint8_t*)block;
79 
80 	while (indx > (sizeof(*block) - offset)) {
81 		bp[indx] = value & 0xff;
82 		indx--;
83 		value >>= 8;
84 	}
85 }
86 
87 /*
88  * Start the CBC-MAC process.  This handles the auth data.
89  */
90 static __m128i
cbc_mac_start(const unsigned char * auth_data,size_t auth_len,const unsigned char * nonce,size_t nonce_len,const unsigned char * key,int nr,size_t data_len,size_t tag_len)91 cbc_mac_start(const unsigned char *auth_data, size_t auth_len,
92 	     const unsigned char *nonce, size_t nonce_len,
93 	     const unsigned char *key, int nr,
94 	     size_t data_len, size_t tag_len)
95 {
96 	__m128i cbc_block, staging_block;
97 	uint8_t *byte_ptr;
98 	/* This defines where the message length goes */
99 	int L = sizeof(__m128i) - 1 - nonce_len;
100 
101 	/*
102 	 * Set up B0 here.  This has the flags byte,
103 	 * followed by the nonce, followed by the
104 	 * length of the message.
105 	 */
106 	cbc_block = _mm_setzero_si128();
107 	byte_ptr = (uint8_t*)&cbc_block;
108 	byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |
109 		(((tag_len - 2) / 2) * 8) |
110 		(L - 1);
111 	bcopy(nonce, byte_ptr + 1, nonce_len);
112 	append_int(data_len, &cbc_block, L+1);
113 	cbc_block = AESNI_ENC(cbc_block, key, nr);
114 
115 	if (auth_len != 0) {
116 		/*
117 		 * We need to start by appending the length descriptor.
118 		 */
119 		uint32_t auth_amt;
120 		size_t copy_amt;
121 		const uint8_t *auth_ptr = auth_data;
122 
123 		staging_block = _mm_setzero_si128();
124 
125 		/*
126 		 * The current OCF calling convention means that
127 		 * there can never be more than 4g of authentication
128 		 * data, so we don't handle the 0xffff case.
129 		 */
130 		KASSERT(auth_len < (1ULL << 32),
131 		    ("%s: auth_len (%zu) larger than 4GB",
132 			__FUNCTION__, auth_len));
133 
134 		if (auth_len < ((1 << 16) - (1 << 8))) {
135 			/*
136 			 * If the auth data length is less than
137 			 * 0xff00, we don't need to encode a length
138 			 * specifier, just the length of the auth
139 			 * data.
140 			 */
141 			be16enc(&staging_block, auth_len);
142 			auth_amt = 2;
143 		} else if (auth_len < (1ULL << 32)) {
144 			/*
145 			 * Two bytes for the length prefix, and then
146 			 * four bytes for the length.  This makes a total
147 			 * of 6 bytes to describe the auth data length.
148 			 */
149 			be16enc(&staging_block, 0xfffe);
150 			be32enc((char*)&staging_block + 2, auth_len);
151 			auth_amt = 6;
152 		} else
153 			panic("%s: auth len too large", __FUNCTION__);
154 
155 		/*
156 		 * Need to copy abytes into blocks.  The first block is
157 		 * already partially filled, by auth_amt, so we need
158 		 * to handle that.  The last block needs to be zero padded.
159 		 */
160 		copy_amt = MIN(auth_len,
161 		    sizeof(staging_block) - auth_amt);
162 		byte_ptr = (uint8_t*)&staging_block;
163 		bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);
164 		auth_ptr += copy_amt;
165 
166 		cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);
167 
168 		while (auth_ptr < auth_data + auth_len) {
169 			copy_amt = MIN((auth_data + auth_len) - auth_ptr,
170 			    sizeof(staging_block));
171 			if (copy_amt < sizeof(staging_block))
172 				bzero(&staging_block, sizeof(staging_block));
173 			bcopy(auth_ptr, &staging_block, copy_amt);
174 			cbc_block = xor_and_encrypt(cbc_block, staging_block,
175 			    key, nr);
176 			auth_ptr += copy_amt;
177 		}
178 	}
179 	return (cbc_block);
180 }
181 
182 /*
183  * Implement AES CCM+CBC-MAC encryption and authentication.
184  *
185  * A couple of notes:
186  * Since abytes is limited to a 32 bit value here, the AAD is
187  * limited to 4 gigabytes or less.
188  */
189 void
AES_CCM_encrypt(const unsigned char * in,unsigned char * out,const unsigned char * addt,const unsigned char * nonce,unsigned char * tag,uint32_t nbytes,uint32_t abytes,int nlen,int tag_length,const unsigned char * key,int nr)190 AES_CCM_encrypt(const unsigned char *in, unsigned char *out,
191 		const unsigned char *addt, const unsigned char *nonce,
192 		unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
193 		int tag_length, const unsigned char *key, int nr)
194 {
195 	int L;
196 	int counter = 1;	/* S0 has 0, S1 has 1 */
197 	size_t copy_amt, total = 0;
198 	uint8_t *byte_ptr;
199 	__m128i s0, rolling_mac, s_x, staging_block;
200 
201 	/* NIST 800-38c section A.1 says n is [7, 13]. */
202 	if (nlen < 7 || nlen > 13)
203 		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
204 
205 	/*
206 	 * We need to know how many bytes to use to describe
207 	 * the length of the data.  Normally, nlen should be
208 	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
209 	 * data to encrypt.  But it can be longer or shorter;
210 	 * this impacts the length of the message.
211 	 */
212 	L = sizeof(__m128i) - 1 - nlen;
213 
214 	/*
215 	 * Clear out the blocks
216 	 */
217 	s0 = _mm_setzero_si128();
218 
219 	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
220 	    key, nr, nbytes, tag_length);
221 
222 	/* s0 has flags, nonce, and then 0 */
223 	byte_ptr = (uint8_t*)&s0;
224 	byte_ptr[0] = L - 1;	/* but the flags byte only has L' */
225 	bcopy(nonce, &byte_ptr[1], nlen);
226 
227 	/*
228 	 * Now to cycle through the rest of the data.
229 	 */
230 	bcopy(&s0, &s_x, sizeof(s0));
231 
232 	while (total < nbytes) {
233 		/*
234 		 * Copy the plain-text data into staging_block.
235 		 * This may need to be zero-padded.
236 		 */
237 		copy_amt = MIN(nbytes - total, sizeof(staging_block));
238 		bcopy(in+total, &staging_block, copy_amt);
239 		if (copy_amt < sizeof(staging_block)) {
240 			byte_ptr = (uint8_t*)&staging_block;
241 			bzero(&byte_ptr[copy_amt],
242 			    sizeof(staging_block) - copy_amt);
243 		}
244 		rolling_mac = xor_and_encrypt(rolling_mac, staging_block,
245 		    key, nr);
246 		/* Put the counter into the s_x block */
247 		append_int(counter++, &s_x, L+1);
248 		/* Encrypt that */
249 		__m128i X = AESNI_ENC(s_x, key, nr);
250 		/* XOR the plain-text with the encrypted counter block */
251 		staging_block = _mm_xor_si128(staging_block, X);
252 		/* And copy it out */
253 		bcopy(&staging_block, out+total, copy_amt);
254 		total += copy_amt;
255 	}
256 	/*
257 	 * Allegedly done with it!  Except for the tag.
258 	 */
259 	s0 = AESNI_ENC(s0, key, nr);
260 	staging_block = _mm_xor_si128(s0, rolling_mac);
261 	bcopy(&staging_block, tag, tag_length);
262 	explicit_bzero(&s0, sizeof(s0));
263 	explicit_bzero(&staging_block, sizeof(staging_block));
264 	explicit_bzero(&s_x, sizeof(s_x));
265 	explicit_bzero(&rolling_mac, sizeof(rolling_mac));
266 }
267 
268 /*
269  * Implement AES CCM+CBC-MAC decryption and authentication.
270  * Returns 0 on failure, 1 on success.
271  *
272  * The primary difference here is that each encrypted block
273  * needs to be hashed&encrypted after it is decrypted (since
274  * the CBC-MAC is based on the plain text).  This means that
275  * we do the decryption twice -- first to verify the tag,
276  * and second to decrypt and copy it out.
277  *
278  * To avoid annoying code copying, we implement the main
279  * loop as a separate function.
280  *
281  * Call with out as NULL to not store the decrypted results;
282  * call with hashp as NULL to not run the authentication.
283  * Calling with neither as NULL does the decryption and
284  * authentication as a single pass (which is not allowed
285  * per the specification, really).
286  *
287  * If hashp is non-NULL, it points to the post-AAD computed
288  * checksum.
289  */
290 static void
decrypt_loop(const unsigned char * in,unsigned char * out,size_t nbytes,__m128i s0,size_t nonce_length,__m128i * macp,const unsigned char * key,int nr)291 decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,
292     __m128i s0, size_t nonce_length, __m128i *macp,
293     const unsigned char *key, int nr)
294 {
295 	size_t total = 0;
296 	__m128i s_x = s0, mac_block;
297 	int counter = 1;
298 	const size_t L = sizeof(__m128i) - 1 - nonce_length;
299 	__m128i pad_block, staging_block;
300 
301 	/*
302 	 * The starting mac (post AAD, if any).
303 	 */
304 	if (macp != NULL)
305 		mac_block = *macp;
306 
307 	while (total < nbytes) {
308 		size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));
309 
310 		if (copy_amt < sizeof(staging_block)) {
311 			staging_block = _mm_setzero_si128();
312 		}
313 		bcopy(in+total, &staging_block, copy_amt);
314 
315 		/*
316 		 * staging_block has the current block of input data,
317 		 * zero-padded if necessary.  This is used in computing
318 		 * both the decrypted data, and the authentication tag.
319 		 */
320 		append_int(counter++, &s_x, L+1);
321 		/*
322 		 * The tag is computed based on the decrypted data.
323 		 */
324 		pad_block = AESNI_ENC(s_x, key, nr);
325 		if (copy_amt < sizeof(staging_block)) {
326 			/*
327 			 * Need to pad out pad_block with 0.
328 			 * (staging_block was set to 0's above.)
329 			 */
330 			uint8_t *end_of_buffer = (uint8_t*)&pad_block;
331 			bzero(end_of_buffer + copy_amt,
332 			    sizeof(pad_block) - copy_amt);
333 		}
334 		staging_block = _mm_xor_si128(staging_block, pad_block);
335 
336 		if (out)
337 			bcopy(&staging_block, out+total, copy_amt);
338 
339 		if (macp)
340 			mac_block = xor_and_encrypt(mac_block, staging_block,
341 			    key, nr);
342 		total += copy_amt;
343 	}
344 
345 	if (macp)
346 		*macp = mac_block;
347 
348 	explicit_bzero(&pad_block, sizeof(pad_block));
349 	explicit_bzero(&staging_block, sizeof(staging_block));
350 	explicit_bzero(&mac_block, sizeof(mac_block));
351 }
352 
353 /*
354  * The exposed decryption routine.  This is practically a
355  * copy of the encryption routine, except that the order
356  * in which the tag is created is changed.
357  * XXX combine the two functions at some point!
358  */
359 int
AES_CCM_decrypt(const unsigned char * in,unsigned char * out,const unsigned char * addt,const unsigned char * nonce,const unsigned char * tag,uint32_t nbytes,uint32_t abytes,int nlen,int tag_length,const unsigned char * key,int nr)360 AES_CCM_decrypt(const unsigned char *in, unsigned char *out,
361 		const unsigned char *addt, const unsigned char *nonce,
362 		const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
363 		int tag_length, const unsigned char *key, int nr)
364 {
365 	int L;
366 	__m128i s0, rolling_mac, staging_block;
367 	uint8_t *byte_ptr;
368 
369 	if (nlen < 0 || nlen > 15)
370 		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
371 
372 	/*
373 	 * We need to know how many bytes to use to describe
374 	 * the length of the data.  Normally, nlen should be
375 	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
376 	 * data to encrypt.  But it can be longer or shorter.
377 	 */
378 	L = sizeof(__m128i) - 1 - nlen;
379 
380 	/*
381 	 * Clear out the blocks
382 	 */
383 	s0 = _mm_setzero_si128();
384 
385 	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
386 	    key, nr, nbytes, tag_length);
387 	/* s0 has flags, nonce, and then 0 */
388 	byte_ptr = (uint8_t*)&s0;
389 	byte_ptr[0] = L-1;	/* but the flags byte only has L' */
390 	bcopy(nonce, &byte_ptr[1], nlen);
391 
392 	/*
393 	 * Now to cycle through the rest of the data.
394 	 */
395 	decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);
396 
397 	/*
398 	 * Compare the tag.
399 	 */
400 	staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);
401 	if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {
402 		return (0);
403 	}
404 
405 	/*
406 	 * Push out the decryption results this time.
407 	 */
408 	decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);
409 	return (1);
410 }
411