xref: /freebsd/sys/crypto/aesni/aesni_ccm.c (revision b4af4f93c682e445bf159f0d1ec90b636296c946)
1 /*-
2  * Copyright (c) 2014 The FreeBSD Foundation
3  * Copyright (c) 2018 iXsystems, Inc
4  * All rights reserved.
5  *
6  * This software was developed by John-Mark Gurney under
7  * the sponsorship of the FreeBSD Foundation and
8  * Rubicon Communications, LLC (Netgate).
9  * Redistribution and use in source and binary forms, with or without
10  * modification, are permitted provided that the following conditions
11  * are met:
12  * 1.  Redistributions of source code must retain the above copyright
13  *     notice, this list of conditions and the following disclaimer.
14  * 2.  Redistributions in binary form must reproduce the above copyright
15  *     notice, this list of conditions and the following disclaimer in the
16  *     documentation and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
19  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
22  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
23  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
24  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
25  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
26  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
27  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
28  * SUCH DAMAGE.
29  *
30  *
31  *	$FreeBSD$
32  *
33  * This file implements AES-CCM+CBC-MAC, as described
34  * at https://tools.ietf.org/html/rfc3610, using Intel's
35  * AES-NI instructions.
36  *
37  */
38 
39 #include <sys/types.h>
40 #include <sys/endian.h>
41 #include <sys/param.h>
42 
43 #include <sys/systm.h>
44 #include <crypto/aesni/aesni.h>
45 #include <crypto/aesni/aesni_os.h>
46 #include <crypto/aesni/aesencdec.h>
47 #define AESNI_ENC(d, k, nr)	aesni_enc(nr-1, (const __m128i*)k, d)
48 
49 #include <wmmintrin.h>
50 #include <emmintrin.h>
51 #include <smmintrin.h>
52 
53 /*
54  * Encrypt a single 128-bit block after
55  * doing an xor.  This is also used to
56  * decrypt (yay symmetric encryption).
57  */
58 static inline __m128i
59 xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)
60 {
61 	__m128i retval = _mm_xor_si128(a, b);
62 
63 	retval = AESNI_ENC(retval, k, nr);
64 	return (retval);
65 }
66 
67 /*
68  * Put value at the end of block, starting at offset.
69  * (This goes backwards, putting bytes in *until* it
70  * reaches offset.)
71  */
72 static void
73 append_int(size_t value, __m128i *block, size_t offset)
74 {
75 	int indx = sizeof(*block) - 1;
76 	uint8_t *bp = (uint8_t*)block;
77 
78 	while (indx > (sizeof(*block) - offset)) {
79 		bp[indx] = value & 0xff;
80 		indx--;
81 		value >>= 8;
82 	}
83 }
84 
85 /*
86  * Start the CBC-MAC process.  This handles the auth data.
87  */
88 static __m128i
89 cbc_mac_start(const unsigned char *auth_data, size_t auth_len,
90 	     const unsigned char *nonce, size_t nonce_len,
91 	     const unsigned char *key, int nr,
92 	     size_t data_len, size_t tag_len)
93 {
94 	__m128i cbc_block, staging_block;
95 	uint8_t *byte_ptr;
96 	/* This defines where the message length goes */
97 	int L = sizeof(__m128i) - 1 - nonce_len;
98 
99 	/*
100 	 * Set up B0 here.  This has the flags byte,
101 	 * followed by the nonce, followed by the
102 	 * length of the message.
103 	 */
104 	cbc_block = _mm_setzero_si128();
105 	byte_ptr = (uint8_t*)&cbc_block;
106 	byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |
107 		(((tag_len - 2) / 2) * 8) |
108 		(L - 1);
109 	bcopy(nonce, byte_ptr + 1, nonce_len);
110 	append_int(data_len, &cbc_block, L+1);
111 	cbc_block = AESNI_ENC(cbc_block, key, nr);
112 
113 	if (auth_len != 0) {
114 		/*
115 		 * We need to start by appending the length descriptor.
116 		 */
117 		uint32_t auth_amt;
118 		size_t copy_amt;
119 		const uint8_t *auth_ptr = auth_data;
120 
121 		staging_block = _mm_setzero_si128();
122 
123 		/*
124 		 * The current OCF calling convention means that
125 		 * there can never be more than 4g of authentication
126 		 * data, so we don't handle the 0xffff case.
127 		 */
128 		KASSERT(auth_len < (1ULL << 32),
129 		    ("%s: auth_len (%zu) larger than 4GB",
130 			__FUNCTION__, auth_len));
131 
132 		if (auth_len < ((1 << 16) - (1 << 8))) {
133 			/*
134 			 * If the auth data length is less than
135 			 * 0xff00, we don't need to encode a length
136 			 * specifier, just the length of the auth
137 			 * data.
138 			 */
139 			be16enc(&staging_block, auth_len);
140 			auth_amt = 2;
141 		} else if (auth_len < (1ULL << 32)) {
142 			/*
143 			 * Two bytes for the length prefix, and then
144 			 * four bytes for the length.  This makes a total
145 			 * of 6 bytes to describe the auth data length.
146 			 */
147 			be16enc(&staging_block, 0xfffe);
148 			be32enc((char*)&staging_block + 2, auth_len);
149 			auth_amt = 6;
150 		} else
151 			panic("%s: auth len too large", __FUNCTION__);
152 
153 		/*
154 		 * Need to copy abytes into blocks.  The first block is
155 		 * already partially filled, by auth_amt, so we need
156 		 * to handle that.  The last block needs to be zero padded.
157 		 */
158 		copy_amt = MIN(auth_len,
159 		    sizeof(staging_block) - auth_amt);
160 		byte_ptr = (uint8_t*)&staging_block;
161 		bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);
162 		auth_ptr += copy_amt;
163 
164 		cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);
165 
166 		while (auth_ptr < auth_data + auth_len) {
167 			copy_amt = MIN((auth_data + auth_len) - auth_ptr,
168 			    sizeof(staging_block));
169 			if (copy_amt < sizeof(staging_block))
170 				bzero(&staging_block, sizeof(staging_block));
171 			bcopy(auth_ptr, &staging_block, copy_amt);
172 			cbc_block = xor_and_encrypt(cbc_block, staging_block,
173 			    key, nr);
174 			auth_ptr += copy_amt;
175 		}
176 	}
177 	return (cbc_block);
178 }
179 
180 /*
181  * Implement AES CCM+CBC-MAC encryption and authentication.
182  *
183  * A couple of notes:
184  * The specification allows for a different number of tag lengths;
185  * however, they're always truncated from 16 bytes, and the tag
186  * length isn't passed in.  (This could be fixed by changing the
187  * code in aesni.c:aesni_cipher_crypt().)
188  * Similarly, although the nonce length is passed in, the
189  * OpenCrypto API that calls us doesn't have a way to set the nonce
190  * other than by having different crypto algorithm types.  As a result,
191  * this is currently always called with nlen=12; this means that we
192  * also have a maximum message length of 16 megabytes.  And similarly,
193  * since abytes is limited to a 32 bit value here, the AAD is
194  * limited to 4 gigabytes or less.
195  */
196 void
197 AES_CCM_encrypt(const unsigned char *in, unsigned char *out,
198 		const unsigned char *addt, const unsigned char *nonce,
199 		unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
200 		const unsigned char *key, int nr)
201 {
202 	static const int tag_length = 16;	/* 128 bits */
203 	int L;
204 	int counter = 1;	/* S0 has 0, S1 has 1 */
205 	size_t copy_amt, total = 0;
206 	uint8_t *byte_ptr;
207 	__m128i s0, rolling_mac, s_x, staging_block;
208 
209 	if (nbytes == 0 && abytes == 0)
210 		return;
211 
212 	/* NIST 800-38c section A.1 says n is [7, 13]. */
213 	if (nlen < 7 || nlen > 13)
214 		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
215 
216 	/*
217 	 * We need to know how many bytes to use to describe
218 	 * the length of the data.  Normally, nlen should be
219 	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
220 	 * data to encrypt.  But it can be longer or shorter;
221 	 * this impacts the length of the message.
222 	 */
223 	L = sizeof(__m128i) - 1 - nlen;
224 
225 	/*
226 	 * Now, this shouldn't happen, but let's make sure that
227 	 * the data length isn't too big.
228 	 */
229 	KASSERT(nbytes <= ((1 << (8 * L)) - 1),
230 	    ("%s: nbytes is %u, but length field is %d bytes",
231 		__FUNCTION__, nbytes, L));
232 
233 	/*
234 	 * Clear out the blocks
235 	 */
236 	s0 = _mm_setzero_si128();
237 
238 	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
239 	    key, nr, nbytes, tag_length);
240 
241 	/* s0 has flags, nonce, and then 0 */
242 	byte_ptr = (uint8_t*)&s0;
243 	byte_ptr[0] = L - 1;	/* but the flags byte only has L' */
244 	bcopy(nonce, &byte_ptr[1], nlen);
245 
246 	/*
247 	 * Now to cycle through the rest of the data.
248 	 */
249 	bcopy(&s0, &s_x, sizeof(s0));
250 
251 	while (total < nbytes) {
252 		/*
253 		 * Copy the plain-text data into staging_block.
254 		 * This may need to be zero-padded.
255 		 */
256 		copy_amt = MIN(nbytes - total, sizeof(staging_block));
257 		bcopy(in+total, &staging_block, copy_amt);
258 		if (copy_amt < sizeof(staging_block)) {
259 			byte_ptr = (uint8_t*)&staging_block;
260 			bzero(&byte_ptr[copy_amt],
261 			    sizeof(staging_block) - copy_amt);
262 		}
263 		rolling_mac = xor_and_encrypt(rolling_mac, staging_block,
264 		    key, nr);
265 		/* Put the counter into the s_x block */
266 		append_int(counter++, &s_x, L+1);
267 		/* Encrypt that */
268 		__m128i X = AESNI_ENC(s_x, key, nr);
269 		/* XOR the plain-text with the encrypted counter block */
270 		staging_block = _mm_xor_si128(staging_block, X);
271 		/* And copy it out */
272 		bcopy(&staging_block, out+total, copy_amt);
273 		total += copy_amt;
274 	}
275 	/*
276 	 * Allegedly done with it!  Except for the tag.
277 	 */
278 	s0 = AESNI_ENC(s0, key, nr);
279 	staging_block = _mm_xor_si128(s0, rolling_mac);
280 	bcopy(&staging_block, tag, tag_length);
281 	explicit_bzero(&s0, sizeof(s0));
282 	explicit_bzero(&staging_block, sizeof(staging_block));
283 	explicit_bzero(&s_x, sizeof(s_x));
284 	explicit_bzero(&rolling_mac, sizeof(rolling_mac));
285 }
286 
287 /*
288  * Implement AES CCM+CBC-MAC decryption and authentication.
289  * Returns 0 on failure, 1 on success.
290  *
291  * The primary difference here is that each encrypted block
292  * needs to be hashed&encrypted after it is decrypted (since
293  * the CBC-MAC is based on the plain text).  This means that
294  * we do the decryption twice -- first to verify the tag,
295  * and second to decrypt and copy it out.
296  *
297  * To avoid annoying code copying, we implement the main
298  * loop as a separate function.
299  *
300  * Call with out as NULL to not store the decrypted results;
301  * call with hashp as NULL to not run the authentication.
302  * Calling with neither as NULL does the decryption and
303  * authentication as a single pass (which is not allowed
304  * per the specification, really).
305  *
306  * If hashp is non-NULL, it points to the post-AAD computed
307  * checksum.
308  */
309 static void
310 decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,
311     __m128i s0, size_t nonce_length, __m128i *macp,
312     const unsigned char *key, int nr)
313 {
314 	size_t total = 0;
315 	__m128i s_x = s0, mac_block;
316 	int counter = 1;
317 	const size_t L = sizeof(__m128i) - 1 - nonce_length;
318 	__m128i pad_block, staging_block;
319 
320 	/*
321 	 * The starting mac (post AAD, if any).
322 	 */
323 	if (macp != NULL)
324 		mac_block = *macp;
325 
326 	while (total < nbytes) {
327 		size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));
328 
329 		if (copy_amt < sizeof(staging_block)) {
330 			staging_block = _mm_setzero_si128();
331 		}
332 		bcopy(in+total, &staging_block, copy_amt);
333 
334 		/*
335 		 * staging_block has the current block of input data,
336 		 * zero-padded if necessary.  This is used in computing
337 		 * both the decrypted data, and the authentication tag.
338 		 */
339 		append_int(counter++, &s_x, L+1);
340 		/*
341 		 * The tag is computed based on the decrypted data.
342 		 */
343 		pad_block = AESNI_ENC(s_x, key, nr);
344 		if (copy_amt < sizeof(staging_block)) {
345 			/*
346 			 * Need to pad out pad_block with 0.
347 			 * (staging_block was set to 0's above.)
348 			 */
349 			uint8_t *end_of_buffer = (uint8_t*)&pad_block;
350 			bzero(end_of_buffer + copy_amt,
351 			    sizeof(pad_block) - copy_amt);
352 		}
353 		staging_block = _mm_xor_si128(staging_block, pad_block);
354 
355 		if (out)
356 			bcopy(&staging_block, out+total, copy_amt);
357 
358 		if (macp)
359 			mac_block = xor_and_encrypt(mac_block, staging_block,
360 			    key, nr);
361 		total += copy_amt;
362 	}
363 
364 	if (macp)
365 		*macp = mac_block;
366 
367 	explicit_bzero(&pad_block, sizeof(pad_block));
368 	explicit_bzero(&staging_block, sizeof(staging_block));
369 	explicit_bzero(&mac_block, sizeof(mac_block));
370 }
371 
372 /*
373  * The exposed decryption routine.  This is practically a
374  * copy of the encryption routine, except that the order
375  * in which the tag is created is changed.
376  * XXX combine the two functions at some point!
377  */
378 int
379 AES_CCM_decrypt(const unsigned char *in, unsigned char *out,
380 		const unsigned char *addt, const unsigned char *nonce,
381 		const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
382 		const unsigned char *key, int nr)
383 {
384 	static const int tag_length = 16;	/* 128 bits */
385 	int L;
386 	__m128i s0, rolling_mac, staging_block;
387 	uint8_t *byte_ptr;
388 
389 	if (nbytes == 0 && abytes == 0)
390 		return (1);	// No message means no decryption!
391 	if (nlen < 0 || nlen > 15)
392 		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
393 
394 	/*
395 	 * We need to know how many bytes to use to describe
396 	 * the length of the data.  Normally, nlen should be
397 	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
398 	 * data to encrypt.  But it can be longer or shorter.
399 	 */
400 	L = sizeof(__m128i) - 1 - nlen;
401 
402 	/*
403 	 * Now, this shouldn't happen, but let's make sure that
404 	 * the data length isn't too big.
405 	 */
406 	if (nbytes > ((1 << (8 * L)) - 1))
407 		panic("%s: nbytes is %u, but length field is %d bytes",
408 		      __FUNCTION__, nbytes, L);
409 	/*
410 	 * Clear out the blocks
411 	 */
412 	s0 = _mm_setzero_si128();
413 
414 	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
415 	    key, nr, nbytes, tag_length);
416 	/* s0 has flags, nonce, and then 0 */
417 	byte_ptr = (uint8_t*)&s0;
418 	byte_ptr[0] = L-1;	/* but the flags byte only has L' */
419 	bcopy(nonce, &byte_ptr[1], nlen);
420 
421 	/*
422 	 * Now to cycle through the rest of the data.
423 	 */
424 	decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);
425 
426 	/*
427 	 * Compare the tag.
428 	 */
429 	staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);
430 	if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {
431 		return (0);
432 	}
433 
434 	/*
435 	 * Push out the decryption results this time.
436 	 */
437 	decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);
438 	return (1);
439 }
440