xref: /freebsd/sys/crypto/aesni/aesni_ccm.c (revision b4a58fbf640409a1e507d9f7b411c83a3f83a2f3)
1 /*-
2  * Copyright (c) 2014-2021 The FreeBSD Foundation
3  * Copyright (c) 2018 iXsystems, Inc
4  * All rights reserved.
5  *
6  * Portions of this software were developed by John-Mark Gurney
7  * under the sponsorship of the FreeBSD Foundation and
8  * Rubicon Communications, LLC (Netgate).
9  *
10  * Portions of this software were developed by Ararat River
11  * Consulting, LLC under sponsorship of the FreeBSD Foundation.
12  *
13  * Redistribution and use in source and binary forms, with or without
14  * modification, are permitted provided that the following conditions
15  * are met:
16  * 1.  Redistributions of source code must retain the above copyright
17  *     notice, this list of conditions and the following disclaimer.
18  * 2.  Redistributions in binary form must reproduce the above copyright
19  *     notice, this list of conditions and the following disclaimer in the
20  *     documentation and/or other materials provided with the distribution.
21  *
22  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
23  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
24  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
25  * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
26  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
27  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
28  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
29  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
30  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
31  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
32  * SUCH DAMAGE.
33  *
34  *
35  *	$FreeBSD$
36  *
37  * This file implements AES-CCM+CBC-MAC, as described
38  * at https://tools.ietf.org/html/rfc3610, using Intel's
39  * AES-NI instructions.
40  *
41  */
42 
43 #include <sys/types.h>
44 #include <sys/endian.h>
45 #include <sys/param.h>
46 
47 #include <sys/systm.h>
48 #include <crypto/aesni/aesni.h>
49 #include <crypto/aesni/aesni_os.h>
50 #include <crypto/aesni/aesencdec.h>
51 #define AESNI_ENC(d, k, nr)	aesni_enc(nr-1, (const __m128i*)k, d)
52 
53 #include <wmmintrin.h>
54 #include <emmintrin.h>
55 #include <smmintrin.h>
56 
57 /*
58  * Encrypt a single 128-bit block after
59  * doing an xor.  This is also used to
60  * decrypt (yay symmetric encryption).
61  */
62 static inline __m128i
63 xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr)
64 {
65 	__m128i retval = _mm_xor_si128(a, b);
66 
67 	retval = AESNI_ENC(retval, k, nr);
68 	return (retval);
69 }
70 
71 /*
72  * Put value at the end of block, starting at offset.
73  * (This goes backwards, putting bytes in *until* it
74  * reaches offset.)
75  */
76 static void
77 append_int(size_t value, __m128i *block, size_t offset)
78 {
79 	int indx = sizeof(*block) - 1;
80 	uint8_t *bp = (uint8_t*)block;
81 
82 	while (indx > (sizeof(*block) - offset)) {
83 		bp[indx] = value & 0xff;
84 		indx--;
85 		value >>= 8;
86 	}
87 }
88 
89 /*
90  * Start the CBC-MAC process.  This handles the auth data.
91  */
92 static __m128i
93 cbc_mac_start(const unsigned char *auth_data, size_t auth_len,
94 	     const unsigned char *nonce, size_t nonce_len,
95 	     const unsigned char *key, int nr,
96 	     size_t data_len, size_t tag_len)
97 {
98 	__m128i cbc_block, staging_block;
99 	uint8_t *byte_ptr;
100 	/* This defines where the message length goes */
101 	int L = sizeof(__m128i) - 1 - nonce_len;
102 
103 	/*
104 	 * Set up B0 here.  This has the flags byte,
105 	 * followed by the nonce, followed by the
106 	 * length of the message.
107 	 */
108 	cbc_block = _mm_setzero_si128();
109 	byte_ptr = (uint8_t*)&cbc_block;
110 	byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 |
111 		(((tag_len - 2) / 2) * 8) |
112 		(L - 1);
113 	bcopy(nonce, byte_ptr + 1, nonce_len);
114 	append_int(data_len, &cbc_block, L+1);
115 	cbc_block = AESNI_ENC(cbc_block, key, nr);
116 
117 	if (auth_len != 0) {
118 		/*
119 		 * We need to start by appending the length descriptor.
120 		 */
121 		uint32_t auth_amt;
122 		size_t copy_amt;
123 		const uint8_t *auth_ptr = auth_data;
124 
125 		staging_block = _mm_setzero_si128();
126 
127 		/*
128 		 * The current OCF calling convention means that
129 		 * there can never be more than 4g of authentication
130 		 * data, so we don't handle the 0xffff case.
131 		 */
132 		KASSERT(auth_len < (1ULL << 32),
133 		    ("%s: auth_len (%zu) larger than 4GB",
134 			__FUNCTION__, auth_len));
135 
136 		if (auth_len < ((1 << 16) - (1 << 8))) {
137 			/*
138 			 * If the auth data length is less than
139 			 * 0xff00, we don't need to encode a length
140 			 * specifier, just the length of the auth
141 			 * data.
142 			 */
143 			be16enc(&staging_block, auth_len);
144 			auth_amt = 2;
145 		} else if (auth_len < (1ULL << 32)) {
146 			/*
147 			 * Two bytes for the length prefix, and then
148 			 * four bytes for the length.  This makes a total
149 			 * of 6 bytes to describe the auth data length.
150 			 */
151 			be16enc(&staging_block, 0xfffe);
152 			be32enc((char*)&staging_block + 2, auth_len);
153 			auth_amt = 6;
154 		} else
155 			panic("%s: auth len too large", __FUNCTION__);
156 
157 		/*
158 		 * Need to copy abytes into blocks.  The first block is
159 		 * already partially filled, by auth_amt, so we need
160 		 * to handle that.  The last block needs to be zero padded.
161 		 */
162 		copy_amt = MIN(auth_len,
163 		    sizeof(staging_block) - auth_amt);
164 		byte_ptr = (uint8_t*)&staging_block;
165 		bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt);
166 		auth_ptr += copy_amt;
167 
168 		cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr);
169 
170 		while (auth_ptr < auth_data + auth_len) {
171 			copy_amt = MIN((auth_data + auth_len) - auth_ptr,
172 			    sizeof(staging_block));
173 			if (copy_amt < sizeof(staging_block))
174 				bzero(&staging_block, sizeof(staging_block));
175 			bcopy(auth_ptr, &staging_block, copy_amt);
176 			cbc_block = xor_and_encrypt(cbc_block, staging_block,
177 			    key, nr);
178 			auth_ptr += copy_amt;
179 		}
180 	}
181 	return (cbc_block);
182 }
183 
184 /*
185  * Implement AES CCM+CBC-MAC encryption and authentication.
186  *
187  * A couple of notes:
188  * Since abytes is limited to a 32 bit value here, the AAD is
189  * limited to 4 gigabytes or less.
190  */
191 void
192 AES_CCM_encrypt(const unsigned char *in, unsigned char *out,
193 		const unsigned char *addt, const unsigned char *nonce,
194 		unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
195 		int tag_length, const unsigned char *key, int nr)
196 {
197 	int L;
198 	int counter = 1;	/* S0 has 0, S1 has 1 */
199 	size_t copy_amt, total = 0;
200 	uint8_t *byte_ptr;
201 	__m128i s0, rolling_mac, s_x, staging_block;
202 
203 	/* NIST 800-38c section A.1 says n is [7, 13]. */
204 	if (nlen < 7 || nlen > 13)
205 		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
206 
207 	/*
208 	 * We need to know how many bytes to use to describe
209 	 * the length of the data.  Normally, nlen should be
210 	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
211 	 * data to encrypt.  But it can be longer or shorter;
212 	 * this impacts the length of the message.
213 	 */
214 	L = sizeof(__m128i) - 1 - nlen;
215 
216 	/*
217 	 * Clear out the blocks
218 	 */
219 	s0 = _mm_setzero_si128();
220 
221 	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
222 	    key, nr, nbytes, tag_length);
223 
224 	/* s0 has flags, nonce, and then 0 */
225 	byte_ptr = (uint8_t*)&s0;
226 	byte_ptr[0] = L - 1;	/* but the flags byte only has L' */
227 	bcopy(nonce, &byte_ptr[1], nlen);
228 
229 	/*
230 	 * Now to cycle through the rest of the data.
231 	 */
232 	bcopy(&s0, &s_x, sizeof(s0));
233 
234 	while (total < nbytes) {
235 		/*
236 		 * Copy the plain-text data into staging_block.
237 		 * This may need to be zero-padded.
238 		 */
239 		copy_amt = MIN(nbytes - total, sizeof(staging_block));
240 		bcopy(in+total, &staging_block, copy_amt);
241 		if (copy_amt < sizeof(staging_block)) {
242 			byte_ptr = (uint8_t*)&staging_block;
243 			bzero(&byte_ptr[copy_amt],
244 			    sizeof(staging_block) - copy_amt);
245 		}
246 		rolling_mac = xor_and_encrypt(rolling_mac, staging_block,
247 		    key, nr);
248 		/* Put the counter into the s_x block */
249 		append_int(counter++, &s_x, L+1);
250 		/* Encrypt that */
251 		__m128i X = AESNI_ENC(s_x, key, nr);
252 		/* XOR the plain-text with the encrypted counter block */
253 		staging_block = _mm_xor_si128(staging_block, X);
254 		/* And copy it out */
255 		bcopy(&staging_block, out+total, copy_amt);
256 		total += copy_amt;
257 	}
258 	/*
259 	 * Allegedly done with it!  Except for the tag.
260 	 */
261 	s0 = AESNI_ENC(s0, key, nr);
262 	staging_block = _mm_xor_si128(s0, rolling_mac);
263 	bcopy(&staging_block, tag, tag_length);
264 	explicit_bzero(&s0, sizeof(s0));
265 	explicit_bzero(&staging_block, sizeof(staging_block));
266 	explicit_bzero(&s_x, sizeof(s_x));
267 	explicit_bzero(&rolling_mac, sizeof(rolling_mac));
268 }
269 
270 /*
271  * Implement AES CCM+CBC-MAC decryption and authentication.
272  * Returns 0 on failure, 1 on success.
273  *
274  * The primary difference here is that each encrypted block
275  * needs to be hashed&encrypted after it is decrypted (since
276  * the CBC-MAC is based on the plain text).  This means that
277  * we do the decryption twice -- first to verify the tag,
278  * and second to decrypt and copy it out.
279  *
280  * To avoid annoying code copying, we implement the main
281  * loop as a separate function.
282  *
283  * Call with out as NULL to not store the decrypted results;
284  * call with hashp as NULL to not run the authentication.
285  * Calling with neither as NULL does the decryption and
286  * authentication as a single pass (which is not allowed
287  * per the specification, really).
288  *
289  * If hashp is non-NULL, it points to the post-AAD computed
290  * checksum.
291  */
292 static void
293 decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes,
294     __m128i s0, size_t nonce_length, __m128i *macp,
295     const unsigned char *key, int nr)
296 {
297 	size_t total = 0;
298 	__m128i s_x = s0, mac_block;
299 	int counter = 1;
300 	const size_t L = sizeof(__m128i) - 1 - nonce_length;
301 	__m128i pad_block, staging_block;
302 
303 	/*
304 	 * The starting mac (post AAD, if any).
305 	 */
306 	if (macp != NULL)
307 		mac_block = *macp;
308 
309 	while (total < nbytes) {
310 		size_t copy_amt = MIN(nbytes - total, sizeof(staging_block));
311 
312 		if (copy_amt < sizeof(staging_block)) {
313 			staging_block = _mm_setzero_si128();
314 		}
315 		bcopy(in+total, &staging_block, copy_amt);
316 
317 		/*
318 		 * staging_block has the current block of input data,
319 		 * zero-padded if necessary.  This is used in computing
320 		 * both the decrypted data, and the authentication tag.
321 		 */
322 		append_int(counter++, &s_x, L+1);
323 		/*
324 		 * The tag is computed based on the decrypted data.
325 		 */
326 		pad_block = AESNI_ENC(s_x, key, nr);
327 		if (copy_amt < sizeof(staging_block)) {
328 			/*
329 			 * Need to pad out pad_block with 0.
330 			 * (staging_block was set to 0's above.)
331 			 */
332 			uint8_t *end_of_buffer = (uint8_t*)&pad_block;
333 			bzero(end_of_buffer + copy_amt,
334 			    sizeof(pad_block) - copy_amt);
335 		}
336 		staging_block = _mm_xor_si128(staging_block, pad_block);
337 
338 		if (out)
339 			bcopy(&staging_block, out+total, copy_amt);
340 
341 		if (macp)
342 			mac_block = xor_and_encrypt(mac_block, staging_block,
343 			    key, nr);
344 		total += copy_amt;
345 	}
346 
347 	if (macp)
348 		*macp = mac_block;
349 
350 	explicit_bzero(&pad_block, sizeof(pad_block));
351 	explicit_bzero(&staging_block, sizeof(staging_block));
352 	explicit_bzero(&mac_block, sizeof(mac_block));
353 }
354 
355 /*
356  * The exposed decryption routine.  This is practically a
357  * copy of the encryption routine, except that the order
358  * in which the tag is created is changed.
359  * XXX combine the two functions at some point!
360  */
361 int
362 AES_CCM_decrypt(const unsigned char *in, unsigned char *out,
363 		const unsigned char *addt, const unsigned char *nonce,
364 		const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen,
365 		int tag_length, const unsigned char *key, int nr)
366 {
367 	int L;
368 	__m128i s0, rolling_mac, staging_block;
369 	uint8_t *byte_ptr;
370 
371 	if (nlen < 0 || nlen > 15)
372 		panic("%s: bad nonce length %d", __FUNCTION__, nlen);
373 
374 	/*
375 	 * We need to know how many bytes to use to describe
376 	 * the length of the data.  Normally, nlen should be
377 	 * 12, which leaves us 3 bytes to do that -- 16mbytes of
378 	 * data to encrypt.  But it can be longer or shorter.
379 	 */
380 	L = sizeof(__m128i) - 1 - nlen;
381 
382 	/*
383 	 * Clear out the blocks
384 	 */
385 	s0 = _mm_setzero_si128();
386 
387 	rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen,
388 	    key, nr, nbytes, tag_length);
389 	/* s0 has flags, nonce, and then 0 */
390 	byte_ptr = (uint8_t*)&s0;
391 	byte_ptr[0] = L-1;	/* but the flags byte only has L' */
392 	bcopy(nonce, &byte_ptr[1], nlen);
393 
394 	/*
395 	 * Now to cycle through the rest of the data.
396 	 */
397 	decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr);
398 
399 	/*
400 	 * Compare the tag.
401 	 */
402 	staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac);
403 	if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) {
404 		return (0);
405 	}
406 
407 	/*
408 	 * Push out the decryption results this time.
409 	 */
410 	decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr);
411 	return (1);
412 }
413