1 /*- 2 * Copyright (c) 2014 The FreeBSD Foundation 3 * Copyright (c) 2018 iXsystems, Inc 4 * All rights reserved. 5 * 6 * This software was developed by John-Mark Gurney under 7 * the sponsorship of the FreeBSD Foundation and 8 * Rubicon Communications, LLC (Netgate). 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * 31 * $FreeBSD$ 32 * 33 * This file implements AES-CCM+CBC-MAC, as described 34 * at https://tools.ietf.org/html/rfc3610, using Intel's 35 * AES-NI instructions. 36 * 37 */ 38 39 #include <sys/types.h> 40 #include <sys/endian.h> 41 #include <sys/param.h> 42 43 #include <sys/systm.h> 44 #include <crypto/aesni/aesni.h> 45 #include <crypto/aesni/aesni_os.h> 46 #include <crypto/aesni/aesencdec.h> 47 #define AESNI_ENC(d, k, nr) aesni_enc(nr-1, (const __m128i*)k, d) 48 49 #include <wmmintrin.h> 50 #include <emmintrin.h> 51 #include <smmintrin.h> 52 53 /* 54 * Encrypt a single 128-bit block after 55 * doing an xor. This is also used to 56 * decrypt (yay symmetric encryption). 57 */ 58 static inline __m128i 59 xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr) 60 { 61 __m128i retval = _mm_xor_si128(a, b); 62 63 retval = AESNI_ENC(retval, k, nr); 64 return (retval); 65 } 66 67 /* 68 * Put value at the end of block, starting at offset. 69 * (This goes backwards, putting bytes in *until* it 70 * reaches offset.) 71 */ 72 static void 73 append_int(size_t value, __m128i *block, size_t offset) 74 { 75 int indx = sizeof(*block) - 1; 76 uint8_t *bp = (uint8_t*)block; 77 78 while (indx > (sizeof(*block) - offset)) { 79 bp[indx] = value & 0xff; 80 indx--; 81 value >>= 8; 82 } 83 } 84 85 /* 86 * Start the CBC-MAC process. This handles the auth data. 87 */ 88 static __m128i 89 cbc_mac_start(const unsigned char *auth_data, size_t auth_len, 90 const unsigned char *nonce, size_t nonce_len, 91 const unsigned char *key, int nr, 92 size_t data_len, size_t tag_len) 93 { 94 __m128i cbc_block, staging_block; 95 uint8_t *byte_ptr; 96 /* This defines where the message length goes */ 97 int L = sizeof(__m128i) - 1 - nonce_len; 98 99 /* 100 * Set up B0 here. This has the flags byte, 101 * followed by the nonce, followed by the 102 * length of the message. 103 */ 104 cbc_block = _mm_setzero_si128(); 105 byte_ptr = (uint8_t*)&cbc_block; 106 byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 | 107 (((tag_len - 2) / 2) * 8) | 108 (L - 1); 109 bcopy(nonce, byte_ptr + 1, nonce_len); 110 append_int(data_len, &cbc_block, L+1); 111 cbc_block = AESNI_ENC(cbc_block, key, nr); 112 113 if (auth_len != 0) { 114 /* 115 * We need to start by appending the length descriptor. 116 */ 117 uint32_t auth_amt; 118 size_t copy_amt; 119 const uint8_t *auth_ptr = auth_data; 120 121 staging_block = _mm_setzero_si128(); 122 123 /* 124 * The current OCF calling convention means that 125 * there can never be more than 4g of authentication 126 * data, so we don't handle the 0xffff case. 127 */ 128 KASSERT(auth_len < (1ULL << 32), 129 ("%s: auth_len (%zu) larger than 4GB", 130 __FUNCTION__, auth_len)); 131 132 if (auth_len < ((1 << 16) - (1 << 8))) { 133 /* 134 * If the auth data length is less than 135 * 0xff00, we don't need to encode a length 136 * specifier, just the length of the auth 137 * data. 138 */ 139 be16enc(&staging_block, auth_len); 140 auth_amt = 2; 141 } else if (auth_len < (1ULL << 32)) { 142 /* 143 * Two bytes for the length prefix, and then 144 * four bytes for the length. This makes a total 145 * of 6 bytes to describe the auth data length. 146 */ 147 be16enc(&staging_block, 0xfffe); 148 be32enc((char*)&staging_block + 2, auth_len); 149 auth_amt = 6; 150 } else 151 panic("%s: auth len too large", __FUNCTION__); 152 153 /* 154 * Need to copy abytes into blocks. The first block is 155 * already partially filled, by auth_amt, so we need 156 * to handle that. The last block needs to be zero padded. 157 */ 158 copy_amt = MIN(auth_len, 159 sizeof(staging_block) - auth_amt); 160 byte_ptr = (uint8_t*)&staging_block; 161 bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt); 162 auth_ptr += copy_amt; 163 164 cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr); 165 166 while (auth_ptr < auth_data + auth_len) { 167 copy_amt = MIN((auth_data + auth_len) - auth_ptr, 168 sizeof(staging_block)); 169 if (copy_amt < sizeof(staging_block)) 170 bzero(&staging_block, sizeof(staging_block)); 171 bcopy(auth_ptr, &staging_block, copy_amt); 172 cbc_block = xor_and_encrypt(cbc_block, staging_block, 173 key, nr); 174 auth_ptr += copy_amt; 175 } 176 } 177 return (cbc_block); 178 } 179 180 /* 181 * Implement AES CCM+CBC-MAC encryption and authentication. 182 * 183 * A couple of notes: 184 * The specification allows for a different number of tag lengths; 185 * however, they're always truncated from 16 bytes, and the tag 186 * length isn't passed in. (This could be fixed by changing the 187 * code in aesni.c:aesni_cipher_crypt().) 188 * Similarly, although the nonce length is passed in, the 189 * OpenCrypto API that calls us doesn't have a way to set the nonce 190 * other than by having different crypto algorithm types. As a result, 191 * this is currently always called with nlen=12; this means that we 192 * also have a maximum message length of 16 megabytes. And similarly, 193 * since abytes is limited to a 32 bit value here, the AAD is 194 * limited to 4 gigabytes or less. 195 */ 196 void 197 AES_CCM_encrypt(const unsigned char *in, unsigned char *out, 198 const unsigned char *addt, const unsigned char *nonce, 199 unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen, 200 const unsigned char *key, int nr) 201 { 202 static const int tag_length = 16; /* 128 bits */ 203 int L; 204 int counter = 1; /* S0 has 0, S1 has 1 */ 205 size_t copy_amt, total = 0; 206 uint8_t *byte_ptr; 207 __m128i s0, rolling_mac, s_x, staging_block; 208 209 if (nbytes == 0 && abytes == 0) 210 return; 211 212 /* NIST 800-38c section A.1 says n is [7, 13]. */ 213 if (nlen < 7 || nlen > 13) 214 panic("%s: bad nonce length %d", __FUNCTION__, nlen); 215 216 /* 217 * We need to know how many bytes to use to describe 218 * the length of the data. Normally, nlen should be 219 * 12, which leaves us 3 bytes to do that -- 16mbytes of 220 * data to encrypt. But it can be longer or shorter; 221 * this impacts the length of the message. 222 */ 223 L = sizeof(__m128i) - 1 - nlen; 224 225 /* 226 * Now, this shouldn't happen, but let's make sure that 227 * the data length isn't too big. 228 */ 229 KASSERT(nbytes <= ((1 << (8 * L)) - 1), 230 ("%s: nbytes is %u, but length field is %d bytes", 231 __FUNCTION__, nbytes, L)); 232 233 /* 234 * Clear out the blocks 235 */ 236 s0 = _mm_setzero_si128(); 237 238 rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen, 239 key, nr, nbytes, tag_length); 240 241 /* s0 has flags, nonce, and then 0 */ 242 byte_ptr = (uint8_t*)&s0; 243 byte_ptr[0] = L - 1; /* but the flags byte only has L' */ 244 bcopy(nonce, &byte_ptr[1], nlen); 245 246 /* 247 * Now to cycle through the rest of the data. 248 */ 249 bcopy(&s0, &s_x, sizeof(s0)); 250 251 while (total < nbytes) { 252 /* 253 * Copy the plain-text data into staging_block. 254 * This may need to be zero-padded. 255 */ 256 copy_amt = MIN(nbytes - total, sizeof(staging_block)); 257 bcopy(in+total, &staging_block, copy_amt); 258 if (copy_amt < sizeof(staging_block)) { 259 byte_ptr = (uint8_t*)&staging_block; 260 bzero(&byte_ptr[copy_amt], 261 sizeof(staging_block) - copy_amt); 262 } 263 rolling_mac = xor_and_encrypt(rolling_mac, staging_block, 264 key, nr); 265 /* Put the counter into the s_x block */ 266 append_int(counter++, &s_x, L+1); 267 /* Encrypt that */ 268 __m128i X = AESNI_ENC(s_x, key, nr); 269 /* XOR the plain-text with the encrypted counter block */ 270 staging_block = _mm_xor_si128(staging_block, X); 271 /* And copy it out */ 272 bcopy(&staging_block, out+total, copy_amt); 273 total += copy_amt; 274 } 275 /* 276 * Allegedly done with it! Except for the tag. 277 */ 278 s0 = AESNI_ENC(s0, key, nr); 279 staging_block = _mm_xor_si128(s0, rolling_mac); 280 bcopy(&staging_block, tag, tag_length); 281 explicit_bzero(&s0, sizeof(s0)); 282 explicit_bzero(&staging_block, sizeof(staging_block)); 283 explicit_bzero(&s_x, sizeof(s_x)); 284 explicit_bzero(&rolling_mac, sizeof(rolling_mac)); 285 } 286 287 /* 288 * Implement AES CCM+CBC-MAC decryption and authentication. 289 * Returns 0 on failure, 1 on success. 290 * 291 * The primary difference here is that each encrypted block 292 * needs to be hashed&encrypted after it is decrypted (since 293 * the CBC-MAC is based on the plain text). This means that 294 * we do the decryption twice -- first to verify the tag, 295 * and second to decrypt and copy it out. 296 * 297 * To avoid annoying code copying, we implement the main 298 * loop as a separate function. 299 * 300 * Call with out as NULL to not store the decrypted results; 301 * call with hashp as NULL to not run the authentication. 302 * Calling with neither as NULL does the decryption and 303 * authentication as a single pass (which is not allowed 304 * per the specification, really). 305 * 306 * If hashp is non-NULL, it points to the post-AAD computed 307 * checksum. 308 */ 309 static void 310 decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes, 311 __m128i s0, size_t nonce_length, __m128i *macp, 312 const unsigned char *key, int nr) 313 { 314 size_t total = 0; 315 __m128i s_x = s0, mac_block; 316 int counter = 1; 317 const size_t L = sizeof(__m128i) - 1 - nonce_length; 318 __m128i pad_block, staging_block; 319 320 /* 321 * The starting mac (post AAD, if any). 322 */ 323 if (macp != NULL) 324 mac_block = *macp; 325 326 while (total < nbytes) { 327 size_t copy_amt = MIN(nbytes - total, sizeof(staging_block)); 328 329 if (copy_amt < sizeof(staging_block)) { 330 staging_block = _mm_setzero_si128(); 331 } 332 bcopy(in+total, &staging_block, copy_amt); 333 334 /* 335 * staging_block has the current block of input data, 336 * zero-padded if necessary. This is used in computing 337 * both the decrypted data, and the authentication tag. 338 */ 339 append_int(counter++, &s_x, L+1); 340 /* 341 * The tag is computed based on the decrypted data. 342 */ 343 pad_block = AESNI_ENC(s_x, key, nr); 344 if (copy_amt < sizeof(staging_block)) { 345 /* 346 * Need to pad out pad_block with 0. 347 * (staging_block was set to 0's above.) 348 */ 349 uint8_t *end_of_buffer = (uint8_t*)&pad_block; 350 bzero(end_of_buffer + copy_amt, 351 sizeof(pad_block) - copy_amt); 352 } 353 staging_block = _mm_xor_si128(staging_block, pad_block); 354 355 if (out) 356 bcopy(&staging_block, out+total, copy_amt); 357 358 if (macp) 359 mac_block = xor_and_encrypt(mac_block, staging_block, 360 key, nr); 361 total += copy_amt; 362 } 363 364 if (macp) 365 *macp = mac_block; 366 367 explicit_bzero(&pad_block, sizeof(pad_block)); 368 explicit_bzero(&staging_block, sizeof(staging_block)); 369 explicit_bzero(&mac_block, sizeof(mac_block)); 370 } 371 372 /* 373 * The exposed decryption routine. This is practically a 374 * copy of the encryption routine, except that the order 375 * in which the tag is created is changed. 376 * XXX combine the two functions at some point! 377 */ 378 int 379 AES_CCM_decrypt(const unsigned char *in, unsigned char *out, 380 const unsigned char *addt, const unsigned char *nonce, 381 const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen, 382 const unsigned char *key, int nr) 383 { 384 static const int tag_length = 16; /* 128 bits */ 385 int L; 386 __m128i s0, rolling_mac, staging_block; 387 uint8_t *byte_ptr; 388 389 if (nbytes == 0 && abytes == 0) 390 return (1); // No message means no decryption! 391 if (nlen < 0 || nlen > 15) 392 panic("%s: bad nonce length %d", __FUNCTION__, nlen); 393 394 /* 395 * We need to know how many bytes to use to describe 396 * the length of the data. Normally, nlen should be 397 * 12, which leaves us 3 bytes to do that -- 16mbytes of 398 * data to encrypt. But it can be longer or shorter. 399 */ 400 L = sizeof(__m128i) - 1 - nlen; 401 402 /* 403 * Now, this shouldn't happen, but let's make sure that 404 * the data length isn't too big. 405 */ 406 if (nbytes > ((1 << (8 * L)) - 1)) 407 panic("%s: nbytes is %u, but length field is %d bytes", 408 __FUNCTION__, nbytes, L); 409 /* 410 * Clear out the blocks 411 */ 412 s0 = _mm_setzero_si128(); 413 414 rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen, 415 key, nr, nbytes, tag_length); 416 /* s0 has flags, nonce, and then 0 */ 417 byte_ptr = (uint8_t*)&s0; 418 byte_ptr[0] = L-1; /* but the flags byte only has L' */ 419 bcopy(nonce, &byte_ptr[1], nlen); 420 421 /* 422 * Now to cycle through the rest of the data. 423 */ 424 decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr); 425 426 /* 427 * Compare the tag. 428 */ 429 staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac); 430 if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) { 431 return (0); 432 } 433 434 /* 435 * Push out the decryption results this time. 436 */ 437 decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr); 438 return (1); 439 } 440