1 /*- 2 * Copyright (c) 2014-2021 The FreeBSD Foundation 3 * Copyright (c) 2018 iXsystems, Inc 4 * All rights reserved. 5 * 6 * Portions of this software were developed by John-Mark Gurney 7 * under the sponsorship of the FreeBSD Foundation and 8 * Rubicon Communications, LLC (Netgate). 9 * 10 * Portions of this software were developed by Ararat River 11 * Consulting, LLC under sponsorship of the FreeBSD Foundation. 12 * 13 * Redistribution and use in source and binary forms, with or without 14 * modification, are permitted provided that the following conditions 15 * are met: 16 * 1. Redistributions of source code must retain the above copyright 17 * notice, this list of conditions and the following disclaimer. 18 * 2. Redistributions in binary form must reproduce the above copyright 19 * notice, this list of conditions and the following disclaimer in the 20 * documentation and/or other materials provided with the distribution. 21 * 22 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 23 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 24 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 25 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 26 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 27 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 28 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 29 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 30 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 31 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 32 * SUCH DAMAGE. 33 * 34 * 35 * $FreeBSD$ 36 * 37 * This file implements AES-CCM+CBC-MAC, as described 38 * at https://tools.ietf.org/html/rfc3610, using Intel's 39 * AES-NI instructions. 40 * 41 */ 42 43 #include <sys/types.h> 44 #include <sys/endian.h> 45 #include <sys/param.h> 46 47 #include <sys/systm.h> 48 #include <crypto/aesni/aesni.h> 49 #include <crypto/aesni/aesni_os.h> 50 #include <crypto/aesni/aesencdec.h> 51 #define AESNI_ENC(d, k, nr) aesni_enc(nr-1, (const __m128i*)k, d) 52 53 #include <wmmintrin.h> 54 #include <emmintrin.h> 55 #include <smmintrin.h> 56 57 /* 58 * Encrypt a single 128-bit block after 59 * doing an xor. This is also used to 60 * decrypt (yay symmetric encryption). 61 */ 62 static inline __m128i 63 xor_and_encrypt(__m128i a, __m128i b, const unsigned char *k, int nr) 64 { 65 __m128i retval = _mm_xor_si128(a, b); 66 67 retval = AESNI_ENC(retval, k, nr); 68 return (retval); 69 } 70 71 /* 72 * Put value at the end of block, starting at offset. 73 * (This goes backwards, putting bytes in *until* it 74 * reaches offset.) 75 */ 76 static void 77 append_int(size_t value, __m128i *block, size_t offset) 78 { 79 int indx = sizeof(*block) - 1; 80 uint8_t *bp = (uint8_t*)block; 81 82 while (indx > (sizeof(*block) - offset)) { 83 bp[indx] = value & 0xff; 84 indx--; 85 value >>= 8; 86 } 87 } 88 89 /* 90 * Start the CBC-MAC process. This handles the auth data. 91 */ 92 static __m128i 93 cbc_mac_start(const unsigned char *auth_data, size_t auth_len, 94 const unsigned char *nonce, size_t nonce_len, 95 const unsigned char *key, int nr, 96 size_t data_len, size_t tag_len) 97 { 98 __m128i cbc_block, staging_block; 99 uint8_t *byte_ptr; 100 /* This defines where the message length goes */ 101 int L = sizeof(__m128i) - 1 - nonce_len; 102 103 /* 104 * Set up B0 here. This has the flags byte, 105 * followed by the nonce, followed by the 106 * length of the message. 107 */ 108 cbc_block = _mm_setzero_si128(); 109 byte_ptr = (uint8_t*)&cbc_block; 110 byte_ptr[0] = ((auth_len > 0) ? 1 : 0) * 64 | 111 (((tag_len - 2) / 2) * 8) | 112 (L - 1); 113 bcopy(nonce, byte_ptr + 1, nonce_len); 114 append_int(data_len, &cbc_block, L+1); 115 cbc_block = AESNI_ENC(cbc_block, key, nr); 116 117 if (auth_len != 0) { 118 /* 119 * We need to start by appending the length descriptor. 120 */ 121 uint32_t auth_amt; 122 size_t copy_amt; 123 const uint8_t *auth_ptr = auth_data; 124 125 staging_block = _mm_setzero_si128(); 126 127 /* 128 * The current OCF calling convention means that 129 * there can never be more than 4g of authentication 130 * data, so we don't handle the 0xffff case. 131 */ 132 KASSERT(auth_len < (1ULL << 32), 133 ("%s: auth_len (%zu) larger than 4GB", 134 __FUNCTION__, auth_len)); 135 136 if (auth_len < ((1 << 16) - (1 << 8))) { 137 /* 138 * If the auth data length is less than 139 * 0xff00, we don't need to encode a length 140 * specifier, just the length of the auth 141 * data. 142 */ 143 be16enc(&staging_block, auth_len); 144 auth_amt = 2; 145 } else if (auth_len < (1ULL << 32)) { 146 /* 147 * Two bytes for the length prefix, and then 148 * four bytes for the length. This makes a total 149 * of 6 bytes to describe the auth data length. 150 */ 151 be16enc(&staging_block, 0xfffe); 152 be32enc((char*)&staging_block + 2, auth_len); 153 auth_amt = 6; 154 } else 155 panic("%s: auth len too large", __FUNCTION__); 156 157 /* 158 * Need to copy abytes into blocks. The first block is 159 * already partially filled, by auth_amt, so we need 160 * to handle that. The last block needs to be zero padded. 161 */ 162 copy_amt = MIN(auth_len, 163 sizeof(staging_block) - auth_amt); 164 byte_ptr = (uint8_t*)&staging_block; 165 bcopy(auth_ptr, &byte_ptr[auth_amt], copy_amt); 166 auth_ptr += copy_amt; 167 168 cbc_block = xor_and_encrypt(cbc_block, staging_block, key, nr); 169 170 while (auth_ptr < auth_data + auth_len) { 171 copy_amt = MIN((auth_data + auth_len) - auth_ptr, 172 sizeof(staging_block)); 173 if (copy_amt < sizeof(staging_block)) 174 bzero(&staging_block, sizeof(staging_block)); 175 bcopy(auth_ptr, &staging_block, copy_amt); 176 cbc_block = xor_and_encrypt(cbc_block, staging_block, 177 key, nr); 178 auth_ptr += copy_amt; 179 } 180 } 181 return (cbc_block); 182 } 183 184 /* 185 * Implement AES CCM+CBC-MAC encryption and authentication. 186 * 187 * A couple of notes: 188 * Since abytes is limited to a 32 bit value here, the AAD is 189 * limited to 4 gigabytes or less. 190 */ 191 void 192 AES_CCM_encrypt(const unsigned char *in, unsigned char *out, 193 const unsigned char *addt, const unsigned char *nonce, 194 unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen, 195 int tag_length, const unsigned char *key, int nr) 196 { 197 int L; 198 int counter = 1; /* S0 has 0, S1 has 1 */ 199 size_t copy_amt, total = 0; 200 uint8_t *byte_ptr; 201 __m128i s0, rolling_mac, s_x, staging_block; 202 203 /* NIST 800-38c section A.1 says n is [7, 13]. */ 204 if (nlen < 7 || nlen > 13) 205 panic("%s: bad nonce length %d", __FUNCTION__, nlen); 206 207 /* 208 * We need to know how many bytes to use to describe 209 * the length of the data. Normally, nlen should be 210 * 12, which leaves us 3 bytes to do that -- 16mbytes of 211 * data to encrypt. But it can be longer or shorter; 212 * this impacts the length of the message. 213 */ 214 L = sizeof(__m128i) - 1 - nlen; 215 216 /* 217 * Clear out the blocks 218 */ 219 s0 = _mm_setzero_si128(); 220 221 rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen, 222 key, nr, nbytes, tag_length); 223 224 /* s0 has flags, nonce, and then 0 */ 225 byte_ptr = (uint8_t*)&s0; 226 byte_ptr[0] = L - 1; /* but the flags byte only has L' */ 227 bcopy(nonce, &byte_ptr[1], nlen); 228 229 /* 230 * Now to cycle through the rest of the data. 231 */ 232 bcopy(&s0, &s_x, sizeof(s0)); 233 234 while (total < nbytes) { 235 /* 236 * Copy the plain-text data into staging_block. 237 * This may need to be zero-padded. 238 */ 239 copy_amt = MIN(nbytes - total, sizeof(staging_block)); 240 bcopy(in+total, &staging_block, copy_amt); 241 if (copy_amt < sizeof(staging_block)) { 242 byte_ptr = (uint8_t*)&staging_block; 243 bzero(&byte_ptr[copy_amt], 244 sizeof(staging_block) - copy_amt); 245 } 246 rolling_mac = xor_and_encrypt(rolling_mac, staging_block, 247 key, nr); 248 /* Put the counter into the s_x block */ 249 append_int(counter++, &s_x, L+1); 250 /* Encrypt that */ 251 __m128i X = AESNI_ENC(s_x, key, nr); 252 /* XOR the plain-text with the encrypted counter block */ 253 staging_block = _mm_xor_si128(staging_block, X); 254 /* And copy it out */ 255 bcopy(&staging_block, out+total, copy_amt); 256 total += copy_amt; 257 } 258 /* 259 * Allegedly done with it! Except for the tag. 260 */ 261 s0 = AESNI_ENC(s0, key, nr); 262 staging_block = _mm_xor_si128(s0, rolling_mac); 263 bcopy(&staging_block, tag, tag_length); 264 explicit_bzero(&s0, sizeof(s0)); 265 explicit_bzero(&staging_block, sizeof(staging_block)); 266 explicit_bzero(&s_x, sizeof(s_x)); 267 explicit_bzero(&rolling_mac, sizeof(rolling_mac)); 268 } 269 270 /* 271 * Implement AES CCM+CBC-MAC decryption and authentication. 272 * Returns 0 on failure, 1 on success. 273 * 274 * The primary difference here is that each encrypted block 275 * needs to be hashed&encrypted after it is decrypted (since 276 * the CBC-MAC is based on the plain text). This means that 277 * we do the decryption twice -- first to verify the tag, 278 * and second to decrypt and copy it out. 279 * 280 * To avoid annoying code copying, we implement the main 281 * loop as a separate function. 282 * 283 * Call with out as NULL to not store the decrypted results; 284 * call with hashp as NULL to not run the authentication. 285 * Calling with neither as NULL does the decryption and 286 * authentication as a single pass (which is not allowed 287 * per the specification, really). 288 * 289 * If hashp is non-NULL, it points to the post-AAD computed 290 * checksum. 291 */ 292 static void 293 decrypt_loop(const unsigned char *in, unsigned char *out, size_t nbytes, 294 __m128i s0, size_t nonce_length, __m128i *macp, 295 const unsigned char *key, int nr) 296 { 297 size_t total = 0; 298 __m128i s_x = s0, mac_block; 299 int counter = 1; 300 const size_t L = sizeof(__m128i) - 1 - nonce_length; 301 __m128i pad_block, staging_block; 302 303 /* 304 * The starting mac (post AAD, if any). 305 */ 306 if (macp != NULL) 307 mac_block = *macp; 308 309 while (total < nbytes) { 310 size_t copy_amt = MIN(nbytes - total, sizeof(staging_block)); 311 312 if (copy_amt < sizeof(staging_block)) { 313 staging_block = _mm_setzero_si128(); 314 } 315 bcopy(in+total, &staging_block, copy_amt); 316 317 /* 318 * staging_block has the current block of input data, 319 * zero-padded if necessary. This is used in computing 320 * both the decrypted data, and the authentication tag. 321 */ 322 append_int(counter++, &s_x, L+1); 323 /* 324 * The tag is computed based on the decrypted data. 325 */ 326 pad_block = AESNI_ENC(s_x, key, nr); 327 if (copy_amt < sizeof(staging_block)) { 328 /* 329 * Need to pad out pad_block with 0. 330 * (staging_block was set to 0's above.) 331 */ 332 uint8_t *end_of_buffer = (uint8_t*)&pad_block; 333 bzero(end_of_buffer + copy_amt, 334 sizeof(pad_block) - copy_amt); 335 } 336 staging_block = _mm_xor_si128(staging_block, pad_block); 337 338 if (out) 339 bcopy(&staging_block, out+total, copy_amt); 340 341 if (macp) 342 mac_block = xor_and_encrypt(mac_block, staging_block, 343 key, nr); 344 total += copy_amt; 345 } 346 347 if (macp) 348 *macp = mac_block; 349 350 explicit_bzero(&pad_block, sizeof(pad_block)); 351 explicit_bzero(&staging_block, sizeof(staging_block)); 352 explicit_bzero(&mac_block, sizeof(mac_block)); 353 } 354 355 /* 356 * The exposed decryption routine. This is practically a 357 * copy of the encryption routine, except that the order 358 * in which the tag is created is changed. 359 * XXX combine the two functions at some point! 360 */ 361 int 362 AES_CCM_decrypt(const unsigned char *in, unsigned char *out, 363 const unsigned char *addt, const unsigned char *nonce, 364 const unsigned char *tag, uint32_t nbytes, uint32_t abytes, int nlen, 365 int tag_length, const unsigned char *key, int nr) 366 { 367 int L; 368 __m128i s0, rolling_mac, staging_block; 369 uint8_t *byte_ptr; 370 371 if (nlen < 0 || nlen > 15) 372 panic("%s: bad nonce length %d", __FUNCTION__, nlen); 373 374 /* 375 * We need to know how many bytes to use to describe 376 * the length of the data. Normally, nlen should be 377 * 12, which leaves us 3 bytes to do that -- 16mbytes of 378 * data to encrypt. But it can be longer or shorter. 379 */ 380 L = sizeof(__m128i) - 1 - nlen; 381 382 /* 383 * Clear out the blocks 384 */ 385 s0 = _mm_setzero_si128(); 386 387 rolling_mac = cbc_mac_start(addt, abytes, nonce, nlen, 388 key, nr, nbytes, tag_length); 389 /* s0 has flags, nonce, and then 0 */ 390 byte_ptr = (uint8_t*)&s0; 391 byte_ptr[0] = L-1; /* but the flags byte only has L' */ 392 bcopy(nonce, &byte_ptr[1], nlen); 393 394 /* 395 * Now to cycle through the rest of the data. 396 */ 397 decrypt_loop(in, NULL, nbytes, s0, nlen, &rolling_mac, key, nr); 398 399 /* 400 * Compare the tag. 401 */ 402 staging_block = _mm_xor_si128(AESNI_ENC(s0, key, nr), rolling_mac); 403 if (timingsafe_bcmp(&staging_block, tag, tag_length) != 0) { 404 return (0); 405 } 406 407 /* 408 * Push out the decryption results this time. 409 */ 410 decrypt_loop(in, out, nbytes, s0, nlen, NULL, key, nr); 411 return (1); 412 } 413