1 /*- 2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org> 3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> 4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/libkern.h> 35 #include <sys/malloc.h> 36 #include <sys/proc.h> 37 #include <sys/systm.h> 38 #include <crypto/aesni/aesni.h> 39 40 #include "aesencdec.h" 41 42 MALLOC_DECLARE(M_AESNI); 43 44 void 45 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, 46 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) 47 { 48 __m128i tot, ivreg; 49 size_t i; 50 51 len /= AES_BLOCK_LEN; 52 ivreg = _mm_loadu_si128((const __m128i *)iv); 53 for (i = 0; i < len; i++) { 54 tot = aesni_enc(rounds - 1, key_schedule, 55 _mm_loadu_si128((const __m128i *)from) ^ ivreg); 56 ivreg = tot; 57 _mm_storeu_si128((__m128i *)to, tot); 58 from += AES_BLOCK_LEN; 59 to += AES_BLOCK_LEN; 60 } 61 } 62 63 void 64 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, 65 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]) 66 { 67 __m128i blocks[8]; 68 __m128i *bufs; 69 __m128i ivreg, nextiv; 70 size_t i, j, cnt; 71 72 ivreg = _mm_loadu_si128((const __m128i *)iv); 73 cnt = len / AES_BLOCK_LEN / 8; 74 for (i = 0; i < cnt; i++) { 75 bufs = (__m128i *)buf; 76 aesni_dec8(rounds - 1, key_schedule, bufs[0], bufs[1], 77 bufs[2], bufs[3], bufs[4], bufs[5], bufs[6], 78 bufs[7], &blocks[0]); 79 for (j = 0; j < 8; j++) { 80 nextiv = bufs[j]; 81 bufs[j] = blocks[j] ^ ivreg; 82 ivreg = nextiv; 83 } 84 buf += AES_BLOCK_LEN * 8; 85 } 86 i *= 8; 87 cnt = len / AES_BLOCK_LEN; 88 for (; i < cnt; i++) { 89 bufs = (__m128i *)buf; 90 nextiv = bufs[0]; 91 bufs[0] = aesni_dec(rounds - 1, key_schedule, bufs[0]) ^ ivreg; 92 ivreg = nextiv; 93 buf += AES_BLOCK_LEN; 94 } 95 } 96 97 void 98 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, 99 const uint8_t *from, uint8_t *to) 100 { 101 __m128i tot; 102 const __m128i *blocks; 103 size_t i, cnt; 104 105 cnt = len / AES_BLOCK_LEN / 8; 106 for (i = 0; i < cnt; i++) { 107 blocks = (const __m128i *)from; 108 aesni_enc8(rounds - 1, key_schedule, blocks[0], blocks[1], 109 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6], 110 blocks[7], (__m128i *)to); 111 from += AES_BLOCK_LEN * 8; 112 to += AES_BLOCK_LEN * 8; 113 } 114 i *= 8; 115 cnt = len / AES_BLOCK_LEN; 116 for (; i < cnt; i++) { 117 tot = aesni_enc(rounds - 1, key_schedule, 118 _mm_loadu_si128((const __m128i *)from)); 119 _mm_storeu_si128((__m128i *)to, tot); 120 from += AES_BLOCK_LEN; 121 to += AES_BLOCK_LEN; 122 } 123 } 124 125 void 126 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, 127 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) 128 { 129 __m128i tot; 130 const __m128i *blocks; 131 size_t i, cnt; 132 133 cnt = len / AES_BLOCK_LEN / 8; 134 for (i = 0; i < cnt; i++) { 135 blocks = (const __m128i *)from; 136 aesni_dec8(rounds - 1, key_schedule, blocks[0], blocks[1], 137 blocks[2], blocks[3], blocks[4], blocks[5], blocks[6], 138 blocks[7], (__m128i *)to); 139 from += AES_BLOCK_LEN * 8; 140 to += AES_BLOCK_LEN * 8; 141 } 142 i *= 8; 143 cnt = len / AES_BLOCK_LEN; 144 for (; i < cnt; i++) { 145 tot = aesni_dec(rounds - 1, key_schedule, 146 _mm_loadu_si128((const __m128i *)from)); 147 _mm_storeu_si128((__m128i *)to, tot); 148 from += AES_BLOCK_LEN; 149 to += AES_BLOCK_LEN; 150 } 151 } 152 153 #define AES_XTS_BLOCKSIZE 16 154 #define AES_XTS_IVSIZE 8 155 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ 156 157 static inline __m128i 158 xts_crank_lfsr(__m128i inp) 159 { 160 const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA); 161 __m128i xtweak, ret; 162 163 /* set up xor mask */ 164 xtweak = _mm_shuffle_epi32(inp, 0x93); 165 xtweak = _mm_srai_epi32(xtweak, 31); 166 xtweak &= alphamask; 167 168 /* next term */ 169 ret = _mm_slli_epi32(inp, 1); 170 ret ^= xtweak; 171 172 return ret; 173 } 174 175 static void 176 aesni_crypt_xts_block(int rounds, const void *key_schedule, __m128i *tweak, 177 const __m128i *from, __m128i *to, int do_encrypt) 178 { 179 __m128i block; 180 181 block = *from ^ *tweak; 182 183 if (do_encrypt) 184 block = aesni_enc(rounds - 1, key_schedule, block); 185 else 186 block = aesni_dec(rounds - 1, key_schedule, block); 187 188 *to = block ^ *tweak; 189 190 *tweak = xts_crank_lfsr(*tweak); 191 } 192 193 static void 194 aesni_crypt_xts_block8(int rounds, const void *key_schedule, __m128i *tweak, 195 const __m128i *from, __m128i *to, int do_encrypt) 196 { 197 __m128i tmptweak; 198 __m128i a, b, c, d, e, f, g, h; 199 __m128i tweaks[8]; 200 __m128i tmp[8]; 201 202 tmptweak = *tweak; 203 204 /* 205 * unroll the loop. This lets gcc put values directly in the 206 * register and saves memory accesses. 207 */ 208 #define PREPINP(v, pos) \ 209 do { \ 210 tweaks[(pos)] = tmptweak; \ 211 (v) = from[(pos)] ^ tmptweak; \ 212 tmptweak = xts_crank_lfsr(tmptweak); \ 213 } while (0) 214 PREPINP(a, 0); 215 PREPINP(b, 1); 216 PREPINP(c, 2); 217 PREPINP(d, 3); 218 PREPINP(e, 4); 219 PREPINP(f, 5); 220 PREPINP(g, 6); 221 PREPINP(h, 7); 222 *tweak = tmptweak; 223 224 if (do_encrypt) 225 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, 226 tmp); 227 else 228 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, 229 tmp); 230 231 to[0] = tmp[0] ^ tweaks[0]; 232 to[1] = tmp[1] ^ tweaks[1]; 233 to[2] = tmp[2] ^ tweaks[2]; 234 to[3] = tmp[3] ^ tweaks[3]; 235 to[4] = tmp[4] ^ tweaks[4]; 236 to[5] = tmp[5] ^ tweaks[5]; 237 to[6] = tmp[6] ^ tweaks[6]; 238 to[7] = tmp[7] ^ tweaks[7]; 239 } 240 241 static void 242 aesni_crypt_xts(int rounds, const void *data_schedule, 243 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 244 const uint8_t iv[AES_BLOCK_LEN], int do_encrypt) 245 { 246 __m128i tweakreg; 247 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); 248 size_t i, cnt; 249 250 /* 251 * Prepare tweak as E_k2(IV). IV is specified as LE representation 252 * of a 64-bit block number which we allow to be passed in directly. 253 */ 254 #if BYTE_ORDER == LITTLE_ENDIAN 255 bcopy(iv, tweak, AES_XTS_IVSIZE); 256 /* Last 64 bits of IV are always zero. */ 257 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); 258 #else 259 #error Only LITTLE_ENDIAN architectures are supported. 260 #endif 261 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]); 262 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg); 263 264 cnt = len / AES_XTS_BLOCKSIZE / 8; 265 for (i = 0; i < cnt; i++) { 266 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg, 267 (const __m128i *)from, (__m128i *)to, do_encrypt); 268 from += AES_XTS_BLOCKSIZE * 8; 269 to += AES_XTS_BLOCKSIZE * 8; 270 } 271 i *= 8; 272 cnt = len / AES_XTS_BLOCKSIZE; 273 for (; i < cnt; i++) { 274 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg, 275 (const __m128i *)from, (__m128i *)to, do_encrypt); 276 from += AES_XTS_BLOCKSIZE; 277 to += AES_XTS_BLOCKSIZE; 278 } 279 } 280 281 void 282 aesni_encrypt_xts(int rounds, const void *data_schedule, 283 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 284 const uint8_t iv[AES_BLOCK_LEN]) 285 { 286 287 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 288 iv, 1); 289 } 290 291 void 292 aesni_decrypt_xts(int rounds, const void *data_schedule, 293 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 294 const uint8_t iv[AES_BLOCK_LEN]) 295 { 296 297 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 298 iv, 0); 299 } 300 301 static int 302 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, 303 int keylen) 304 { 305 306 switch (ses->algo) { 307 case CRYPTO_AES_CBC: 308 switch (keylen) { 309 case 128: 310 ses->rounds = AES128_ROUNDS; 311 break; 312 case 192: 313 ses->rounds = AES192_ROUNDS; 314 break; 315 case 256: 316 ses->rounds = AES256_ROUNDS; 317 break; 318 default: 319 return (EINVAL); 320 } 321 break; 322 case CRYPTO_AES_XTS: 323 switch (keylen) { 324 case 256: 325 ses->rounds = AES128_ROUNDS; 326 break; 327 case 512: 328 ses->rounds = AES256_ROUNDS; 329 break; 330 default: 331 return (EINVAL); 332 } 333 break; 334 default: 335 return (EINVAL); 336 } 337 338 aesni_set_enckey(key, ses->enc_schedule, ses->rounds); 339 aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); 340 if (ses->algo == CRYPTO_AES_CBC) 341 arc4rand(ses->iv, sizeof(ses->iv), 0); 342 else /* if (ses->algo == CRYPTO_AES_XTS) */ { 343 aesni_set_enckey(key + keylen / 16, ses->xts_schedule, 344 ses->rounds); 345 } 346 347 return (0); 348 } 349 350 int 351 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) 352 { 353 struct thread *td; 354 int error, saved_ctx; 355 356 td = curthread; 357 if (!is_fpu_kern_thread(0)) { 358 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); 359 saved_ctx = 1; 360 } else { 361 error = 0; 362 saved_ctx = 0; 363 } 364 if (error == 0) { 365 error = aesni_cipher_setup_common(ses, encini->cri_key, 366 encini->cri_klen); 367 if (saved_ctx) 368 fpu_kern_leave(td, ses->fpu_ctx); 369 } 370 return (error); 371 } 372 373 int 374 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, 375 struct cryptop *crp) 376 { 377 struct thread *td; 378 uint8_t *buf; 379 int error, allocated, saved_ctx; 380 381 buf = aesni_cipher_alloc(enccrd, crp, &allocated); 382 if (buf == NULL) 383 return (ENOMEM); 384 385 td = curthread; 386 if (!is_fpu_kern_thread(0)) { 387 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); 388 if (error != 0) 389 goto out; 390 saved_ctx = 1; 391 } else { 392 saved_ctx = 0; 393 error = 0; 394 } 395 396 if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { 397 error = aesni_cipher_setup_common(ses, enccrd->crd_key, 398 enccrd->crd_klen); 399 if (error != 0) 400 goto out; 401 } 402 403 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { 404 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) 405 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); 406 if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) 407 crypto_copyback(crp->crp_flags, crp->crp_buf, 408 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); 409 if (ses->algo == CRYPTO_AES_CBC) { 410 aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, 411 enccrd->crd_len, buf, buf, ses->iv); 412 } else /* if (ses->algo == CRYPTO_AES_XTS) */ { 413 aesni_encrypt_xts(ses->rounds, ses->enc_schedule, 414 ses->xts_schedule, enccrd->crd_len, buf, buf, 415 ses->iv); 416 } 417 } else { 418 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) 419 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); 420 else 421 crypto_copydata(crp->crp_flags, crp->crp_buf, 422 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); 423 if (ses->algo == CRYPTO_AES_CBC) { 424 aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, 425 enccrd->crd_len, buf, ses->iv); 426 } else /* if (ses->algo == CRYPTO_AES_XTS) */ { 427 aesni_decrypt_xts(ses->rounds, ses->dec_schedule, 428 ses->xts_schedule, enccrd->crd_len, buf, buf, 429 ses->iv); 430 } 431 } 432 if (saved_ctx) 433 fpu_kern_leave(td, ses->fpu_ctx); 434 if (allocated) 435 crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, 436 enccrd->crd_len, buf); 437 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) 438 crypto_copydata(crp->crp_flags, crp->crp_buf, 439 enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, 440 AES_BLOCK_LEN, ses->iv); 441 out: 442 if (allocated) { 443 bzero(buf, enccrd->crd_len); 444 free(buf, M_AESNI); 445 } 446 return (error); 447 } 448