1 /*- 2 * Copyright (C) 2008 Damien Miller <djm@mindrot.org> 3 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> 4 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 5 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> 6 * All rights reserved. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 #include <sys/cdefs.h> 31 __FBSDID("$FreeBSD$"); 32 33 #include <sys/param.h> 34 #include <sys/libkern.h> 35 #include <sys/malloc.h> 36 #include <sys/proc.h> 37 #include <sys/systm.h> 38 #include <crypto/aesni/aesni.h> 39 40 #include "aesencdec.h" 41 42 MALLOC_DECLARE(M_AESNI); 43 44 struct blocks8 { 45 __m128i blk[8]; 46 } __packed; 47 48 void 49 aesni_encrypt_cbc(int rounds, const void *key_schedule, size_t len, 50 const uint8_t *from, uint8_t *to, const uint8_t iv[AES_BLOCK_LEN]) 51 { 52 __m128i tot, ivreg; 53 size_t i; 54 55 len /= AES_BLOCK_LEN; 56 ivreg = _mm_loadu_si128((const __m128i *)iv); 57 for (i = 0; i < len; i++) { 58 tot = aesni_enc(rounds - 1, key_schedule, 59 _mm_loadu_si128((const __m128i *)from) ^ ivreg); 60 ivreg = tot; 61 _mm_storeu_si128((__m128i *)to, tot); 62 from += AES_BLOCK_LEN; 63 to += AES_BLOCK_LEN; 64 } 65 } 66 67 void 68 aesni_decrypt_cbc(int rounds, const void *key_schedule, size_t len, 69 uint8_t *buf, const uint8_t iv[AES_BLOCK_LEN]) 70 { 71 __m128i blocks[8]; 72 struct blocks8 *blks; 73 __m128i ivreg, nextiv; 74 size_t i, j, cnt; 75 76 ivreg = _mm_loadu_si128((const __m128i *)iv); 77 cnt = len / AES_BLOCK_LEN / 8; 78 for (i = 0; i < cnt; i++) { 79 blks = (struct blocks8 *)buf; 80 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1], 81 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5], 82 blks->blk[6], blks->blk[7], &blocks[0]); 83 for (j = 0; j < 8; j++) { 84 nextiv = blks->blk[j]; 85 blks->blk[j] = blocks[j] ^ ivreg; 86 ivreg = nextiv; 87 } 88 buf += AES_BLOCK_LEN * 8; 89 } 90 i *= 8; 91 cnt = len / AES_BLOCK_LEN; 92 for (; i < cnt; i++) { 93 nextiv = _mm_loadu_si128((void *)buf); 94 _mm_storeu_si128((void *)buf, 95 aesni_dec(rounds - 1, key_schedule, nextiv) ^ ivreg); 96 ivreg = nextiv; 97 buf += AES_BLOCK_LEN; 98 } 99 } 100 101 void 102 aesni_encrypt_ecb(int rounds, const void *key_schedule, size_t len, 103 const uint8_t *from, uint8_t *to) 104 { 105 __m128i tot; 106 __m128i tout[8]; 107 struct blocks8 *top; 108 const struct blocks8 *blks; 109 size_t i, cnt; 110 111 cnt = len / AES_BLOCK_LEN / 8; 112 for (i = 0; i < cnt; i++) { 113 blks = (const struct blocks8 *)from; 114 top = (struct blocks8 *)to; 115 aesni_enc8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1], 116 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5], 117 blks->blk[6], blks->blk[7], tout); 118 top->blk[0] = tout[0]; 119 top->blk[1] = tout[1]; 120 top->blk[2] = tout[2]; 121 top->blk[3] = tout[3]; 122 top->blk[4] = tout[4]; 123 top->blk[5] = tout[5]; 124 top->blk[6] = tout[6]; 125 top->blk[7] = tout[7]; 126 from += AES_BLOCK_LEN * 8; 127 to += AES_BLOCK_LEN * 8; 128 } 129 i *= 8; 130 cnt = len / AES_BLOCK_LEN; 131 for (; i < cnt; i++) { 132 tot = aesni_enc(rounds - 1, key_schedule, 133 _mm_loadu_si128((const __m128i *)from)); 134 _mm_storeu_si128((__m128i *)to, tot); 135 from += AES_BLOCK_LEN; 136 to += AES_BLOCK_LEN; 137 } 138 } 139 140 void 141 aesni_decrypt_ecb(int rounds, const void *key_schedule, size_t len, 142 const uint8_t from[AES_BLOCK_LEN], uint8_t to[AES_BLOCK_LEN]) 143 { 144 __m128i tot; 145 __m128i tout[8]; 146 const struct blocks8 *blks; 147 struct blocks8 *top; 148 size_t i, cnt; 149 150 cnt = len / AES_BLOCK_LEN / 8; 151 for (i = 0; i < cnt; i++) { 152 blks = (const struct blocks8 *)from; 153 top = (struct blocks8 *)to; 154 aesni_dec8(rounds - 1, key_schedule, blks->blk[0], blks->blk[1], 155 blks->blk[2], blks->blk[3], blks->blk[4], blks->blk[5], 156 blks->blk[6], blks->blk[7], tout); 157 top->blk[0] = tout[0]; 158 top->blk[1] = tout[1]; 159 top->blk[2] = tout[2]; 160 top->blk[3] = tout[3]; 161 top->blk[4] = tout[4]; 162 top->blk[5] = tout[5]; 163 top->blk[6] = tout[6]; 164 top->blk[7] = tout[7]; 165 from += AES_BLOCK_LEN * 8; 166 to += AES_BLOCK_LEN * 8; 167 } 168 i *= 8; 169 cnt = len / AES_BLOCK_LEN; 170 for (; i < cnt; i++) { 171 tot = aesni_dec(rounds - 1, key_schedule, 172 _mm_loadu_si128((const __m128i *)from)); 173 _mm_storeu_si128((__m128i *)to, tot); 174 from += AES_BLOCK_LEN; 175 to += AES_BLOCK_LEN; 176 } 177 } 178 179 #define AES_XTS_BLOCKSIZE 16 180 #define AES_XTS_IVSIZE 8 181 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ 182 183 static inline __m128i 184 xts_crank_lfsr(__m128i inp) 185 { 186 const __m128i alphamask = _mm_set_epi32(1, 1, 1, AES_XTS_ALPHA); 187 __m128i xtweak, ret; 188 189 /* set up xor mask */ 190 xtweak = _mm_shuffle_epi32(inp, 0x93); 191 xtweak = _mm_srai_epi32(xtweak, 31); 192 xtweak &= alphamask; 193 194 /* next term */ 195 ret = _mm_slli_epi32(inp, 1); 196 ret ^= xtweak; 197 198 return ret; 199 } 200 201 static void 202 aesni_crypt_xts_block(int rounds, const __m128i *key_schedule, __m128i *tweak, 203 const uint8_t *from, uint8_t *to, int do_encrypt) 204 { 205 __m128i block; 206 207 block = _mm_loadu_si128((const __m128i *)from) ^ *tweak; 208 209 if (do_encrypt) 210 block = aesni_enc(rounds - 1, key_schedule, block); 211 else 212 block = aesni_dec(rounds - 1, key_schedule, block); 213 214 _mm_storeu_si128((__m128i *)to, block ^ *tweak); 215 216 *tweak = xts_crank_lfsr(*tweak); 217 } 218 219 static void 220 aesni_crypt_xts_block8(int rounds, const __m128i *key_schedule, __m128i *tweak, 221 const uint8_t *from, uint8_t *to, int do_encrypt) 222 { 223 __m128i tmptweak; 224 __m128i a, b, c, d, e, f, g, h; 225 __m128i tweaks[8]; 226 __m128i tmp[8]; 227 __m128i *top; 228 const __m128i *fromp; 229 230 tmptweak = *tweak; 231 232 /* 233 * unroll the loop. This lets gcc put values directly in the 234 * register and saves memory accesses. 235 */ 236 fromp = (const __m128i *)from; 237 #define PREPINP(v, pos) \ 238 do { \ 239 tweaks[(pos)] = tmptweak; \ 240 (v) = _mm_loadu_si128(&fromp[pos]) ^ \ 241 tmptweak; \ 242 tmptweak = xts_crank_lfsr(tmptweak); \ 243 } while (0) 244 PREPINP(a, 0); 245 PREPINP(b, 1); 246 PREPINP(c, 2); 247 PREPINP(d, 3); 248 PREPINP(e, 4); 249 PREPINP(f, 5); 250 PREPINP(g, 6); 251 PREPINP(h, 7); 252 *tweak = tmptweak; 253 254 if (do_encrypt) 255 aesni_enc8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, 256 tmp); 257 else 258 aesni_dec8(rounds - 1, key_schedule, a, b, c, d, e, f, g, h, 259 tmp); 260 261 top = (__m128i *)to; 262 _mm_storeu_si128(&top[0], tmp[0] ^ tweaks[0]); 263 _mm_storeu_si128(&top[1], tmp[1] ^ tweaks[1]); 264 _mm_storeu_si128(&top[2], tmp[2] ^ tweaks[2]); 265 _mm_storeu_si128(&top[3], tmp[3] ^ tweaks[3]); 266 _mm_storeu_si128(&top[4], tmp[4] ^ tweaks[4]); 267 _mm_storeu_si128(&top[5], tmp[5] ^ tweaks[5]); 268 _mm_storeu_si128(&top[6], tmp[6] ^ tweaks[6]); 269 _mm_storeu_si128(&top[7], tmp[7] ^ tweaks[7]); 270 } 271 272 static void 273 aesni_crypt_xts(int rounds, const __m128i *data_schedule, 274 const __m128i *tweak_schedule, size_t len, const uint8_t *from, 275 uint8_t *to, const uint8_t iv[AES_BLOCK_LEN], int do_encrypt) 276 { 277 __m128i tweakreg; 278 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); 279 size_t i, cnt; 280 281 /* 282 * Prepare tweak as E_k2(IV). IV is specified as LE representation 283 * of a 64-bit block number which we allow to be passed in directly. 284 */ 285 #if BYTE_ORDER == LITTLE_ENDIAN 286 bcopy(iv, tweak, AES_XTS_IVSIZE); 287 /* Last 64 bits of IV are always zero. */ 288 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); 289 #else 290 #error Only LITTLE_ENDIAN architectures are supported. 291 #endif 292 tweakreg = _mm_loadu_si128((__m128i *)&tweak[0]); 293 tweakreg = aesni_enc(rounds - 1, tweak_schedule, tweakreg); 294 295 cnt = len / AES_XTS_BLOCKSIZE / 8; 296 for (i = 0; i < cnt; i++) { 297 aesni_crypt_xts_block8(rounds, data_schedule, &tweakreg, 298 from, to, do_encrypt); 299 from += AES_XTS_BLOCKSIZE * 8; 300 to += AES_XTS_BLOCKSIZE * 8; 301 } 302 i *= 8; 303 cnt = len / AES_XTS_BLOCKSIZE; 304 for (; i < cnt; i++) { 305 aesni_crypt_xts_block(rounds, data_schedule, &tweakreg, 306 from, to, do_encrypt); 307 from += AES_XTS_BLOCKSIZE; 308 to += AES_XTS_BLOCKSIZE; 309 } 310 } 311 312 void 313 aesni_encrypt_xts(int rounds, const void *data_schedule, 314 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 315 const uint8_t iv[AES_BLOCK_LEN]) 316 { 317 318 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 319 iv, 1); 320 } 321 322 void 323 aesni_decrypt_xts(int rounds, const void *data_schedule, 324 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 325 const uint8_t iv[AES_BLOCK_LEN]) 326 { 327 328 aesni_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 329 iv, 0); 330 } 331 332 static int 333 aesni_cipher_setup_common(struct aesni_session *ses, const uint8_t *key, 334 int keylen) 335 { 336 337 switch (ses->algo) { 338 case CRYPTO_AES_CBC: 339 switch (keylen) { 340 case 128: 341 ses->rounds = AES128_ROUNDS; 342 break; 343 case 192: 344 ses->rounds = AES192_ROUNDS; 345 break; 346 case 256: 347 ses->rounds = AES256_ROUNDS; 348 break; 349 default: 350 return (EINVAL); 351 } 352 break; 353 case CRYPTO_AES_XTS: 354 switch (keylen) { 355 case 256: 356 ses->rounds = AES128_ROUNDS; 357 break; 358 case 512: 359 ses->rounds = AES256_ROUNDS; 360 break; 361 default: 362 return (EINVAL); 363 } 364 break; 365 default: 366 return (EINVAL); 367 } 368 369 aesni_set_enckey(key, ses->enc_schedule, ses->rounds); 370 aesni_set_deckey(ses->enc_schedule, ses->dec_schedule, ses->rounds); 371 if (ses->algo == CRYPTO_AES_CBC) 372 arc4rand(ses->iv, sizeof(ses->iv), 0); 373 else /* if (ses->algo == CRYPTO_AES_XTS) */ { 374 aesni_set_enckey(key + keylen / 16, ses->xts_schedule, 375 ses->rounds); 376 } 377 378 return (0); 379 } 380 381 int 382 aesni_cipher_setup(struct aesni_session *ses, struct cryptoini *encini) 383 { 384 struct thread *td; 385 int error, saved_ctx; 386 387 td = curthread; 388 if (!is_fpu_kern_thread(0)) { 389 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); 390 saved_ctx = 1; 391 } else { 392 error = 0; 393 saved_ctx = 0; 394 } 395 if (error == 0) { 396 error = aesni_cipher_setup_common(ses, encini->cri_key, 397 encini->cri_klen); 398 if (saved_ctx) 399 fpu_kern_leave(td, ses->fpu_ctx); 400 } 401 return (error); 402 } 403 404 int 405 aesni_cipher_process(struct aesni_session *ses, struct cryptodesc *enccrd, 406 struct cryptop *crp) 407 { 408 struct thread *td; 409 uint8_t *buf; 410 int error, allocated, saved_ctx; 411 412 buf = aesni_cipher_alloc(enccrd, crp, &allocated); 413 if (buf == NULL) 414 return (ENOMEM); 415 416 td = curthread; 417 if (!is_fpu_kern_thread(0)) { 418 error = fpu_kern_enter(td, ses->fpu_ctx, FPU_KERN_NORMAL); 419 if (error != 0) 420 goto out; 421 saved_ctx = 1; 422 } else { 423 saved_ctx = 0; 424 error = 0; 425 } 426 427 if ((enccrd->crd_flags & CRD_F_KEY_EXPLICIT) != 0) { 428 error = aesni_cipher_setup_common(ses, enccrd->crd_key, 429 enccrd->crd_klen); 430 if (error != 0) 431 goto out; 432 } 433 434 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) { 435 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) 436 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); 437 if ((enccrd->crd_flags & CRD_F_IV_PRESENT) == 0) 438 crypto_copyback(crp->crp_flags, crp->crp_buf, 439 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); 440 if (ses->algo == CRYPTO_AES_CBC) { 441 aesni_encrypt_cbc(ses->rounds, ses->enc_schedule, 442 enccrd->crd_len, buf, buf, ses->iv); 443 } else /* if (ses->algo == CRYPTO_AES_XTS) */ { 444 aesni_encrypt_xts(ses->rounds, ses->enc_schedule, 445 ses->xts_schedule, enccrd->crd_len, buf, buf, 446 ses->iv); 447 } 448 } else { 449 if ((enccrd->crd_flags & CRD_F_IV_EXPLICIT) != 0) 450 bcopy(enccrd->crd_iv, ses->iv, AES_BLOCK_LEN); 451 else 452 crypto_copydata(crp->crp_flags, crp->crp_buf, 453 enccrd->crd_inject, AES_BLOCK_LEN, ses->iv); 454 if (ses->algo == CRYPTO_AES_CBC) { 455 aesni_decrypt_cbc(ses->rounds, ses->dec_schedule, 456 enccrd->crd_len, buf, ses->iv); 457 } else /* if (ses->algo == CRYPTO_AES_XTS) */ { 458 aesni_decrypt_xts(ses->rounds, ses->dec_schedule, 459 ses->xts_schedule, enccrd->crd_len, buf, buf, 460 ses->iv); 461 } 462 } 463 if (saved_ctx) 464 fpu_kern_leave(td, ses->fpu_ctx); 465 if (allocated) 466 crypto_copyback(crp->crp_flags, crp->crp_buf, enccrd->crd_skip, 467 enccrd->crd_len, buf); 468 if ((enccrd->crd_flags & CRD_F_ENCRYPT) != 0) 469 crypto_copydata(crp->crp_flags, crp->crp_buf, 470 enccrd->crd_skip + enccrd->crd_len - AES_BLOCK_LEN, 471 AES_BLOCK_LEN, ses->iv); 472 out: 473 if (allocated) { 474 bzero(buf, enccrd->crd_len); 475 free(buf, M_AESNI); 476 } 477 return (error); 478 } 479