1 /*- 2 * Copyright (c) 2016 The FreeBSD Foundation 3 * Copyright (c) 2020 Ampere Computing 4 * All rights reserved. 5 * 6 * This software was developed by Andrew Turner under 7 * sponsorship from the FreeBSD Foundation. 8 * 9 * Redistribution and use in source and binary forms, with or without 10 * modification, are permitted provided that the following conditions 11 * are met: 12 * 1. Redistributions of source code must retain the above copyright 13 * notice, this list of conditions and the following disclaimer. 14 * 2. Redistributions in binary form must reproduce the above copyright 15 * notice, this list of conditions and the following disclaimer in the 16 * documentation and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 19 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 22 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 23 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 24 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 25 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 26 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 27 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 28 * SUCH DAMAGE. 29 * 30 * This file is derived from aesni_wrap.c: 31 * Copyright (C) 2008 Damien Miller <djm@mindrot.org> 32 * Copyright (c) 2010 Konstantin Belousov <kib@FreeBSD.org> 33 * Copyright (c) 2010-2011 Pawel Jakub Dawidek <pawel@dawidek.net> 34 * Copyright 2012-2013 John-Mark Gurney <jmg@FreeBSD.org> 35 * Copyright (c) 2014 The FreeBSD Foundation 36 */ 37 38 /* 39 * This code is built with floating-point enabled. Make sure to have entered 40 * into floating-point context before calling any of these functions. 41 */ 42 43 #include <sys/cdefs.h> 44 __FBSDID("$FreeBSD$"); 45 46 #include <sys/param.h> 47 #include <sys/systm.h> 48 #include <sys/malloc.h> 49 #include <sys/queue.h> 50 51 #include <opencrypto/cryptodev.h> 52 #include <opencrypto/gmac.h> 53 #include <crypto/rijndael/rijndael.h> 54 #include <crypto/armv8/armv8_crypto.h> 55 56 #include <arm_neon.h> 57 58 static uint8x16_t 59 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from) 60 { 61 uint8x16_t tmp; 62 int i; 63 64 tmp = from; 65 for (i = 0; i < rounds - 1; i += 2) { 66 tmp = vaeseq_u8(tmp, keysched[i]); 67 tmp = vaesmcq_u8(tmp); 68 tmp = vaeseq_u8(tmp, keysched[i + 1]); 69 tmp = vaesmcq_u8(tmp); 70 } 71 72 tmp = vaeseq_u8(tmp, keysched[rounds - 1]); 73 tmp = vaesmcq_u8(tmp); 74 tmp = vaeseq_u8(tmp, keysched[rounds]); 75 tmp = veorq_u8(tmp, keysched[rounds + 1]); 76 77 return (tmp); 78 } 79 80 static uint8x16_t 81 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from) 82 { 83 uint8x16_t tmp; 84 int i; 85 86 tmp = from; 87 for (i = 0; i < rounds - 1; i += 2) { 88 tmp = vaesdq_u8(tmp, keysched[i]); 89 tmp = vaesimcq_u8(tmp); 90 tmp = vaesdq_u8(tmp, keysched[i+1]); 91 tmp = vaesimcq_u8(tmp); 92 } 93 94 tmp = vaesdq_u8(tmp, keysched[rounds - 1]); 95 tmp = vaesimcq_u8(tmp); 96 tmp = vaesdq_u8(tmp, keysched[rounds]); 97 tmp = veorq_u8(tmp, keysched[rounds + 1]); 98 99 return (tmp); 100 } 101 102 void 103 armv8_aes_encrypt_cbc(const AES_key_t *key, size_t len, 104 const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN]) 105 { 106 uint8x16_t tot, ivreg, tmp; 107 size_t i; 108 109 len /= AES_BLOCK_LEN; 110 ivreg = vld1q_u8(iv); 111 for (i = 0; i < len; i++) { 112 tmp = vld1q_u8(from); 113 tot = armv8_aes_enc(key->aes_rounds - 1, 114 (const void*)key->aes_key, veorq_u8(tmp, ivreg)); 115 ivreg = tot; 116 vst1q_u8(to, tot); 117 from += AES_BLOCK_LEN; 118 to += AES_BLOCK_LEN; 119 } 120 } 121 122 void 123 armv8_aes_decrypt_cbc(const AES_key_t *key, size_t len, 124 uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN]) 125 { 126 uint8x16_t ivreg, nextiv, tmp; 127 size_t i; 128 129 len /= AES_BLOCK_LEN; 130 ivreg = vld1q_u8(iv); 131 for (i = 0; i < len; i++) { 132 nextiv = vld1q_u8(buf); 133 tmp = armv8_aes_dec(key->aes_rounds - 1, 134 (const void*)key->aes_key, nextiv); 135 vst1q_u8(buf, veorq_u8(tmp, ivreg)); 136 ivreg = nextiv; 137 buf += AES_BLOCK_LEN; 138 } 139 } 140 141 #define AES_XTS_BLOCKSIZE 16 142 #define AES_XTS_IVSIZE 8 143 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ 144 145 static inline int32x4_t 146 xts_crank_lfsr(int32x4_t inp) 147 { 148 const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1}; 149 int32x4_t xtweak, ret; 150 151 /* set up xor mask */ 152 xtweak = vextq_s32(inp, inp, 3); 153 xtweak = vshrq_n_s32(xtweak, 31); 154 xtweak &= alphamask; 155 156 /* next term */ 157 ret = vshlq_n_s32(inp, 1); 158 ret ^= xtweak; 159 160 return ret; 161 } 162 163 static void 164 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule, 165 uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt) 166 { 167 uint8x16_t block; 168 169 block = vld1q_u8(from) ^ *tweak; 170 171 if (do_encrypt) 172 block = armv8_aes_enc(rounds - 1, key_schedule, block); 173 else 174 block = armv8_aes_dec(rounds - 1, key_schedule, block); 175 176 vst1q_u8(to, block ^ *tweak); 177 178 *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak))); 179 } 180 181 static void 182 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule, 183 const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from, 184 uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt) 185 { 186 uint8x16_t tweakreg; 187 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); 188 size_t i, cnt; 189 190 /* 191 * Prepare tweak as E_k2(IV). IV is specified as LE representation 192 * of a 64-bit block number which we allow to be passed in directly. 193 */ 194 #if BYTE_ORDER == LITTLE_ENDIAN 195 bcopy(iv, tweak, AES_XTS_IVSIZE); 196 /* Last 64 bits of IV are always zero. */ 197 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); 198 #else 199 #error Only LITTLE_ENDIAN architectures are supported. 200 #endif 201 tweakreg = vld1q_u8(tweak); 202 tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg); 203 204 cnt = len / AES_XTS_BLOCKSIZE; 205 for (i = 0; i < cnt; i++) { 206 armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg, 207 from, to, do_encrypt); 208 from += AES_XTS_BLOCKSIZE; 209 to += AES_XTS_BLOCKSIZE; 210 } 211 } 212 213 void 214 armv8_aes_encrypt_xts(AES_key_t *data_schedule, 215 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 216 const uint8_t iv[static AES_BLOCK_LEN]) 217 { 218 219 armv8_aes_crypt_xts(data_schedule->aes_rounds, 220 (const void *)&data_schedule->aes_key, tweak_schedule, len, from, 221 to, iv, 1); 222 } 223 224 void 225 armv8_aes_decrypt_xts(AES_key_t *data_schedule, 226 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 227 const uint8_t iv[static AES_BLOCK_LEN]) 228 { 229 230 armv8_aes_crypt_xts(data_schedule->aes_rounds, 231 (const void *)&data_schedule->aes_key, tweak_schedule, len, from, 232 to,iv, 0); 233 234 } 235 236 #define AES_INC_COUNTER(counter) \ 237 do { \ 238 for (int pos = AES_BLOCK_LEN - 1; \ 239 pos >= 0; pos--) \ 240 if (++(counter)[pos]) \ 241 break; \ 242 } while (0) 243 244 struct armv8_gcm_state { 245 __uint128_val_t EK0; 246 __uint128_val_t EKi; 247 __uint128_val_t Xi; 248 __uint128_val_t lenblock; 249 uint8_t aes_counter[AES_BLOCK_LEN]; 250 }; 251 252 void 253 armv8_aes_encrypt_gcm(AES_key_t *aes_key, size_t len, 254 const uint8_t *from, uint8_t *to, 255 size_t authdatalen, const uint8_t *authdata, 256 uint8_t tag[static GMAC_DIGEST_LEN], 257 const uint8_t iv[static AES_GCM_IV_LEN], 258 const __uint128_val_t *Htable) 259 { 260 struct armv8_gcm_state s; 261 const uint64_t *from64; 262 uint64_t *to64; 263 uint8_t block[AES_BLOCK_LEN]; 264 size_t i, trailer; 265 266 bzero(&s.aes_counter, AES_BLOCK_LEN); 267 memcpy(s.aes_counter, iv, AES_GCM_IV_LEN); 268 269 /* Setup the counter */ 270 s.aes_counter[AES_BLOCK_LEN - 1] = 1; 271 272 /* EK0 for a final GMAC round */ 273 aes_v8_encrypt(s.aes_counter, s.EK0.c, aes_key); 274 275 /* GCM starts with 2 as counter, 1 is used for final xor of tag. */ 276 s.aes_counter[AES_BLOCK_LEN - 1] = 2; 277 278 memset(s.Xi.c, 0, sizeof(s.Xi.c)); 279 trailer = authdatalen % AES_BLOCK_LEN; 280 if (authdatalen - trailer > 0) { 281 gcm_ghash_v8(s.Xi.u, Htable, authdata, authdatalen - trailer); 282 authdata += authdatalen - trailer; 283 } 284 if (trailer > 0 || authdatalen == 0) { 285 memset(block, 0, sizeof(block)); 286 memcpy(block, authdata, trailer); 287 gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN); 288 } 289 290 from64 = (const uint64_t*)from; 291 to64 = (uint64_t*)to; 292 trailer = len % AES_BLOCK_LEN; 293 294 for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) { 295 aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); 296 AES_INC_COUNTER(s.aes_counter); 297 to64[0] = from64[0] ^ s.EKi.u[0]; 298 to64[1] = from64[1] ^ s.EKi.u[1]; 299 gcm_ghash_v8(s.Xi.u, Htable, (uint8_t*)to64, AES_BLOCK_LEN); 300 301 to64 += 2; 302 from64 += 2; 303 } 304 305 to += (len - trailer); 306 from += (len - trailer); 307 308 if (trailer) { 309 aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); 310 AES_INC_COUNTER(s.aes_counter); 311 memset(block, 0, sizeof(block)); 312 for (i = 0; i < trailer; i++) { 313 block[i] = to[i] = from[i] ^ s.EKi.c[i]; 314 } 315 316 gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN); 317 } 318 319 /* Lengths block */ 320 s.lenblock.u[0] = s.lenblock.u[1] = 0; 321 s.lenblock.d[1] = htobe32(authdatalen * 8); 322 s.lenblock.d[3] = htobe32(len * 8); 323 gcm_ghash_v8(s.Xi.u, Htable, s.lenblock.c, AES_BLOCK_LEN); 324 325 s.Xi.u[0] ^= s.EK0.u[0]; 326 s.Xi.u[1] ^= s.EK0.u[1]; 327 memcpy(tag, s.Xi.c, GMAC_DIGEST_LEN); 328 329 explicit_bzero(&s, sizeof(s)); 330 } 331 332 int 333 armv8_aes_decrypt_gcm(AES_key_t *aes_key, size_t len, 334 const uint8_t *from, uint8_t *to, 335 size_t authdatalen, const uint8_t *authdata, 336 const uint8_t tag[static GMAC_DIGEST_LEN], 337 const uint8_t iv[static AES_GCM_IV_LEN], 338 const __uint128_val_t *Htable) 339 { 340 struct armv8_gcm_state s; 341 const uint64_t *from64; 342 uint64_t *to64; 343 uint8_t block[AES_BLOCK_LEN]; 344 size_t i, trailer; 345 int error; 346 347 error = 0; 348 bzero(&s.aes_counter, AES_BLOCK_LEN); 349 memcpy(s.aes_counter, iv, AES_GCM_IV_LEN); 350 351 /* Setup the counter */ 352 s.aes_counter[AES_BLOCK_LEN - 1] = 1; 353 354 /* EK0 for a final GMAC round */ 355 aes_v8_encrypt(s.aes_counter, s.EK0.c, aes_key); 356 357 memset(s.Xi.c, 0, sizeof(s.Xi.c)); 358 trailer = authdatalen % AES_BLOCK_LEN; 359 if (authdatalen - trailer > 0) { 360 gcm_ghash_v8(s.Xi.u, Htable, authdata, authdatalen - trailer); 361 authdata += authdatalen - trailer; 362 } 363 if (trailer > 0 || authdatalen == 0) { 364 memset(block, 0, sizeof(block)); 365 memcpy(block, authdata, trailer); 366 gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN); 367 } 368 369 trailer = len % AES_BLOCK_LEN; 370 if (len - trailer > 0) 371 gcm_ghash_v8(s.Xi.u, Htable, from, len - trailer); 372 if (trailer > 0) { 373 memset(block, 0, sizeof(block)); 374 memcpy(block, from + len - trailer, trailer); 375 gcm_ghash_v8(s.Xi.u, Htable, block, AES_BLOCK_LEN); 376 } 377 378 /* Lengths block */ 379 s.lenblock.u[0] = s.lenblock.u[1] = 0; 380 s.lenblock.d[1] = htobe32(authdatalen * 8); 381 s.lenblock.d[3] = htobe32(len * 8); 382 gcm_ghash_v8(s.Xi.u, Htable, s.lenblock.c, AES_BLOCK_LEN); 383 384 s.Xi.u[0] ^= s.EK0.u[0]; 385 s.Xi.u[1] ^= s.EK0.u[1]; 386 if (timingsafe_bcmp(tag, s.Xi.c, GMAC_DIGEST_LEN) != 0) { 387 error = EBADMSG; 388 goto out; 389 } 390 391 /* GCM starts with 2 as counter, 1 is used for final xor of tag. */ 392 s.aes_counter[AES_BLOCK_LEN - 1] = 2; 393 394 from64 = (const uint64_t*)from; 395 to64 = (uint64_t*)to; 396 397 for (i = 0; i < (len - trailer); i += AES_BLOCK_LEN) { 398 aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); 399 AES_INC_COUNTER(s.aes_counter); 400 to64[0] = from64[0] ^ s.EKi.u[0]; 401 to64[1] = from64[1] ^ s.EKi.u[1]; 402 to64 += 2; 403 from64 += 2; 404 } 405 406 to += (len - trailer); 407 from += (len - trailer); 408 409 if (trailer) { 410 aes_v8_encrypt(s.aes_counter, s.EKi.c, aes_key); 411 AES_INC_COUNTER(s.aes_counter); 412 for (i = 0; i < trailer; i++) 413 to[i] = from[i] ^ s.EKi.c[i]; 414 } 415 416 out: 417 explicit_bzero(&s, sizeof(s)); 418 return (error); 419 } 420