1 /*- 2 * Copyright (c) 2016 The FreeBSD Foundation 3 * All rights reserved. 4 * 5 * This software was developed by Andrew Turner under 6 * sponsorship from the FreeBSD Foundation. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions 10 * are met: 11 * 1. Redistributions of source code must retain the above copyright 12 * notice, this list of conditions and the following disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 18 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 19 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 20 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 21 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 22 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 23 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 24 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 25 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 26 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 27 * SUCH DAMAGE. 28 */ 29 30 /* 31 * This code is built with floating-point enabled. Make sure to have entered 32 * into floating-point context before calling any of these functions. 33 */ 34 35 #include <sys/cdefs.h> 36 __FBSDID("$FreeBSD$"); 37 38 #include <sys/param.h> 39 #include <sys/systm.h> 40 #include <sys/malloc.h> 41 #include <sys/queue.h> 42 43 #include <opencrypto/cryptodev.h> 44 #include <crypto/armv8/armv8_crypto.h> 45 46 #include <arm_neon.h> 47 48 static uint8x16_t 49 armv8_aes_enc(int rounds, const uint8x16_t *keysched, const uint8x16_t from) 50 { 51 uint8x16_t tmp; 52 int i; 53 54 tmp = from; 55 for (i = 0; i < rounds - 1; i += 2) { 56 tmp = vaeseq_u8(tmp, keysched[i]); 57 tmp = vaesmcq_u8(tmp); 58 tmp = vaeseq_u8(tmp, keysched[i + 1]); 59 tmp = vaesmcq_u8(tmp); 60 } 61 62 tmp = vaeseq_u8(tmp, keysched[rounds - 1]); 63 tmp = vaesmcq_u8(tmp); 64 tmp = vaeseq_u8(tmp, keysched[rounds]); 65 tmp = veorq_u8(tmp, keysched[rounds + 1]); 66 67 return (tmp); 68 } 69 70 static uint8x16_t 71 armv8_aes_dec(int rounds, const uint8x16_t *keysched, const uint8x16_t from) 72 { 73 uint8x16_t tmp; 74 int i; 75 76 tmp = from; 77 for (i = 0; i < rounds - 1; i += 2) { 78 tmp = vaesdq_u8(tmp, keysched[i]); 79 tmp = vaesimcq_u8(tmp); 80 tmp = vaesdq_u8(tmp, keysched[i+1]); 81 tmp = vaesimcq_u8(tmp); 82 } 83 84 tmp = vaesdq_u8(tmp, keysched[rounds - 1]); 85 tmp = vaesimcq_u8(tmp); 86 tmp = vaesdq_u8(tmp, keysched[rounds]); 87 tmp = veorq_u8(tmp, keysched[rounds + 1]); 88 89 return (tmp); 90 } 91 92 void 93 armv8_aes_encrypt_cbc(int rounds, const void *key_schedule, size_t len, 94 const uint8_t *from, uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN]) 95 { 96 uint8x16_t tot, ivreg, tmp; 97 size_t i; 98 99 len /= AES_BLOCK_LEN; 100 ivreg = vld1q_u8(iv); 101 for (i = 0; i < len; i++) { 102 tmp = vld1q_u8(from); 103 tot = armv8_aes_enc(rounds - 1, key_schedule, 104 veorq_u8(tmp, ivreg)); 105 ivreg = tot; 106 vst1q_u8(to, tot); 107 from += AES_BLOCK_LEN; 108 to += AES_BLOCK_LEN; 109 } 110 } 111 112 void 113 armv8_aes_decrypt_cbc(int rounds, const void *key_schedule, size_t len, 114 uint8_t *buf, const uint8_t iv[static AES_BLOCK_LEN]) 115 { 116 uint8x16_t ivreg, nextiv, tmp; 117 size_t i; 118 119 len /= AES_BLOCK_LEN; 120 ivreg = vld1q_u8(iv); 121 for (i = 0; i < len; i++) { 122 nextiv = vld1q_u8(buf); 123 tmp = armv8_aes_dec(rounds - 1, key_schedule, nextiv); 124 vst1q_u8(buf, veorq_u8(tmp, ivreg)); 125 ivreg = nextiv; 126 buf += AES_BLOCK_LEN; 127 } 128 } 129 130 #define AES_XTS_BLOCKSIZE 16 131 #define AES_XTS_IVSIZE 8 132 #define AES_XTS_ALPHA 0x87 /* GF(2^128) generator polynomial */ 133 134 static inline int32x4_t 135 xts_crank_lfsr(int32x4_t inp) 136 { 137 const int32x4_t alphamask = {AES_XTS_ALPHA, 1, 1, 1}; 138 int32x4_t xtweak, ret; 139 140 /* set up xor mask */ 141 xtweak = vextq_s32(inp, inp, 3); 142 xtweak = vshrq_n_s32(xtweak, 31); 143 xtweak &= alphamask; 144 145 /* next term */ 146 ret = vshlq_n_s32(inp, 1); 147 ret ^= xtweak; 148 149 return ret; 150 } 151 152 static void 153 armv8_aes_crypt_xts_block(int rounds, const uint8x16_t *key_schedule, 154 uint8x16_t *tweak, const uint8_t *from, uint8_t *to, int do_encrypt) 155 { 156 uint8x16_t block; 157 158 block = vld1q_u8(from) ^ *tweak; 159 160 if (do_encrypt) 161 block = armv8_aes_enc(rounds - 1, key_schedule, block); 162 else 163 block = armv8_aes_dec(rounds - 1, key_schedule, block); 164 165 vst1q_u8(to, block ^ *tweak); 166 167 *tweak = vreinterpretq_u8_s32(xts_crank_lfsr(vreinterpretq_s32_u8(*tweak))); 168 } 169 170 static void 171 armv8_aes_crypt_xts(int rounds, const uint8x16_t *data_schedule, 172 const uint8x16_t *tweak_schedule, size_t len, const uint8_t *from, 173 uint8_t *to, const uint8_t iv[static AES_BLOCK_LEN], int do_encrypt) 174 { 175 uint8x16_t tweakreg; 176 uint8_t tweak[AES_XTS_BLOCKSIZE] __aligned(16); 177 size_t i, cnt; 178 179 /* 180 * Prepare tweak as E_k2(IV). IV is specified as LE representation 181 * of a 64-bit block number which we allow to be passed in directly. 182 */ 183 #if BYTE_ORDER == LITTLE_ENDIAN 184 bcopy(iv, tweak, AES_XTS_IVSIZE); 185 /* Last 64 bits of IV are always zero. */ 186 bzero(tweak + AES_XTS_IVSIZE, AES_XTS_IVSIZE); 187 #else 188 #error Only LITTLE_ENDIAN architectures are supported. 189 #endif 190 tweakreg = vld1q_u8(tweak); 191 tweakreg = armv8_aes_enc(rounds - 1, tweak_schedule, tweakreg); 192 193 cnt = len / AES_XTS_BLOCKSIZE; 194 for (i = 0; i < cnt; i++) { 195 armv8_aes_crypt_xts_block(rounds, data_schedule, &tweakreg, 196 from, to, do_encrypt); 197 from += AES_XTS_BLOCKSIZE; 198 to += AES_XTS_BLOCKSIZE; 199 } 200 } 201 202 void 203 armv8_aes_encrypt_xts(int rounds, const void *data_schedule, 204 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 205 const uint8_t iv[static AES_BLOCK_LEN]) 206 { 207 208 armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 209 iv, 1); 210 } 211 212 void 213 armv8_aes_decrypt_xts(int rounds, const void *data_schedule, 214 const void *tweak_schedule, size_t len, const uint8_t *from, uint8_t *to, 215 const uint8_t iv[static AES_BLOCK_LEN]) 216 { 217 218 armv8_aes_crypt_xts(rounds, data_schedule, tweak_schedule, len, from, to, 219 iv, 0); 220 } 221