1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Glue Code for 3-way parallel assembler optimized version of Twofish 4 * 5 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 6 */ 7 8 #include <asm/crypto/glue_helper.h> 9 #include <asm/crypto/twofish.h> 10 #include <crypto/algapi.h> 11 #include <crypto/b128ops.h> 12 #include <crypto/internal/skcipher.h> 13 #include <crypto/twofish.h> 14 #include <linux/crypto.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 19 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); 20 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); 21 22 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, 23 const u8 *key, unsigned int keylen) 24 { 25 return twofish_setkey(&tfm->base, key, keylen); 26 } 27 28 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 29 const u8 *src) 30 { 31 __twofish_enc_blk_3way(ctx, dst, src, false); 32 } 33 34 static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, 35 const u8 *src) 36 { 37 __twofish_enc_blk_3way(ctx, dst, src, true); 38 } 39 40 void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) 41 { 42 u128 ivs[2]; 43 44 ivs[0] = src[0]; 45 ivs[1] = src[1]; 46 47 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); 48 49 u128_xor(&dst[1], &dst[1], &ivs[0]); 50 u128_xor(&dst[2], &dst[2], &ivs[1]); 51 } 52 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); 53 54 void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) 55 { 56 be128 ctrblk; 57 58 if (dst != src) 59 *dst = *src; 60 61 le128_to_be128(&ctrblk, iv); 62 le128_inc(iv); 63 64 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 65 u128_xor(dst, dst, (u128 *)&ctrblk); 66 } 67 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); 68 69 void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, 70 le128 *iv) 71 { 72 be128 ctrblks[3]; 73 74 if (dst != src) { 75 dst[0] = src[0]; 76 dst[1] = src[1]; 77 dst[2] = src[2]; 78 } 79 80 le128_to_be128(&ctrblks[0], iv); 81 le128_inc(iv); 82 le128_to_be128(&ctrblks[1], iv); 83 le128_inc(iv); 84 le128_to_be128(&ctrblks[2], iv); 85 le128_inc(iv); 86 87 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); 88 } 89 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); 90 91 static const struct common_glue_ctx twofish_enc = { 92 .num_funcs = 2, 93 .fpu_blocks_limit = -1, 94 95 .funcs = { { 96 .num_blocks = 3, 97 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } 98 }, { 99 .num_blocks = 1, 100 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } 101 } } 102 }; 103 104 static const struct common_glue_ctx twofish_ctr = { 105 .num_funcs = 2, 106 .fpu_blocks_limit = -1, 107 108 .funcs = { { 109 .num_blocks = 3, 110 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } 111 }, { 112 .num_blocks = 1, 113 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } 114 } } 115 }; 116 117 static const struct common_glue_ctx twofish_dec = { 118 .num_funcs = 2, 119 .fpu_blocks_limit = -1, 120 121 .funcs = { { 122 .num_blocks = 3, 123 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } 124 }, { 125 .num_blocks = 1, 126 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } 127 } } 128 }; 129 130 static const struct common_glue_ctx twofish_dec_cbc = { 131 .num_funcs = 2, 132 .fpu_blocks_limit = -1, 133 134 .funcs = { { 135 .num_blocks = 3, 136 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } 137 }, { 138 .num_blocks = 1, 139 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } 140 } } 141 }; 142 143 static int ecb_encrypt(struct skcipher_request *req) 144 { 145 return glue_ecb_req_128bit(&twofish_enc, req); 146 } 147 148 static int ecb_decrypt(struct skcipher_request *req) 149 { 150 return glue_ecb_req_128bit(&twofish_dec, req); 151 } 152 153 static int cbc_encrypt(struct skcipher_request *req) 154 { 155 return glue_cbc_encrypt_req_128bit(GLUE_FUNC_CAST(twofish_enc_blk), 156 req); 157 } 158 159 static int cbc_decrypt(struct skcipher_request *req) 160 { 161 return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); 162 } 163 164 static int ctr_crypt(struct skcipher_request *req) 165 { 166 return glue_ctr_req_128bit(&twofish_ctr, req); 167 } 168 169 static struct skcipher_alg tf_skciphers[] = { 170 { 171 .base.cra_name = "ecb(twofish)", 172 .base.cra_driver_name = "ecb-twofish-3way", 173 .base.cra_priority = 300, 174 .base.cra_blocksize = TF_BLOCK_SIZE, 175 .base.cra_ctxsize = sizeof(struct twofish_ctx), 176 .base.cra_module = THIS_MODULE, 177 .min_keysize = TF_MIN_KEY_SIZE, 178 .max_keysize = TF_MAX_KEY_SIZE, 179 .setkey = twofish_setkey_skcipher, 180 .encrypt = ecb_encrypt, 181 .decrypt = ecb_decrypt, 182 }, { 183 .base.cra_name = "cbc(twofish)", 184 .base.cra_driver_name = "cbc-twofish-3way", 185 .base.cra_priority = 300, 186 .base.cra_blocksize = TF_BLOCK_SIZE, 187 .base.cra_ctxsize = sizeof(struct twofish_ctx), 188 .base.cra_module = THIS_MODULE, 189 .min_keysize = TF_MIN_KEY_SIZE, 190 .max_keysize = TF_MAX_KEY_SIZE, 191 .ivsize = TF_BLOCK_SIZE, 192 .setkey = twofish_setkey_skcipher, 193 .encrypt = cbc_encrypt, 194 .decrypt = cbc_decrypt, 195 }, { 196 .base.cra_name = "ctr(twofish)", 197 .base.cra_driver_name = "ctr-twofish-3way", 198 .base.cra_priority = 300, 199 .base.cra_blocksize = 1, 200 .base.cra_ctxsize = sizeof(struct twofish_ctx), 201 .base.cra_module = THIS_MODULE, 202 .min_keysize = TF_MIN_KEY_SIZE, 203 .max_keysize = TF_MAX_KEY_SIZE, 204 .ivsize = TF_BLOCK_SIZE, 205 .chunksize = TF_BLOCK_SIZE, 206 .setkey = twofish_setkey_skcipher, 207 .encrypt = ctr_crypt, 208 .decrypt = ctr_crypt, 209 }, 210 }; 211 212 static bool is_blacklisted_cpu(void) 213 { 214 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 215 return false; 216 217 if (boot_cpu_data.x86 == 0x06 && 218 (boot_cpu_data.x86_model == 0x1c || 219 boot_cpu_data.x86_model == 0x26 || 220 boot_cpu_data.x86_model == 0x36)) { 221 /* 222 * On Atom, twofish-3way is slower than original assembler 223 * implementation. Twofish-3way trades off some performance in 224 * storing blocks in 64bit registers to allow three blocks to 225 * be processed parallel. Parallel operation then allows gaining 226 * more performance than was trade off, on out-of-order CPUs. 227 * However Atom does not benefit from this parallellism and 228 * should be blacklisted. 229 */ 230 return true; 231 } 232 233 if (boot_cpu_data.x86 == 0x0f) { 234 /* 235 * On Pentium 4, twofish-3way is slower than original assembler 236 * implementation because excessive uses of 64bit rotate and 237 * left-shifts (which are really slow on P4) needed to store and 238 * handle 128bit block in two 64bit registers. 239 */ 240 return true; 241 } 242 243 return false; 244 } 245 246 static int force; 247 module_param(force, int, 0); 248 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 249 250 static int __init init(void) 251 { 252 if (!force && is_blacklisted_cpu()) { 253 printk(KERN_INFO 254 "twofish-x86_64-3way: performance on this CPU " 255 "would be suboptimal: disabling " 256 "twofish-x86_64-3way.\n"); 257 return -ENODEV; 258 } 259 260 return crypto_register_skciphers(tf_skciphers, 261 ARRAY_SIZE(tf_skciphers)); 262 } 263 264 static void __exit fini(void) 265 { 266 crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); 267 } 268 269 module_init(init); 270 module_exit(fini); 271 272 MODULE_LICENSE("GPL"); 273 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); 274 MODULE_ALIAS_CRYPTO("twofish"); 275 MODULE_ALIAS_CRYPTO("twofish-asm"); 276