1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Glue Code for 3-way parallel assembler optimized version of Twofish 4 * 5 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 6 */ 7 8 #include <asm/crypto/glue_helper.h> 9 #include <asm/crypto/twofish.h> 10 #include <crypto/algapi.h> 11 #include <crypto/b128ops.h> 12 #include <crypto/internal/skcipher.h> 13 #include <crypto/twofish.h> 14 #include <linux/crypto.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 19 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); 20 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); 21 22 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, 23 const u8 *key, unsigned int keylen) 24 { 25 return twofish_setkey(&tfm->base, key, keylen); 26 } 27 28 static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) 29 { 30 __twofish_enc_blk_3way(ctx, dst, src, false); 31 } 32 33 static inline void twofish_enc_blk_xor_3way(const void *ctx, u8 *dst, 34 const u8 *src) 35 { 36 __twofish_enc_blk_3way(ctx, dst, src, true); 37 } 38 39 void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) 40 { 41 u128 ivs[2]; 42 u128 *dst = (u128 *)d; 43 const u128 *src = (const u128 *)s; 44 45 ivs[0] = src[0]; 46 ivs[1] = src[1]; 47 48 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); 49 50 u128_xor(&dst[1], &dst[1], &ivs[0]); 51 u128_xor(&dst[2], &dst[2], &ivs[1]); 52 } 53 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); 54 55 void twofish_enc_blk_ctr(const void *ctx, u8 *d, const u8 *s, le128 *iv) 56 { 57 be128 ctrblk; 58 u128 *dst = (u128 *)d; 59 const u128 *src = (const u128 *)s; 60 61 if (dst != src) 62 *dst = *src; 63 64 le128_to_be128(&ctrblk, iv); 65 le128_inc(iv); 66 67 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 68 u128_xor(dst, dst, (u128 *)&ctrblk); 69 } 70 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); 71 72 void twofish_enc_blk_ctr_3way(const void *ctx, u8 *d, const u8 *s, le128 *iv) 73 { 74 be128 ctrblks[3]; 75 u128 *dst = (u128 *)d; 76 const u128 *src = (const u128 *)s; 77 78 if (dst != src) { 79 dst[0] = src[0]; 80 dst[1] = src[1]; 81 dst[2] = src[2]; 82 } 83 84 le128_to_be128(&ctrblks[0], iv); 85 le128_inc(iv); 86 le128_to_be128(&ctrblks[1], iv); 87 le128_inc(iv); 88 le128_to_be128(&ctrblks[2], iv); 89 le128_inc(iv); 90 91 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); 92 } 93 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); 94 95 static const struct common_glue_ctx twofish_enc = { 96 .num_funcs = 2, 97 .fpu_blocks_limit = -1, 98 99 .funcs = { { 100 .num_blocks = 3, 101 .fn_u = { .ecb = twofish_enc_blk_3way } 102 }, { 103 .num_blocks = 1, 104 .fn_u = { .ecb = twofish_enc_blk } 105 } } 106 }; 107 108 static const struct common_glue_ctx twofish_ctr = { 109 .num_funcs = 2, 110 .fpu_blocks_limit = -1, 111 112 .funcs = { { 113 .num_blocks = 3, 114 .fn_u = { .ctr = twofish_enc_blk_ctr_3way } 115 }, { 116 .num_blocks = 1, 117 .fn_u = { .ctr = twofish_enc_blk_ctr } 118 } } 119 }; 120 121 static const struct common_glue_ctx twofish_dec = { 122 .num_funcs = 2, 123 .fpu_blocks_limit = -1, 124 125 .funcs = { { 126 .num_blocks = 3, 127 .fn_u = { .ecb = twofish_dec_blk_3way } 128 }, { 129 .num_blocks = 1, 130 .fn_u = { .ecb = twofish_dec_blk } 131 } } 132 }; 133 134 static const struct common_glue_ctx twofish_dec_cbc = { 135 .num_funcs = 2, 136 .fpu_blocks_limit = -1, 137 138 .funcs = { { 139 .num_blocks = 3, 140 .fn_u = { .cbc = twofish_dec_blk_cbc_3way } 141 }, { 142 .num_blocks = 1, 143 .fn_u = { .cbc = twofish_dec_blk } 144 } } 145 }; 146 147 static int ecb_encrypt(struct skcipher_request *req) 148 { 149 return glue_ecb_req_128bit(&twofish_enc, req); 150 } 151 152 static int ecb_decrypt(struct skcipher_request *req) 153 { 154 return glue_ecb_req_128bit(&twofish_dec, req); 155 } 156 157 static int cbc_encrypt(struct skcipher_request *req) 158 { 159 return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); 160 } 161 162 static int cbc_decrypt(struct skcipher_request *req) 163 { 164 return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); 165 } 166 167 static int ctr_crypt(struct skcipher_request *req) 168 { 169 return glue_ctr_req_128bit(&twofish_ctr, req); 170 } 171 172 static struct skcipher_alg tf_skciphers[] = { 173 { 174 .base.cra_name = "ecb(twofish)", 175 .base.cra_driver_name = "ecb-twofish-3way", 176 .base.cra_priority = 300, 177 .base.cra_blocksize = TF_BLOCK_SIZE, 178 .base.cra_ctxsize = sizeof(struct twofish_ctx), 179 .base.cra_module = THIS_MODULE, 180 .min_keysize = TF_MIN_KEY_SIZE, 181 .max_keysize = TF_MAX_KEY_SIZE, 182 .setkey = twofish_setkey_skcipher, 183 .encrypt = ecb_encrypt, 184 .decrypt = ecb_decrypt, 185 }, { 186 .base.cra_name = "cbc(twofish)", 187 .base.cra_driver_name = "cbc-twofish-3way", 188 .base.cra_priority = 300, 189 .base.cra_blocksize = TF_BLOCK_SIZE, 190 .base.cra_ctxsize = sizeof(struct twofish_ctx), 191 .base.cra_module = THIS_MODULE, 192 .min_keysize = TF_MIN_KEY_SIZE, 193 .max_keysize = TF_MAX_KEY_SIZE, 194 .ivsize = TF_BLOCK_SIZE, 195 .setkey = twofish_setkey_skcipher, 196 .encrypt = cbc_encrypt, 197 .decrypt = cbc_decrypt, 198 }, { 199 .base.cra_name = "ctr(twofish)", 200 .base.cra_driver_name = "ctr-twofish-3way", 201 .base.cra_priority = 300, 202 .base.cra_blocksize = 1, 203 .base.cra_ctxsize = sizeof(struct twofish_ctx), 204 .base.cra_module = THIS_MODULE, 205 .min_keysize = TF_MIN_KEY_SIZE, 206 .max_keysize = TF_MAX_KEY_SIZE, 207 .ivsize = TF_BLOCK_SIZE, 208 .chunksize = TF_BLOCK_SIZE, 209 .setkey = twofish_setkey_skcipher, 210 .encrypt = ctr_crypt, 211 .decrypt = ctr_crypt, 212 }, 213 }; 214 215 static bool is_blacklisted_cpu(void) 216 { 217 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 218 return false; 219 220 if (boot_cpu_data.x86 == 0x06 && 221 (boot_cpu_data.x86_model == 0x1c || 222 boot_cpu_data.x86_model == 0x26 || 223 boot_cpu_data.x86_model == 0x36)) { 224 /* 225 * On Atom, twofish-3way is slower than original assembler 226 * implementation. Twofish-3way trades off some performance in 227 * storing blocks in 64bit registers to allow three blocks to 228 * be processed parallel. Parallel operation then allows gaining 229 * more performance than was trade off, on out-of-order CPUs. 230 * However Atom does not benefit from this parallellism and 231 * should be blacklisted. 232 */ 233 return true; 234 } 235 236 if (boot_cpu_data.x86 == 0x0f) { 237 /* 238 * On Pentium 4, twofish-3way is slower than original assembler 239 * implementation because excessive uses of 64bit rotate and 240 * left-shifts (which are really slow on P4) needed to store and 241 * handle 128bit block in two 64bit registers. 242 */ 243 return true; 244 } 245 246 return false; 247 } 248 249 static int force; 250 module_param(force, int, 0); 251 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 252 253 static int __init init(void) 254 { 255 if (!force && is_blacklisted_cpu()) { 256 printk(KERN_INFO 257 "twofish-x86_64-3way: performance on this CPU " 258 "would be suboptimal: disabling " 259 "twofish-x86_64-3way.\n"); 260 return -ENODEV; 261 } 262 263 return crypto_register_skciphers(tf_skciphers, 264 ARRAY_SIZE(tf_skciphers)); 265 } 266 267 static void __exit fini(void) 268 { 269 crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); 270 } 271 272 module_init(init); 273 module_exit(fini); 274 275 MODULE_LICENSE("GPL"); 276 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); 277 MODULE_ALIAS_CRYPTO("twofish"); 278 MODULE_ALIAS_CRYPTO("twofish-asm"); 279