1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Glue Code for 3-way parallel assembler optimized version of Twofish 4 * 5 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 6 */ 7 8 #include <asm/crypto/glue_helper.h> 9 #include <asm/crypto/twofish.h> 10 #include <crypto/algapi.h> 11 #include <crypto/b128ops.h> 12 #include <crypto/internal/skcipher.h> 13 #include <crypto/twofish.h> 14 #include <linux/crypto.h> 15 #include <linux/init.h> 16 #include <linux/module.h> 17 #include <linux/types.h> 18 19 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); 20 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); 21 22 static int twofish_setkey_skcipher(struct crypto_skcipher *tfm, 23 const u8 *key, unsigned int keylen) 24 { 25 return twofish_setkey(&tfm->base, key, keylen); 26 } 27 28 static inline void twofish_enc_blk_3way(const void *ctx, u8 *dst, const u8 *src) 29 { 30 __twofish_enc_blk_3way(ctx, dst, src, false); 31 } 32 33 void twofish_dec_blk_cbc_3way(const void *ctx, u8 *d, const u8 *s) 34 { 35 u128 ivs[2]; 36 u128 *dst = (u128 *)d; 37 const u128 *src = (const u128 *)s; 38 39 ivs[0] = src[0]; 40 ivs[1] = src[1]; 41 42 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); 43 44 u128_xor(&dst[1], &dst[1], &ivs[0]); 45 u128_xor(&dst[2], &dst[2], &ivs[1]); 46 } 47 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); 48 49 static const struct common_glue_ctx twofish_enc = { 50 .num_funcs = 2, 51 .fpu_blocks_limit = -1, 52 53 .funcs = { { 54 .num_blocks = 3, 55 .fn_u = { .ecb = twofish_enc_blk_3way } 56 }, { 57 .num_blocks = 1, 58 .fn_u = { .ecb = twofish_enc_blk } 59 } } 60 }; 61 62 static const struct common_glue_ctx twofish_dec = { 63 .num_funcs = 2, 64 .fpu_blocks_limit = -1, 65 66 .funcs = { { 67 .num_blocks = 3, 68 .fn_u = { .ecb = twofish_dec_blk_3way } 69 }, { 70 .num_blocks = 1, 71 .fn_u = { .ecb = twofish_dec_blk } 72 } } 73 }; 74 75 static const struct common_glue_ctx twofish_dec_cbc = { 76 .num_funcs = 2, 77 .fpu_blocks_limit = -1, 78 79 .funcs = { { 80 .num_blocks = 3, 81 .fn_u = { .cbc = twofish_dec_blk_cbc_3way } 82 }, { 83 .num_blocks = 1, 84 .fn_u = { .cbc = twofish_dec_blk } 85 } } 86 }; 87 88 static int ecb_encrypt(struct skcipher_request *req) 89 { 90 return glue_ecb_req_128bit(&twofish_enc, req); 91 } 92 93 static int ecb_decrypt(struct skcipher_request *req) 94 { 95 return glue_ecb_req_128bit(&twofish_dec, req); 96 } 97 98 static int cbc_encrypt(struct skcipher_request *req) 99 { 100 return glue_cbc_encrypt_req_128bit(twofish_enc_blk, req); 101 } 102 103 static int cbc_decrypt(struct skcipher_request *req) 104 { 105 return glue_cbc_decrypt_req_128bit(&twofish_dec_cbc, req); 106 } 107 108 static struct skcipher_alg tf_skciphers[] = { 109 { 110 .base.cra_name = "ecb(twofish)", 111 .base.cra_driver_name = "ecb-twofish-3way", 112 .base.cra_priority = 300, 113 .base.cra_blocksize = TF_BLOCK_SIZE, 114 .base.cra_ctxsize = sizeof(struct twofish_ctx), 115 .base.cra_module = THIS_MODULE, 116 .min_keysize = TF_MIN_KEY_SIZE, 117 .max_keysize = TF_MAX_KEY_SIZE, 118 .setkey = twofish_setkey_skcipher, 119 .encrypt = ecb_encrypt, 120 .decrypt = ecb_decrypt, 121 }, { 122 .base.cra_name = "cbc(twofish)", 123 .base.cra_driver_name = "cbc-twofish-3way", 124 .base.cra_priority = 300, 125 .base.cra_blocksize = TF_BLOCK_SIZE, 126 .base.cra_ctxsize = sizeof(struct twofish_ctx), 127 .base.cra_module = THIS_MODULE, 128 .min_keysize = TF_MIN_KEY_SIZE, 129 .max_keysize = TF_MAX_KEY_SIZE, 130 .ivsize = TF_BLOCK_SIZE, 131 .setkey = twofish_setkey_skcipher, 132 .encrypt = cbc_encrypt, 133 .decrypt = cbc_decrypt, 134 }, 135 }; 136 137 static bool is_blacklisted_cpu(void) 138 { 139 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 140 return false; 141 142 if (boot_cpu_data.x86 == 0x06 && 143 (boot_cpu_data.x86_model == 0x1c || 144 boot_cpu_data.x86_model == 0x26 || 145 boot_cpu_data.x86_model == 0x36)) { 146 /* 147 * On Atom, twofish-3way is slower than original assembler 148 * implementation. Twofish-3way trades off some performance in 149 * storing blocks in 64bit registers to allow three blocks to 150 * be processed parallel. Parallel operation then allows gaining 151 * more performance than was trade off, on out-of-order CPUs. 152 * However Atom does not benefit from this parallellism and 153 * should be blacklisted. 154 */ 155 return true; 156 } 157 158 if (boot_cpu_data.x86 == 0x0f) { 159 /* 160 * On Pentium 4, twofish-3way is slower than original assembler 161 * implementation because excessive uses of 64bit rotate and 162 * left-shifts (which are really slow on P4) needed to store and 163 * handle 128bit block in two 64bit registers. 164 */ 165 return true; 166 } 167 168 return false; 169 } 170 171 static int force; 172 module_param(force, int, 0); 173 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 174 175 static int __init init(void) 176 { 177 if (!force && is_blacklisted_cpu()) { 178 printk(KERN_INFO 179 "twofish-x86_64-3way: performance on this CPU " 180 "would be suboptimal: disabling " 181 "twofish-x86_64-3way.\n"); 182 return -ENODEV; 183 } 184 185 return crypto_register_skciphers(tf_skciphers, 186 ARRAY_SIZE(tf_skciphers)); 187 } 188 189 static void __exit fini(void) 190 { 191 crypto_unregister_skciphers(tf_skciphers, ARRAY_SIZE(tf_skciphers)); 192 } 193 194 module_init(init); 195 module_exit(fini); 196 197 MODULE_LICENSE("GPL"); 198 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); 199 MODULE_ALIAS_CRYPTO("twofish"); 200 MODULE_ALIAS_CRYPTO("twofish-asm"); 201