1 /* 2 * Glue Code for 3-way parallel assembler optimized version of Twofish 3 * 4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 19 * USA 20 * 21 */ 22 23 #include <asm/processor.h> 24 #include <linux/crypto.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/types.h> 28 #include <crypto/algapi.h> 29 #include <crypto/twofish.h> 30 #include <crypto/b128ops.h> 31 #include <asm/crypto/twofish.h> 32 #include <asm/crypto/glue_helper.h> 33 34 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); 35 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); 36 37 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 38 const u8 *src) 39 { 40 __twofish_enc_blk_3way(ctx, dst, src, false); 41 } 42 43 static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, 44 const u8 *src) 45 { 46 __twofish_enc_blk_3way(ctx, dst, src, true); 47 } 48 49 void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) 50 { 51 u128 ivs[2]; 52 53 ivs[0] = src[0]; 54 ivs[1] = src[1]; 55 56 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); 57 58 u128_xor(&dst[1], &dst[1], &ivs[0]); 59 u128_xor(&dst[2], &dst[2], &ivs[1]); 60 } 61 EXPORT_SYMBOL_GPL(twofish_dec_blk_cbc_3way); 62 63 void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, le128 *iv) 64 { 65 be128 ctrblk; 66 67 if (dst != src) 68 *dst = *src; 69 70 le128_to_be128(&ctrblk, iv); 71 le128_inc(iv); 72 73 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 74 u128_xor(dst, dst, (u128 *)&ctrblk); 75 } 76 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr); 77 78 void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, 79 le128 *iv) 80 { 81 be128 ctrblks[3]; 82 83 if (dst != src) { 84 dst[0] = src[0]; 85 dst[1] = src[1]; 86 dst[2] = src[2]; 87 } 88 89 le128_to_be128(&ctrblks[0], iv); 90 le128_inc(iv); 91 le128_to_be128(&ctrblks[1], iv); 92 le128_inc(iv); 93 le128_to_be128(&ctrblks[2], iv); 94 le128_inc(iv); 95 96 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); 97 } 98 EXPORT_SYMBOL_GPL(twofish_enc_blk_ctr_3way); 99 100 static const struct common_glue_ctx twofish_enc = { 101 .num_funcs = 2, 102 .fpu_blocks_limit = -1, 103 104 .funcs = { { 105 .num_blocks = 3, 106 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } 107 }, { 108 .num_blocks = 1, 109 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } 110 } } 111 }; 112 113 static const struct common_glue_ctx twofish_ctr = { 114 .num_funcs = 2, 115 .fpu_blocks_limit = -1, 116 117 .funcs = { { 118 .num_blocks = 3, 119 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } 120 }, { 121 .num_blocks = 1, 122 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } 123 } } 124 }; 125 126 static const struct common_glue_ctx twofish_dec = { 127 .num_funcs = 2, 128 .fpu_blocks_limit = -1, 129 130 .funcs = { { 131 .num_blocks = 3, 132 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } 133 }, { 134 .num_blocks = 1, 135 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } 136 } } 137 }; 138 139 static const struct common_glue_ctx twofish_dec_cbc = { 140 .num_funcs = 2, 141 .fpu_blocks_limit = -1, 142 143 .funcs = { { 144 .num_blocks = 3, 145 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } 146 }, { 147 .num_blocks = 1, 148 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } 149 } } 150 }; 151 152 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 153 struct scatterlist *src, unsigned int nbytes) 154 { 155 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); 156 } 157 158 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 159 struct scatterlist *src, unsigned int nbytes) 160 { 161 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); 162 } 163 164 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 165 struct scatterlist *src, unsigned int nbytes) 166 { 167 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, 168 dst, src, nbytes); 169 } 170 171 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 172 struct scatterlist *src, unsigned int nbytes) 173 { 174 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, 175 nbytes); 176 } 177 178 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 179 struct scatterlist *src, unsigned int nbytes) 180 { 181 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); 182 } 183 184 static struct crypto_alg tf_algs[] = { { 185 .cra_name = "ecb(twofish)", 186 .cra_driver_name = "ecb-twofish-3way", 187 .cra_priority = 300, 188 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 189 .cra_blocksize = TF_BLOCK_SIZE, 190 .cra_ctxsize = sizeof(struct twofish_ctx), 191 .cra_alignmask = 0, 192 .cra_type = &crypto_blkcipher_type, 193 .cra_module = THIS_MODULE, 194 .cra_u = { 195 .blkcipher = { 196 .min_keysize = TF_MIN_KEY_SIZE, 197 .max_keysize = TF_MAX_KEY_SIZE, 198 .setkey = twofish_setkey, 199 .encrypt = ecb_encrypt, 200 .decrypt = ecb_decrypt, 201 }, 202 }, 203 }, { 204 .cra_name = "cbc(twofish)", 205 .cra_driver_name = "cbc-twofish-3way", 206 .cra_priority = 300, 207 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 208 .cra_blocksize = TF_BLOCK_SIZE, 209 .cra_ctxsize = sizeof(struct twofish_ctx), 210 .cra_alignmask = 0, 211 .cra_type = &crypto_blkcipher_type, 212 .cra_module = THIS_MODULE, 213 .cra_u = { 214 .blkcipher = { 215 .min_keysize = TF_MIN_KEY_SIZE, 216 .max_keysize = TF_MAX_KEY_SIZE, 217 .ivsize = TF_BLOCK_SIZE, 218 .setkey = twofish_setkey, 219 .encrypt = cbc_encrypt, 220 .decrypt = cbc_decrypt, 221 }, 222 }, 223 }, { 224 .cra_name = "ctr(twofish)", 225 .cra_driver_name = "ctr-twofish-3way", 226 .cra_priority = 300, 227 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 228 .cra_blocksize = 1, 229 .cra_ctxsize = sizeof(struct twofish_ctx), 230 .cra_alignmask = 0, 231 .cra_type = &crypto_blkcipher_type, 232 .cra_module = THIS_MODULE, 233 .cra_u = { 234 .blkcipher = { 235 .min_keysize = TF_MIN_KEY_SIZE, 236 .max_keysize = TF_MAX_KEY_SIZE, 237 .ivsize = TF_BLOCK_SIZE, 238 .setkey = twofish_setkey, 239 .encrypt = ctr_crypt, 240 .decrypt = ctr_crypt, 241 }, 242 }, 243 } }; 244 245 static bool is_blacklisted_cpu(void) 246 { 247 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 248 return false; 249 250 if (boot_cpu_data.x86 == 0x06 && 251 (boot_cpu_data.x86_model == 0x1c || 252 boot_cpu_data.x86_model == 0x26 || 253 boot_cpu_data.x86_model == 0x36)) { 254 /* 255 * On Atom, twofish-3way is slower than original assembler 256 * implementation. Twofish-3way trades off some performance in 257 * storing blocks in 64bit registers to allow three blocks to 258 * be processed parallel. Parallel operation then allows gaining 259 * more performance than was trade off, on out-of-order CPUs. 260 * However Atom does not benefit from this parallellism and 261 * should be blacklisted. 262 */ 263 return true; 264 } 265 266 if (boot_cpu_data.x86 == 0x0f) { 267 /* 268 * On Pentium 4, twofish-3way is slower than original assembler 269 * implementation because excessive uses of 64bit rotate and 270 * left-shifts (which are really slow on P4) needed to store and 271 * handle 128bit block in two 64bit registers. 272 */ 273 return true; 274 } 275 276 return false; 277 } 278 279 static int force; 280 module_param(force, int, 0); 281 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 282 283 static int __init init(void) 284 { 285 if (!force && is_blacklisted_cpu()) { 286 printk(KERN_INFO 287 "twofish-x86_64-3way: performance on this CPU " 288 "would be suboptimal: disabling " 289 "twofish-x86_64-3way.\n"); 290 return -ENODEV; 291 } 292 293 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); 294 } 295 296 static void __exit fini(void) 297 { 298 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); 299 } 300 301 module_init(init); 302 module_exit(fini); 303 304 MODULE_LICENSE("GPL"); 305 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); 306 MODULE_ALIAS_CRYPTO("twofish"); 307 MODULE_ALIAS_CRYPTO("twofish-asm"); 308