1 /* 2 * Glue Code for 3-way parallel assembler optimized version of Twofish 3 * 4 * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi> 5 * 6 * This program is free software; you can redistribute it and/or modify 7 * it under the terms of the GNU General Public License as published by 8 * the Free Software Foundation; either version 2 of the License, or 9 * (at your option) any later version. 10 * 11 * This program is distributed in the hope that it will be useful, 12 * but WITHOUT ANY WARRANTY; without even the implied warranty of 13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 14 * GNU General Public License for more details. 15 * 16 * You should have received a copy of the GNU General Public License 17 * along with this program; if not, write to the Free Software 18 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 19 * USA 20 * 21 */ 22 23 #include <asm/processor.h> 24 #include <linux/crypto.h> 25 #include <linux/init.h> 26 #include <linux/module.h> 27 #include <linux/types.h> 28 #include <crypto/algapi.h> 29 #include <crypto/twofish.h> 30 #include <crypto/b128ops.h> 31 #include <asm/crypto/glue_helper.h> 32 #include <crypto/lrw.h> 33 #include <crypto/xts.h> 34 35 /* regular block cipher functions from twofish_x86_64 module */ 36 asmlinkage void twofish_enc_blk(struct twofish_ctx *ctx, u8 *dst, 37 const u8 *src); 38 asmlinkage void twofish_dec_blk(struct twofish_ctx *ctx, u8 *dst, 39 const u8 *src); 40 41 /* 3-way parallel cipher functions */ 42 asmlinkage void __twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 43 const u8 *src, bool xor); 44 EXPORT_SYMBOL_GPL(__twofish_enc_blk_3way); 45 asmlinkage void twofish_dec_blk_3way(struct twofish_ctx *ctx, u8 *dst, 46 const u8 *src); 47 EXPORT_SYMBOL_GPL(twofish_dec_blk_3way); 48 49 static inline void twofish_enc_blk_3way(struct twofish_ctx *ctx, u8 *dst, 50 const u8 *src) 51 { 52 __twofish_enc_blk_3way(ctx, dst, src, false); 53 } 54 55 static inline void twofish_enc_blk_xor_3way(struct twofish_ctx *ctx, u8 *dst, 56 const u8 *src) 57 { 58 __twofish_enc_blk_3way(ctx, dst, src, true); 59 } 60 61 static void twofish_dec_blk_cbc_3way(void *ctx, u128 *dst, const u128 *src) 62 { 63 u128 ivs[2]; 64 65 ivs[0] = src[0]; 66 ivs[1] = src[1]; 67 68 twofish_dec_blk_3way(ctx, (u8 *)dst, (u8 *)src); 69 70 u128_xor(&dst[1], &dst[1], &ivs[0]); 71 u128_xor(&dst[2], &dst[2], &ivs[1]); 72 } 73 74 static void twofish_enc_blk_ctr(void *ctx, u128 *dst, const u128 *src, u128 *iv) 75 { 76 be128 ctrblk; 77 78 if (dst != src) 79 *dst = *src; 80 81 u128_to_be128(&ctrblk, iv); 82 u128_inc(iv); 83 84 twofish_enc_blk(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk); 85 u128_xor(dst, dst, (u128 *)&ctrblk); 86 } 87 88 static void twofish_enc_blk_ctr_3way(void *ctx, u128 *dst, const u128 *src, 89 u128 *iv) 90 { 91 be128 ctrblks[3]; 92 93 if (dst != src) { 94 dst[0] = src[0]; 95 dst[1] = src[1]; 96 dst[2] = src[2]; 97 } 98 99 u128_to_be128(&ctrblks[0], iv); 100 u128_inc(iv); 101 u128_to_be128(&ctrblks[1], iv); 102 u128_inc(iv); 103 u128_to_be128(&ctrblks[2], iv); 104 u128_inc(iv); 105 106 twofish_enc_blk_xor_3way(ctx, (u8 *)dst, (u8 *)ctrblks); 107 } 108 109 static const struct common_glue_ctx twofish_enc = { 110 .num_funcs = 2, 111 .fpu_blocks_limit = -1, 112 113 .funcs = { { 114 .num_blocks = 3, 115 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_3way) } 116 }, { 117 .num_blocks = 1, 118 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk) } 119 } } 120 }; 121 122 static const struct common_glue_ctx twofish_ctr = { 123 .num_funcs = 2, 124 .fpu_blocks_limit = -1, 125 126 .funcs = { { 127 .num_blocks = 3, 128 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr_3way) } 129 }, { 130 .num_blocks = 1, 131 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_enc_blk_ctr) } 132 } } 133 }; 134 135 static const struct common_glue_ctx twofish_dec = { 136 .num_funcs = 2, 137 .fpu_blocks_limit = -1, 138 139 .funcs = { { 140 .num_blocks = 3, 141 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk_3way) } 142 }, { 143 .num_blocks = 1, 144 .fn_u = { .ecb = GLUE_FUNC_CAST(twofish_dec_blk) } 145 } } 146 }; 147 148 static const struct common_glue_ctx twofish_dec_cbc = { 149 .num_funcs = 2, 150 .fpu_blocks_limit = -1, 151 152 .funcs = { { 153 .num_blocks = 3, 154 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk_cbc_3way) } 155 }, { 156 .num_blocks = 1, 157 .fn_u = { .cbc = GLUE_CBC_FUNC_CAST(twofish_dec_blk) } 158 } } 159 }; 160 161 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 162 struct scatterlist *src, unsigned int nbytes) 163 { 164 return glue_ecb_crypt_128bit(&twofish_enc, desc, dst, src, nbytes); 165 } 166 167 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 168 struct scatterlist *src, unsigned int nbytes) 169 { 170 return glue_ecb_crypt_128bit(&twofish_dec, desc, dst, src, nbytes); 171 } 172 173 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 174 struct scatterlist *src, unsigned int nbytes) 175 { 176 return glue_cbc_encrypt_128bit(GLUE_FUNC_CAST(twofish_enc_blk), desc, 177 dst, src, nbytes); 178 } 179 180 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 181 struct scatterlist *src, unsigned int nbytes) 182 { 183 return glue_cbc_decrypt_128bit(&twofish_dec_cbc, desc, dst, src, 184 nbytes); 185 } 186 187 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst, 188 struct scatterlist *src, unsigned int nbytes) 189 { 190 return glue_ctr_crypt_128bit(&twofish_ctr, desc, dst, src, nbytes); 191 } 192 193 static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) 194 { 195 const unsigned int bsize = TF_BLOCK_SIZE; 196 struct twofish_ctx *ctx = priv; 197 int i; 198 199 if (nbytes == 3 * bsize) { 200 twofish_enc_blk_3way(ctx, srcdst, srcdst); 201 return; 202 } 203 204 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) 205 twofish_enc_blk(ctx, srcdst, srcdst); 206 } 207 208 static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes) 209 { 210 const unsigned int bsize = TF_BLOCK_SIZE; 211 struct twofish_ctx *ctx = priv; 212 int i; 213 214 if (nbytes == 3 * bsize) { 215 twofish_dec_blk_3way(ctx, srcdst, srcdst); 216 return; 217 } 218 219 for (i = 0; i < nbytes / bsize; i++, srcdst += bsize) 220 twofish_dec_blk(ctx, srcdst, srcdst); 221 } 222 223 struct twofish_lrw_ctx { 224 struct lrw_table_ctx lrw_table; 225 struct twofish_ctx twofish_ctx; 226 }; 227 228 static int lrw_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 229 unsigned int keylen) 230 { 231 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 232 int err; 233 234 err = __twofish_setkey(&ctx->twofish_ctx, key, keylen - TF_BLOCK_SIZE, 235 &tfm->crt_flags); 236 if (err) 237 return err; 238 239 return lrw_init_table(&ctx->lrw_table, key + keylen - TF_BLOCK_SIZE); 240 } 241 242 static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 243 struct scatterlist *src, unsigned int nbytes) 244 { 245 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 246 be128 buf[3]; 247 struct lrw_crypt_req req = { 248 .tbuf = buf, 249 .tbuflen = sizeof(buf), 250 251 .table_ctx = &ctx->lrw_table, 252 .crypt_ctx = &ctx->twofish_ctx, 253 .crypt_fn = encrypt_callback, 254 }; 255 256 return lrw_crypt(desc, dst, src, nbytes, &req); 257 } 258 259 static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 260 struct scatterlist *src, unsigned int nbytes) 261 { 262 struct twofish_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 263 be128 buf[3]; 264 struct lrw_crypt_req req = { 265 .tbuf = buf, 266 .tbuflen = sizeof(buf), 267 268 .table_ctx = &ctx->lrw_table, 269 .crypt_ctx = &ctx->twofish_ctx, 270 .crypt_fn = decrypt_callback, 271 }; 272 273 return lrw_crypt(desc, dst, src, nbytes, &req); 274 } 275 276 static void lrw_exit_tfm(struct crypto_tfm *tfm) 277 { 278 struct twofish_lrw_ctx *ctx = crypto_tfm_ctx(tfm); 279 280 lrw_free_table(&ctx->lrw_table); 281 } 282 283 struct twofish_xts_ctx { 284 struct twofish_ctx tweak_ctx; 285 struct twofish_ctx crypt_ctx; 286 }; 287 288 static int xts_twofish_setkey(struct crypto_tfm *tfm, const u8 *key, 289 unsigned int keylen) 290 { 291 struct twofish_xts_ctx *ctx = crypto_tfm_ctx(tfm); 292 u32 *flags = &tfm->crt_flags; 293 int err; 294 295 /* key consists of keys of equal size concatenated, therefore 296 * the length must be even 297 */ 298 if (keylen % 2) { 299 *flags |= CRYPTO_TFM_RES_BAD_KEY_LEN; 300 return -EINVAL; 301 } 302 303 /* first half of xts-key is for crypt */ 304 err = __twofish_setkey(&ctx->crypt_ctx, key, keylen / 2, flags); 305 if (err) 306 return err; 307 308 /* second half of xts-key is for tweak */ 309 return __twofish_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2, 310 flags); 311 } 312 313 static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 314 struct scatterlist *src, unsigned int nbytes) 315 { 316 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 317 be128 buf[3]; 318 struct xts_crypt_req req = { 319 .tbuf = buf, 320 .tbuflen = sizeof(buf), 321 322 .tweak_ctx = &ctx->tweak_ctx, 323 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), 324 .crypt_ctx = &ctx->crypt_ctx, 325 .crypt_fn = encrypt_callback, 326 }; 327 328 return xts_crypt(desc, dst, src, nbytes, &req); 329 } 330 331 static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst, 332 struct scatterlist *src, unsigned int nbytes) 333 { 334 struct twofish_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm); 335 be128 buf[3]; 336 struct xts_crypt_req req = { 337 .tbuf = buf, 338 .tbuflen = sizeof(buf), 339 340 .tweak_ctx = &ctx->tweak_ctx, 341 .tweak_fn = XTS_TWEAK_CAST(twofish_enc_blk), 342 .crypt_ctx = &ctx->crypt_ctx, 343 .crypt_fn = decrypt_callback, 344 }; 345 346 return xts_crypt(desc, dst, src, nbytes, &req); 347 } 348 349 static struct crypto_alg tf_algs[5] = { { 350 .cra_name = "ecb(twofish)", 351 .cra_driver_name = "ecb-twofish-3way", 352 .cra_priority = 300, 353 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 354 .cra_blocksize = TF_BLOCK_SIZE, 355 .cra_ctxsize = sizeof(struct twofish_ctx), 356 .cra_alignmask = 0, 357 .cra_type = &crypto_blkcipher_type, 358 .cra_module = THIS_MODULE, 359 .cra_list = LIST_HEAD_INIT(tf_algs[0].cra_list), 360 .cra_u = { 361 .blkcipher = { 362 .min_keysize = TF_MIN_KEY_SIZE, 363 .max_keysize = TF_MAX_KEY_SIZE, 364 .setkey = twofish_setkey, 365 .encrypt = ecb_encrypt, 366 .decrypt = ecb_decrypt, 367 }, 368 }, 369 }, { 370 .cra_name = "cbc(twofish)", 371 .cra_driver_name = "cbc-twofish-3way", 372 .cra_priority = 300, 373 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 374 .cra_blocksize = TF_BLOCK_SIZE, 375 .cra_ctxsize = sizeof(struct twofish_ctx), 376 .cra_alignmask = 0, 377 .cra_type = &crypto_blkcipher_type, 378 .cra_module = THIS_MODULE, 379 .cra_list = LIST_HEAD_INIT(tf_algs[1].cra_list), 380 .cra_u = { 381 .blkcipher = { 382 .min_keysize = TF_MIN_KEY_SIZE, 383 .max_keysize = TF_MAX_KEY_SIZE, 384 .ivsize = TF_BLOCK_SIZE, 385 .setkey = twofish_setkey, 386 .encrypt = cbc_encrypt, 387 .decrypt = cbc_decrypt, 388 }, 389 }, 390 }, { 391 .cra_name = "ctr(twofish)", 392 .cra_driver_name = "ctr-twofish-3way", 393 .cra_priority = 300, 394 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 395 .cra_blocksize = 1, 396 .cra_ctxsize = sizeof(struct twofish_ctx), 397 .cra_alignmask = 0, 398 .cra_type = &crypto_blkcipher_type, 399 .cra_module = THIS_MODULE, 400 .cra_list = LIST_HEAD_INIT(tf_algs[2].cra_list), 401 .cra_u = { 402 .blkcipher = { 403 .min_keysize = TF_MIN_KEY_SIZE, 404 .max_keysize = TF_MAX_KEY_SIZE, 405 .ivsize = TF_BLOCK_SIZE, 406 .setkey = twofish_setkey, 407 .encrypt = ctr_crypt, 408 .decrypt = ctr_crypt, 409 }, 410 }, 411 }, { 412 .cra_name = "lrw(twofish)", 413 .cra_driver_name = "lrw-twofish-3way", 414 .cra_priority = 300, 415 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 416 .cra_blocksize = TF_BLOCK_SIZE, 417 .cra_ctxsize = sizeof(struct twofish_lrw_ctx), 418 .cra_alignmask = 0, 419 .cra_type = &crypto_blkcipher_type, 420 .cra_module = THIS_MODULE, 421 .cra_list = LIST_HEAD_INIT(tf_algs[3].cra_list), 422 .cra_exit = lrw_exit_tfm, 423 .cra_u = { 424 .blkcipher = { 425 .min_keysize = TF_MIN_KEY_SIZE + TF_BLOCK_SIZE, 426 .max_keysize = TF_MAX_KEY_SIZE + TF_BLOCK_SIZE, 427 .ivsize = TF_BLOCK_SIZE, 428 .setkey = lrw_twofish_setkey, 429 .encrypt = lrw_encrypt, 430 .decrypt = lrw_decrypt, 431 }, 432 }, 433 }, { 434 .cra_name = "xts(twofish)", 435 .cra_driver_name = "xts-twofish-3way", 436 .cra_priority = 300, 437 .cra_flags = CRYPTO_ALG_TYPE_BLKCIPHER, 438 .cra_blocksize = TF_BLOCK_SIZE, 439 .cra_ctxsize = sizeof(struct twofish_xts_ctx), 440 .cra_alignmask = 0, 441 .cra_type = &crypto_blkcipher_type, 442 .cra_module = THIS_MODULE, 443 .cra_list = LIST_HEAD_INIT(tf_algs[4].cra_list), 444 .cra_u = { 445 .blkcipher = { 446 .min_keysize = TF_MIN_KEY_SIZE * 2, 447 .max_keysize = TF_MAX_KEY_SIZE * 2, 448 .ivsize = TF_BLOCK_SIZE, 449 .setkey = xts_twofish_setkey, 450 .encrypt = xts_encrypt, 451 .decrypt = xts_decrypt, 452 }, 453 }, 454 } }; 455 456 static bool is_blacklisted_cpu(void) 457 { 458 if (boot_cpu_data.x86_vendor != X86_VENDOR_INTEL) 459 return false; 460 461 if (boot_cpu_data.x86 == 0x06 && 462 (boot_cpu_data.x86_model == 0x1c || 463 boot_cpu_data.x86_model == 0x26 || 464 boot_cpu_data.x86_model == 0x36)) { 465 /* 466 * On Atom, twofish-3way is slower than original assembler 467 * implementation. Twofish-3way trades off some performance in 468 * storing blocks in 64bit registers to allow three blocks to 469 * be processed parallel. Parallel operation then allows gaining 470 * more performance than was trade off, on out-of-order CPUs. 471 * However Atom does not benefit from this parallellism and 472 * should be blacklisted. 473 */ 474 return true; 475 } 476 477 if (boot_cpu_data.x86 == 0x0f) { 478 /* 479 * On Pentium 4, twofish-3way is slower than original assembler 480 * implementation because excessive uses of 64bit rotate and 481 * left-shifts (which are really slow on P4) needed to store and 482 * handle 128bit block in two 64bit registers. 483 */ 484 return true; 485 } 486 487 return false; 488 } 489 490 static int force; 491 module_param(force, int, 0); 492 MODULE_PARM_DESC(force, "Force module load, ignore CPU blacklist"); 493 494 static int __init init(void) 495 { 496 if (!force && is_blacklisted_cpu()) { 497 printk(KERN_INFO 498 "twofish-x86_64-3way: performance on this CPU " 499 "would be suboptimal: disabling " 500 "twofish-x86_64-3way.\n"); 501 return -ENODEV; 502 } 503 504 return crypto_register_algs(tf_algs, ARRAY_SIZE(tf_algs)); 505 } 506 507 static void __exit fini(void) 508 { 509 crypto_unregister_algs(tf_algs, ARRAY_SIZE(tf_algs)); 510 } 511 512 module_init(init); 513 module_exit(fini); 514 515 MODULE_LICENSE("GPL"); 516 MODULE_DESCRIPTION("Twofish Cipher Algorithm, 3-way parallel asm optimized"); 517 MODULE_ALIAS("twofish"); 518 MODULE_ALIAS("twofish-asm"); 519