xref: /linux/arch/x86/crypto/serpent_sse2_glue.c (revision b43ab901d671e3e3cad425ea5e9a3c74e266dcdd)
1 /*
2  * Glue Code for SSE2 assembler versions of Serpent Cipher
3  *
4  * Copyright (c) 2011 Jussi Kivilinna <jussi.kivilinna@mbnet.fi>
5  *
6  * Glue code based on aesni-intel_glue.c by:
7  *  Copyright (C) 2008, Intel Corp.
8  *    Author: Huang Ying <ying.huang@intel.com>
9  *
10  * CBC & ECB parts based on code (crypto/cbc.c,ecb.c) by:
11  *   Copyright (c) 2006 Herbert Xu <herbert@gondor.apana.org.au>
12  * CTR part based on code (crypto/ctr.c) by:
13  *   (C) Copyright IBM Corp. 2007 - Joy Latten <latten@us.ibm.com>
14  *
15  * This program is free software; you can redistribute it and/or modify
16  * it under the terms of the GNU General Public License as published by
17  * the Free Software Foundation; either version 2 of the License, or
18  * (at your option) any later version.
19  *
20  * This program is distributed in the hope that it will be useful,
21  * but WITHOUT ANY WARRANTY; without even the implied warranty of
22  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
23  * GNU General Public License for more details.
24  *
25  * You should have received a copy of the GNU General Public License
26  * along with this program; if not, write to the Free Software
27  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
28  * USA
29  *
30  */
31 
32 #include <linux/module.h>
33 #include <linux/hardirq.h>
34 #include <linux/types.h>
35 #include <linux/crypto.h>
36 #include <linux/err.h>
37 #include <crypto/algapi.h>
38 #include <crypto/serpent.h>
39 #include <crypto/cryptd.h>
40 #include <crypto/b128ops.h>
41 #include <crypto/ctr.h>
42 #include <crypto/lrw.h>
43 #include <crypto/xts.h>
44 #include <asm/i387.h>
45 #include <asm/serpent.h>
46 #include <crypto/scatterwalk.h>
47 #include <linux/workqueue.h>
48 #include <linux/spinlock.h>
49 
50 struct async_serpent_ctx {
51 	struct cryptd_ablkcipher *cryptd_tfm;
52 };
53 
54 static inline bool serpent_fpu_begin(bool fpu_enabled, unsigned int nbytes)
55 {
56 	if (fpu_enabled)
57 		return true;
58 
59 	/* SSE2 is only used when chunk to be processed is large enough, so
60 	 * do not enable FPU until it is necessary.
61 	 */
62 	if (nbytes < SERPENT_BLOCK_SIZE * SERPENT_PARALLEL_BLOCKS)
63 		return false;
64 
65 	kernel_fpu_begin();
66 	return true;
67 }
68 
69 static inline void serpent_fpu_end(bool fpu_enabled)
70 {
71 	if (fpu_enabled)
72 		kernel_fpu_end();
73 }
74 
75 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
76 		     bool enc)
77 {
78 	bool fpu_enabled = false;
79 	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
80 	const unsigned int bsize = SERPENT_BLOCK_SIZE;
81 	unsigned int nbytes;
82 	int err;
83 
84 	err = blkcipher_walk_virt(desc, walk);
85 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
86 
87 	while ((nbytes = walk->nbytes)) {
88 		u8 *wsrc = walk->src.virt.addr;
89 		u8 *wdst = walk->dst.virt.addr;
90 
91 		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
92 
93 		/* Process multi-block batch */
94 		if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
95 			do {
96 				if (enc)
97 					serpent_enc_blk_xway(ctx, wdst, wsrc);
98 				else
99 					serpent_dec_blk_xway(ctx, wdst, wsrc);
100 
101 				wsrc += bsize * SERPENT_PARALLEL_BLOCKS;
102 				wdst += bsize * SERPENT_PARALLEL_BLOCKS;
103 				nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
104 			} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
105 
106 			if (nbytes < bsize)
107 				goto done;
108 		}
109 
110 		/* Handle leftovers */
111 		do {
112 			if (enc)
113 				__serpent_encrypt(ctx, wdst, wsrc);
114 			else
115 				__serpent_decrypt(ctx, wdst, wsrc);
116 
117 			wsrc += bsize;
118 			wdst += bsize;
119 			nbytes -= bsize;
120 		} while (nbytes >= bsize);
121 
122 done:
123 		err = blkcipher_walk_done(desc, walk, nbytes);
124 	}
125 
126 	serpent_fpu_end(fpu_enabled);
127 	return err;
128 }
129 
130 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
131 		       struct scatterlist *src, unsigned int nbytes)
132 {
133 	struct blkcipher_walk walk;
134 
135 	blkcipher_walk_init(&walk, dst, src, nbytes);
136 	return ecb_crypt(desc, &walk, true);
137 }
138 
139 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
140 		       struct scatterlist *src, unsigned int nbytes)
141 {
142 	struct blkcipher_walk walk;
143 
144 	blkcipher_walk_init(&walk, dst, src, nbytes);
145 	return ecb_crypt(desc, &walk, false);
146 }
147 
148 static struct crypto_alg blk_ecb_alg = {
149 	.cra_name		= "__ecb-serpent-sse2",
150 	.cra_driver_name	= "__driver-ecb-serpent-sse2",
151 	.cra_priority		= 0,
152 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
153 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
154 	.cra_ctxsize		= sizeof(struct serpent_ctx),
155 	.cra_alignmask		= 0,
156 	.cra_type		= &crypto_blkcipher_type,
157 	.cra_module		= THIS_MODULE,
158 	.cra_list		= LIST_HEAD_INIT(blk_ecb_alg.cra_list),
159 	.cra_u = {
160 		.blkcipher = {
161 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
162 			.max_keysize	= SERPENT_MAX_KEY_SIZE,
163 			.setkey		= serpent_setkey,
164 			.encrypt	= ecb_encrypt,
165 			.decrypt	= ecb_decrypt,
166 		},
167 	},
168 };
169 
170 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
171 				  struct blkcipher_walk *walk)
172 {
173 	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
174 	const unsigned int bsize = SERPENT_BLOCK_SIZE;
175 	unsigned int nbytes = walk->nbytes;
176 	u128 *src = (u128 *)walk->src.virt.addr;
177 	u128 *dst = (u128 *)walk->dst.virt.addr;
178 	u128 *iv = (u128 *)walk->iv;
179 
180 	do {
181 		u128_xor(dst, src, iv);
182 		__serpent_encrypt(ctx, (u8 *)dst, (u8 *)dst);
183 		iv = dst;
184 
185 		src += 1;
186 		dst += 1;
187 		nbytes -= bsize;
188 	} while (nbytes >= bsize);
189 
190 	u128_xor((u128 *)walk->iv, (u128 *)walk->iv, iv);
191 	return nbytes;
192 }
193 
194 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
195 		       struct scatterlist *src, unsigned int nbytes)
196 {
197 	struct blkcipher_walk walk;
198 	int err;
199 
200 	blkcipher_walk_init(&walk, dst, src, nbytes);
201 	err = blkcipher_walk_virt(desc, &walk);
202 
203 	while ((nbytes = walk.nbytes)) {
204 		nbytes = __cbc_encrypt(desc, &walk);
205 		err = blkcipher_walk_done(desc, &walk, nbytes);
206 	}
207 
208 	return err;
209 }
210 
211 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
212 				  struct blkcipher_walk *walk)
213 {
214 	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
215 	const unsigned int bsize = SERPENT_BLOCK_SIZE;
216 	unsigned int nbytes = walk->nbytes;
217 	u128 *src = (u128 *)walk->src.virt.addr;
218 	u128 *dst = (u128 *)walk->dst.virt.addr;
219 	u128 ivs[SERPENT_PARALLEL_BLOCKS - 1];
220 	u128 last_iv;
221 	int i;
222 
223 	/* Start of the last block. */
224 	src += nbytes / bsize - 1;
225 	dst += nbytes / bsize - 1;
226 
227 	last_iv = *src;
228 
229 	/* Process multi-block batch */
230 	if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
231 		do {
232 			nbytes -= bsize * (SERPENT_PARALLEL_BLOCKS - 1);
233 			src -= SERPENT_PARALLEL_BLOCKS - 1;
234 			dst -= SERPENT_PARALLEL_BLOCKS - 1;
235 
236 			for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
237 				ivs[i] = src[i];
238 
239 			serpent_dec_blk_xway(ctx, (u8 *)dst, (u8 *)src);
240 
241 			for (i = 0; i < SERPENT_PARALLEL_BLOCKS - 1; i++)
242 				u128_xor(dst + (i + 1), dst + (i + 1), ivs + i);
243 
244 			nbytes -= bsize;
245 			if (nbytes < bsize)
246 				goto done;
247 
248 			u128_xor(dst, dst, src - 1);
249 			src -= 1;
250 			dst -= 1;
251 		} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
252 
253 		if (nbytes < bsize)
254 			goto done;
255 	}
256 
257 	/* Handle leftovers */
258 	for (;;) {
259 		__serpent_decrypt(ctx, (u8 *)dst, (u8 *)src);
260 
261 		nbytes -= bsize;
262 		if (nbytes < bsize)
263 			break;
264 
265 		u128_xor(dst, dst, src - 1);
266 		src -= 1;
267 		dst -= 1;
268 	}
269 
270 done:
271 	u128_xor(dst, dst, (u128 *)walk->iv);
272 	*(u128 *)walk->iv = last_iv;
273 
274 	return nbytes;
275 }
276 
277 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
278 		       struct scatterlist *src, unsigned int nbytes)
279 {
280 	bool fpu_enabled = false;
281 	struct blkcipher_walk walk;
282 	int err;
283 
284 	blkcipher_walk_init(&walk, dst, src, nbytes);
285 	err = blkcipher_walk_virt(desc, &walk);
286 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
287 
288 	while ((nbytes = walk.nbytes)) {
289 		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
290 		nbytes = __cbc_decrypt(desc, &walk);
291 		err = blkcipher_walk_done(desc, &walk, nbytes);
292 	}
293 
294 	serpent_fpu_end(fpu_enabled);
295 	return err;
296 }
297 
298 static struct crypto_alg blk_cbc_alg = {
299 	.cra_name		= "__cbc-serpent-sse2",
300 	.cra_driver_name	= "__driver-cbc-serpent-sse2",
301 	.cra_priority		= 0,
302 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
303 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
304 	.cra_ctxsize		= sizeof(struct serpent_ctx),
305 	.cra_alignmask		= 0,
306 	.cra_type		= &crypto_blkcipher_type,
307 	.cra_module		= THIS_MODULE,
308 	.cra_list		= LIST_HEAD_INIT(blk_cbc_alg.cra_list),
309 	.cra_u = {
310 		.blkcipher = {
311 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
312 			.max_keysize	= SERPENT_MAX_KEY_SIZE,
313 			.setkey		= serpent_setkey,
314 			.encrypt	= cbc_encrypt,
315 			.decrypt	= cbc_decrypt,
316 		},
317 	},
318 };
319 
320 static inline void u128_to_be128(be128 *dst, const u128 *src)
321 {
322 	dst->a = cpu_to_be64(src->a);
323 	dst->b = cpu_to_be64(src->b);
324 }
325 
326 static inline void be128_to_u128(u128 *dst, const be128 *src)
327 {
328 	dst->a = be64_to_cpu(src->a);
329 	dst->b = be64_to_cpu(src->b);
330 }
331 
332 static inline void u128_inc(u128 *i)
333 {
334 	i->b++;
335 	if (!i->b)
336 		i->a++;
337 }
338 
339 static void ctr_crypt_final(struct blkcipher_desc *desc,
340 			    struct blkcipher_walk *walk)
341 {
342 	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
343 	u8 *ctrblk = walk->iv;
344 	u8 keystream[SERPENT_BLOCK_SIZE];
345 	u8 *src = walk->src.virt.addr;
346 	u8 *dst = walk->dst.virt.addr;
347 	unsigned int nbytes = walk->nbytes;
348 
349 	__serpent_encrypt(ctx, keystream, ctrblk);
350 	crypto_xor(keystream, src, nbytes);
351 	memcpy(dst, keystream, nbytes);
352 
353 	crypto_inc(ctrblk, SERPENT_BLOCK_SIZE);
354 }
355 
356 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
357 				struct blkcipher_walk *walk)
358 {
359 	struct serpent_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
360 	const unsigned int bsize = SERPENT_BLOCK_SIZE;
361 	unsigned int nbytes = walk->nbytes;
362 	u128 *src = (u128 *)walk->src.virt.addr;
363 	u128 *dst = (u128 *)walk->dst.virt.addr;
364 	u128 ctrblk;
365 	be128 ctrblocks[SERPENT_PARALLEL_BLOCKS];
366 	int i;
367 
368 	be128_to_u128(&ctrblk, (be128 *)walk->iv);
369 
370 	/* Process multi-block batch */
371 	if (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS) {
372 		do {
373 			/* create ctrblks for parallel encrypt */
374 			for (i = 0; i < SERPENT_PARALLEL_BLOCKS; i++) {
375 				if (dst != src)
376 					dst[i] = src[i];
377 
378 				u128_to_be128(&ctrblocks[i], &ctrblk);
379 				u128_inc(&ctrblk);
380 			}
381 
382 			serpent_enc_blk_xway_xor(ctx, (u8 *)dst,
383 						 (u8 *)ctrblocks);
384 
385 			src += SERPENT_PARALLEL_BLOCKS;
386 			dst += SERPENT_PARALLEL_BLOCKS;
387 			nbytes -= bsize * SERPENT_PARALLEL_BLOCKS;
388 		} while (nbytes >= bsize * SERPENT_PARALLEL_BLOCKS);
389 
390 		if (nbytes < bsize)
391 			goto done;
392 	}
393 
394 	/* Handle leftovers */
395 	do {
396 		if (dst != src)
397 			*dst = *src;
398 
399 		u128_to_be128(&ctrblocks[0], &ctrblk);
400 		u128_inc(&ctrblk);
401 
402 		__serpent_encrypt(ctx, (u8 *)ctrblocks, (u8 *)ctrblocks);
403 		u128_xor(dst, dst, (u128 *)ctrblocks);
404 
405 		src += 1;
406 		dst += 1;
407 		nbytes -= bsize;
408 	} while (nbytes >= bsize);
409 
410 done:
411 	u128_to_be128((be128 *)walk->iv, &ctrblk);
412 	return nbytes;
413 }
414 
415 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
416 		     struct scatterlist *src, unsigned int nbytes)
417 {
418 	bool fpu_enabled = false;
419 	struct blkcipher_walk walk;
420 	int err;
421 
422 	blkcipher_walk_init(&walk, dst, src, nbytes);
423 	err = blkcipher_walk_virt_block(desc, &walk, SERPENT_BLOCK_SIZE);
424 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
425 
426 	while ((nbytes = walk.nbytes) >= SERPENT_BLOCK_SIZE) {
427 		fpu_enabled = serpent_fpu_begin(fpu_enabled, nbytes);
428 		nbytes = __ctr_crypt(desc, &walk);
429 		err = blkcipher_walk_done(desc, &walk, nbytes);
430 	}
431 
432 	serpent_fpu_end(fpu_enabled);
433 
434 	if (walk.nbytes) {
435 		ctr_crypt_final(desc, &walk);
436 		err = blkcipher_walk_done(desc, &walk, 0);
437 	}
438 
439 	return err;
440 }
441 
442 static struct crypto_alg blk_ctr_alg = {
443 	.cra_name		= "__ctr-serpent-sse2",
444 	.cra_driver_name	= "__driver-ctr-serpent-sse2",
445 	.cra_priority		= 0,
446 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
447 	.cra_blocksize		= 1,
448 	.cra_ctxsize		= sizeof(struct serpent_ctx),
449 	.cra_alignmask		= 0,
450 	.cra_type		= &crypto_blkcipher_type,
451 	.cra_module		= THIS_MODULE,
452 	.cra_list		= LIST_HEAD_INIT(blk_ctr_alg.cra_list),
453 	.cra_u = {
454 		.blkcipher = {
455 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
456 			.max_keysize	= SERPENT_MAX_KEY_SIZE,
457 			.ivsize		= SERPENT_BLOCK_SIZE,
458 			.setkey		= serpent_setkey,
459 			.encrypt	= ctr_crypt,
460 			.decrypt	= ctr_crypt,
461 		},
462 	},
463 };
464 
465 struct crypt_priv {
466 	struct serpent_ctx *ctx;
467 	bool fpu_enabled;
468 };
469 
470 static void encrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
471 {
472 	const unsigned int bsize = SERPENT_BLOCK_SIZE;
473 	struct crypt_priv *ctx = priv;
474 	int i;
475 
476 	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
477 
478 	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
479 		serpent_enc_blk_xway(ctx->ctx, srcdst, srcdst);
480 		return;
481 	}
482 
483 	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
484 		__serpent_encrypt(ctx->ctx, srcdst, srcdst);
485 }
486 
487 static void decrypt_callback(void *priv, u8 *srcdst, unsigned int nbytes)
488 {
489 	const unsigned int bsize = SERPENT_BLOCK_SIZE;
490 	struct crypt_priv *ctx = priv;
491 	int i;
492 
493 	ctx->fpu_enabled = serpent_fpu_begin(ctx->fpu_enabled, nbytes);
494 
495 	if (nbytes == bsize * SERPENT_PARALLEL_BLOCKS) {
496 		serpent_dec_blk_xway(ctx->ctx, srcdst, srcdst);
497 		return;
498 	}
499 
500 	for (i = 0; i < nbytes / bsize; i++, srcdst += bsize)
501 		__serpent_decrypt(ctx->ctx, srcdst, srcdst);
502 }
503 
504 struct serpent_lrw_ctx {
505 	struct lrw_table_ctx lrw_table;
506 	struct serpent_ctx serpent_ctx;
507 };
508 
509 static int lrw_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
510 			      unsigned int keylen)
511 {
512 	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
513 	int err;
514 
515 	err = __serpent_setkey(&ctx->serpent_ctx, key, keylen -
516 							SERPENT_BLOCK_SIZE);
517 	if (err)
518 		return err;
519 
520 	return lrw_init_table(&ctx->lrw_table, key + keylen -
521 						SERPENT_BLOCK_SIZE);
522 }
523 
524 static int lrw_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
525 		       struct scatterlist *src, unsigned int nbytes)
526 {
527 	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
528 	be128 buf[SERPENT_PARALLEL_BLOCKS];
529 	struct crypt_priv crypt_ctx = {
530 		.ctx = &ctx->serpent_ctx,
531 		.fpu_enabled = false,
532 	};
533 	struct lrw_crypt_req req = {
534 		.tbuf = buf,
535 		.tbuflen = sizeof(buf),
536 
537 		.table_ctx = &ctx->lrw_table,
538 		.crypt_ctx = &crypt_ctx,
539 		.crypt_fn = encrypt_callback,
540 	};
541 	int ret;
542 
543 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
544 	ret = lrw_crypt(desc, dst, src, nbytes, &req);
545 	serpent_fpu_end(crypt_ctx.fpu_enabled);
546 
547 	return ret;
548 }
549 
550 static int lrw_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
551 		       struct scatterlist *src, unsigned int nbytes)
552 {
553 	struct serpent_lrw_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
554 	be128 buf[SERPENT_PARALLEL_BLOCKS];
555 	struct crypt_priv crypt_ctx = {
556 		.ctx = &ctx->serpent_ctx,
557 		.fpu_enabled = false,
558 	};
559 	struct lrw_crypt_req req = {
560 		.tbuf = buf,
561 		.tbuflen = sizeof(buf),
562 
563 		.table_ctx = &ctx->lrw_table,
564 		.crypt_ctx = &crypt_ctx,
565 		.crypt_fn = decrypt_callback,
566 	};
567 	int ret;
568 
569 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
570 	ret = lrw_crypt(desc, dst, src, nbytes, &req);
571 	serpent_fpu_end(crypt_ctx.fpu_enabled);
572 
573 	return ret;
574 }
575 
576 static void lrw_exit_tfm(struct crypto_tfm *tfm)
577 {
578 	struct serpent_lrw_ctx *ctx = crypto_tfm_ctx(tfm);
579 
580 	lrw_free_table(&ctx->lrw_table);
581 }
582 
583 static struct crypto_alg blk_lrw_alg = {
584 	.cra_name		= "__lrw-serpent-sse2",
585 	.cra_driver_name	= "__driver-lrw-serpent-sse2",
586 	.cra_priority		= 0,
587 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
588 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
589 	.cra_ctxsize		= sizeof(struct serpent_lrw_ctx),
590 	.cra_alignmask		= 0,
591 	.cra_type		= &crypto_blkcipher_type,
592 	.cra_module		= THIS_MODULE,
593 	.cra_list		= LIST_HEAD_INIT(blk_lrw_alg.cra_list),
594 	.cra_exit		= lrw_exit_tfm,
595 	.cra_u = {
596 		.blkcipher = {
597 			.min_keysize	= SERPENT_MIN_KEY_SIZE +
598 					  SERPENT_BLOCK_SIZE,
599 			.max_keysize	= SERPENT_MAX_KEY_SIZE +
600 					  SERPENT_BLOCK_SIZE,
601 			.ivsize		= SERPENT_BLOCK_SIZE,
602 			.setkey		= lrw_serpent_setkey,
603 			.encrypt	= lrw_encrypt,
604 			.decrypt	= lrw_decrypt,
605 		},
606 	},
607 };
608 
609 struct serpent_xts_ctx {
610 	struct serpent_ctx tweak_ctx;
611 	struct serpent_ctx crypt_ctx;
612 };
613 
614 static int xts_serpent_setkey(struct crypto_tfm *tfm, const u8 *key,
615 			      unsigned int keylen)
616 {
617 	struct serpent_xts_ctx *ctx = crypto_tfm_ctx(tfm);
618 	u32 *flags = &tfm->crt_flags;
619 	int err;
620 
621 	/* key consists of keys of equal size concatenated, therefore
622 	 * the length must be even
623 	 */
624 	if (keylen % 2) {
625 		*flags |= CRYPTO_TFM_RES_BAD_KEY_LEN;
626 		return -EINVAL;
627 	}
628 
629 	/* first half of xts-key is for crypt */
630 	err = __serpent_setkey(&ctx->crypt_ctx, key, keylen / 2);
631 	if (err)
632 		return err;
633 
634 	/* second half of xts-key is for tweak */
635 	return __serpent_setkey(&ctx->tweak_ctx, key + keylen / 2, keylen / 2);
636 }
637 
638 static int xts_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
639 		       struct scatterlist *src, unsigned int nbytes)
640 {
641 	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
642 	be128 buf[SERPENT_PARALLEL_BLOCKS];
643 	struct crypt_priv crypt_ctx = {
644 		.ctx = &ctx->crypt_ctx,
645 		.fpu_enabled = false,
646 	};
647 	struct xts_crypt_req req = {
648 		.tbuf = buf,
649 		.tbuflen = sizeof(buf),
650 
651 		.tweak_ctx = &ctx->tweak_ctx,
652 		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
653 		.crypt_ctx = &crypt_ctx,
654 		.crypt_fn = encrypt_callback,
655 	};
656 	int ret;
657 
658 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
659 	ret = xts_crypt(desc, dst, src, nbytes, &req);
660 	serpent_fpu_end(crypt_ctx.fpu_enabled);
661 
662 	return ret;
663 }
664 
665 static int xts_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
666 		       struct scatterlist *src, unsigned int nbytes)
667 {
668 	struct serpent_xts_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
669 	be128 buf[SERPENT_PARALLEL_BLOCKS];
670 	struct crypt_priv crypt_ctx = {
671 		.ctx = &ctx->crypt_ctx,
672 		.fpu_enabled = false,
673 	};
674 	struct xts_crypt_req req = {
675 		.tbuf = buf,
676 		.tbuflen = sizeof(buf),
677 
678 		.tweak_ctx = &ctx->tweak_ctx,
679 		.tweak_fn = XTS_TWEAK_CAST(__serpent_encrypt),
680 		.crypt_ctx = &crypt_ctx,
681 		.crypt_fn = decrypt_callback,
682 	};
683 	int ret;
684 
685 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
686 	ret = xts_crypt(desc, dst, src, nbytes, &req);
687 	serpent_fpu_end(crypt_ctx.fpu_enabled);
688 
689 	return ret;
690 }
691 
692 static struct crypto_alg blk_xts_alg = {
693 	.cra_name		= "__xts-serpent-sse2",
694 	.cra_driver_name	= "__driver-xts-serpent-sse2",
695 	.cra_priority		= 0,
696 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
697 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
698 	.cra_ctxsize		= sizeof(struct serpent_xts_ctx),
699 	.cra_alignmask		= 0,
700 	.cra_type		= &crypto_blkcipher_type,
701 	.cra_module		= THIS_MODULE,
702 	.cra_list		= LIST_HEAD_INIT(blk_xts_alg.cra_list),
703 	.cra_u = {
704 		.blkcipher = {
705 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
706 			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
707 			.ivsize		= SERPENT_BLOCK_SIZE,
708 			.setkey		= xts_serpent_setkey,
709 			.encrypt	= xts_encrypt,
710 			.decrypt	= xts_decrypt,
711 		},
712 	},
713 };
714 
715 static int ablk_set_key(struct crypto_ablkcipher *tfm, const u8 *key,
716 			unsigned int key_len)
717 {
718 	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
719 	struct crypto_ablkcipher *child = &ctx->cryptd_tfm->base;
720 	int err;
721 
722 	crypto_ablkcipher_clear_flags(child, CRYPTO_TFM_REQ_MASK);
723 	crypto_ablkcipher_set_flags(child, crypto_ablkcipher_get_flags(tfm)
724 				    & CRYPTO_TFM_REQ_MASK);
725 	err = crypto_ablkcipher_setkey(child, key, key_len);
726 	crypto_ablkcipher_set_flags(tfm, crypto_ablkcipher_get_flags(child)
727 				    & CRYPTO_TFM_RES_MASK);
728 	return err;
729 }
730 
731 static int __ablk_encrypt(struct ablkcipher_request *req)
732 {
733 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
734 	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
735 	struct blkcipher_desc desc;
736 
737 	desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
738 	desc.info = req->info;
739 	desc.flags = 0;
740 
741 	return crypto_blkcipher_crt(desc.tfm)->encrypt(
742 		&desc, req->dst, req->src, req->nbytes);
743 }
744 
745 static int ablk_encrypt(struct ablkcipher_request *req)
746 {
747 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
748 	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
749 
750 	if (!irq_fpu_usable()) {
751 		struct ablkcipher_request *cryptd_req =
752 			ablkcipher_request_ctx(req);
753 
754 		memcpy(cryptd_req, req, sizeof(*req));
755 		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
756 
757 		return crypto_ablkcipher_encrypt(cryptd_req);
758 	} else {
759 		return __ablk_encrypt(req);
760 	}
761 }
762 
763 static int ablk_decrypt(struct ablkcipher_request *req)
764 {
765 	struct crypto_ablkcipher *tfm = crypto_ablkcipher_reqtfm(req);
766 	struct async_serpent_ctx *ctx = crypto_ablkcipher_ctx(tfm);
767 
768 	if (!irq_fpu_usable()) {
769 		struct ablkcipher_request *cryptd_req =
770 			ablkcipher_request_ctx(req);
771 
772 		memcpy(cryptd_req, req, sizeof(*req));
773 		ablkcipher_request_set_tfm(cryptd_req, &ctx->cryptd_tfm->base);
774 
775 		return crypto_ablkcipher_decrypt(cryptd_req);
776 	} else {
777 		struct blkcipher_desc desc;
778 
779 		desc.tfm = cryptd_ablkcipher_child(ctx->cryptd_tfm);
780 		desc.info = req->info;
781 		desc.flags = 0;
782 
783 		return crypto_blkcipher_crt(desc.tfm)->decrypt(
784 			&desc, req->dst, req->src, req->nbytes);
785 	}
786 }
787 
788 static void ablk_exit(struct crypto_tfm *tfm)
789 {
790 	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
791 
792 	cryptd_free_ablkcipher(ctx->cryptd_tfm);
793 }
794 
795 static void ablk_init_common(struct crypto_tfm *tfm,
796 			     struct cryptd_ablkcipher *cryptd_tfm)
797 {
798 	struct async_serpent_ctx *ctx = crypto_tfm_ctx(tfm);
799 
800 	ctx->cryptd_tfm = cryptd_tfm;
801 	tfm->crt_ablkcipher.reqsize = sizeof(struct ablkcipher_request) +
802 		crypto_ablkcipher_reqsize(&cryptd_tfm->base);
803 }
804 
805 static int ablk_ecb_init(struct crypto_tfm *tfm)
806 {
807 	struct cryptd_ablkcipher *cryptd_tfm;
808 
809 	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ecb-serpent-sse2", 0, 0);
810 	if (IS_ERR(cryptd_tfm))
811 		return PTR_ERR(cryptd_tfm);
812 	ablk_init_common(tfm, cryptd_tfm);
813 	return 0;
814 }
815 
816 static struct crypto_alg ablk_ecb_alg = {
817 	.cra_name		= "ecb(serpent)",
818 	.cra_driver_name	= "ecb-serpent-sse2",
819 	.cra_priority		= 400,
820 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
821 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
822 	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
823 	.cra_alignmask		= 0,
824 	.cra_type		= &crypto_ablkcipher_type,
825 	.cra_module		= THIS_MODULE,
826 	.cra_list		= LIST_HEAD_INIT(ablk_ecb_alg.cra_list),
827 	.cra_init		= ablk_ecb_init,
828 	.cra_exit		= ablk_exit,
829 	.cra_u = {
830 		.ablkcipher = {
831 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
832 			.max_keysize	= SERPENT_MAX_KEY_SIZE,
833 			.setkey		= ablk_set_key,
834 			.encrypt	= ablk_encrypt,
835 			.decrypt	= ablk_decrypt,
836 		},
837 	},
838 };
839 
840 static int ablk_cbc_init(struct crypto_tfm *tfm)
841 {
842 	struct cryptd_ablkcipher *cryptd_tfm;
843 
844 	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-cbc-serpent-sse2", 0, 0);
845 	if (IS_ERR(cryptd_tfm))
846 		return PTR_ERR(cryptd_tfm);
847 	ablk_init_common(tfm, cryptd_tfm);
848 	return 0;
849 }
850 
851 static struct crypto_alg ablk_cbc_alg = {
852 	.cra_name		= "cbc(serpent)",
853 	.cra_driver_name	= "cbc-serpent-sse2",
854 	.cra_priority		= 400,
855 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
856 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
857 	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
858 	.cra_alignmask		= 0,
859 	.cra_type		= &crypto_ablkcipher_type,
860 	.cra_module		= THIS_MODULE,
861 	.cra_list		= LIST_HEAD_INIT(ablk_cbc_alg.cra_list),
862 	.cra_init		= ablk_cbc_init,
863 	.cra_exit		= ablk_exit,
864 	.cra_u = {
865 		.ablkcipher = {
866 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
867 			.max_keysize	= SERPENT_MAX_KEY_SIZE,
868 			.ivsize		= SERPENT_BLOCK_SIZE,
869 			.setkey		= ablk_set_key,
870 			.encrypt	= __ablk_encrypt,
871 			.decrypt	= ablk_decrypt,
872 		},
873 	},
874 };
875 
876 static int ablk_ctr_init(struct crypto_tfm *tfm)
877 {
878 	struct cryptd_ablkcipher *cryptd_tfm;
879 
880 	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-ctr-serpent-sse2", 0, 0);
881 	if (IS_ERR(cryptd_tfm))
882 		return PTR_ERR(cryptd_tfm);
883 	ablk_init_common(tfm, cryptd_tfm);
884 	return 0;
885 }
886 
887 static struct crypto_alg ablk_ctr_alg = {
888 	.cra_name		= "ctr(serpent)",
889 	.cra_driver_name	= "ctr-serpent-sse2",
890 	.cra_priority		= 400,
891 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
892 	.cra_blocksize		= 1,
893 	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
894 	.cra_alignmask		= 0,
895 	.cra_type		= &crypto_ablkcipher_type,
896 	.cra_module		= THIS_MODULE,
897 	.cra_list		= LIST_HEAD_INIT(ablk_ctr_alg.cra_list),
898 	.cra_init		= ablk_ctr_init,
899 	.cra_exit		= ablk_exit,
900 	.cra_u = {
901 		.ablkcipher = {
902 			.min_keysize	= SERPENT_MIN_KEY_SIZE,
903 			.max_keysize	= SERPENT_MAX_KEY_SIZE,
904 			.ivsize		= SERPENT_BLOCK_SIZE,
905 			.setkey		= ablk_set_key,
906 			.encrypt	= ablk_encrypt,
907 			.decrypt	= ablk_encrypt,
908 			.geniv		= "chainiv",
909 		},
910 	},
911 };
912 
913 static int ablk_lrw_init(struct crypto_tfm *tfm)
914 {
915 	struct cryptd_ablkcipher *cryptd_tfm;
916 
917 	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-lrw-serpent-sse2", 0, 0);
918 	if (IS_ERR(cryptd_tfm))
919 		return PTR_ERR(cryptd_tfm);
920 	ablk_init_common(tfm, cryptd_tfm);
921 	return 0;
922 }
923 
924 static struct crypto_alg ablk_lrw_alg = {
925 	.cra_name		= "lrw(serpent)",
926 	.cra_driver_name	= "lrw-serpent-sse2",
927 	.cra_priority		= 400,
928 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
929 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
930 	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
931 	.cra_alignmask		= 0,
932 	.cra_type		= &crypto_ablkcipher_type,
933 	.cra_module		= THIS_MODULE,
934 	.cra_list		= LIST_HEAD_INIT(ablk_lrw_alg.cra_list),
935 	.cra_init		= ablk_lrw_init,
936 	.cra_exit		= ablk_exit,
937 	.cra_u = {
938 		.ablkcipher = {
939 			.min_keysize	= SERPENT_MIN_KEY_SIZE +
940 					  SERPENT_BLOCK_SIZE,
941 			.max_keysize	= SERPENT_MAX_KEY_SIZE +
942 					  SERPENT_BLOCK_SIZE,
943 			.ivsize		= SERPENT_BLOCK_SIZE,
944 			.setkey		= ablk_set_key,
945 			.encrypt	= ablk_encrypt,
946 			.decrypt	= ablk_decrypt,
947 		},
948 	},
949 };
950 
951 static int ablk_xts_init(struct crypto_tfm *tfm)
952 {
953 	struct cryptd_ablkcipher *cryptd_tfm;
954 
955 	cryptd_tfm = cryptd_alloc_ablkcipher("__driver-xts-serpent-sse2", 0, 0);
956 	if (IS_ERR(cryptd_tfm))
957 		return PTR_ERR(cryptd_tfm);
958 	ablk_init_common(tfm, cryptd_tfm);
959 	return 0;
960 }
961 
962 static struct crypto_alg ablk_xts_alg = {
963 	.cra_name		= "xts(serpent)",
964 	.cra_driver_name	= "xts-serpent-sse2",
965 	.cra_priority		= 400,
966 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
967 	.cra_blocksize		= SERPENT_BLOCK_SIZE,
968 	.cra_ctxsize		= sizeof(struct async_serpent_ctx),
969 	.cra_alignmask		= 0,
970 	.cra_type		= &crypto_ablkcipher_type,
971 	.cra_module		= THIS_MODULE,
972 	.cra_list		= LIST_HEAD_INIT(ablk_xts_alg.cra_list),
973 	.cra_init		= ablk_xts_init,
974 	.cra_exit		= ablk_exit,
975 	.cra_u = {
976 		.ablkcipher = {
977 			.min_keysize	= SERPENT_MIN_KEY_SIZE * 2,
978 			.max_keysize	= SERPENT_MAX_KEY_SIZE * 2,
979 			.ivsize		= SERPENT_BLOCK_SIZE,
980 			.setkey		= ablk_set_key,
981 			.encrypt	= ablk_encrypt,
982 			.decrypt	= ablk_decrypt,
983 		},
984 	},
985 };
986 
987 static int __init serpent_sse2_init(void)
988 {
989 	int err;
990 
991 	if (!cpu_has_xmm2) {
992 		printk(KERN_INFO "SSE2 instructions are not detected.\n");
993 		return -ENODEV;
994 	}
995 
996 	err = crypto_register_alg(&blk_ecb_alg);
997 	if (err)
998 		goto blk_ecb_err;
999 	err = crypto_register_alg(&blk_cbc_alg);
1000 	if (err)
1001 		goto blk_cbc_err;
1002 	err = crypto_register_alg(&blk_ctr_alg);
1003 	if (err)
1004 		goto blk_ctr_err;
1005 	err = crypto_register_alg(&ablk_ecb_alg);
1006 	if (err)
1007 		goto ablk_ecb_err;
1008 	err = crypto_register_alg(&ablk_cbc_alg);
1009 	if (err)
1010 		goto ablk_cbc_err;
1011 	err = crypto_register_alg(&ablk_ctr_alg);
1012 	if (err)
1013 		goto ablk_ctr_err;
1014 	err = crypto_register_alg(&blk_lrw_alg);
1015 	if (err)
1016 		goto blk_lrw_err;
1017 	err = crypto_register_alg(&ablk_lrw_alg);
1018 	if (err)
1019 		goto ablk_lrw_err;
1020 	err = crypto_register_alg(&blk_xts_alg);
1021 	if (err)
1022 		goto blk_xts_err;
1023 	err = crypto_register_alg(&ablk_xts_alg);
1024 	if (err)
1025 		goto ablk_xts_err;
1026 	return err;
1027 
1028 	crypto_unregister_alg(&ablk_xts_alg);
1029 ablk_xts_err:
1030 	crypto_unregister_alg(&blk_xts_alg);
1031 blk_xts_err:
1032 	crypto_unregister_alg(&ablk_lrw_alg);
1033 ablk_lrw_err:
1034 	crypto_unregister_alg(&blk_lrw_alg);
1035 blk_lrw_err:
1036 	crypto_unregister_alg(&ablk_ctr_alg);
1037 ablk_ctr_err:
1038 	crypto_unregister_alg(&ablk_cbc_alg);
1039 ablk_cbc_err:
1040 	crypto_unregister_alg(&ablk_ecb_alg);
1041 ablk_ecb_err:
1042 	crypto_unregister_alg(&blk_ctr_alg);
1043 blk_ctr_err:
1044 	crypto_unregister_alg(&blk_cbc_alg);
1045 blk_cbc_err:
1046 	crypto_unregister_alg(&blk_ecb_alg);
1047 blk_ecb_err:
1048 	return err;
1049 }
1050 
1051 static void __exit serpent_sse2_exit(void)
1052 {
1053 	crypto_unregister_alg(&ablk_xts_alg);
1054 	crypto_unregister_alg(&blk_xts_alg);
1055 	crypto_unregister_alg(&ablk_lrw_alg);
1056 	crypto_unregister_alg(&blk_lrw_alg);
1057 	crypto_unregister_alg(&ablk_ctr_alg);
1058 	crypto_unregister_alg(&ablk_cbc_alg);
1059 	crypto_unregister_alg(&ablk_ecb_alg);
1060 	crypto_unregister_alg(&blk_ctr_alg);
1061 	crypto_unregister_alg(&blk_cbc_alg);
1062 	crypto_unregister_alg(&blk_ecb_alg);
1063 }
1064 
1065 module_init(serpent_sse2_init);
1066 module_exit(serpent_sse2_exit);
1067 
1068 MODULE_DESCRIPTION("Serpent Cipher Algorithm, SSE2 optimized");
1069 MODULE_LICENSE("GPL");
1070 MODULE_ALIAS("serpent");
1071