xref: /linux/arch/x86/crypto/cast5_avx_glue.c (revision 0883c2c06fb5bcf5b9e008270827e63c09a88c1e)
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/ablk_helper.h>
30 #include <crypto/algapi.h>
31 #include <crypto/cast5.h>
32 #include <crypto/cryptd.h>
33 #include <crypto/ctr.h>
34 #include <asm/fpu/api.h>
35 #include <asm/crypto/glue_helper.h>
36 
37 #define CAST5_PARALLEL_BLOCKS 16
38 
39 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
40 				    const u8 *src);
41 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
42 				    const u8 *src);
43 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
44 				    const u8 *src);
45 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
46 				__be64 *iv);
47 
48 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
49 {
50 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
51 			      NULL, fpu_enabled, nbytes);
52 }
53 
54 static inline void cast5_fpu_end(bool fpu_enabled)
55 {
56 	return glue_fpu_end(fpu_enabled);
57 }
58 
59 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
60 		     bool enc)
61 {
62 	bool fpu_enabled = false;
63 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
64 	const unsigned int bsize = CAST5_BLOCK_SIZE;
65 	unsigned int nbytes;
66 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
67 	int err;
68 
69 	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
70 
71 	err = blkcipher_walk_virt(desc, walk);
72 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
73 
74 	while ((nbytes = walk->nbytes)) {
75 		u8 *wsrc = walk->src.virt.addr;
76 		u8 *wdst = walk->dst.virt.addr;
77 
78 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
79 
80 		/* Process multi-block batch */
81 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
82 			do {
83 				fn(ctx, wdst, wsrc);
84 
85 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
86 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
87 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
88 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
89 
90 			if (nbytes < bsize)
91 				goto done;
92 		}
93 
94 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
95 
96 		/* Handle leftovers */
97 		do {
98 			fn(ctx, wdst, wsrc);
99 
100 			wsrc += bsize;
101 			wdst += bsize;
102 			nbytes -= bsize;
103 		} while (nbytes >= bsize);
104 
105 done:
106 		err = blkcipher_walk_done(desc, walk, nbytes);
107 	}
108 
109 	cast5_fpu_end(fpu_enabled);
110 	return err;
111 }
112 
113 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
114 		       struct scatterlist *src, unsigned int nbytes)
115 {
116 	struct blkcipher_walk walk;
117 
118 	blkcipher_walk_init(&walk, dst, src, nbytes);
119 	return ecb_crypt(desc, &walk, true);
120 }
121 
122 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
123 		       struct scatterlist *src, unsigned int nbytes)
124 {
125 	struct blkcipher_walk walk;
126 
127 	blkcipher_walk_init(&walk, dst, src, nbytes);
128 	return ecb_crypt(desc, &walk, false);
129 }
130 
131 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
132 				  struct blkcipher_walk *walk)
133 {
134 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
135 	const unsigned int bsize = CAST5_BLOCK_SIZE;
136 	unsigned int nbytes = walk->nbytes;
137 	u64 *src = (u64 *)walk->src.virt.addr;
138 	u64 *dst = (u64 *)walk->dst.virt.addr;
139 	u64 *iv = (u64 *)walk->iv;
140 
141 	do {
142 		*dst = *src ^ *iv;
143 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
144 		iv = dst;
145 
146 		src += 1;
147 		dst += 1;
148 		nbytes -= bsize;
149 	} while (nbytes >= bsize);
150 
151 	*(u64 *)walk->iv = *iv;
152 	return nbytes;
153 }
154 
155 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
156 		       struct scatterlist *src, unsigned int nbytes)
157 {
158 	struct blkcipher_walk walk;
159 	int err;
160 
161 	blkcipher_walk_init(&walk, dst, src, nbytes);
162 	err = blkcipher_walk_virt(desc, &walk);
163 
164 	while ((nbytes = walk.nbytes)) {
165 		nbytes = __cbc_encrypt(desc, &walk);
166 		err = blkcipher_walk_done(desc, &walk, nbytes);
167 	}
168 
169 	return err;
170 }
171 
172 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
173 				  struct blkcipher_walk *walk)
174 {
175 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
176 	const unsigned int bsize = CAST5_BLOCK_SIZE;
177 	unsigned int nbytes = walk->nbytes;
178 	u64 *src = (u64 *)walk->src.virt.addr;
179 	u64 *dst = (u64 *)walk->dst.virt.addr;
180 	u64 last_iv;
181 
182 	/* Start of the last block. */
183 	src += nbytes / bsize - 1;
184 	dst += nbytes / bsize - 1;
185 
186 	last_iv = *src;
187 
188 	/* Process multi-block batch */
189 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
190 		do {
191 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
192 			src -= CAST5_PARALLEL_BLOCKS - 1;
193 			dst -= CAST5_PARALLEL_BLOCKS - 1;
194 
195 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
196 
197 			nbytes -= bsize;
198 			if (nbytes < bsize)
199 				goto done;
200 
201 			*dst ^= *(src - 1);
202 			src -= 1;
203 			dst -= 1;
204 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
205 	}
206 
207 	/* Handle leftovers */
208 	for (;;) {
209 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
210 
211 		nbytes -= bsize;
212 		if (nbytes < bsize)
213 			break;
214 
215 		*dst ^= *(src - 1);
216 		src -= 1;
217 		dst -= 1;
218 	}
219 
220 done:
221 	*dst ^= *(u64 *)walk->iv;
222 	*(u64 *)walk->iv = last_iv;
223 
224 	return nbytes;
225 }
226 
227 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
228 		       struct scatterlist *src, unsigned int nbytes)
229 {
230 	bool fpu_enabled = false;
231 	struct blkcipher_walk walk;
232 	int err;
233 
234 	blkcipher_walk_init(&walk, dst, src, nbytes);
235 	err = blkcipher_walk_virt(desc, &walk);
236 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
237 
238 	while ((nbytes = walk.nbytes)) {
239 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
240 		nbytes = __cbc_decrypt(desc, &walk);
241 		err = blkcipher_walk_done(desc, &walk, nbytes);
242 	}
243 
244 	cast5_fpu_end(fpu_enabled);
245 	return err;
246 }
247 
248 static void ctr_crypt_final(struct blkcipher_desc *desc,
249 			    struct blkcipher_walk *walk)
250 {
251 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
252 	u8 *ctrblk = walk->iv;
253 	u8 keystream[CAST5_BLOCK_SIZE];
254 	u8 *src = walk->src.virt.addr;
255 	u8 *dst = walk->dst.virt.addr;
256 	unsigned int nbytes = walk->nbytes;
257 
258 	__cast5_encrypt(ctx, keystream, ctrblk);
259 	crypto_xor(keystream, src, nbytes);
260 	memcpy(dst, keystream, nbytes);
261 
262 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
263 }
264 
265 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
266 				struct blkcipher_walk *walk)
267 {
268 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
269 	const unsigned int bsize = CAST5_BLOCK_SIZE;
270 	unsigned int nbytes = walk->nbytes;
271 	u64 *src = (u64 *)walk->src.virt.addr;
272 	u64 *dst = (u64 *)walk->dst.virt.addr;
273 
274 	/* Process multi-block batch */
275 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
276 		do {
277 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
278 					(__be64 *)walk->iv);
279 
280 			src += CAST5_PARALLEL_BLOCKS;
281 			dst += CAST5_PARALLEL_BLOCKS;
282 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
283 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
284 
285 		if (nbytes < bsize)
286 			goto done;
287 	}
288 
289 	/* Handle leftovers */
290 	do {
291 		u64 ctrblk;
292 
293 		if (dst != src)
294 			*dst = *src;
295 
296 		ctrblk = *(u64 *)walk->iv;
297 		be64_add_cpu((__be64 *)walk->iv, 1);
298 
299 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
300 		*dst ^= ctrblk;
301 
302 		src += 1;
303 		dst += 1;
304 		nbytes -= bsize;
305 	} while (nbytes >= bsize);
306 
307 done:
308 	return nbytes;
309 }
310 
311 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
312 		     struct scatterlist *src, unsigned int nbytes)
313 {
314 	bool fpu_enabled = false;
315 	struct blkcipher_walk walk;
316 	int err;
317 
318 	blkcipher_walk_init(&walk, dst, src, nbytes);
319 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
320 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
321 
322 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
323 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
324 		nbytes = __ctr_crypt(desc, &walk);
325 		err = blkcipher_walk_done(desc, &walk, nbytes);
326 	}
327 
328 	cast5_fpu_end(fpu_enabled);
329 
330 	if (walk.nbytes) {
331 		ctr_crypt_final(desc, &walk);
332 		err = blkcipher_walk_done(desc, &walk, 0);
333 	}
334 
335 	return err;
336 }
337 
338 
339 static struct crypto_alg cast5_algs[6] = { {
340 	.cra_name		= "__ecb-cast5-avx",
341 	.cra_driver_name	= "__driver-ecb-cast5-avx",
342 	.cra_priority		= 0,
343 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
344 				  CRYPTO_ALG_INTERNAL,
345 	.cra_blocksize		= CAST5_BLOCK_SIZE,
346 	.cra_ctxsize		= sizeof(struct cast5_ctx),
347 	.cra_alignmask		= 0,
348 	.cra_type		= &crypto_blkcipher_type,
349 	.cra_module		= THIS_MODULE,
350 	.cra_u = {
351 		.blkcipher = {
352 			.min_keysize	= CAST5_MIN_KEY_SIZE,
353 			.max_keysize	= CAST5_MAX_KEY_SIZE,
354 			.setkey		= cast5_setkey,
355 			.encrypt	= ecb_encrypt,
356 			.decrypt	= ecb_decrypt,
357 		},
358 	},
359 }, {
360 	.cra_name		= "__cbc-cast5-avx",
361 	.cra_driver_name	= "__driver-cbc-cast5-avx",
362 	.cra_priority		= 0,
363 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
364 				  CRYPTO_ALG_INTERNAL,
365 	.cra_blocksize		= CAST5_BLOCK_SIZE,
366 	.cra_ctxsize		= sizeof(struct cast5_ctx),
367 	.cra_alignmask		= 0,
368 	.cra_type		= &crypto_blkcipher_type,
369 	.cra_module		= THIS_MODULE,
370 	.cra_u = {
371 		.blkcipher = {
372 			.min_keysize	= CAST5_MIN_KEY_SIZE,
373 			.max_keysize	= CAST5_MAX_KEY_SIZE,
374 			.setkey		= cast5_setkey,
375 			.encrypt	= cbc_encrypt,
376 			.decrypt	= cbc_decrypt,
377 		},
378 	},
379 }, {
380 	.cra_name		= "__ctr-cast5-avx",
381 	.cra_driver_name	= "__driver-ctr-cast5-avx",
382 	.cra_priority		= 0,
383 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER |
384 				  CRYPTO_ALG_INTERNAL,
385 	.cra_blocksize		= 1,
386 	.cra_ctxsize		= sizeof(struct cast5_ctx),
387 	.cra_alignmask		= 0,
388 	.cra_type		= &crypto_blkcipher_type,
389 	.cra_module		= THIS_MODULE,
390 	.cra_u = {
391 		.blkcipher = {
392 			.min_keysize	= CAST5_MIN_KEY_SIZE,
393 			.max_keysize	= CAST5_MAX_KEY_SIZE,
394 			.ivsize		= CAST5_BLOCK_SIZE,
395 			.setkey		= cast5_setkey,
396 			.encrypt	= ctr_crypt,
397 			.decrypt	= ctr_crypt,
398 		},
399 	},
400 }, {
401 	.cra_name		= "ecb(cast5)",
402 	.cra_driver_name	= "ecb-cast5-avx",
403 	.cra_priority		= 200,
404 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
405 	.cra_blocksize		= CAST5_BLOCK_SIZE,
406 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
407 	.cra_alignmask		= 0,
408 	.cra_type		= &crypto_ablkcipher_type,
409 	.cra_module		= THIS_MODULE,
410 	.cra_init		= ablk_init,
411 	.cra_exit		= ablk_exit,
412 	.cra_u = {
413 		.ablkcipher = {
414 			.min_keysize	= CAST5_MIN_KEY_SIZE,
415 			.max_keysize	= CAST5_MAX_KEY_SIZE,
416 			.setkey		= ablk_set_key,
417 			.encrypt	= ablk_encrypt,
418 			.decrypt	= ablk_decrypt,
419 		},
420 	},
421 }, {
422 	.cra_name		= "cbc(cast5)",
423 	.cra_driver_name	= "cbc-cast5-avx",
424 	.cra_priority		= 200,
425 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
426 	.cra_blocksize		= CAST5_BLOCK_SIZE,
427 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
428 	.cra_alignmask		= 0,
429 	.cra_type		= &crypto_ablkcipher_type,
430 	.cra_module		= THIS_MODULE,
431 	.cra_init		= ablk_init,
432 	.cra_exit		= ablk_exit,
433 	.cra_u = {
434 		.ablkcipher = {
435 			.min_keysize	= CAST5_MIN_KEY_SIZE,
436 			.max_keysize	= CAST5_MAX_KEY_SIZE,
437 			.ivsize		= CAST5_BLOCK_SIZE,
438 			.setkey		= ablk_set_key,
439 			.encrypt	= __ablk_encrypt,
440 			.decrypt	= ablk_decrypt,
441 		},
442 	},
443 }, {
444 	.cra_name		= "ctr(cast5)",
445 	.cra_driver_name	= "ctr-cast5-avx",
446 	.cra_priority		= 200,
447 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
448 	.cra_blocksize		= 1,
449 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
450 	.cra_alignmask		= 0,
451 	.cra_type		= &crypto_ablkcipher_type,
452 	.cra_module		= THIS_MODULE,
453 	.cra_init		= ablk_init,
454 	.cra_exit		= ablk_exit,
455 	.cra_u = {
456 		.ablkcipher = {
457 			.min_keysize	= CAST5_MIN_KEY_SIZE,
458 			.max_keysize	= CAST5_MAX_KEY_SIZE,
459 			.ivsize		= CAST5_BLOCK_SIZE,
460 			.setkey		= ablk_set_key,
461 			.encrypt	= ablk_encrypt,
462 			.decrypt	= ablk_encrypt,
463 			.geniv		= "chainiv",
464 		},
465 	},
466 } };
467 
468 static int __init cast5_init(void)
469 {
470 	const char *feature_name;
471 
472 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
473 				&feature_name)) {
474 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
475 		return -ENODEV;
476 	}
477 
478 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
479 }
480 
481 static void __exit cast5_exit(void)
482 {
483 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
484 }
485 
486 module_init(cast5_init);
487 module_exit(cast5_exit);
488 
489 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
490 MODULE_LICENSE("GPL");
491 MODULE_ALIAS_CRYPTO("cast5");
492