xref: /linux/arch/x86/crypto/cast5_avx_glue.c (revision 4949009eb8d40a441dcddcd96e101e77d31cf1b2)
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <linux/module.h>
25 #include <linux/hardirq.h>
26 #include <linux/types.h>
27 #include <linux/crypto.h>
28 #include <linux/err.h>
29 #include <crypto/ablk_helper.h>
30 #include <crypto/algapi.h>
31 #include <crypto/cast5.h>
32 #include <crypto/cryptd.h>
33 #include <crypto/ctr.h>
34 #include <asm/xcr.h>
35 #include <asm/xsave.h>
36 #include <asm/crypto/glue_helper.h>
37 
38 #define CAST5_PARALLEL_BLOCKS 16
39 
40 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
41 				    const u8 *src);
42 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
43 				    const u8 *src);
44 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
45 				    const u8 *src);
46 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
47 				__be64 *iv);
48 
49 static inline bool cast5_fpu_begin(bool fpu_enabled, unsigned int nbytes)
50 {
51 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
52 			      NULL, fpu_enabled, nbytes);
53 }
54 
55 static inline void cast5_fpu_end(bool fpu_enabled)
56 {
57 	return glue_fpu_end(fpu_enabled);
58 }
59 
60 static int ecb_crypt(struct blkcipher_desc *desc, struct blkcipher_walk *walk,
61 		     bool enc)
62 {
63 	bool fpu_enabled = false;
64 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
65 	const unsigned int bsize = CAST5_BLOCK_SIZE;
66 	unsigned int nbytes;
67 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
68 	int err;
69 
70 	fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
71 
72 	err = blkcipher_walk_virt(desc, walk);
73 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
74 
75 	while ((nbytes = walk->nbytes)) {
76 		u8 *wsrc = walk->src.virt.addr;
77 		u8 *wdst = walk->dst.virt.addr;
78 
79 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
80 
81 		/* Process multi-block batch */
82 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83 			do {
84 				fn(ctx, wdst, wsrc);
85 
86 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
87 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
88 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
89 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
90 
91 			if (nbytes < bsize)
92 				goto done;
93 		}
94 
95 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
96 
97 		/* Handle leftovers */
98 		do {
99 			fn(ctx, wdst, wsrc);
100 
101 			wsrc += bsize;
102 			wdst += bsize;
103 			nbytes -= bsize;
104 		} while (nbytes >= bsize);
105 
106 done:
107 		err = blkcipher_walk_done(desc, walk, nbytes);
108 	}
109 
110 	cast5_fpu_end(fpu_enabled);
111 	return err;
112 }
113 
114 static int ecb_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
115 		       struct scatterlist *src, unsigned int nbytes)
116 {
117 	struct blkcipher_walk walk;
118 
119 	blkcipher_walk_init(&walk, dst, src, nbytes);
120 	return ecb_crypt(desc, &walk, true);
121 }
122 
123 static int ecb_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
124 		       struct scatterlist *src, unsigned int nbytes)
125 {
126 	struct blkcipher_walk walk;
127 
128 	blkcipher_walk_init(&walk, dst, src, nbytes);
129 	return ecb_crypt(desc, &walk, false);
130 }
131 
132 static unsigned int __cbc_encrypt(struct blkcipher_desc *desc,
133 				  struct blkcipher_walk *walk)
134 {
135 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
136 	const unsigned int bsize = CAST5_BLOCK_SIZE;
137 	unsigned int nbytes = walk->nbytes;
138 	u64 *src = (u64 *)walk->src.virt.addr;
139 	u64 *dst = (u64 *)walk->dst.virt.addr;
140 	u64 *iv = (u64 *)walk->iv;
141 
142 	do {
143 		*dst = *src ^ *iv;
144 		__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
145 		iv = dst;
146 
147 		src += 1;
148 		dst += 1;
149 		nbytes -= bsize;
150 	} while (nbytes >= bsize);
151 
152 	*(u64 *)walk->iv = *iv;
153 	return nbytes;
154 }
155 
156 static int cbc_encrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
157 		       struct scatterlist *src, unsigned int nbytes)
158 {
159 	struct blkcipher_walk walk;
160 	int err;
161 
162 	blkcipher_walk_init(&walk, dst, src, nbytes);
163 	err = blkcipher_walk_virt(desc, &walk);
164 
165 	while ((nbytes = walk.nbytes)) {
166 		nbytes = __cbc_encrypt(desc, &walk);
167 		err = blkcipher_walk_done(desc, &walk, nbytes);
168 	}
169 
170 	return err;
171 }
172 
173 static unsigned int __cbc_decrypt(struct blkcipher_desc *desc,
174 				  struct blkcipher_walk *walk)
175 {
176 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
177 	const unsigned int bsize = CAST5_BLOCK_SIZE;
178 	unsigned int nbytes = walk->nbytes;
179 	u64 *src = (u64 *)walk->src.virt.addr;
180 	u64 *dst = (u64 *)walk->dst.virt.addr;
181 	u64 last_iv;
182 
183 	/* Start of the last block. */
184 	src += nbytes / bsize - 1;
185 	dst += nbytes / bsize - 1;
186 
187 	last_iv = *src;
188 
189 	/* Process multi-block batch */
190 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
191 		do {
192 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
193 			src -= CAST5_PARALLEL_BLOCKS - 1;
194 			dst -= CAST5_PARALLEL_BLOCKS - 1;
195 
196 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
197 
198 			nbytes -= bsize;
199 			if (nbytes < bsize)
200 				goto done;
201 
202 			*dst ^= *(src - 1);
203 			src -= 1;
204 			dst -= 1;
205 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
206 	}
207 
208 	/* Handle leftovers */
209 	for (;;) {
210 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
211 
212 		nbytes -= bsize;
213 		if (nbytes < bsize)
214 			break;
215 
216 		*dst ^= *(src - 1);
217 		src -= 1;
218 		dst -= 1;
219 	}
220 
221 done:
222 	*dst ^= *(u64 *)walk->iv;
223 	*(u64 *)walk->iv = last_iv;
224 
225 	return nbytes;
226 }
227 
228 static int cbc_decrypt(struct blkcipher_desc *desc, struct scatterlist *dst,
229 		       struct scatterlist *src, unsigned int nbytes)
230 {
231 	bool fpu_enabled = false;
232 	struct blkcipher_walk walk;
233 	int err;
234 
235 	blkcipher_walk_init(&walk, dst, src, nbytes);
236 	err = blkcipher_walk_virt(desc, &walk);
237 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
238 
239 	while ((nbytes = walk.nbytes)) {
240 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
241 		nbytes = __cbc_decrypt(desc, &walk);
242 		err = blkcipher_walk_done(desc, &walk, nbytes);
243 	}
244 
245 	cast5_fpu_end(fpu_enabled);
246 	return err;
247 }
248 
249 static void ctr_crypt_final(struct blkcipher_desc *desc,
250 			    struct blkcipher_walk *walk)
251 {
252 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
253 	u8 *ctrblk = walk->iv;
254 	u8 keystream[CAST5_BLOCK_SIZE];
255 	u8 *src = walk->src.virt.addr;
256 	u8 *dst = walk->dst.virt.addr;
257 	unsigned int nbytes = walk->nbytes;
258 
259 	__cast5_encrypt(ctx, keystream, ctrblk);
260 	crypto_xor(keystream, src, nbytes);
261 	memcpy(dst, keystream, nbytes);
262 
263 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
264 }
265 
266 static unsigned int __ctr_crypt(struct blkcipher_desc *desc,
267 				struct blkcipher_walk *walk)
268 {
269 	struct cast5_ctx *ctx = crypto_blkcipher_ctx(desc->tfm);
270 	const unsigned int bsize = CAST5_BLOCK_SIZE;
271 	unsigned int nbytes = walk->nbytes;
272 	u64 *src = (u64 *)walk->src.virt.addr;
273 	u64 *dst = (u64 *)walk->dst.virt.addr;
274 
275 	/* Process multi-block batch */
276 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
277 		do {
278 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
279 					(__be64 *)walk->iv);
280 
281 			src += CAST5_PARALLEL_BLOCKS;
282 			dst += CAST5_PARALLEL_BLOCKS;
283 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
284 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
285 
286 		if (nbytes < bsize)
287 			goto done;
288 	}
289 
290 	/* Handle leftovers */
291 	do {
292 		u64 ctrblk;
293 
294 		if (dst != src)
295 			*dst = *src;
296 
297 		ctrblk = *(u64 *)walk->iv;
298 		be64_add_cpu((__be64 *)walk->iv, 1);
299 
300 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
301 		*dst ^= ctrblk;
302 
303 		src += 1;
304 		dst += 1;
305 		nbytes -= bsize;
306 	} while (nbytes >= bsize);
307 
308 done:
309 	return nbytes;
310 }
311 
312 static int ctr_crypt(struct blkcipher_desc *desc, struct scatterlist *dst,
313 		     struct scatterlist *src, unsigned int nbytes)
314 {
315 	bool fpu_enabled = false;
316 	struct blkcipher_walk walk;
317 	int err;
318 
319 	blkcipher_walk_init(&walk, dst, src, nbytes);
320 	err = blkcipher_walk_virt_block(desc, &walk, CAST5_BLOCK_SIZE);
321 	desc->flags &= ~CRYPTO_TFM_REQ_MAY_SLEEP;
322 
323 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
324 		fpu_enabled = cast5_fpu_begin(fpu_enabled, nbytes);
325 		nbytes = __ctr_crypt(desc, &walk);
326 		err = blkcipher_walk_done(desc, &walk, nbytes);
327 	}
328 
329 	cast5_fpu_end(fpu_enabled);
330 
331 	if (walk.nbytes) {
332 		ctr_crypt_final(desc, &walk);
333 		err = blkcipher_walk_done(desc, &walk, 0);
334 	}
335 
336 	return err;
337 }
338 
339 
340 static struct crypto_alg cast5_algs[6] = { {
341 	.cra_name		= "__ecb-cast5-avx",
342 	.cra_driver_name	= "__driver-ecb-cast5-avx",
343 	.cra_priority		= 0,
344 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
345 	.cra_blocksize		= CAST5_BLOCK_SIZE,
346 	.cra_ctxsize		= sizeof(struct cast5_ctx),
347 	.cra_alignmask		= 0,
348 	.cra_type		= &crypto_blkcipher_type,
349 	.cra_module		= THIS_MODULE,
350 	.cra_u = {
351 		.blkcipher = {
352 			.min_keysize	= CAST5_MIN_KEY_SIZE,
353 			.max_keysize	= CAST5_MAX_KEY_SIZE,
354 			.setkey		= cast5_setkey,
355 			.encrypt	= ecb_encrypt,
356 			.decrypt	= ecb_decrypt,
357 		},
358 	},
359 }, {
360 	.cra_name		= "__cbc-cast5-avx",
361 	.cra_driver_name	= "__driver-cbc-cast5-avx",
362 	.cra_priority		= 0,
363 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
364 	.cra_blocksize		= CAST5_BLOCK_SIZE,
365 	.cra_ctxsize		= sizeof(struct cast5_ctx),
366 	.cra_alignmask		= 0,
367 	.cra_type		= &crypto_blkcipher_type,
368 	.cra_module		= THIS_MODULE,
369 	.cra_u = {
370 		.blkcipher = {
371 			.min_keysize	= CAST5_MIN_KEY_SIZE,
372 			.max_keysize	= CAST5_MAX_KEY_SIZE,
373 			.setkey		= cast5_setkey,
374 			.encrypt	= cbc_encrypt,
375 			.decrypt	= cbc_decrypt,
376 		},
377 	},
378 }, {
379 	.cra_name		= "__ctr-cast5-avx",
380 	.cra_driver_name	= "__driver-ctr-cast5-avx",
381 	.cra_priority		= 0,
382 	.cra_flags		= CRYPTO_ALG_TYPE_BLKCIPHER,
383 	.cra_blocksize		= 1,
384 	.cra_ctxsize		= sizeof(struct cast5_ctx),
385 	.cra_alignmask		= 0,
386 	.cra_type		= &crypto_blkcipher_type,
387 	.cra_module		= THIS_MODULE,
388 	.cra_u = {
389 		.blkcipher = {
390 			.min_keysize	= CAST5_MIN_KEY_SIZE,
391 			.max_keysize	= CAST5_MAX_KEY_SIZE,
392 			.ivsize		= CAST5_BLOCK_SIZE,
393 			.setkey		= cast5_setkey,
394 			.encrypt	= ctr_crypt,
395 			.decrypt	= ctr_crypt,
396 		},
397 	},
398 }, {
399 	.cra_name		= "ecb(cast5)",
400 	.cra_driver_name	= "ecb-cast5-avx",
401 	.cra_priority		= 200,
402 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
403 	.cra_blocksize		= CAST5_BLOCK_SIZE,
404 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
405 	.cra_alignmask		= 0,
406 	.cra_type		= &crypto_ablkcipher_type,
407 	.cra_module		= THIS_MODULE,
408 	.cra_init		= ablk_init,
409 	.cra_exit		= ablk_exit,
410 	.cra_u = {
411 		.ablkcipher = {
412 			.min_keysize	= CAST5_MIN_KEY_SIZE,
413 			.max_keysize	= CAST5_MAX_KEY_SIZE,
414 			.setkey		= ablk_set_key,
415 			.encrypt	= ablk_encrypt,
416 			.decrypt	= ablk_decrypt,
417 		},
418 	},
419 }, {
420 	.cra_name		= "cbc(cast5)",
421 	.cra_driver_name	= "cbc-cast5-avx",
422 	.cra_priority		= 200,
423 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
424 	.cra_blocksize		= CAST5_BLOCK_SIZE,
425 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
426 	.cra_alignmask		= 0,
427 	.cra_type		= &crypto_ablkcipher_type,
428 	.cra_module		= THIS_MODULE,
429 	.cra_init		= ablk_init,
430 	.cra_exit		= ablk_exit,
431 	.cra_u = {
432 		.ablkcipher = {
433 			.min_keysize	= CAST5_MIN_KEY_SIZE,
434 			.max_keysize	= CAST5_MAX_KEY_SIZE,
435 			.ivsize		= CAST5_BLOCK_SIZE,
436 			.setkey		= ablk_set_key,
437 			.encrypt	= __ablk_encrypt,
438 			.decrypt	= ablk_decrypt,
439 		},
440 	},
441 }, {
442 	.cra_name		= "ctr(cast5)",
443 	.cra_driver_name	= "ctr-cast5-avx",
444 	.cra_priority		= 200,
445 	.cra_flags		= CRYPTO_ALG_TYPE_ABLKCIPHER | CRYPTO_ALG_ASYNC,
446 	.cra_blocksize		= 1,
447 	.cra_ctxsize		= sizeof(struct async_helper_ctx),
448 	.cra_alignmask		= 0,
449 	.cra_type		= &crypto_ablkcipher_type,
450 	.cra_module		= THIS_MODULE,
451 	.cra_init		= ablk_init,
452 	.cra_exit		= ablk_exit,
453 	.cra_u = {
454 		.ablkcipher = {
455 			.min_keysize	= CAST5_MIN_KEY_SIZE,
456 			.max_keysize	= CAST5_MAX_KEY_SIZE,
457 			.ivsize		= CAST5_BLOCK_SIZE,
458 			.setkey		= ablk_set_key,
459 			.encrypt	= ablk_encrypt,
460 			.decrypt	= ablk_encrypt,
461 			.geniv		= "chainiv",
462 		},
463 	},
464 } };
465 
466 static int __init cast5_init(void)
467 {
468 	u64 xcr0;
469 
470 	if (!cpu_has_avx || !cpu_has_osxsave) {
471 		pr_info("AVX instructions are not detected.\n");
472 		return -ENODEV;
473 	}
474 
475 	xcr0 = xgetbv(XCR_XFEATURE_ENABLED_MASK);
476 	if ((xcr0 & (XSTATE_SSE | XSTATE_YMM)) != (XSTATE_SSE | XSTATE_YMM)) {
477 		pr_info("AVX detected but unusable.\n");
478 		return -ENODEV;
479 	}
480 
481 	return crypto_register_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
482 }
483 
484 static void __exit cast5_exit(void)
485 {
486 	crypto_unregister_algs(cast5_algs, ARRAY_SIZE(cast5_algs));
487 }
488 
489 module_init(cast5_init);
490 module_exit(cast5_exit);
491 
492 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
493 MODULE_LICENSE("GPL");
494 MODULE_ALIAS_CRYPTO("cast5");
495