xref: /linux/arch/x86/crypto/cast5_avx_glue.c (revision b9b77222d4ff6b5bb8f5d87fca20de0910618bb9)
1 /*
2  * Glue Code for the AVX assembler implemention of the Cast5 Cipher
3  *
4  * Copyright (C) 2012 Johannes Goetzfried
5  *     <Johannes.Goetzfried@informatik.stud.uni-erlangen.de>
6  *
7  * This program is free software; you can redistribute it and/or modify
8  * it under the terms of the GNU General Public License as published by
9  * the Free Software Foundation; either version 2 of the License, or
10  * (at your option) any later version.
11  *
12  * This program is distributed in the hope that it will be useful,
13  * but WITHOUT ANY WARRANTY; without even the implied warranty of
14  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
15  * GNU General Public License for more details.
16  *
17  * You should have received a copy of the GNU General Public License
18  * along with this program; if not, write to the Free Software
19  * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA  02111-1307
20  * USA
21  *
22  */
23 
24 #include <asm/crypto/glue_helper.h>
25 #include <crypto/algapi.h>
26 #include <crypto/cast5.h>
27 #include <crypto/internal/simd.h>
28 #include <linux/crypto.h>
29 #include <linux/err.h>
30 #include <linux/module.h>
31 #include <linux/types.h>
32 
33 #define CAST5_PARALLEL_BLOCKS 16
34 
35 asmlinkage void cast5_ecb_enc_16way(struct cast5_ctx *ctx, u8 *dst,
36 				    const u8 *src);
37 asmlinkage void cast5_ecb_dec_16way(struct cast5_ctx *ctx, u8 *dst,
38 				    const u8 *src);
39 asmlinkage void cast5_cbc_dec_16way(struct cast5_ctx *ctx, u8 *dst,
40 				    const u8 *src);
41 asmlinkage void cast5_ctr_16way(struct cast5_ctx *ctx, u8 *dst, const u8 *src,
42 				__be64 *iv);
43 
44 static int cast5_setkey_skcipher(struct crypto_skcipher *tfm, const u8 *key,
45 				 unsigned int keylen)
46 {
47 	return cast5_setkey(&tfm->base, key, keylen);
48 }
49 
50 static inline bool cast5_fpu_begin(bool fpu_enabled, struct skcipher_walk *walk,
51 				   unsigned int nbytes)
52 {
53 	return glue_fpu_begin(CAST5_BLOCK_SIZE, CAST5_PARALLEL_BLOCKS,
54 			      walk, fpu_enabled, nbytes);
55 }
56 
57 static inline void cast5_fpu_end(bool fpu_enabled)
58 {
59 	return glue_fpu_end(fpu_enabled);
60 }
61 
62 static int ecb_crypt(struct skcipher_request *req, bool enc)
63 {
64 	bool fpu_enabled = false;
65 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
66 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
67 	struct skcipher_walk walk;
68 	const unsigned int bsize = CAST5_BLOCK_SIZE;
69 	unsigned int nbytes;
70 	void (*fn)(struct cast5_ctx *ctx, u8 *dst, const u8 *src);
71 	int err;
72 
73 	err = skcipher_walk_virt(&walk, req, false);
74 
75 	while ((nbytes = walk.nbytes)) {
76 		u8 *wsrc = walk.src.virt.addr;
77 		u8 *wdst = walk.dst.virt.addr;
78 
79 		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
80 
81 		/* Process multi-block batch */
82 		if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
83 			fn = (enc) ? cast5_ecb_enc_16way : cast5_ecb_dec_16way;
84 			do {
85 				fn(ctx, wdst, wsrc);
86 
87 				wsrc += bsize * CAST5_PARALLEL_BLOCKS;
88 				wdst += bsize * CAST5_PARALLEL_BLOCKS;
89 				nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
90 			} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
91 
92 			if (nbytes < bsize)
93 				goto done;
94 		}
95 
96 		fn = (enc) ? __cast5_encrypt : __cast5_decrypt;
97 
98 		/* Handle leftovers */
99 		do {
100 			fn(ctx, wdst, wsrc);
101 
102 			wsrc += bsize;
103 			wdst += bsize;
104 			nbytes -= bsize;
105 		} while (nbytes >= bsize);
106 
107 done:
108 		err = skcipher_walk_done(&walk, nbytes);
109 	}
110 
111 	cast5_fpu_end(fpu_enabled);
112 	return err;
113 }
114 
115 static int ecb_encrypt(struct skcipher_request *req)
116 {
117 	return ecb_crypt(req, true);
118 }
119 
120 static int ecb_decrypt(struct skcipher_request *req)
121 {
122 	return ecb_crypt(req, false);
123 }
124 
125 static int cbc_encrypt(struct skcipher_request *req)
126 {
127 	const unsigned int bsize = CAST5_BLOCK_SIZE;
128 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
129 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
130 	struct skcipher_walk walk;
131 	unsigned int nbytes;
132 	int err;
133 
134 	err = skcipher_walk_virt(&walk, req, false);
135 
136 	while ((nbytes = walk.nbytes)) {
137 		u64 *src = (u64 *)walk.src.virt.addr;
138 		u64 *dst = (u64 *)walk.dst.virt.addr;
139 		u64 *iv = (u64 *)walk.iv;
140 
141 		do {
142 			*dst = *src ^ *iv;
143 			__cast5_encrypt(ctx, (u8 *)dst, (u8 *)dst);
144 			iv = dst;
145 			src++;
146 			dst++;
147 			nbytes -= bsize;
148 		} while (nbytes >= bsize);
149 
150 		*(u64 *)walk.iv = *iv;
151 		err = skcipher_walk_done(&walk, nbytes);
152 	}
153 
154 	return err;
155 }
156 
157 static unsigned int __cbc_decrypt(struct cast5_ctx *ctx,
158 				  struct skcipher_walk *walk)
159 {
160 	const unsigned int bsize = CAST5_BLOCK_SIZE;
161 	unsigned int nbytes = walk->nbytes;
162 	u64 *src = (u64 *)walk->src.virt.addr;
163 	u64 *dst = (u64 *)walk->dst.virt.addr;
164 	u64 last_iv;
165 
166 	/* Start of the last block. */
167 	src += nbytes / bsize - 1;
168 	dst += nbytes / bsize - 1;
169 
170 	last_iv = *src;
171 
172 	/* Process multi-block batch */
173 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
174 		do {
175 			nbytes -= bsize * (CAST5_PARALLEL_BLOCKS - 1);
176 			src -= CAST5_PARALLEL_BLOCKS - 1;
177 			dst -= CAST5_PARALLEL_BLOCKS - 1;
178 
179 			cast5_cbc_dec_16way(ctx, (u8 *)dst, (u8 *)src);
180 
181 			nbytes -= bsize;
182 			if (nbytes < bsize)
183 				goto done;
184 
185 			*dst ^= *(src - 1);
186 			src -= 1;
187 			dst -= 1;
188 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
189 	}
190 
191 	/* Handle leftovers */
192 	for (;;) {
193 		__cast5_decrypt(ctx, (u8 *)dst, (u8 *)src);
194 
195 		nbytes -= bsize;
196 		if (nbytes < bsize)
197 			break;
198 
199 		*dst ^= *(src - 1);
200 		src -= 1;
201 		dst -= 1;
202 	}
203 
204 done:
205 	*dst ^= *(u64 *)walk->iv;
206 	*(u64 *)walk->iv = last_iv;
207 
208 	return nbytes;
209 }
210 
211 static int cbc_decrypt(struct skcipher_request *req)
212 {
213 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
214 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
215 	bool fpu_enabled = false;
216 	struct skcipher_walk walk;
217 	unsigned int nbytes;
218 	int err;
219 
220 	err = skcipher_walk_virt(&walk, req, false);
221 
222 	while ((nbytes = walk.nbytes)) {
223 		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
224 		nbytes = __cbc_decrypt(ctx, &walk);
225 		err = skcipher_walk_done(&walk, nbytes);
226 	}
227 
228 	cast5_fpu_end(fpu_enabled);
229 	return err;
230 }
231 
232 static void ctr_crypt_final(struct skcipher_walk *walk, struct cast5_ctx *ctx)
233 {
234 	u8 *ctrblk = walk->iv;
235 	u8 keystream[CAST5_BLOCK_SIZE];
236 	u8 *src = walk->src.virt.addr;
237 	u8 *dst = walk->dst.virt.addr;
238 	unsigned int nbytes = walk->nbytes;
239 
240 	__cast5_encrypt(ctx, keystream, ctrblk);
241 	crypto_xor_cpy(dst, keystream, src, nbytes);
242 
243 	crypto_inc(ctrblk, CAST5_BLOCK_SIZE);
244 }
245 
246 static unsigned int __ctr_crypt(struct skcipher_walk *walk,
247 				struct cast5_ctx *ctx)
248 {
249 	const unsigned int bsize = CAST5_BLOCK_SIZE;
250 	unsigned int nbytes = walk->nbytes;
251 	u64 *src = (u64 *)walk->src.virt.addr;
252 	u64 *dst = (u64 *)walk->dst.virt.addr;
253 
254 	/* Process multi-block batch */
255 	if (nbytes >= bsize * CAST5_PARALLEL_BLOCKS) {
256 		do {
257 			cast5_ctr_16way(ctx, (u8 *)dst, (u8 *)src,
258 					(__be64 *)walk->iv);
259 
260 			src += CAST5_PARALLEL_BLOCKS;
261 			dst += CAST5_PARALLEL_BLOCKS;
262 			nbytes -= bsize * CAST5_PARALLEL_BLOCKS;
263 		} while (nbytes >= bsize * CAST5_PARALLEL_BLOCKS);
264 
265 		if (nbytes < bsize)
266 			goto done;
267 	}
268 
269 	/* Handle leftovers */
270 	do {
271 		u64 ctrblk;
272 
273 		if (dst != src)
274 			*dst = *src;
275 
276 		ctrblk = *(u64 *)walk->iv;
277 		be64_add_cpu((__be64 *)walk->iv, 1);
278 
279 		__cast5_encrypt(ctx, (u8 *)&ctrblk, (u8 *)&ctrblk);
280 		*dst ^= ctrblk;
281 
282 		src += 1;
283 		dst += 1;
284 		nbytes -= bsize;
285 	} while (nbytes >= bsize);
286 
287 done:
288 	return nbytes;
289 }
290 
291 static int ctr_crypt(struct skcipher_request *req)
292 {
293 	struct crypto_skcipher *tfm = crypto_skcipher_reqtfm(req);
294 	struct cast5_ctx *ctx = crypto_skcipher_ctx(tfm);
295 	bool fpu_enabled = false;
296 	struct skcipher_walk walk;
297 	unsigned int nbytes;
298 	int err;
299 
300 	err = skcipher_walk_virt(&walk, req, false);
301 
302 	while ((nbytes = walk.nbytes) >= CAST5_BLOCK_SIZE) {
303 		fpu_enabled = cast5_fpu_begin(fpu_enabled, &walk, nbytes);
304 		nbytes = __ctr_crypt(&walk, ctx);
305 		err = skcipher_walk_done(&walk, nbytes);
306 	}
307 
308 	cast5_fpu_end(fpu_enabled);
309 
310 	if (walk.nbytes) {
311 		ctr_crypt_final(&walk, ctx);
312 		err = skcipher_walk_done(&walk, 0);
313 	}
314 
315 	return err;
316 }
317 
318 static struct skcipher_alg cast5_algs[] = {
319 	{
320 		.base.cra_name		= "__ecb(cast5)",
321 		.base.cra_driver_name	= "__ecb-cast5-avx",
322 		.base.cra_priority	= 200,
323 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
324 		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
325 		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
326 		.base.cra_module	= THIS_MODULE,
327 		.min_keysize		= CAST5_MIN_KEY_SIZE,
328 		.max_keysize		= CAST5_MAX_KEY_SIZE,
329 		.setkey			= cast5_setkey_skcipher,
330 		.encrypt		= ecb_encrypt,
331 		.decrypt		= ecb_decrypt,
332 	}, {
333 		.base.cra_name		= "__cbc(cast5)",
334 		.base.cra_driver_name	= "__cbc-cast5-avx",
335 		.base.cra_priority	= 200,
336 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
337 		.base.cra_blocksize	= CAST5_BLOCK_SIZE,
338 		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
339 		.base.cra_module	= THIS_MODULE,
340 		.min_keysize		= CAST5_MIN_KEY_SIZE,
341 		.max_keysize		= CAST5_MAX_KEY_SIZE,
342 		.ivsize			= CAST5_BLOCK_SIZE,
343 		.setkey			= cast5_setkey_skcipher,
344 		.encrypt		= cbc_encrypt,
345 		.decrypt		= cbc_decrypt,
346 	}, {
347 		.base.cra_name		= "__ctr(cast5)",
348 		.base.cra_driver_name	= "__ctr-cast5-avx",
349 		.base.cra_priority	= 200,
350 		.base.cra_flags		= CRYPTO_ALG_INTERNAL,
351 		.base.cra_blocksize	= 1,
352 		.base.cra_ctxsize	= sizeof(struct cast5_ctx),
353 		.base.cra_module	= THIS_MODULE,
354 		.min_keysize		= CAST5_MIN_KEY_SIZE,
355 		.max_keysize		= CAST5_MAX_KEY_SIZE,
356 		.ivsize			= CAST5_BLOCK_SIZE,
357 		.chunksize		= CAST5_BLOCK_SIZE,
358 		.setkey			= cast5_setkey_skcipher,
359 		.encrypt		= ctr_crypt,
360 		.decrypt		= ctr_crypt,
361 	}
362 };
363 
364 static struct simd_skcipher_alg *cast5_simd_algs[ARRAY_SIZE(cast5_algs)];
365 
366 static int __init cast5_init(void)
367 {
368 	const char *feature_name;
369 
370 	if (!cpu_has_xfeatures(XFEATURE_MASK_SSE | XFEATURE_MASK_YMM,
371 				&feature_name)) {
372 		pr_info("CPU feature '%s' is not supported.\n", feature_name);
373 		return -ENODEV;
374 	}
375 
376 	return simd_register_skciphers_compat(cast5_algs,
377 					      ARRAY_SIZE(cast5_algs),
378 					      cast5_simd_algs);
379 }
380 
381 static void __exit cast5_exit(void)
382 {
383 	simd_unregister_skciphers(cast5_algs, ARRAY_SIZE(cast5_algs),
384 				  cast5_simd_algs);
385 }
386 
387 module_init(cast5_init);
388 module_exit(cast5_exit);
389 
390 MODULE_DESCRIPTION("Cast5 Cipher Algorithm, AVX optimized");
391 MODULE_LICENSE("GPL");
392 MODULE_ALIAS_CRYPTO("cast5");
393