xref: /linux/drivers/crypto/padlock-sha.c (revision bfd5bb6f90af092aa345b15cd78143956a13c2a8)
1 /*
2  * Cryptographic API.
3  *
4  * Support for VIA PadLock hardware crypto engine.
5  *
6  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  */
14 
15 #include <crypto/internal/hash.h>
16 #include <crypto/padlock.h>
17 #include <crypto/sha.h>
18 #include <linux/err.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/errno.h>
22 #include <linux/interrupt.h>
23 #include <linux/kernel.h>
24 #include <linux/scatterlist.h>
25 #include <asm/cpu_device_id.h>
26 #include <asm/fpu/api.h>
27 
28 struct padlock_sha_desc {
29 	struct shash_desc fallback;
30 };
31 
32 struct padlock_sha_ctx {
33 	struct crypto_shash *fallback;
34 };
35 
36 static int padlock_sha_init(struct shash_desc *desc)
37 {
38 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
39 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
40 
41 	dctx->fallback.tfm = ctx->fallback;
42 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
43 	return crypto_shash_init(&dctx->fallback);
44 }
45 
46 static int padlock_sha_update(struct shash_desc *desc,
47 			      const u8 *data, unsigned int length)
48 {
49 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
50 
51 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
52 	return crypto_shash_update(&dctx->fallback, data, length);
53 }
54 
55 static int padlock_sha_export(struct shash_desc *desc, void *out)
56 {
57 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
58 
59 	return crypto_shash_export(&dctx->fallback, out);
60 }
61 
62 static int padlock_sha_import(struct shash_desc *desc, const void *in)
63 {
64 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
65 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
66 
67 	dctx->fallback.tfm = ctx->fallback;
68 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
69 	return crypto_shash_import(&dctx->fallback, in);
70 }
71 
72 static inline void padlock_output_block(uint32_t *src,
73 		 	uint32_t *dst, size_t count)
74 {
75 	while (count--)
76 		*dst++ = swab32(*src++);
77 }
78 
79 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
80 			      unsigned int count, u8 *out)
81 {
82 	/* We can't store directly to *out as it may be unaligned. */
83 	/* BTW Don't reduce the buffer size below 128 Bytes!
84 	 *     PadLock microcode needs it that big. */
85 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
86 		((aligned(STACK_ALIGN)));
87 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
88 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
89 	struct sha1_state state;
90 	unsigned int space;
91 	unsigned int leftover;
92 	int err;
93 
94 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
95 	err = crypto_shash_export(&dctx->fallback, &state);
96 	if (err)
97 		goto out;
98 
99 	if (state.count + count > ULONG_MAX)
100 		return crypto_shash_finup(&dctx->fallback, in, count, out);
101 
102 	leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
103 	space =  SHA1_BLOCK_SIZE - leftover;
104 	if (space) {
105 		if (count > space) {
106 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
107 			      crypto_shash_export(&dctx->fallback, &state);
108 			if (err)
109 				goto out;
110 			count -= space;
111 			in += space;
112 		} else {
113 			memcpy(state.buffer + leftover, in, count);
114 			in = state.buffer;
115 			count += leftover;
116 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
117 		}
118 	}
119 
120 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
121 
122 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
123 		      : \
124 		      : "c"((unsigned long)state.count + count), \
125 			"a"((unsigned long)state.count), \
126 			"S"(in), "D"(result));
127 
128 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
129 
130 out:
131 	return err;
132 }
133 
134 static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
135 {
136 	u8 buf[4];
137 
138 	return padlock_sha1_finup(desc, buf, 0, out);
139 }
140 
141 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
142 				unsigned int count, u8 *out)
143 {
144 	/* We can't store directly to *out as it may be unaligned. */
145 	/* BTW Don't reduce the buffer size below 128 Bytes!
146 	 *     PadLock microcode needs it that big. */
147 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
148 		((aligned(STACK_ALIGN)));
149 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
150 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
151 	struct sha256_state state;
152 	unsigned int space;
153 	unsigned int leftover;
154 	int err;
155 
156 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
157 	err = crypto_shash_export(&dctx->fallback, &state);
158 	if (err)
159 		goto out;
160 
161 	if (state.count + count > ULONG_MAX)
162 		return crypto_shash_finup(&dctx->fallback, in, count, out);
163 
164 	leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
165 	space =  SHA256_BLOCK_SIZE - leftover;
166 	if (space) {
167 		if (count > space) {
168 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
169 			      crypto_shash_export(&dctx->fallback, &state);
170 			if (err)
171 				goto out;
172 			count -= space;
173 			in += space;
174 		} else {
175 			memcpy(state.buf + leftover, in, count);
176 			in = state.buf;
177 			count += leftover;
178 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
179 		}
180 	}
181 
182 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
183 
184 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
185 		      : \
186 		      : "c"((unsigned long)state.count + count), \
187 			"a"((unsigned long)state.count), \
188 			"S"(in), "D"(result));
189 
190 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
191 
192 out:
193 	return err;
194 }
195 
196 static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
197 {
198 	u8 buf[4];
199 
200 	return padlock_sha256_finup(desc, buf, 0, out);
201 }
202 
203 static int padlock_cra_init(struct crypto_tfm *tfm)
204 {
205 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
206 	const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
207 	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
208 	struct crypto_shash *fallback_tfm;
209 	int err = -ENOMEM;
210 
211 	/* Allocate a fallback and abort if it failed. */
212 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
213 					  CRYPTO_ALG_NEED_FALLBACK);
214 	if (IS_ERR(fallback_tfm)) {
215 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
216 		       fallback_driver_name);
217 		err = PTR_ERR(fallback_tfm);
218 		goto out;
219 	}
220 
221 	ctx->fallback = fallback_tfm;
222 	hash->descsize += crypto_shash_descsize(fallback_tfm);
223 	return 0;
224 
225 out:
226 	return err;
227 }
228 
229 static void padlock_cra_exit(struct crypto_tfm *tfm)
230 {
231 	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
232 
233 	crypto_free_shash(ctx->fallback);
234 }
235 
236 static struct shash_alg sha1_alg = {
237 	.digestsize	=	SHA1_DIGEST_SIZE,
238 	.init   	= 	padlock_sha_init,
239 	.update 	=	padlock_sha_update,
240 	.finup  	=	padlock_sha1_finup,
241 	.final  	=	padlock_sha1_final,
242 	.export		=	padlock_sha_export,
243 	.import		=	padlock_sha_import,
244 	.descsize	=	sizeof(struct padlock_sha_desc),
245 	.statesize	=	sizeof(struct sha1_state),
246 	.base		=	{
247 		.cra_name		=	"sha1",
248 		.cra_driver_name	=	"sha1-padlock",
249 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
250 		.cra_flags		=	CRYPTO_ALG_TYPE_SHASH |
251 						CRYPTO_ALG_NEED_FALLBACK,
252 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
253 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
254 		.cra_module		=	THIS_MODULE,
255 		.cra_init		=	padlock_cra_init,
256 		.cra_exit		=	padlock_cra_exit,
257 	}
258 };
259 
260 static struct shash_alg sha256_alg = {
261 	.digestsize	=	SHA256_DIGEST_SIZE,
262 	.init   	= 	padlock_sha_init,
263 	.update 	=	padlock_sha_update,
264 	.finup  	=	padlock_sha256_finup,
265 	.final  	=	padlock_sha256_final,
266 	.export		=	padlock_sha_export,
267 	.import		=	padlock_sha_import,
268 	.descsize	=	sizeof(struct padlock_sha_desc),
269 	.statesize	=	sizeof(struct sha256_state),
270 	.base		=	{
271 		.cra_name		=	"sha256",
272 		.cra_driver_name	=	"sha256-padlock",
273 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
274 		.cra_flags		=	CRYPTO_ALG_TYPE_SHASH |
275 						CRYPTO_ALG_NEED_FALLBACK,
276 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
277 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
278 		.cra_module		=	THIS_MODULE,
279 		.cra_init		=	padlock_cra_init,
280 		.cra_exit		=	padlock_cra_exit,
281 	}
282 };
283 
284 /* Add two shash_alg instance for hardware-implemented *
285 * multiple-parts hash supported by VIA Nano Processor.*/
286 static int padlock_sha1_init_nano(struct shash_desc *desc)
287 {
288 	struct sha1_state *sctx = shash_desc_ctx(desc);
289 
290 	*sctx = (struct sha1_state){
291 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
292 	};
293 
294 	return 0;
295 }
296 
297 static int padlock_sha1_update_nano(struct shash_desc *desc,
298 			const u8 *data,	unsigned int len)
299 {
300 	struct sha1_state *sctx = shash_desc_ctx(desc);
301 	unsigned int partial, done;
302 	const u8 *src;
303 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
304 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
305 		((aligned(STACK_ALIGN)));
306 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
307 
308 	partial = sctx->count & 0x3f;
309 	sctx->count += len;
310 	done = 0;
311 	src = data;
312 	memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
313 
314 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
315 
316 		/* Append the bytes in state's buffer to a block to handle */
317 		if (partial) {
318 			done = -partial;
319 			memcpy(sctx->buffer + partial, data,
320 				done + SHA1_BLOCK_SIZE);
321 			src = sctx->buffer;
322 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
323 			: "+S"(src), "+D"(dst) \
324 			: "a"((long)-1), "c"((unsigned long)1));
325 			done += SHA1_BLOCK_SIZE;
326 			src = data + done;
327 		}
328 
329 		/* Process the left bytes from the input data */
330 		if (len - done >= SHA1_BLOCK_SIZE) {
331 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
332 			: "+S"(src), "+D"(dst)
333 			: "a"((long)-1),
334 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
335 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
336 			src = data + done;
337 		}
338 		partial = 0;
339 	}
340 	memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
341 	memcpy(sctx->buffer + partial, src, len - done);
342 
343 	return 0;
344 }
345 
346 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
347 {
348 	struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
349 	unsigned int partial, padlen;
350 	__be64 bits;
351 	static const u8 padding[64] = { 0x80, };
352 
353 	bits = cpu_to_be64(state->count << 3);
354 
355 	/* Pad out to 56 mod 64 */
356 	partial = state->count & 0x3f;
357 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
358 	padlock_sha1_update_nano(desc, padding, padlen);
359 
360 	/* Append length field bytes */
361 	padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
362 
363 	/* Swap to output */
364 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
365 
366 	return 0;
367 }
368 
369 static int padlock_sha256_init_nano(struct shash_desc *desc)
370 {
371 	struct sha256_state *sctx = shash_desc_ctx(desc);
372 
373 	*sctx = (struct sha256_state){
374 		.state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
375 				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
376 	};
377 
378 	return 0;
379 }
380 
381 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
382 			  unsigned int len)
383 {
384 	struct sha256_state *sctx = shash_desc_ctx(desc);
385 	unsigned int partial, done;
386 	const u8 *src;
387 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
388 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
389 		((aligned(STACK_ALIGN)));
390 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
391 
392 	partial = sctx->count & 0x3f;
393 	sctx->count += len;
394 	done = 0;
395 	src = data;
396 	memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
397 
398 	if ((partial + len) >= SHA256_BLOCK_SIZE) {
399 
400 		/* Append the bytes in state's buffer to a block to handle */
401 		if (partial) {
402 			done = -partial;
403 			memcpy(sctx->buf + partial, data,
404 				done + SHA256_BLOCK_SIZE);
405 			src = sctx->buf;
406 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
407 			: "+S"(src), "+D"(dst)
408 			: "a"((long)-1), "c"((unsigned long)1));
409 			done += SHA256_BLOCK_SIZE;
410 			src = data + done;
411 		}
412 
413 		/* Process the left bytes from input data*/
414 		if (len - done >= SHA256_BLOCK_SIZE) {
415 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
416 			: "+S"(src), "+D"(dst)
417 			: "a"((long)-1),
418 			"c"((unsigned long)((len - done) / 64)));
419 			done += ((len - done) - (len - done) % 64);
420 			src = data + done;
421 		}
422 		partial = 0;
423 	}
424 	memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
425 	memcpy(sctx->buf + partial, src, len - done);
426 
427 	return 0;
428 }
429 
430 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
431 {
432 	struct sha256_state *state =
433 		(struct sha256_state *)shash_desc_ctx(desc);
434 	unsigned int partial, padlen;
435 	__be64 bits;
436 	static const u8 padding[64] = { 0x80, };
437 
438 	bits = cpu_to_be64(state->count << 3);
439 
440 	/* Pad out to 56 mod 64 */
441 	partial = state->count & 0x3f;
442 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
443 	padlock_sha256_update_nano(desc, padding, padlen);
444 
445 	/* Append length field bytes */
446 	padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
447 
448 	/* Swap to output */
449 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
450 
451 	return 0;
452 }
453 
454 static int padlock_sha_export_nano(struct shash_desc *desc,
455 				void *out)
456 {
457 	int statesize = crypto_shash_statesize(desc->tfm);
458 	void *sctx = shash_desc_ctx(desc);
459 
460 	memcpy(out, sctx, statesize);
461 	return 0;
462 }
463 
464 static int padlock_sha_import_nano(struct shash_desc *desc,
465 				const void *in)
466 {
467 	int statesize = crypto_shash_statesize(desc->tfm);
468 	void *sctx = shash_desc_ctx(desc);
469 
470 	memcpy(sctx, in, statesize);
471 	return 0;
472 }
473 
474 static struct shash_alg sha1_alg_nano = {
475 	.digestsize	=	SHA1_DIGEST_SIZE,
476 	.init		=	padlock_sha1_init_nano,
477 	.update		=	padlock_sha1_update_nano,
478 	.final		=	padlock_sha1_final_nano,
479 	.export		=	padlock_sha_export_nano,
480 	.import		=	padlock_sha_import_nano,
481 	.descsize	=	sizeof(struct sha1_state),
482 	.statesize	=	sizeof(struct sha1_state),
483 	.base		=	{
484 		.cra_name		=	"sha1",
485 		.cra_driver_name	=	"sha1-padlock-nano",
486 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
487 		.cra_flags		=	CRYPTO_ALG_TYPE_SHASH,
488 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
489 		.cra_module		=	THIS_MODULE,
490 	}
491 };
492 
493 static struct shash_alg sha256_alg_nano = {
494 	.digestsize	=	SHA256_DIGEST_SIZE,
495 	.init		=	padlock_sha256_init_nano,
496 	.update		=	padlock_sha256_update_nano,
497 	.final		=	padlock_sha256_final_nano,
498 	.export		=	padlock_sha_export_nano,
499 	.import		=	padlock_sha_import_nano,
500 	.descsize	=	sizeof(struct sha256_state),
501 	.statesize	=	sizeof(struct sha256_state),
502 	.base		=	{
503 		.cra_name		=	"sha256",
504 		.cra_driver_name	=	"sha256-padlock-nano",
505 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
506 		.cra_flags		=	CRYPTO_ALG_TYPE_SHASH,
507 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
508 		.cra_module		=	THIS_MODULE,
509 	}
510 };
511 
512 static const struct x86_cpu_id padlock_sha_ids[] = {
513 	X86_FEATURE_MATCH(X86_FEATURE_PHE),
514 	{}
515 };
516 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
517 
518 static int __init padlock_init(void)
519 {
520 	int rc = -ENODEV;
521 	struct cpuinfo_x86 *c = &cpu_data(0);
522 	struct shash_alg *sha1;
523 	struct shash_alg *sha256;
524 
525 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
526 		return -ENODEV;
527 
528 	/* Register the newly added algorithm module if on *
529 	* VIA Nano processor, or else just do as before */
530 	if (c->x86_model < 0x0f) {
531 		sha1 = &sha1_alg;
532 		sha256 = &sha256_alg;
533 	} else {
534 		sha1 = &sha1_alg_nano;
535 		sha256 = &sha256_alg_nano;
536 	}
537 
538 	rc = crypto_register_shash(sha1);
539 	if (rc)
540 		goto out;
541 
542 	rc = crypto_register_shash(sha256);
543 	if (rc)
544 		goto out_unreg1;
545 
546 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
547 
548 	return 0;
549 
550 out_unreg1:
551 	crypto_unregister_shash(sha1);
552 
553 out:
554 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
555 	return rc;
556 }
557 
558 static void __exit padlock_fini(void)
559 {
560 	struct cpuinfo_x86 *c = &cpu_data(0);
561 
562 	if (c->x86_model >= 0x0f) {
563 		crypto_unregister_shash(&sha1_alg_nano);
564 		crypto_unregister_shash(&sha256_alg_nano);
565 	} else {
566 		crypto_unregister_shash(&sha1_alg);
567 		crypto_unregister_shash(&sha256_alg);
568 	}
569 }
570 
571 module_init(padlock_init);
572 module_exit(padlock_fini);
573 
574 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
575 MODULE_LICENSE("GPL");
576 MODULE_AUTHOR("Michal Ludvig");
577 
578 MODULE_ALIAS_CRYPTO("sha1-all");
579 MODULE_ALIAS_CRYPTO("sha256-all");
580 MODULE_ALIAS_CRYPTO("sha1-padlock");
581 MODULE_ALIAS_CRYPTO("sha256-padlock");
582