xref: /linux/drivers/crypto/padlock-sha.c (revision e9f0878c4b2004ac19581274c1ae4c61ae3ca70e)
1 /*
2  * Cryptographic API.
3  *
4  * Support for VIA PadLock hardware crypto engine.
5  *
6  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or
11  * (at your option) any later version.
12  *
13  */
14 
15 #include <crypto/internal/hash.h>
16 #include <crypto/padlock.h>
17 #include <crypto/sha.h>
18 #include <linux/err.h>
19 #include <linux/module.h>
20 #include <linux/init.h>
21 #include <linux/errno.h>
22 #include <linux/interrupt.h>
23 #include <linux/kernel.h>
24 #include <linux/scatterlist.h>
25 #include <asm/cpu_device_id.h>
26 #include <asm/fpu/api.h>
27 
28 struct padlock_sha_desc {
29 	struct shash_desc fallback;
30 };
31 
32 struct padlock_sha_ctx {
33 	struct crypto_shash *fallback;
34 };
35 
36 static int padlock_sha_init(struct shash_desc *desc)
37 {
38 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
39 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
40 
41 	dctx->fallback.tfm = ctx->fallback;
42 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
43 	return crypto_shash_init(&dctx->fallback);
44 }
45 
46 static int padlock_sha_update(struct shash_desc *desc,
47 			      const u8 *data, unsigned int length)
48 {
49 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
50 
51 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
52 	return crypto_shash_update(&dctx->fallback, data, length);
53 }
54 
55 static int padlock_sha_export(struct shash_desc *desc, void *out)
56 {
57 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
58 
59 	return crypto_shash_export(&dctx->fallback, out);
60 }
61 
62 static int padlock_sha_import(struct shash_desc *desc, const void *in)
63 {
64 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
65 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
66 
67 	dctx->fallback.tfm = ctx->fallback;
68 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
69 	return crypto_shash_import(&dctx->fallback, in);
70 }
71 
72 static inline void padlock_output_block(uint32_t *src,
73 		 	uint32_t *dst, size_t count)
74 {
75 	while (count--)
76 		*dst++ = swab32(*src++);
77 }
78 
79 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
80 			      unsigned int count, u8 *out)
81 {
82 	/* We can't store directly to *out as it may be unaligned. */
83 	/* BTW Don't reduce the buffer size below 128 Bytes!
84 	 *     PadLock microcode needs it that big. */
85 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
86 		((aligned(STACK_ALIGN)));
87 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
88 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
89 	struct sha1_state state;
90 	unsigned int space;
91 	unsigned int leftover;
92 	int err;
93 
94 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
95 	err = crypto_shash_export(&dctx->fallback, &state);
96 	if (err)
97 		goto out;
98 
99 	if (state.count + count > ULONG_MAX)
100 		return crypto_shash_finup(&dctx->fallback, in, count, out);
101 
102 	leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
103 	space =  SHA1_BLOCK_SIZE - leftover;
104 	if (space) {
105 		if (count > space) {
106 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
107 			      crypto_shash_export(&dctx->fallback, &state);
108 			if (err)
109 				goto out;
110 			count -= space;
111 			in += space;
112 		} else {
113 			memcpy(state.buffer + leftover, in, count);
114 			in = state.buffer;
115 			count += leftover;
116 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
117 		}
118 	}
119 
120 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
121 
122 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
123 		      : \
124 		      : "c"((unsigned long)state.count + count), \
125 			"a"((unsigned long)state.count), \
126 			"S"(in), "D"(result));
127 
128 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
129 
130 out:
131 	return err;
132 }
133 
134 static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
135 {
136 	u8 buf[4];
137 
138 	return padlock_sha1_finup(desc, buf, 0, out);
139 }
140 
141 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
142 				unsigned int count, u8 *out)
143 {
144 	/* We can't store directly to *out as it may be unaligned. */
145 	/* BTW Don't reduce the buffer size below 128 Bytes!
146 	 *     PadLock microcode needs it that big. */
147 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
148 		((aligned(STACK_ALIGN)));
149 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
150 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
151 	struct sha256_state state;
152 	unsigned int space;
153 	unsigned int leftover;
154 	int err;
155 
156 	dctx->fallback.flags = desc->flags & CRYPTO_TFM_REQ_MAY_SLEEP;
157 	err = crypto_shash_export(&dctx->fallback, &state);
158 	if (err)
159 		goto out;
160 
161 	if (state.count + count > ULONG_MAX)
162 		return crypto_shash_finup(&dctx->fallback, in, count, out);
163 
164 	leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
165 	space =  SHA256_BLOCK_SIZE - leftover;
166 	if (space) {
167 		if (count > space) {
168 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
169 			      crypto_shash_export(&dctx->fallback, &state);
170 			if (err)
171 				goto out;
172 			count -= space;
173 			in += space;
174 		} else {
175 			memcpy(state.buf + leftover, in, count);
176 			in = state.buf;
177 			count += leftover;
178 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
179 		}
180 	}
181 
182 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
183 
184 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
185 		      : \
186 		      : "c"((unsigned long)state.count + count), \
187 			"a"((unsigned long)state.count), \
188 			"S"(in), "D"(result));
189 
190 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
191 
192 out:
193 	return err;
194 }
195 
196 static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
197 {
198 	u8 buf[4];
199 
200 	return padlock_sha256_finup(desc, buf, 0, out);
201 }
202 
203 static int padlock_cra_init(struct crypto_tfm *tfm)
204 {
205 	struct crypto_shash *hash = __crypto_shash_cast(tfm);
206 	const char *fallback_driver_name = crypto_tfm_alg_name(tfm);
207 	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
208 	struct crypto_shash *fallback_tfm;
209 	int err = -ENOMEM;
210 
211 	/* Allocate a fallback and abort if it failed. */
212 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
213 					  CRYPTO_ALG_NEED_FALLBACK);
214 	if (IS_ERR(fallback_tfm)) {
215 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
216 		       fallback_driver_name);
217 		err = PTR_ERR(fallback_tfm);
218 		goto out;
219 	}
220 
221 	ctx->fallback = fallback_tfm;
222 	hash->descsize += crypto_shash_descsize(fallback_tfm);
223 	return 0;
224 
225 out:
226 	return err;
227 }
228 
229 static void padlock_cra_exit(struct crypto_tfm *tfm)
230 {
231 	struct padlock_sha_ctx *ctx = crypto_tfm_ctx(tfm);
232 
233 	crypto_free_shash(ctx->fallback);
234 }
235 
236 static struct shash_alg sha1_alg = {
237 	.digestsize	=	SHA1_DIGEST_SIZE,
238 	.init   	= 	padlock_sha_init,
239 	.update 	=	padlock_sha_update,
240 	.finup  	=	padlock_sha1_finup,
241 	.final  	=	padlock_sha1_final,
242 	.export		=	padlock_sha_export,
243 	.import		=	padlock_sha_import,
244 	.descsize	=	sizeof(struct padlock_sha_desc),
245 	.statesize	=	sizeof(struct sha1_state),
246 	.base		=	{
247 		.cra_name		=	"sha1",
248 		.cra_driver_name	=	"sha1-padlock",
249 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
250 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
251 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
252 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
253 		.cra_module		=	THIS_MODULE,
254 		.cra_init		=	padlock_cra_init,
255 		.cra_exit		=	padlock_cra_exit,
256 	}
257 };
258 
259 static struct shash_alg sha256_alg = {
260 	.digestsize	=	SHA256_DIGEST_SIZE,
261 	.init   	= 	padlock_sha_init,
262 	.update 	=	padlock_sha_update,
263 	.finup  	=	padlock_sha256_finup,
264 	.final  	=	padlock_sha256_final,
265 	.export		=	padlock_sha_export,
266 	.import		=	padlock_sha_import,
267 	.descsize	=	sizeof(struct padlock_sha_desc),
268 	.statesize	=	sizeof(struct sha256_state),
269 	.base		=	{
270 		.cra_name		=	"sha256",
271 		.cra_driver_name	=	"sha256-padlock",
272 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
273 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
274 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
275 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
276 		.cra_module		=	THIS_MODULE,
277 		.cra_init		=	padlock_cra_init,
278 		.cra_exit		=	padlock_cra_exit,
279 	}
280 };
281 
282 /* Add two shash_alg instance for hardware-implemented *
283 * multiple-parts hash supported by VIA Nano Processor.*/
284 static int padlock_sha1_init_nano(struct shash_desc *desc)
285 {
286 	struct sha1_state *sctx = shash_desc_ctx(desc);
287 
288 	*sctx = (struct sha1_state){
289 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
290 	};
291 
292 	return 0;
293 }
294 
295 static int padlock_sha1_update_nano(struct shash_desc *desc,
296 			const u8 *data,	unsigned int len)
297 {
298 	struct sha1_state *sctx = shash_desc_ctx(desc);
299 	unsigned int partial, done;
300 	const u8 *src;
301 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
302 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
303 		((aligned(STACK_ALIGN)));
304 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
305 
306 	partial = sctx->count & 0x3f;
307 	sctx->count += len;
308 	done = 0;
309 	src = data;
310 	memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
311 
312 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
313 
314 		/* Append the bytes in state's buffer to a block to handle */
315 		if (partial) {
316 			done = -partial;
317 			memcpy(sctx->buffer + partial, data,
318 				done + SHA1_BLOCK_SIZE);
319 			src = sctx->buffer;
320 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
321 			: "+S"(src), "+D"(dst) \
322 			: "a"((long)-1), "c"((unsigned long)1));
323 			done += SHA1_BLOCK_SIZE;
324 			src = data + done;
325 		}
326 
327 		/* Process the left bytes from the input data */
328 		if (len - done >= SHA1_BLOCK_SIZE) {
329 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
330 			: "+S"(src), "+D"(dst)
331 			: "a"((long)-1),
332 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
333 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
334 			src = data + done;
335 		}
336 		partial = 0;
337 	}
338 	memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
339 	memcpy(sctx->buffer + partial, src, len - done);
340 
341 	return 0;
342 }
343 
344 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
345 {
346 	struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
347 	unsigned int partial, padlen;
348 	__be64 bits;
349 	static const u8 padding[64] = { 0x80, };
350 
351 	bits = cpu_to_be64(state->count << 3);
352 
353 	/* Pad out to 56 mod 64 */
354 	partial = state->count & 0x3f;
355 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
356 	padlock_sha1_update_nano(desc, padding, padlen);
357 
358 	/* Append length field bytes */
359 	padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
360 
361 	/* Swap to output */
362 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
363 
364 	return 0;
365 }
366 
367 static int padlock_sha256_init_nano(struct shash_desc *desc)
368 {
369 	struct sha256_state *sctx = shash_desc_ctx(desc);
370 
371 	*sctx = (struct sha256_state){
372 		.state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
373 				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
374 	};
375 
376 	return 0;
377 }
378 
379 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
380 			  unsigned int len)
381 {
382 	struct sha256_state *sctx = shash_desc_ctx(desc);
383 	unsigned int partial, done;
384 	const u8 *src;
385 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
386 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
387 		((aligned(STACK_ALIGN)));
388 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
389 
390 	partial = sctx->count & 0x3f;
391 	sctx->count += len;
392 	done = 0;
393 	src = data;
394 	memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
395 
396 	if ((partial + len) >= SHA256_BLOCK_SIZE) {
397 
398 		/* Append the bytes in state's buffer to a block to handle */
399 		if (partial) {
400 			done = -partial;
401 			memcpy(sctx->buf + partial, data,
402 				done + SHA256_BLOCK_SIZE);
403 			src = sctx->buf;
404 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
405 			: "+S"(src), "+D"(dst)
406 			: "a"((long)-1), "c"((unsigned long)1));
407 			done += SHA256_BLOCK_SIZE;
408 			src = data + done;
409 		}
410 
411 		/* Process the left bytes from input data*/
412 		if (len - done >= SHA256_BLOCK_SIZE) {
413 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
414 			: "+S"(src), "+D"(dst)
415 			: "a"((long)-1),
416 			"c"((unsigned long)((len - done) / 64)));
417 			done += ((len - done) - (len - done) % 64);
418 			src = data + done;
419 		}
420 		partial = 0;
421 	}
422 	memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
423 	memcpy(sctx->buf + partial, src, len - done);
424 
425 	return 0;
426 }
427 
428 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
429 {
430 	struct sha256_state *state =
431 		(struct sha256_state *)shash_desc_ctx(desc);
432 	unsigned int partial, padlen;
433 	__be64 bits;
434 	static const u8 padding[64] = { 0x80, };
435 
436 	bits = cpu_to_be64(state->count << 3);
437 
438 	/* Pad out to 56 mod 64 */
439 	partial = state->count & 0x3f;
440 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
441 	padlock_sha256_update_nano(desc, padding, padlen);
442 
443 	/* Append length field bytes */
444 	padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
445 
446 	/* Swap to output */
447 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
448 
449 	return 0;
450 }
451 
452 static int padlock_sha_export_nano(struct shash_desc *desc,
453 				void *out)
454 {
455 	int statesize = crypto_shash_statesize(desc->tfm);
456 	void *sctx = shash_desc_ctx(desc);
457 
458 	memcpy(out, sctx, statesize);
459 	return 0;
460 }
461 
462 static int padlock_sha_import_nano(struct shash_desc *desc,
463 				const void *in)
464 {
465 	int statesize = crypto_shash_statesize(desc->tfm);
466 	void *sctx = shash_desc_ctx(desc);
467 
468 	memcpy(sctx, in, statesize);
469 	return 0;
470 }
471 
472 static struct shash_alg sha1_alg_nano = {
473 	.digestsize	=	SHA1_DIGEST_SIZE,
474 	.init		=	padlock_sha1_init_nano,
475 	.update		=	padlock_sha1_update_nano,
476 	.final		=	padlock_sha1_final_nano,
477 	.export		=	padlock_sha_export_nano,
478 	.import		=	padlock_sha_import_nano,
479 	.descsize	=	sizeof(struct sha1_state),
480 	.statesize	=	sizeof(struct sha1_state),
481 	.base		=	{
482 		.cra_name		=	"sha1",
483 		.cra_driver_name	=	"sha1-padlock-nano",
484 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
485 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
486 		.cra_module		=	THIS_MODULE,
487 	}
488 };
489 
490 static struct shash_alg sha256_alg_nano = {
491 	.digestsize	=	SHA256_DIGEST_SIZE,
492 	.init		=	padlock_sha256_init_nano,
493 	.update		=	padlock_sha256_update_nano,
494 	.final		=	padlock_sha256_final_nano,
495 	.export		=	padlock_sha_export_nano,
496 	.import		=	padlock_sha_import_nano,
497 	.descsize	=	sizeof(struct sha256_state),
498 	.statesize	=	sizeof(struct sha256_state),
499 	.base		=	{
500 		.cra_name		=	"sha256",
501 		.cra_driver_name	=	"sha256-padlock-nano",
502 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
503 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
504 		.cra_module		=	THIS_MODULE,
505 	}
506 };
507 
508 static const struct x86_cpu_id padlock_sha_ids[] = {
509 	X86_FEATURE_MATCH(X86_FEATURE_PHE),
510 	{}
511 };
512 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
513 
514 static int __init padlock_init(void)
515 {
516 	int rc = -ENODEV;
517 	struct cpuinfo_x86 *c = &cpu_data(0);
518 	struct shash_alg *sha1;
519 	struct shash_alg *sha256;
520 
521 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
522 		return -ENODEV;
523 
524 	/* Register the newly added algorithm module if on *
525 	* VIA Nano processor, or else just do as before */
526 	if (c->x86_model < 0x0f) {
527 		sha1 = &sha1_alg;
528 		sha256 = &sha256_alg;
529 	} else {
530 		sha1 = &sha1_alg_nano;
531 		sha256 = &sha256_alg_nano;
532 	}
533 
534 	rc = crypto_register_shash(sha1);
535 	if (rc)
536 		goto out;
537 
538 	rc = crypto_register_shash(sha256);
539 	if (rc)
540 		goto out_unreg1;
541 
542 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
543 
544 	return 0;
545 
546 out_unreg1:
547 	crypto_unregister_shash(sha1);
548 
549 out:
550 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
551 	return rc;
552 }
553 
554 static void __exit padlock_fini(void)
555 {
556 	struct cpuinfo_x86 *c = &cpu_data(0);
557 
558 	if (c->x86_model >= 0x0f) {
559 		crypto_unregister_shash(&sha1_alg_nano);
560 		crypto_unregister_shash(&sha256_alg_nano);
561 	} else {
562 		crypto_unregister_shash(&sha1_alg);
563 		crypto_unregister_shash(&sha256_alg);
564 	}
565 }
566 
567 module_init(padlock_init);
568 module_exit(padlock_fini);
569 
570 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
571 MODULE_LICENSE("GPL");
572 MODULE_AUTHOR("Michal Ludvig");
573 
574 MODULE_ALIAS_CRYPTO("sha1-all");
575 MODULE_ALIAS_CRYPTO("sha256-all");
576 MODULE_ALIAS_CRYPTO("sha1-padlock");
577 MODULE_ALIAS_CRYPTO("sha256-padlock");
578