xref: /linux/drivers/crypto/padlock-sha.c (revision 3d0fe49454652117522f60bfbefb978ba0e5300b)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * Cryptographic API.
4  *
5  * Support for VIA PadLock hardware crypto engine.
6  *
7  * Copyright (c) 2006  Michal Ludvig <michal@logix.cz>
8  */
9 
10 #include <crypto/internal/hash.h>
11 #include <crypto/padlock.h>
12 #include <crypto/sha1.h>
13 #include <crypto/sha2.h>
14 #include <linux/err.h>
15 #include <linux/module.h>
16 #include <linux/init.h>
17 #include <linux/errno.h>
18 #include <linux/interrupt.h>
19 #include <linux/kernel.h>
20 #include <linux/scatterlist.h>
21 #include <asm/cpu_device_id.h>
22 #include <asm/fpu/api.h>
23 
24 struct padlock_sha_desc {
25 	struct shash_desc fallback;
26 };
27 
28 struct padlock_sha_ctx {
29 	struct crypto_shash *fallback;
30 };
31 
32 static int padlock_sha_init(struct shash_desc *desc)
33 {
34 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
35 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
36 
37 	dctx->fallback.tfm = ctx->fallback;
38 	return crypto_shash_init(&dctx->fallback);
39 }
40 
41 static int padlock_sha_update(struct shash_desc *desc,
42 			      const u8 *data, unsigned int length)
43 {
44 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
45 
46 	return crypto_shash_update(&dctx->fallback, data, length);
47 }
48 
49 static int padlock_sha_export(struct shash_desc *desc, void *out)
50 {
51 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
52 
53 	return crypto_shash_export(&dctx->fallback, out);
54 }
55 
56 static int padlock_sha_import(struct shash_desc *desc, const void *in)
57 {
58 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
59 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(desc->tfm);
60 
61 	dctx->fallback.tfm = ctx->fallback;
62 	return crypto_shash_import(&dctx->fallback, in);
63 }
64 
65 static inline void padlock_output_block(uint32_t *src,
66 		 	uint32_t *dst, size_t count)
67 {
68 	while (count--)
69 		*dst++ = swab32(*src++);
70 }
71 
72 static int padlock_sha1_finup(struct shash_desc *desc, const u8 *in,
73 			      unsigned int count, u8 *out)
74 {
75 	/* We can't store directly to *out as it may be unaligned. */
76 	/* BTW Don't reduce the buffer size below 128 Bytes!
77 	 *     PadLock microcode needs it that big. */
78 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
79 		((aligned(STACK_ALIGN)));
80 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
81 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
82 	struct sha1_state state;
83 	unsigned int space;
84 	unsigned int leftover;
85 	int err;
86 
87 	err = crypto_shash_export(&dctx->fallback, &state);
88 	if (err)
89 		goto out;
90 
91 	if (state.count + count > ULONG_MAX)
92 		return crypto_shash_finup(&dctx->fallback, in, count, out);
93 
94 	leftover = ((state.count - 1) & (SHA1_BLOCK_SIZE - 1)) + 1;
95 	space =  SHA1_BLOCK_SIZE - leftover;
96 	if (space) {
97 		if (count > space) {
98 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
99 			      crypto_shash_export(&dctx->fallback, &state);
100 			if (err)
101 				goto out;
102 			count -= space;
103 			in += space;
104 		} else {
105 			memcpy(state.buffer + leftover, in, count);
106 			in = state.buffer;
107 			count += leftover;
108 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
109 		}
110 	}
111 
112 	memcpy(result, &state.state, SHA1_DIGEST_SIZE);
113 
114 	asm volatile (".byte 0xf3,0x0f,0xa6,0xc8" /* rep xsha1 */
115 		      : \
116 		      : "c"((unsigned long)state.count + count), \
117 			"a"((unsigned long)state.count), \
118 			"S"(in), "D"(result));
119 
120 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 5);
121 
122 out:
123 	return err;
124 }
125 
126 static int padlock_sha1_final(struct shash_desc *desc, u8 *out)
127 {
128 	u8 buf[4];
129 
130 	return padlock_sha1_finup(desc, buf, 0, out);
131 }
132 
133 static int padlock_sha256_finup(struct shash_desc *desc, const u8 *in,
134 				unsigned int count, u8 *out)
135 {
136 	/* We can't store directly to *out as it may be unaligned. */
137 	/* BTW Don't reduce the buffer size below 128 Bytes!
138 	 *     PadLock microcode needs it that big. */
139 	char buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
140 		((aligned(STACK_ALIGN)));
141 	char *result = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
142 	struct padlock_sha_desc *dctx = shash_desc_ctx(desc);
143 	struct sha256_state state;
144 	unsigned int space;
145 	unsigned int leftover;
146 	int err;
147 
148 	err = crypto_shash_export(&dctx->fallback, &state);
149 	if (err)
150 		goto out;
151 
152 	if (state.count + count > ULONG_MAX)
153 		return crypto_shash_finup(&dctx->fallback, in, count, out);
154 
155 	leftover = ((state.count - 1) & (SHA256_BLOCK_SIZE - 1)) + 1;
156 	space =  SHA256_BLOCK_SIZE - leftover;
157 	if (space) {
158 		if (count > space) {
159 			err = crypto_shash_update(&dctx->fallback, in, space) ?:
160 			      crypto_shash_export(&dctx->fallback, &state);
161 			if (err)
162 				goto out;
163 			count -= space;
164 			in += space;
165 		} else {
166 			memcpy(state.buf + leftover, in, count);
167 			in = state.buf;
168 			count += leftover;
169 			state.count &= ~(SHA1_BLOCK_SIZE - 1);
170 		}
171 	}
172 
173 	memcpy(result, &state.state, SHA256_DIGEST_SIZE);
174 
175 	asm volatile (".byte 0xf3,0x0f,0xa6,0xd0" /* rep xsha256 */
176 		      : \
177 		      : "c"((unsigned long)state.count + count), \
178 			"a"((unsigned long)state.count), \
179 			"S"(in), "D"(result));
180 
181 	padlock_output_block((uint32_t *)result, (uint32_t *)out, 8);
182 
183 out:
184 	return err;
185 }
186 
187 static int padlock_sha256_final(struct shash_desc *desc, u8 *out)
188 {
189 	u8 buf[4];
190 
191 	return padlock_sha256_finup(desc, buf, 0, out);
192 }
193 
194 static int padlock_init_tfm(struct crypto_shash *hash)
195 {
196 	const char *fallback_driver_name = crypto_shash_alg_name(hash);
197 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
198 	struct crypto_shash *fallback_tfm;
199 
200 	/* Allocate a fallback and abort if it failed. */
201 	fallback_tfm = crypto_alloc_shash(fallback_driver_name, 0,
202 					  CRYPTO_ALG_NEED_FALLBACK);
203 	if (IS_ERR(fallback_tfm)) {
204 		printk(KERN_WARNING PFX "Fallback driver '%s' could not be loaded!\n",
205 		       fallback_driver_name);
206 		return PTR_ERR(fallback_tfm);
207 	}
208 
209 	ctx->fallback = fallback_tfm;
210 	hash->descsize += crypto_shash_descsize(fallback_tfm);
211 	return 0;
212 }
213 
214 static void padlock_exit_tfm(struct crypto_shash *hash)
215 {
216 	struct padlock_sha_ctx *ctx = crypto_shash_ctx(hash);
217 
218 	crypto_free_shash(ctx->fallback);
219 }
220 
221 static struct shash_alg sha1_alg = {
222 	.digestsize	=	SHA1_DIGEST_SIZE,
223 	.init   	= 	padlock_sha_init,
224 	.update 	=	padlock_sha_update,
225 	.finup  	=	padlock_sha1_finup,
226 	.final  	=	padlock_sha1_final,
227 	.export		=	padlock_sha_export,
228 	.import		=	padlock_sha_import,
229 	.init_tfm	=	padlock_init_tfm,
230 	.exit_tfm	=	padlock_exit_tfm,
231 	.descsize	=	sizeof(struct padlock_sha_desc),
232 	.statesize	=	sizeof(struct sha1_state),
233 	.base		=	{
234 		.cra_name		=	"sha1",
235 		.cra_driver_name	=	"sha1-padlock",
236 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
237 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
238 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
239 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
240 		.cra_module		=	THIS_MODULE,
241 	}
242 };
243 
244 static struct shash_alg sha256_alg = {
245 	.digestsize	=	SHA256_DIGEST_SIZE,
246 	.init   	= 	padlock_sha_init,
247 	.update 	=	padlock_sha_update,
248 	.finup  	=	padlock_sha256_finup,
249 	.final  	=	padlock_sha256_final,
250 	.export		=	padlock_sha_export,
251 	.import		=	padlock_sha_import,
252 	.init_tfm	=	padlock_init_tfm,
253 	.exit_tfm	=	padlock_exit_tfm,
254 	.descsize	=	sizeof(struct padlock_sha_desc),
255 	.statesize	=	sizeof(struct sha256_state),
256 	.base		=	{
257 		.cra_name		=	"sha256",
258 		.cra_driver_name	=	"sha256-padlock",
259 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
260 		.cra_flags		=	CRYPTO_ALG_NEED_FALLBACK,
261 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
262 		.cra_ctxsize		=	sizeof(struct padlock_sha_ctx),
263 		.cra_module		=	THIS_MODULE,
264 	}
265 };
266 
267 /* Add two shash_alg instance for hardware-implemented *
268 * multiple-parts hash supported by VIA Nano Processor.*/
269 static int padlock_sha1_init_nano(struct shash_desc *desc)
270 {
271 	struct sha1_state *sctx = shash_desc_ctx(desc);
272 
273 	*sctx = (struct sha1_state){
274 		.state = { SHA1_H0, SHA1_H1, SHA1_H2, SHA1_H3, SHA1_H4 },
275 	};
276 
277 	return 0;
278 }
279 
280 static int padlock_sha1_update_nano(struct shash_desc *desc,
281 			const u8 *data,	unsigned int len)
282 {
283 	struct sha1_state *sctx = shash_desc_ctx(desc);
284 	unsigned int partial, done;
285 	const u8 *src;
286 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
287 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
288 		((aligned(STACK_ALIGN)));
289 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
290 
291 	partial = sctx->count & 0x3f;
292 	sctx->count += len;
293 	done = 0;
294 	src = data;
295 	memcpy(dst, (u8 *)(sctx->state), SHA1_DIGEST_SIZE);
296 
297 	if ((partial + len) >= SHA1_BLOCK_SIZE) {
298 
299 		/* Append the bytes in state's buffer to a block to handle */
300 		if (partial) {
301 			done = -partial;
302 			memcpy(sctx->buffer + partial, data,
303 				done + SHA1_BLOCK_SIZE);
304 			src = sctx->buffer;
305 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
306 			: "+S"(src), "+D"(dst) \
307 			: "a"((long)-1), "c"((unsigned long)1));
308 			done += SHA1_BLOCK_SIZE;
309 			src = data + done;
310 		}
311 
312 		/* Process the left bytes from the input data */
313 		if (len - done >= SHA1_BLOCK_SIZE) {
314 			asm volatile (".byte 0xf3,0x0f,0xa6,0xc8"
315 			: "+S"(src), "+D"(dst)
316 			: "a"((long)-1),
317 			"c"((unsigned long)((len - done) / SHA1_BLOCK_SIZE)));
318 			done += ((len - done) - (len - done) % SHA1_BLOCK_SIZE);
319 			src = data + done;
320 		}
321 		partial = 0;
322 	}
323 	memcpy((u8 *)(sctx->state), dst, SHA1_DIGEST_SIZE);
324 	memcpy(sctx->buffer + partial, src, len - done);
325 
326 	return 0;
327 }
328 
329 static int padlock_sha1_final_nano(struct shash_desc *desc, u8 *out)
330 {
331 	struct sha1_state *state = (struct sha1_state *)shash_desc_ctx(desc);
332 	unsigned int partial, padlen;
333 	__be64 bits;
334 	static const u8 padding[64] = { 0x80, };
335 
336 	bits = cpu_to_be64(state->count << 3);
337 
338 	/* Pad out to 56 mod 64 */
339 	partial = state->count & 0x3f;
340 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
341 	padlock_sha1_update_nano(desc, padding, padlen);
342 
343 	/* Append length field bytes */
344 	padlock_sha1_update_nano(desc, (const u8 *)&bits, sizeof(bits));
345 
346 	/* Swap to output */
347 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 5);
348 
349 	return 0;
350 }
351 
352 static int padlock_sha256_init_nano(struct shash_desc *desc)
353 {
354 	struct sha256_state *sctx = shash_desc_ctx(desc);
355 
356 	*sctx = (struct sha256_state){
357 		.state = { SHA256_H0, SHA256_H1, SHA256_H2, SHA256_H3, \
358 				SHA256_H4, SHA256_H5, SHA256_H6, SHA256_H7},
359 	};
360 
361 	return 0;
362 }
363 
364 static int padlock_sha256_update_nano(struct shash_desc *desc, const u8 *data,
365 			  unsigned int len)
366 {
367 	struct sha256_state *sctx = shash_desc_ctx(desc);
368 	unsigned int partial, done;
369 	const u8 *src;
370 	/*The PHE require the out buffer must 128 bytes and 16-bytes aligned*/
371 	u8 buf[128 + PADLOCK_ALIGNMENT - STACK_ALIGN] __attribute__
372 		((aligned(STACK_ALIGN)));
373 	u8 *dst = PTR_ALIGN(&buf[0], PADLOCK_ALIGNMENT);
374 
375 	partial = sctx->count & 0x3f;
376 	sctx->count += len;
377 	done = 0;
378 	src = data;
379 	memcpy(dst, (u8 *)(sctx->state), SHA256_DIGEST_SIZE);
380 
381 	if ((partial + len) >= SHA256_BLOCK_SIZE) {
382 
383 		/* Append the bytes in state's buffer to a block to handle */
384 		if (partial) {
385 			done = -partial;
386 			memcpy(sctx->buf + partial, data,
387 				done + SHA256_BLOCK_SIZE);
388 			src = sctx->buf;
389 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
390 			: "+S"(src), "+D"(dst)
391 			: "a"((long)-1), "c"((unsigned long)1));
392 			done += SHA256_BLOCK_SIZE;
393 			src = data + done;
394 		}
395 
396 		/* Process the left bytes from input data*/
397 		if (len - done >= SHA256_BLOCK_SIZE) {
398 			asm volatile (".byte 0xf3,0x0f,0xa6,0xd0"
399 			: "+S"(src), "+D"(dst)
400 			: "a"((long)-1),
401 			"c"((unsigned long)((len - done) / 64)));
402 			done += ((len - done) - (len - done) % 64);
403 			src = data + done;
404 		}
405 		partial = 0;
406 	}
407 	memcpy((u8 *)(sctx->state), dst, SHA256_DIGEST_SIZE);
408 	memcpy(sctx->buf + partial, src, len - done);
409 
410 	return 0;
411 }
412 
413 static int padlock_sha256_final_nano(struct shash_desc *desc, u8 *out)
414 {
415 	struct sha256_state *state =
416 		(struct sha256_state *)shash_desc_ctx(desc);
417 	unsigned int partial, padlen;
418 	__be64 bits;
419 	static const u8 padding[64] = { 0x80, };
420 
421 	bits = cpu_to_be64(state->count << 3);
422 
423 	/* Pad out to 56 mod 64 */
424 	partial = state->count & 0x3f;
425 	padlen = (partial < 56) ? (56 - partial) : ((64+56) - partial);
426 	padlock_sha256_update_nano(desc, padding, padlen);
427 
428 	/* Append length field bytes */
429 	padlock_sha256_update_nano(desc, (const u8 *)&bits, sizeof(bits));
430 
431 	/* Swap to output */
432 	padlock_output_block((uint32_t *)(state->state), (uint32_t *)out, 8);
433 
434 	return 0;
435 }
436 
437 static int padlock_sha_export_nano(struct shash_desc *desc,
438 				void *out)
439 {
440 	int statesize = crypto_shash_statesize(desc->tfm);
441 	void *sctx = shash_desc_ctx(desc);
442 
443 	memcpy(out, sctx, statesize);
444 	return 0;
445 }
446 
447 static int padlock_sha_import_nano(struct shash_desc *desc,
448 				const void *in)
449 {
450 	int statesize = crypto_shash_statesize(desc->tfm);
451 	void *sctx = shash_desc_ctx(desc);
452 
453 	memcpy(sctx, in, statesize);
454 	return 0;
455 }
456 
457 static struct shash_alg sha1_alg_nano = {
458 	.digestsize	=	SHA1_DIGEST_SIZE,
459 	.init		=	padlock_sha1_init_nano,
460 	.update		=	padlock_sha1_update_nano,
461 	.final		=	padlock_sha1_final_nano,
462 	.export		=	padlock_sha_export_nano,
463 	.import		=	padlock_sha_import_nano,
464 	.descsize	=	sizeof(struct sha1_state),
465 	.statesize	=	sizeof(struct sha1_state),
466 	.base		=	{
467 		.cra_name		=	"sha1",
468 		.cra_driver_name	=	"sha1-padlock-nano",
469 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
470 		.cra_blocksize		=	SHA1_BLOCK_SIZE,
471 		.cra_module		=	THIS_MODULE,
472 	}
473 };
474 
475 static struct shash_alg sha256_alg_nano = {
476 	.digestsize	=	SHA256_DIGEST_SIZE,
477 	.init		=	padlock_sha256_init_nano,
478 	.update		=	padlock_sha256_update_nano,
479 	.final		=	padlock_sha256_final_nano,
480 	.export		=	padlock_sha_export_nano,
481 	.import		=	padlock_sha_import_nano,
482 	.descsize	=	sizeof(struct sha256_state),
483 	.statesize	=	sizeof(struct sha256_state),
484 	.base		=	{
485 		.cra_name		=	"sha256",
486 		.cra_driver_name	=	"sha256-padlock-nano",
487 		.cra_priority		=	PADLOCK_CRA_PRIORITY,
488 		.cra_blocksize		=	SHA256_BLOCK_SIZE,
489 		.cra_module		=	THIS_MODULE,
490 	}
491 };
492 
493 static const struct x86_cpu_id padlock_sha_ids[] = {
494 	X86_MATCH_FEATURE(X86_FEATURE_PHE, NULL),
495 	{}
496 };
497 MODULE_DEVICE_TABLE(x86cpu, padlock_sha_ids);
498 
499 static int __init padlock_init(void)
500 {
501 	int rc = -ENODEV;
502 	struct cpuinfo_x86 *c = &cpu_data(0);
503 	struct shash_alg *sha1;
504 	struct shash_alg *sha256;
505 
506 	if (!x86_match_cpu(padlock_sha_ids) || !boot_cpu_has(X86_FEATURE_PHE_EN))
507 		return -ENODEV;
508 
509 	/* Register the newly added algorithm module if on *
510 	* VIA Nano processor, or else just do as before */
511 	if (c->x86_model < 0x0f) {
512 		sha1 = &sha1_alg;
513 		sha256 = &sha256_alg;
514 	} else {
515 		sha1 = &sha1_alg_nano;
516 		sha256 = &sha256_alg_nano;
517 	}
518 
519 	rc = crypto_register_shash(sha1);
520 	if (rc)
521 		goto out;
522 
523 	rc = crypto_register_shash(sha256);
524 	if (rc)
525 		goto out_unreg1;
526 
527 	printk(KERN_NOTICE PFX "Using VIA PadLock ACE for SHA1/SHA256 algorithms.\n");
528 
529 	return 0;
530 
531 out_unreg1:
532 	crypto_unregister_shash(sha1);
533 
534 out:
535 	printk(KERN_ERR PFX "VIA PadLock SHA1/SHA256 initialization failed.\n");
536 	return rc;
537 }
538 
539 static void __exit padlock_fini(void)
540 {
541 	struct cpuinfo_x86 *c = &cpu_data(0);
542 
543 	if (c->x86_model >= 0x0f) {
544 		crypto_unregister_shash(&sha1_alg_nano);
545 		crypto_unregister_shash(&sha256_alg_nano);
546 	} else {
547 		crypto_unregister_shash(&sha1_alg);
548 		crypto_unregister_shash(&sha256_alg);
549 	}
550 }
551 
552 module_init(padlock_init);
553 module_exit(padlock_fini);
554 
555 MODULE_DESCRIPTION("VIA PadLock SHA1/SHA256 algorithms support.");
556 MODULE_LICENSE("GPL");
557 MODULE_AUTHOR("Michal Ludvig");
558 
559 MODULE_ALIAS_CRYPTO("sha1-all");
560 MODULE_ALIAS_CRYPTO("sha256-all");
561 MODULE_ALIAS_CRYPTO("sha1-padlock");
562 MODULE_ALIAS_CRYPTO("sha256-padlock");
563