xref: /linux/drivers/crypto/atmel-sha.c (revision 95e9fd10f06cb5642028b6b851e32b8c8afb4571)
1 /*
2  * Cryptographic API.
3  *
4  * Support for ATMEL SHA1/SHA256 HW acceleration.
5  *
6  * Copyright (c) 2012 Eukréa Electromatique - ATMEL
7  * Author: Nicolas Royer <nicolas@eukrea.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as published
11  * by the Free Software Foundation.
12  *
13  * Some ideas are from omap-sham.c drivers.
14  */
15 
16 
17 #include <linux/kernel.h>
18 #include <linux/module.h>
19 #include <linux/slab.h>
20 #include <linux/err.h>
21 #include <linux/clk.h>
22 #include <linux/io.h>
23 #include <linux/hw_random.h>
24 #include <linux/platform_device.h>
25 
26 #include <linux/device.h>
27 #include <linux/module.h>
28 #include <linux/init.h>
29 #include <linux/errno.h>
30 #include <linux/interrupt.h>
31 #include <linux/kernel.h>
32 #include <linux/clk.h>
33 #include <linux/irq.h>
34 #include <linux/io.h>
35 #include <linux/platform_device.h>
36 #include <linux/scatterlist.h>
37 #include <linux/dma-mapping.h>
38 #include <linux/delay.h>
39 #include <linux/crypto.h>
40 #include <linux/cryptohash.h>
41 #include <crypto/scatterwalk.h>
42 #include <crypto/algapi.h>
43 #include <crypto/sha.h>
44 #include <crypto/hash.h>
45 #include <crypto/internal/hash.h>
46 #include "atmel-sha-regs.h"
47 
48 /* SHA flags */
49 #define SHA_FLAGS_BUSY			BIT(0)
50 #define	SHA_FLAGS_FINAL			BIT(1)
51 #define SHA_FLAGS_DMA_ACTIVE	BIT(2)
52 #define SHA_FLAGS_OUTPUT_READY	BIT(3)
53 #define SHA_FLAGS_INIT			BIT(4)
54 #define SHA_FLAGS_CPU			BIT(5)
55 #define SHA_FLAGS_DMA_READY		BIT(6)
56 
57 #define SHA_FLAGS_FINUP		BIT(16)
58 #define SHA_FLAGS_SG		BIT(17)
59 #define SHA_FLAGS_SHA1		BIT(18)
60 #define SHA_FLAGS_SHA256	BIT(19)
61 #define SHA_FLAGS_ERROR		BIT(20)
62 #define SHA_FLAGS_PAD		BIT(21)
63 
64 #define SHA_FLAGS_DUALBUFF	BIT(24)
65 
66 #define SHA_OP_UPDATE	1
67 #define SHA_OP_FINAL	2
68 
69 #define SHA_BUFFER_LEN		PAGE_SIZE
70 
71 #define ATMEL_SHA_DMA_THRESHOLD		56
72 
73 
74 struct atmel_sha_dev;
75 
76 struct atmel_sha_reqctx {
77 	struct atmel_sha_dev	*dd;
78 	unsigned long	flags;
79 	unsigned long	op;
80 
81 	u8	digest[SHA256_DIGEST_SIZE] __aligned(sizeof(u32));
82 	size_t	digcnt;
83 	size_t	bufcnt;
84 	size_t	buflen;
85 	dma_addr_t	dma_addr;
86 
87 	/* walk state */
88 	struct scatterlist	*sg;
89 	unsigned int	offset;	/* offset in current sg */
90 	unsigned int	total;	/* total request */
91 
92 	u8	buffer[0] __aligned(sizeof(u32));
93 };
94 
95 struct atmel_sha_ctx {
96 	struct atmel_sha_dev	*dd;
97 
98 	unsigned long		flags;
99 
100 	/* fallback stuff */
101 	struct crypto_shash	*fallback;
102 
103 };
104 
105 #define ATMEL_SHA_QUEUE_LENGTH	1
106 
107 struct atmel_sha_dev {
108 	struct list_head	list;
109 	unsigned long		phys_base;
110 	struct device		*dev;
111 	struct clk			*iclk;
112 	int					irq;
113 	void __iomem		*io_base;
114 
115 	spinlock_t		lock;
116 	int			err;
117 	struct tasklet_struct	done_task;
118 
119 	unsigned long		flags;
120 	struct crypto_queue	queue;
121 	struct ahash_request	*req;
122 };
123 
124 struct atmel_sha_drv {
125 	struct list_head	dev_list;
126 	spinlock_t		lock;
127 };
128 
129 static struct atmel_sha_drv atmel_sha = {
130 	.dev_list = LIST_HEAD_INIT(atmel_sha.dev_list),
131 	.lock = __SPIN_LOCK_UNLOCKED(atmel_sha.lock),
132 };
133 
134 static inline u32 atmel_sha_read(struct atmel_sha_dev *dd, u32 offset)
135 {
136 	return readl_relaxed(dd->io_base + offset);
137 }
138 
139 static inline void atmel_sha_write(struct atmel_sha_dev *dd,
140 					u32 offset, u32 value)
141 {
142 	writel_relaxed(value, dd->io_base + offset);
143 }
144 
145 static void atmel_sha_dualbuff_test(struct atmel_sha_dev *dd)
146 {
147 	atmel_sha_write(dd, SHA_MR, SHA_MR_DUALBUFF);
148 
149 	if (atmel_sha_read(dd, SHA_MR) & SHA_MR_DUALBUFF)
150 		dd->flags |= SHA_FLAGS_DUALBUFF;
151 }
152 
153 static size_t atmel_sha_append_sg(struct atmel_sha_reqctx *ctx)
154 {
155 	size_t count;
156 
157 	while ((ctx->bufcnt < ctx->buflen) && ctx->total) {
158 		count = min(ctx->sg->length - ctx->offset, ctx->total);
159 		count = min(count, ctx->buflen - ctx->bufcnt);
160 
161 		if (count <= 0)
162 			break;
163 
164 		scatterwalk_map_and_copy(ctx->buffer + ctx->bufcnt, ctx->sg,
165 			ctx->offset, count, 0);
166 
167 		ctx->bufcnt += count;
168 		ctx->offset += count;
169 		ctx->total -= count;
170 
171 		if (ctx->offset == ctx->sg->length) {
172 			ctx->sg = sg_next(ctx->sg);
173 			if (ctx->sg)
174 				ctx->offset = 0;
175 			else
176 				ctx->total = 0;
177 		}
178 	}
179 
180 	return 0;
181 }
182 
183 /*
184  * The purpose of this padding is to ensure that the padded message
185  * is a multiple of 512 bits. The bit "1" is appended at the end of
186  * the message followed by "padlen-1" zero bits. Then a 64 bits block
187  * equals to the message length in bits is appended.
188  *
189  * padlen is calculated as followed:
190  *  - if message length < 56 bytes then padlen = 56 - message length
191  *  - else padlen = 64 + 56 - message length
192  */
193 static void atmel_sha_fill_padding(struct atmel_sha_reqctx *ctx, int length)
194 {
195 	unsigned int index, padlen;
196 	u64 bits;
197 	u64 size;
198 
199 	bits = (ctx->bufcnt + ctx->digcnt + length) << 3;
200 	size = cpu_to_be64(bits);
201 
202 	index = ctx->bufcnt & 0x3f;
203 	padlen = (index < 56) ? (56 - index) : ((64+56) - index);
204 	*(ctx->buffer + ctx->bufcnt) = 0x80;
205 	memset(ctx->buffer + ctx->bufcnt + 1, 0, padlen-1);
206 	memcpy(ctx->buffer + ctx->bufcnt + padlen, &size, 8);
207 	ctx->bufcnt += padlen + 8;
208 	ctx->flags |= SHA_FLAGS_PAD;
209 }
210 
211 static int atmel_sha_init(struct ahash_request *req)
212 {
213 	struct crypto_ahash *tfm = crypto_ahash_reqtfm(req);
214 	struct atmel_sha_ctx *tctx = crypto_ahash_ctx(tfm);
215 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
216 	struct atmel_sha_dev *dd = NULL;
217 	struct atmel_sha_dev *tmp;
218 
219 	spin_lock_bh(&atmel_sha.lock);
220 	if (!tctx->dd) {
221 		list_for_each_entry(tmp, &atmel_sha.dev_list, list) {
222 			dd = tmp;
223 			break;
224 		}
225 		tctx->dd = dd;
226 	} else {
227 		dd = tctx->dd;
228 	}
229 
230 	spin_unlock_bh(&atmel_sha.lock);
231 
232 	ctx->dd = dd;
233 
234 	ctx->flags = 0;
235 
236 	dev_dbg(dd->dev, "init: digest size: %d\n",
237 		crypto_ahash_digestsize(tfm));
238 
239 	if (crypto_ahash_digestsize(tfm) == SHA1_DIGEST_SIZE)
240 		ctx->flags |= SHA_FLAGS_SHA1;
241 	else if (crypto_ahash_digestsize(tfm) == SHA256_DIGEST_SIZE)
242 		ctx->flags |= SHA_FLAGS_SHA256;
243 
244 	ctx->bufcnt = 0;
245 	ctx->digcnt = 0;
246 	ctx->buflen = SHA_BUFFER_LEN;
247 
248 	return 0;
249 }
250 
251 static void atmel_sha_write_ctrl(struct atmel_sha_dev *dd, int dma)
252 {
253 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
254 	u32 valcr = 0, valmr = SHA_MR_MODE_AUTO;
255 
256 	if (likely(dma)) {
257 		atmel_sha_write(dd, SHA_IER, SHA_INT_TXBUFE);
258 		valmr = SHA_MR_MODE_PDC;
259 		if (dd->flags & SHA_FLAGS_DUALBUFF)
260 			valmr = SHA_MR_DUALBUFF;
261 	} else {
262 		atmel_sha_write(dd, SHA_IER, SHA_INT_DATARDY);
263 	}
264 
265 	if (ctx->flags & SHA_FLAGS_SHA256)
266 		valmr |= SHA_MR_ALGO_SHA256;
267 
268 	/* Setting CR_FIRST only for the first iteration */
269 	if (!ctx->digcnt)
270 		valcr = SHA_CR_FIRST;
271 
272 	atmel_sha_write(dd, SHA_CR, valcr);
273 	atmel_sha_write(dd, SHA_MR, valmr);
274 }
275 
276 static int atmel_sha_xmit_cpu(struct atmel_sha_dev *dd, const u8 *buf,
277 			      size_t length, int final)
278 {
279 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
280 	int count, len32;
281 	const u32 *buffer = (const u32 *)buf;
282 
283 	dev_dbg(dd->dev, "xmit_cpu: digcnt: %d, length: %d, final: %d\n",
284 						ctx->digcnt, length, final);
285 
286 	atmel_sha_write_ctrl(dd, 0);
287 
288 	/* should be non-zero before next lines to disable clocks later */
289 	ctx->digcnt += length;
290 
291 	if (final)
292 		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
293 
294 	len32 = DIV_ROUND_UP(length, sizeof(u32));
295 
296 	dd->flags |= SHA_FLAGS_CPU;
297 
298 	for (count = 0; count < len32; count++)
299 		atmel_sha_write(dd, SHA_REG_DIN(count), buffer[count]);
300 
301 	return -EINPROGRESS;
302 }
303 
304 static int atmel_sha_xmit_pdc(struct atmel_sha_dev *dd, dma_addr_t dma_addr1,
305 		size_t length1, dma_addr_t dma_addr2, size_t length2, int final)
306 {
307 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
308 	int len32;
309 
310 	dev_dbg(dd->dev, "xmit_pdc: digcnt: %d, length: %d, final: %d\n",
311 						ctx->digcnt, length1, final);
312 
313 	len32 = DIV_ROUND_UP(length1, sizeof(u32));
314 	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTDIS);
315 	atmel_sha_write(dd, SHA_TPR, dma_addr1);
316 	atmel_sha_write(dd, SHA_TCR, len32);
317 
318 	len32 = DIV_ROUND_UP(length2, sizeof(u32));
319 	atmel_sha_write(dd, SHA_TNPR, dma_addr2);
320 	atmel_sha_write(dd, SHA_TNCR, len32);
321 
322 	atmel_sha_write_ctrl(dd, 1);
323 
324 	/* should be non-zero before next lines to disable clocks later */
325 	ctx->digcnt += length1;
326 
327 	if (final)
328 		dd->flags |= SHA_FLAGS_FINAL; /* catch last interrupt */
329 
330 	dd->flags |=  SHA_FLAGS_DMA_ACTIVE;
331 
332 	/* Start DMA transfer */
333 	atmel_sha_write(dd, SHA_PTCR, SHA_PTCR_TXTEN);
334 
335 	return -EINPROGRESS;
336 }
337 
338 static int atmel_sha_update_cpu(struct atmel_sha_dev *dd)
339 {
340 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
341 	int bufcnt;
342 
343 	atmel_sha_append_sg(ctx);
344 	atmel_sha_fill_padding(ctx, 0);
345 
346 	bufcnt = ctx->bufcnt;
347 	ctx->bufcnt = 0;
348 
349 	return atmel_sha_xmit_cpu(dd, ctx->buffer, bufcnt, 1);
350 }
351 
352 static int atmel_sha_xmit_dma_map(struct atmel_sha_dev *dd,
353 					struct atmel_sha_reqctx *ctx,
354 					size_t length, int final)
355 {
356 	ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
357 				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
358 	if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
359 		dev_err(dd->dev, "dma %u bytes error\n", ctx->buflen +
360 				SHA1_BLOCK_SIZE);
361 		return -EINVAL;
362 	}
363 
364 	ctx->flags &= ~SHA_FLAGS_SG;
365 
366 	/* next call does not fail... so no unmap in the case of error */
367 	return atmel_sha_xmit_pdc(dd, ctx->dma_addr, length, 0, 0, final);
368 }
369 
370 static int atmel_sha_update_dma_slow(struct atmel_sha_dev *dd)
371 {
372 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
373 	unsigned int final;
374 	size_t count;
375 
376 	atmel_sha_append_sg(ctx);
377 
378 	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
379 
380 	dev_dbg(dd->dev, "slow: bufcnt: %u, digcnt: %d, final: %d\n",
381 					 ctx->bufcnt, ctx->digcnt, final);
382 
383 	if (final)
384 		atmel_sha_fill_padding(ctx, 0);
385 
386 	if (final || (ctx->bufcnt == ctx->buflen && ctx->total)) {
387 		count = ctx->bufcnt;
388 		ctx->bufcnt = 0;
389 		return atmel_sha_xmit_dma_map(dd, ctx, count, final);
390 	}
391 
392 	return 0;
393 }
394 
395 static int atmel_sha_update_dma_start(struct atmel_sha_dev *dd)
396 {
397 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
398 	unsigned int length, final, tail;
399 	struct scatterlist *sg;
400 	unsigned int count;
401 
402 	if (!ctx->total)
403 		return 0;
404 
405 	if (ctx->bufcnt || ctx->offset)
406 		return atmel_sha_update_dma_slow(dd);
407 
408 	dev_dbg(dd->dev, "fast: digcnt: %d, bufcnt: %u, total: %u\n",
409 			ctx->digcnt, ctx->bufcnt, ctx->total);
410 
411 	sg = ctx->sg;
412 
413 	if (!IS_ALIGNED(sg->offset, sizeof(u32)))
414 		return atmel_sha_update_dma_slow(dd);
415 
416 	if (!sg_is_last(sg) && !IS_ALIGNED(sg->length, SHA1_BLOCK_SIZE))
417 		/* size is not SHA1_BLOCK_SIZE aligned */
418 		return atmel_sha_update_dma_slow(dd);
419 
420 	length = min(ctx->total, sg->length);
421 
422 	if (sg_is_last(sg)) {
423 		if (!(ctx->flags & SHA_FLAGS_FINUP)) {
424 			/* not last sg must be SHA1_BLOCK_SIZE aligned */
425 			tail = length & (SHA1_BLOCK_SIZE - 1);
426 			length -= tail;
427 			if (length == 0) {
428 				/* offset where to start slow */
429 				ctx->offset = length;
430 				return atmel_sha_update_dma_slow(dd);
431 			}
432 		}
433 	}
434 
435 	ctx->total -= length;
436 	ctx->offset = length; /* offset where to start slow */
437 
438 	final = (ctx->flags & SHA_FLAGS_FINUP) && !ctx->total;
439 
440 	/* Add padding */
441 	if (final) {
442 		tail = length & (SHA1_BLOCK_SIZE - 1);
443 		length -= tail;
444 		ctx->total += tail;
445 		ctx->offset = length; /* offset where to start slow */
446 
447 		sg = ctx->sg;
448 		atmel_sha_append_sg(ctx);
449 
450 		atmel_sha_fill_padding(ctx, length);
451 
452 		ctx->dma_addr = dma_map_single(dd->dev, ctx->buffer,
453 			ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
454 		if (dma_mapping_error(dd->dev, ctx->dma_addr)) {
455 			dev_err(dd->dev, "dma %u bytes error\n",
456 				ctx->buflen + SHA1_BLOCK_SIZE);
457 			return -EINVAL;
458 		}
459 
460 		if (length == 0) {
461 			ctx->flags &= ~SHA_FLAGS_SG;
462 			count = ctx->bufcnt;
463 			ctx->bufcnt = 0;
464 			return atmel_sha_xmit_pdc(dd, ctx->dma_addr, count, 0,
465 					0, final);
466 		} else {
467 			ctx->sg = sg;
468 			if (!dma_map_sg(dd->dev, ctx->sg, 1,
469 				DMA_TO_DEVICE)) {
470 					dev_err(dd->dev, "dma_map_sg  error\n");
471 					return -EINVAL;
472 			}
473 
474 			ctx->flags |= SHA_FLAGS_SG;
475 
476 			count = ctx->bufcnt;
477 			ctx->bufcnt = 0;
478 			return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg),
479 					length, ctx->dma_addr, count, final);
480 		}
481 	}
482 
483 	if (!dma_map_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE)) {
484 		dev_err(dd->dev, "dma_map_sg  error\n");
485 		return -EINVAL;
486 	}
487 
488 	ctx->flags |= SHA_FLAGS_SG;
489 
490 	/* next call does not fail... so no unmap in the case of error */
491 	return atmel_sha_xmit_pdc(dd, sg_dma_address(ctx->sg), length, 0,
492 								0, final);
493 }
494 
495 static int atmel_sha_update_dma_stop(struct atmel_sha_dev *dd)
496 {
497 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(dd->req);
498 
499 	if (ctx->flags & SHA_FLAGS_SG) {
500 		dma_unmap_sg(dd->dev, ctx->sg, 1, DMA_TO_DEVICE);
501 		if (ctx->sg->length == ctx->offset) {
502 			ctx->sg = sg_next(ctx->sg);
503 			if (ctx->sg)
504 				ctx->offset = 0;
505 		}
506 		if (ctx->flags & SHA_FLAGS_PAD)
507 			dma_unmap_single(dd->dev, ctx->dma_addr,
508 				ctx->buflen + SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
509 	} else {
510 		dma_unmap_single(dd->dev, ctx->dma_addr, ctx->buflen +
511 						SHA1_BLOCK_SIZE, DMA_TO_DEVICE);
512 	}
513 
514 	return 0;
515 }
516 
517 static int atmel_sha_update_req(struct atmel_sha_dev *dd)
518 {
519 	struct ahash_request *req = dd->req;
520 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
521 	int err;
522 
523 	dev_dbg(dd->dev, "update_req: total: %u, digcnt: %d, finup: %d\n",
524 		 ctx->total, ctx->digcnt, (ctx->flags & SHA_FLAGS_FINUP) != 0);
525 
526 	if (ctx->flags & SHA_FLAGS_CPU)
527 		err = atmel_sha_update_cpu(dd);
528 	else
529 		err = atmel_sha_update_dma_start(dd);
530 
531 	/* wait for dma completion before can take more data */
532 	dev_dbg(dd->dev, "update: err: %d, digcnt: %d\n",
533 			err, ctx->digcnt);
534 
535 	return err;
536 }
537 
538 static int atmel_sha_final_req(struct atmel_sha_dev *dd)
539 {
540 	struct ahash_request *req = dd->req;
541 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
542 	int err = 0;
543 	int count;
544 
545 	if (ctx->bufcnt >= ATMEL_SHA_DMA_THRESHOLD) {
546 		atmel_sha_fill_padding(ctx, 0);
547 		count = ctx->bufcnt;
548 		ctx->bufcnt = 0;
549 		err = atmel_sha_xmit_dma_map(dd, ctx, count, 1);
550 	}
551 	/* faster to handle last block with cpu */
552 	else {
553 		atmel_sha_fill_padding(ctx, 0);
554 		count = ctx->bufcnt;
555 		ctx->bufcnt = 0;
556 		err = atmel_sha_xmit_cpu(dd, ctx->buffer, count, 1);
557 	}
558 
559 	dev_dbg(dd->dev, "final_req: err: %d\n", err);
560 
561 	return err;
562 }
563 
564 static void atmel_sha_copy_hash(struct ahash_request *req)
565 {
566 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
567 	u32 *hash = (u32 *)ctx->digest;
568 	int i;
569 
570 	if (likely(ctx->flags & SHA_FLAGS_SHA1))
571 		for (i = 0; i < SHA1_DIGEST_SIZE / sizeof(u32); i++)
572 			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
573 	else
574 		for (i = 0; i < SHA256_DIGEST_SIZE / sizeof(u32); i++)
575 			hash[i] = atmel_sha_read(ctx->dd, SHA_REG_DIGEST(i));
576 }
577 
578 static void atmel_sha_copy_ready_hash(struct ahash_request *req)
579 {
580 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
581 
582 	if (!req->result)
583 		return;
584 
585 	if (likely(ctx->flags & SHA_FLAGS_SHA1))
586 		memcpy(req->result, ctx->digest, SHA1_DIGEST_SIZE);
587 	else
588 		memcpy(req->result, ctx->digest, SHA256_DIGEST_SIZE);
589 }
590 
591 static int atmel_sha_finish(struct ahash_request *req)
592 {
593 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
594 	struct atmel_sha_dev *dd = ctx->dd;
595 	int err = 0;
596 
597 	if (ctx->digcnt)
598 		atmel_sha_copy_ready_hash(req);
599 
600 	dev_dbg(dd->dev, "digcnt: %d, bufcnt: %d\n", ctx->digcnt,
601 		ctx->bufcnt);
602 
603 	return err;
604 }
605 
606 static void atmel_sha_finish_req(struct ahash_request *req, int err)
607 {
608 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
609 	struct atmel_sha_dev *dd = ctx->dd;
610 
611 	if (!err) {
612 		atmel_sha_copy_hash(req);
613 		if (SHA_FLAGS_FINAL & dd->flags)
614 			err = atmel_sha_finish(req);
615 	} else {
616 		ctx->flags |= SHA_FLAGS_ERROR;
617 	}
618 
619 	/* atomic operation is not needed here */
620 	dd->flags &= ~(SHA_FLAGS_BUSY | SHA_FLAGS_FINAL | SHA_FLAGS_CPU |
621 			SHA_FLAGS_DMA_READY | SHA_FLAGS_OUTPUT_READY);
622 
623 	clk_disable_unprepare(dd->iclk);
624 
625 	if (req->base.complete)
626 		req->base.complete(&req->base, err);
627 
628 	/* handle new request */
629 	tasklet_schedule(&dd->done_task);
630 }
631 
632 static int atmel_sha_hw_init(struct atmel_sha_dev *dd)
633 {
634 	clk_prepare_enable(dd->iclk);
635 
636 	if (SHA_FLAGS_INIT & dd->flags) {
637 		atmel_sha_write(dd, SHA_CR, SHA_CR_SWRST);
638 		atmel_sha_dualbuff_test(dd);
639 		dd->flags |= SHA_FLAGS_INIT;
640 		dd->err = 0;
641 	}
642 
643 	return 0;
644 }
645 
646 static int atmel_sha_handle_queue(struct atmel_sha_dev *dd,
647 				  struct ahash_request *req)
648 {
649 	struct crypto_async_request *async_req, *backlog;
650 	struct atmel_sha_reqctx *ctx;
651 	unsigned long flags;
652 	int err = 0, ret = 0;
653 
654 	spin_lock_irqsave(&dd->lock, flags);
655 	if (req)
656 		ret = ahash_enqueue_request(&dd->queue, req);
657 
658 	if (SHA_FLAGS_BUSY & dd->flags) {
659 		spin_unlock_irqrestore(&dd->lock, flags);
660 		return ret;
661 	}
662 
663 	backlog = crypto_get_backlog(&dd->queue);
664 	async_req = crypto_dequeue_request(&dd->queue);
665 	if (async_req)
666 		dd->flags |= SHA_FLAGS_BUSY;
667 
668 	spin_unlock_irqrestore(&dd->lock, flags);
669 
670 	if (!async_req)
671 		return ret;
672 
673 	if (backlog)
674 		backlog->complete(backlog, -EINPROGRESS);
675 
676 	req = ahash_request_cast(async_req);
677 	dd->req = req;
678 	ctx = ahash_request_ctx(req);
679 
680 	dev_dbg(dd->dev, "handling new req, op: %lu, nbytes: %d\n",
681 						ctx->op, req->nbytes);
682 
683 	err = atmel_sha_hw_init(dd);
684 
685 	if (err)
686 		goto err1;
687 
688 	if (ctx->op == SHA_OP_UPDATE) {
689 		err = atmel_sha_update_req(dd);
690 		if (err != -EINPROGRESS && (ctx->flags & SHA_FLAGS_FINUP)) {
691 			/* no final() after finup() */
692 			err = atmel_sha_final_req(dd);
693 		}
694 	} else if (ctx->op == SHA_OP_FINAL) {
695 		err = atmel_sha_final_req(dd);
696 	}
697 
698 err1:
699 	if (err != -EINPROGRESS)
700 		/* done_task will not finish it, so do it here */
701 		atmel_sha_finish_req(req, err);
702 
703 	dev_dbg(dd->dev, "exit, err: %d\n", err);
704 
705 	return ret;
706 }
707 
708 static int atmel_sha_enqueue(struct ahash_request *req, unsigned int op)
709 {
710 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
711 	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
712 	struct atmel_sha_dev *dd = tctx->dd;
713 
714 	ctx->op = op;
715 
716 	return atmel_sha_handle_queue(dd, req);
717 }
718 
719 static int atmel_sha_update(struct ahash_request *req)
720 {
721 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
722 
723 	if (!req->nbytes)
724 		return 0;
725 
726 	ctx->total = req->nbytes;
727 	ctx->sg = req->src;
728 	ctx->offset = 0;
729 
730 	if (ctx->flags & SHA_FLAGS_FINUP) {
731 		if (ctx->bufcnt + ctx->total < ATMEL_SHA_DMA_THRESHOLD)
732 			/* faster to use CPU for short transfers */
733 			ctx->flags |= SHA_FLAGS_CPU;
734 	} else if (ctx->bufcnt + ctx->total < ctx->buflen) {
735 		atmel_sha_append_sg(ctx);
736 		return 0;
737 	}
738 	return atmel_sha_enqueue(req, SHA_OP_UPDATE);
739 }
740 
741 static int atmel_sha_final(struct ahash_request *req)
742 {
743 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
744 	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(req->base.tfm);
745 	struct atmel_sha_dev *dd = tctx->dd;
746 
747 	int err = 0;
748 
749 	ctx->flags |= SHA_FLAGS_FINUP;
750 
751 	if (ctx->flags & SHA_FLAGS_ERROR)
752 		return 0; /* uncompleted hash is not needed */
753 
754 	if (ctx->bufcnt) {
755 		return atmel_sha_enqueue(req, SHA_OP_FINAL);
756 	} else if (!(ctx->flags & SHA_FLAGS_PAD)) { /* add padding */
757 		err = atmel_sha_hw_init(dd);
758 		if (err)
759 			goto err1;
760 
761 		dd->flags |= SHA_FLAGS_BUSY;
762 		err = atmel_sha_final_req(dd);
763 	} else {
764 		/* copy ready hash (+ finalize hmac) */
765 		return atmel_sha_finish(req);
766 	}
767 
768 err1:
769 	if (err != -EINPROGRESS)
770 		/* done_task will not finish it, so do it here */
771 		atmel_sha_finish_req(req, err);
772 
773 	return err;
774 }
775 
776 static int atmel_sha_finup(struct ahash_request *req)
777 {
778 	struct atmel_sha_reqctx *ctx = ahash_request_ctx(req);
779 	int err1, err2;
780 
781 	ctx->flags |= SHA_FLAGS_FINUP;
782 
783 	err1 = atmel_sha_update(req);
784 	if (err1 == -EINPROGRESS || err1 == -EBUSY)
785 		return err1;
786 
787 	/*
788 	 * final() has to be always called to cleanup resources
789 	 * even if udpate() failed, except EINPROGRESS
790 	 */
791 	err2 = atmel_sha_final(req);
792 
793 	return err1 ?: err2;
794 }
795 
796 static int atmel_sha_digest(struct ahash_request *req)
797 {
798 	return atmel_sha_init(req) ?: atmel_sha_finup(req);
799 }
800 
801 static int atmel_sha_cra_init_alg(struct crypto_tfm *tfm, const char *alg_base)
802 {
803 	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
804 	const char *alg_name = crypto_tfm_alg_name(tfm);
805 
806 	/* Allocate a fallback and abort if it failed. */
807 	tctx->fallback = crypto_alloc_shash(alg_name, 0,
808 					    CRYPTO_ALG_NEED_FALLBACK);
809 	if (IS_ERR(tctx->fallback)) {
810 		pr_err("atmel-sha: fallback driver '%s' could not be loaded.\n",
811 				alg_name);
812 		return PTR_ERR(tctx->fallback);
813 	}
814 	crypto_ahash_set_reqsize(__crypto_ahash_cast(tfm),
815 				 sizeof(struct atmel_sha_reqctx) +
816 				 SHA_BUFFER_LEN + SHA256_BLOCK_SIZE);
817 
818 	return 0;
819 }
820 
821 static int atmel_sha_cra_init(struct crypto_tfm *tfm)
822 {
823 	return atmel_sha_cra_init_alg(tfm, NULL);
824 }
825 
826 static void atmel_sha_cra_exit(struct crypto_tfm *tfm)
827 {
828 	struct atmel_sha_ctx *tctx = crypto_tfm_ctx(tfm);
829 
830 	crypto_free_shash(tctx->fallback);
831 	tctx->fallback = NULL;
832 }
833 
834 static struct ahash_alg sha_algs[] = {
835 {
836 	.init		= atmel_sha_init,
837 	.update		= atmel_sha_update,
838 	.final		= atmel_sha_final,
839 	.finup		= atmel_sha_finup,
840 	.digest		= atmel_sha_digest,
841 	.halg = {
842 		.digestsize	= SHA1_DIGEST_SIZE,
843 		.base	= {
844 			.cra_name		= "sha1",
845 			.cra_driver_name	= "atmel-sha1",
846 			.cra_priority		= 100,
847 			.cra_flags		= CRYPTO_ALG_ASYNC |
848 						CRYPTO_ALG_NEED_FALLBACK,
849 			.cra_blocksize		= SHA1_BLOCK_SIZE,
850 			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
851 			.cra_alignmask		= 0,
852 			.cra_module		= THIS_MODULE,
853 			.cra_init		= atmel_sha_cra_init,
854 			.cra_exit		= atmel_sha_cra_exit,
855 		}
856 	}
857 },
858 {
859 	.init		= atmel_sha_init,
860 	.update		= atmel_sha_update,
861 	.final		= atmel_sha_final,
862 	.finup		= atmel_sha_finup,
863 	.digest		= atmel_sha_digest,
864 	.halg = {
865 		.digestsize	= SHA256_DIGEST_SIZE,
866 		.base	= {
867 			.cra_name		= "sha256",
868 			.cra_driver_name	= "atmel-sha256",
869 			.cra_priority		= 100,
870 			.cra_flags		= CRYPTO_ALG_ASYNC |
871 						CRYPTO_ALG_NEED_FALLBACK,
872 			.cra_blocksize		= SHA256_BLOCK_SIZE,
873 			.cra_ctxsize		= sizeof(struct atmel_sha_ctx),
874 			.cra_alignmask		= 0,
875 			.cra_module		= THIS_MODULE,
876 			.cra_init		= atmel_sha_cra_init,
877 			.cra_exit		= atmel_sha_cra_exit,
878 		}
879 	}
880 },
881 };
882 
883 static void atmel_sha_done_task(unsigned long data)
884 {
885 	struct atmel_sha_dev *dd = (struct atmel_sha_dev *)data;
886 	int err = 0;
887 
888 	if (!(SHA_FLAGS_BUSY & dd->flags)) {
889 		atmel_sha_handle_queue(dd, NULL);
890 		return;
891 	}
892 
893 	if (SHA_FLAGS_CPU & dd->flags) {
894 		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
895 			dd->flags &= ~SHA_FLAGS_OUTPUT_READY;
896 			goto finish;
897 		}
898 	} else if (SHA_FLAGS_DMA_READY & dd->flags) {
899 		if (SHA_FLAGS_DMA_ACTIVE & dd->flags) {
900 			dd->flags &= ~SHA_FLAGS_DMA_ACTIVE;
901 			atmel_sha_update_dma_stop(dd);
902 			if (dd->err) {
903 				err = dd->err;
904 				goto finish;
905 			}
906 		}
907 		if (SHA_FLAGS_OUTPUT_READY & dd->flags) {
908 			/* hash or semi-hash ready */
909 			dd->flags &= ~(SHA_FLAGS_DMA_READY |
910 						SHA_FLAGS_OUTPUT_READY);
911 			err = atmel_sha_update_dma_start(dd);
912 			if (err != -EINPROGRESS)
913 				goto finish;
914 		}
915 	}
916 	return;
917 
918 finish:
919 	/* finish curent request */
920 	atmel_sha_finish_req(dd->req, err);
921 }
922 
923 static irqreturn_t atmel_sha_irq(int irq, void *dev_id)
924 {
925 	struct atmel_sha_dev *sha_dd = dev_id;
926 	u32 reg;
927 
928 	reg = atmel_sha_read(sha_dd, SHA_ISR);
929 	if (reg & atmel_sha_read(sha_dd, SHA_IMR)) {
930 		atmel_sha_write(sha_dd, SHA_IDR, reg);
931 		if (SHA_FLAGS_BUSY & sha_dd->flags) {
932 			sha_dd->flags |= SHA_FLAGS_OUTPUT_READY;
933 			if (!(SHA_FLAGS_CPU & sha_dd->flags))
934 				sha_dd->flags |= SHA_FLAGS_DMA_READY;
935 			tasklet_schedule(&sha_dd->done_task);
936 		} else {
937 			dev_warn(sha_dd->dev, "SHA interrupt when no active requests.\n");
938 		}
939 		return IRQ_HANDLED;
940 	}
941 
942 	return IRQ_NONE;
943 }
944 
945 static void atmel_sha_unregister_algs(struct atmel_sha_dev *dd)
946 {
947 	int i;
948 
949 	for (i = 0; i < ARRAY_SIZE(sha_algs); i++)
950 		crypto_unregister_ahash(&sha_algs[i]);
951 }
952 
953 static int atmel_sha_register_algs(struct atmel_sha_dev *dd)
954 {
955 	int err, i, j;
956 
957 	for (i = 0; i < ARRAY_SIZE(sha_algs); i++) {
958 		err = crypto_register_ahash(&sha_algs[i]);
959 		if (err)
960 			goto err_sha_algs;
961 	}
962 
963 	return 0;
964 
965 err_sha_algs:
966 	for (j = 0; j < i; j++)
967 		crypto_unregister_ahash(&sha_algs[j]);
968 
969 	return err;
970 }
971 
972 static int __devinit atmel_sha_probe(struct platform_device *pdev)
973 {
974 	struct atmel_sha_dev *sha_dd;
975 	struct device *dev = &pdev->dev;
976 	struct resource *sha_res;
977 	unsigned long sha_phys_size;
978 	int err;
979 
980 	sha_dd = kzalloc(sizeof(struct atmel_sha_dev), GFP_KERNEL);
981 	if (sha_dd == NULL) {
982 		dev_err(dev, "unable to alloc data struct.\n");
983 		err = -ENOMEM;
984 		goto sha_dd_err;
985 	}
986 
987 	sha_dd->dev = dev;
988 
989 	platform_set_drvdata(pdev, sha_dd);
990 
991 	INIT_LIST_HEAD(&sha_dd->list);
992 
993 	tasklet_init(&sha_dd->done_task, atmel_sha_done_task,
994 					(unsigned long)sha_dd);
995 
996 	crypto_init_queue(&sha_dd->queue, ATMEL_SHA_QUEUE_LENGTH);
997 
998 	sha_dd->irq = -1;
999 
1000 	/* Get the base address */
1001 	sha_res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
1002 	if (!sha_res) {
1003 		dev_err(dev, "no MEM resource info\n");
1004 		err = -ENODEV;
1005 		goto res_err;
1006 	}
1007 	sha_dd->phys_base = sha_res->start;
1008 	sha_phys_size = resource_size(sha_res);
1009 
1010 	/* Get the IRQ */
1011 	sha_dd->irq = platform_get_irq(pdev,  0);
1012 	if (sha_dd->irq < 0) {
1013 		dev_err(dev, "no IRQ resource info\n");
1014 		err = sha_dd->irq;
1015 		goto res_err;
1016 	}
1017 
1018 	err = request_irq(sha_dd->irq, atmel_sha_irq, IRQF_SHARED, "atmel-sha",
1019 						sha_dd);
1020 	if (err) {
1021 		dev_err(dev, "unable to request sha irq.\n");
1022 		goto res_err;
1023 	}
1024 
1025 	/* Initializing the clock */
1026 	sha_dd->iclk = clk_get(&pdev->dev, NULL);
1027 	if (IS_ERR(sha_dd->iclk)) {
1028 		dev_err(dev, "clock intialization failed.\n");
1029 		err = PTR_ERR(sha_dd->iclk);
1030 		goto clk_err;
1031 	}
1032 
1033 	sha_dd->io_base = ioremap(sha_dd->phys_base, sha_phys_size);
1034 	if (!sha_dd->io_base) {
1035 		dev_err(dev, "can't ioremap\n");
1036 		err = -ENOMEM;
1037 		goto sha_io_err;
1038 	}
1039 
1040 	spin_lock(&atmel_sha.lock);
1041 	list_add_tail(&sha_dd->list, &atmel_sha.dev_list);
1042 	spin_unlock(&atmel_sha.lock);
1043 
1044 	err = atmel_sha_register_algs(sha_dd);
1045 	if (err)
1046 		goto err_algs;
1047 
1048 	dev_info(dev, "Atmel SHA1/SHA256\n");
1049 
1050 	return 0;
1051 
1052 err_algs:
1053 	spin_lock(&atmel_sha.lock);
1054 	list_del(&sha_dd->list);
1055 	spin_unlock(&atmel_sha.lock);
1056 	iounmap(sha_dd->io_base);
1057 sha_io_err:
1058 	clk_put(sha_dd->iclk);
1059 clk_err:
1060 	free_irq(sha_dd->irq, sha_dd);
1061 res_err:
1062 	tasklet_kill(&sha_dd->done_task);
1063 	kfree(sha_dd);
1064 	sha_dd = NULL;
1065 sha_dd_err:
1066 	dev_err(dev, "initialization failed.\n");
1067 
1068 	return err;
1069 }
1070 
1071 static int __devexit atmel_sha_remove(struct platform_device *pdev)
1072 {
1073 	static struct atmel_sha_dev *sha_dd;
1074 
1075 	sha_dd = platform_get_drvdata(pdev);
1076 	if (!sha_dd)
1077 		return -ENODEV;
1078 	spin_lock(&atmel_sha.lock);
1079 	list_del(&sha_dd->list);
1080 	spin_unlock(&atmel_sha.lock);
1081 
1082 	atmel_sha_unregister_algs(sha_dd);
1083 
1084 	tasklet_kill(&sha_dd->done_task);
1085 
1086 	iounmap(sha_dd->io_base);
1087 
1088 	clk_put(sha_dd->iclk);
1089 
1090 	if (sha_dd->irq >= 0)
1091 		free_irq(sha_dd->irq, sha_dd);
1092 
1093 	kfree(sha_dd);
1094 	sha_dd = NULL;
1095 
1096 	return 0;
1097 }
1098 
1099 static struct platform_driver atmel_sha_driver = {
1100 	.probe		= atmel_sha_probe,
1101 	.remove		= __devexit_p(atmel_sha_remove),
1102 	.driver		= {
1103 		.name	= "atmel_sha",
1104 		.owner	= THIS_MODULE,
1105 	},
1106 };
1107 
1108 module_platform_driver(atmel_sha_driver);
1109 
1110 MODULE_DESCRIPTION("Atmel SHA1/SHA256 hw acceleration support.");
1111 MODULE_LICENSE("GPL v2");
1112 MODULE_AUTHOR("Nicolas Royer - Eukréa Electromatique");
1113