xref: /linux/drivers/crypto/ccp/ccp-ops.c (revision 9fd2da71c301184d98fe37674ca8d017d1ce6600)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Cryptographic Coprocessor (CCP) driver
4  *
5  * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
6  *
7  * Author: Tom Lendacky <thomas.lendacky@amd.com>
8  * Author: Gary R Hook <gary.hook@amd.com>
9  */
10 
11 #include <crypto/des.h>
12 #include <crypto/scatterwalk.h>
13 #include <crypto/utils.h>
14 #include <linux/ccp.h>
15 #include <linux/dma-mapping.h>
16 #include <linux/errno.h>
17 #include <linux/kernel.h>
18 #include <linux/module.h>
19 
20 #include "ccp-dev.h"
21 
22 /* SHA initial context values */
23 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
24 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
25 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
26 	cpu_to_be32(SHA1_H4),
27 };
28 
29 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
30 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
31 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
32 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
33 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
34 };
35 
36 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
37 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
38 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
39 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
40 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
41 };
42 
43 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
44 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
45 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
46 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
47 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
48 };
49 
50 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
51 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
52 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
53 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
54 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
55 };
56 
57 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
58 					ccp_gen_jobid(ccp) : 0)
59 
60 static u32 ccp_gen_jobid(struct ccp_device *ccp)
61 {
62 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
63 }
64 
65 static void ccp_sg_free(struct ccp_sg_workarea *wa)
66 {
67 	if (wa->dma_count)
68 		dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
69 
70 	wa->dma_count = 0;
71 }
72 
73 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
74 				struct scatterlist *sg, u64 len,
75 				enum dma_data_direction dma_dir)
76 {
77 	memset(wa, 0, sizeof(*wa));
78 
79 	wa->sg = sg;
80 	if (!sg)
81 		return 0;
82 
83 	wa->nents = sg_nents_for_len(sg, len);
84 	if (wa->nents < 0)
85 		return wa->nents;
86 
87 	wa->bytes_left = len;
88 	wa->sg_used = 0;
89 
90 	if (len == 0)
91 		return 0;
92 
93 	if (dma_dir == DMA_NONE)
94 		return 0;
95 
96 	wa->dma_sg = sg;
97 	wa->dma_sg_head = sg;
98 	wa->dma_dev = dev;
99 	wa->dma_dir = dma_dir;
100 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
101 	if (!wa->dma_count)
102 		return -ENOMEM;
103 
104 	return 0;
105 }
106 
107 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
108 {
109 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
110 	unsigned int sg_combined_len = 0;
111 
112 	if (!wa->sg)
113 		return;
114 
115 	wa->sg_used += nbytes;
116 	wa->bytes_left -= nbytes;
117 	if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
118 		/* Advance to the next DMA scatterlist entry */
119 		wa->dma_sg = sg_next(wa->dma_sg);
120 
121 		/* In the case that the DMA mapped scatterlist has entries
122 		 * that have been merged, the non-DMA mapped scatterlist
123 		 * must be advanced multiple times for each merged entry.
124 		 * This ensures that the current non-DMA mapped entry
125 		 * corresponds to the current DMA mapped entry.
126 		 */
127 		do {
128 			sg_combined_len += wa->sg->length;
129 			wa->sg = sg_next(wa->sg);
130 		} while (wa->sg_used > sg_combined_len);
131 
132 		wa->sg_used = 0;
133 	}
134 }
135 
136 static void ccp_dm_free(struct ccp_dm_workarea *wa)
137 {
138 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
139 		if (wa->address)
140 			dma_pool_free(wa->dma_pool, wa->address,
141 				      wa->dma.address);
142 	} else {
143 		if (wa->dma.address)
144 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
145 					 wa->dma.dir);
146 		kfree(wa->address);
147 	}
148 
149 	wa->address = NULL;
150 	wa->dma.address = 0;
151 }
152 
153 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
154 				struct ccp_cmd_queue *cmd_q,
155 				unsigned int len,
156 				enum dma_data_direction dir)
157 {
158 	memset(wa, 0, sizeof(*wa));
159 
160 	if (!len)
161 		return 0;
162 
163 	wa->dev = cmd_q->ccp->dev;
164 	wa->length = len;
165 
166 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
167 		wa->dma_pool = cmd_q->dma_pool;
168 
169 		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
170 					     &wa->dma.address);
171 		if (!wa->address)
172 			return -ENOMEM;
173 
174 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
175 
176 	} else {
177 		wa->address = kzalloc(len, GFP_KERNEL);
178 		if (!wa->address)
179 			return -ENOMEM;
180 
181 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
182 						 dir);
183 		if (dma_mapping_error(wa->dev, wa->dma.address)) {
184 			kfree(wa->address);
185 			wa->address = NULL;
186 			return -ENOMEM;
187 		}
188 
189 		wa->dma.length = len;
190 	}
191 	wa->dma.dir = dir;
192 
193 	return 0;
194 }
195 
196 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
197 			   struct scatterlist *sg, unsigned int sg_offset,
198 			   unsigned int len)
199 {
200 	WARN_ON(!wa->address);
201 
202 	if (len > (wa->length - wa_offset))
203 		return -EINVAL;
204 
205 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
206 				 0);
207 	return 0;
208 }
209 
210 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
211 			    struct scatterlist *sg, unsigned int sg_offset,
212 			    unsigned int len)
213 {
214 	WARN_ON(!wa->address);
215 
216 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
217 				 1);
218 }
219 
220 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
221 				   unsigned int wa_offset,
222 				   struct scatterlist *sg,
223 				   unsigned int sg_offset,
224 				   unsigned int len)
225 {
226 	u8 *p, *q;
227 	int	rc;
228 
229 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
230 	if (rc)
231 		return rc;
232 
233 	p = wa->address + wa_offset;
234 	q = p + len - 1;
235 	while (p < q) {
236 		*p = *p ^ *q;
237 		*q = *p ^ *q;
238 		*p = *p ^ *q;
239 		p++;
240 		q--;
241 	}
242 	return 0;
243 }
244 
245 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
246 				    unsigned int wa_offset,
247 				    struct scatterlist *sg,
248 				    unsigned int sg_offset,
249 				    unsigned int len)
250 {
251 	u8 *p, *q;
252 
253 	p = wa->address + wa_offset;
254 	q = p + len - 1;
255 	while (p < q) {
256 		*p = *p ^ *q;
257 		*q = *p ^ *q;
258 		*p = *p ^ *q;
259 		p++;
260 		q--;
261 	}
262 
263 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
264 }
265 
266 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
267 {
268 	ccp_dm_free(&data->dm_wa);
269 	ccp_sg_free(&data->sg_wa);
270 }
271 
272 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
273 			 struct scatterlist *sg, u64 sg_len,
274 			 unsigned int dm_len,
275 			 enum dma_data_direction dir)
276 {
277 	int ret;
278 
279 	memset(data, 0, sizeof(*data));
280 
281 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
282 				   dir);
283 	if (ret)
284 		goto e_err;
285 
286 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
287 	if (ret)
288 		goto e_err;
289 
290 	return 0;
291 
292 e_err:
293 	ccp_free_data(data, cmd_q);
294 
295 	return ret;
296 }
297 
298 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
299 {
300 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
301 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
302 	unsigned int buf_count, nbytes;
303 
304 	/* Clear the buffer if setting it */
305 	if (!from)
306 		memset(dm_wa->address, 0, dm_wa->length);
307 
308 	if (!sg_wa->sg)
309 		return 0;
310 
311 	/* Perform the copy operation
312 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
313 	 *   an unsigned int
314 	 */
315 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
316 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
317 				 nbytes, from);
318 
319 	/* Update the structures and generate the count */
320 	buf_count = 0;
321 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
322 		nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
323 			     dm_wa->length - buf_count);
324 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
325 
326 		buf_count += nbytes;
327 		ccp_update_sg_workarea(sg_wa, nbytes);
328 	}
329 
330 	return buf_count;
331 }
332 
333 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
334 {
335 	return ccp_queue_buf(data, 0);
336 }
337 
338 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
339 {
340 	return ccp_queue_buf(data, 1);
341 }
342 
343 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
344 			     struct ccp_op *op, unsigned int block_size,
345 			     bool blocksize_op)
346 {
347 	unsigned int sg_src_len, sg_dst_len, op_len;
348 
349 	/* The CCP can only DMA from/to one address each per operation. This
350 	 * requires that we find the smallest DMA area between the source
351 	 * and destination. The resulting len values will always be <= UINT_MAX
352 	 * because the dma length is an unsigned int.
353 	 */
354 	sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
355 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
356 
357 	if (dst) {
358 		sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
359 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
360 		op_len = min(sg_src_len, sg_dst_len);
361 	} else {
362 		op_len = sg_src_len;
363 	}
364 
365 	/* The data operation length will be at least block_size in length
366 	 * or the smaller of available sg room remaining for the source or
367 	 * the destination
368 	 */
369 	op_len = max(op_len, block_size);
370 
371 	/* Unless we have to buffer data, there's no reason to wait */
372 	op->soc = 0;
373 
374 	if (sg_src_len < block_size) {
375 		/* Not enough data in the sg element, so it
376 		 * needs to be buffered into a blocksize chunk
377 		 */
378 		int cp_len = ccp_fill_queue_buf(src);
379 
380 		op->soc = 1;
381 		op->src.u.dma.address = src->dm_wa.dma.address;
382 		op->src.u.dma.offset = 0;
383 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
384 	} else {
385 		/* Enough data in the sg element, but we need to
386 		 * adjust for any previously copied data
387 		 */
388 		op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
389 		op->src.u.dma.offset = src->sg_wa.sg_used;
390 		op->src.u.dma.length = op_len & ~(block_size - 1);
391 
392 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
393 	}
394 
395 	if (dst) {
396 		if (sg_dst_len < block_size) {
397 			/* Not enough room in the sg element or we're on the
398 			 * last piece of data (when using padding), so the
399 			 * output needs to be buffered into a blocksize chunk
400 			 */
401 			op->soc = 1;
402 			op->dst.u.dma.address = dst->dm_wa.dma.address;
403 			op->dst.u.dma.offset = 0;
404 			op->dst.u.dma.length = op->src.u.dma.length;
405 		} else {
406 			/* Enough room in the sg element, but we need to
407 			 * adjust for any previously used area
408 			 */
409 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
410 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
411 			op->dst.u.dma.length = op->src.u.dma.length;
412 		}
413 	}
414 }
415 
416 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
417 			     struct ccp_op *op)
418 {
419 	op->init = 0;
420 
421 	if (dst) {
422 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
423 			ccp_empty_queue_buf(dst);
424 		else
425 			ccp_update_sg_workarea(&dst->sg_wa,
426 					       op->dst.u.dma.length);
427 	}
428 }
429 
430 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
431 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
432 			       u32 byte_swap, bool from)
433 {
434 	struct ccp_op op;
435 
436 	memset(&op, 0, sizeof(op));
437 
438 	op.cmd_q = cmd_q;
439 	op.jobid = jobid;
440 	op.eom = 1;
441 
442 	if (from) {
443 		op.soc = 1;
444 		op.src.type = CCP_MEMTYPE_SB;
445 		op.src.u.sb = sb;
446 		op.dst.type = CCP_MEMTYPE_SYSTEM;
447 		op.dst.u.dma.address = wa->dma.address;
448 		op.dst.u.dma.length = wa->length;
449 	} else {
450 		op.src.type = CCP_MEMTYPE_SYSTEM;
451 		op.src.u.dma.address = wa->dma.address;
452 		op.src.u.dma.length = wa->length;
453 		op.dst.type = CCP_MEMTYPE_SB;
454 		op.dst.u.sb = sb;
455 	}
456 
457 	op.u.passthru.byte_swap = byte_swap;
458 
459 	return cmd_q->ccp->vdata->perform->passthru(&op);
460 }
461 
462 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
463 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
464 			  u32 byte_swap)
465 {
466 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
467 }
468 
469 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
470 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
471 			    u32 byte_swap)
472 {
473 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
474 }
475 
476 static noinline_for_stack int
477 ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
478 {
479 	struct ccp_aes_engine *aes = &cmd->u.aes;
480 	struct ccp_dm_workarea key, ctx;
481 	struct ccp_data src;
482 	struct ccp_op op;
483 	unsigned int dm_offset;
484 	int ret;
485 
486 	if (!((aes->key_len == AES_KEYSIZE_128) ||
487 	      (aes->key_len == AES_KEYSIZE_192) ||
488 	      (aes->key_len == AES_KEYSIZE_256)))
489 		return -EINVAL;
490 
491 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
492 		return -EINVAL;
493 
494 	if (aes->iv_len != AES_BLOCK_SIZE)
495 		return -EINVAL;
496 
497 	if (!aes->key || !aes->iv || !aes->src)
498 		return -EINVAL;
499 
500 	if (aes->cmac_final) {
501 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
502 			return -EINVAL;
503 
504 		if (!aes->cmac_key)
505 			return -EINVAL;
506 	}
507 
508 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
509 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
510 
511 	ret = -EIO;
512 	memset(&op, 0, sizeof(op));
513 	op.cmd_q = cmd_q;
514 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
515 	op.sb_key = cmd_q->sb_key;
516 	op.sb_ctx = cmd_q->sb_ctx;
517 	op.init = 1;
518 	op.u.aes.type = aes->type;
519 	op.u.aes.mode = aes->mode;
520 	op.u.aes.action = aes->action;
521 
522 	/* All supported key sizes fit in a single (32-byte) SB entry
523 	 * and must be in little endian format. Use the 256-bit byte
524 	 * swap passthru option to convert from big endian to little
525 	 * endian.
526 	 */
527 	ret = ccp_init_dm_workarea(&key, cmd_q,
528 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
529 				   DMA_TO_DEVICE);
530 	if (ret)
531 		return ret;
532 
533 	dm_offset = CCP_SB_BYTES - aes->key_len;
534 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
535 	if (ret)
536 		goto e_key;
537 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
538 			     CCP_PASSTHRU_BYTESWAP_256BIT);
539 	if (ret) {
540 		cmd->engine_error = cmd_q->cmd_error;
541 		goto e_key;
542 	}
543 
544 	/* The AES context fits in a single (32-byte) SB entry and
545 	 * must be in little endian format. Use the 256-bit byte swap
546 	 * passthru option to convert from big endian to little endian.
547 	 */
548 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
549 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
550 				   DMA_BIDIRECTIONAL);
551 	if (ret)
552 		goto e_key;
553 
554 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
555 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
556 	if (ret)
557 		goto e_ctx;
558 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
559 			     CCP_PASSTHRU_BYTESWAP_256BIT);
560 	if (ret) {
561 		cmd->engine_error = cmd_q->cmd_error;
562 		goto e_ctx;
563 	}
564 
565 	/* Send data to the CCP AES engine */
566 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
567 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
568 	if (ret)
569 		goto e_ctx;
570 
571 	while (src.sg_wa.bytes_left) {
572 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
573 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
574 			op.eom = 1;
575 
576 			/* Push the K1/K2 key to the CCP now */
577 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
578 					       op.sb_ctx,
579 					       CCP_PASSTHRU_BYTESWAP_256BIT);
580 			if (ret) {
581 				cmd->engine_error = cmd_q->cmd_error;
582 				goto e_src;
583 			}
584 
585 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
586 					      aes->cmac_key_len);
587 			if (ret)
588 				goto e_src;
589 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
590 					     CCP_PASSTHRU_BYTESWAP_256BIT);
591 			if (ret) {
592 				cmd->engine_error = cmd_q->cmd_error;
593 				goto e_src;
594 			}
595 		}
596 
597 		ret = cmd_q->ccp->vdata->perform->aes(&op);
598 		if (ret) {
599 			cmd->engine_error = cmd_q->cmd_error;
600 			goto e_src;
601 		}
602 
603 		ccp_process_data(&src, NULL, &op);
604 	}
605 
606 	/* Retrieve the AES context - convert from LE to BE using
607 	 * 32-byte (256-bit) byteswapping
608 	 */
609 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
610 			       CCP_PASSTHRU_BYTESWAP_256BIT);
611 	if (ret) {
612 		cmd->engine_error = cmd_q->cmd_error;
613 		goto e_src;
614 	}
615 
616 	/* ...but we only need AES_BLOCK_SIZE bytes */
617 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
618 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
619 
620 e_src:
621 	ccp_free_data(&src, cmd_q);
622 
623 e_ctx:
624 	ccp_dm_free(&ctx);
625 
626 e_key:
627 	ccp_dm_free(&key);
628 
629 	return ret;
630 }
631 
632 static noinline_for_stack int
633 ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
634 {
635 	struct ccp_aes_engine *aes = &cmd->u.aes;
636 	struct {
637 		struct ccp_dm_workarea key;
638 		struct ccp_dm_workarea ctx;
639 		struct ccp_dm_workarea final;
640 		struct ccp_dm_workarea tag;
641 		struct ccp_data src;
642 		struct ccp_data dst;
643 		struct ccp_data aad;
644 		struct ccp_op op;
645 	} *wa __cleanup(kfree) = kzalloc(sizeof *wa, GFP_KERNEL);
646 	unsigned int dm_offset;
647 	unsigned int authsize;
648 	unsigned int jobid;
649 	unsigned int ilen;
650 	bool in_place = true; /* Default value */
651 	__be64 *final;
652 	int ret;
653 
654 	struct scatterlist *p_inp, sg_inp[2];
655 	struct scatterlist *p_tag, sg_tag[2];
656 	struct scatterlist *p_outp, sg_outp[2];
657 	struct scatterlist *p_aad;
658 
659 	if (!wa)
660 		return -ENOMEM;
661 
662 	if (!aes->iv)
663 		return -EINVAL;
664 
665 	if (!((aes->key_len == AES_KEYSIZE_128) ||
666 		(aes->key_len == AES_KEYSIZE_192) ||
667 		(aes->key_len == AES_KEYSIZE_256)))
668 		return -EINVAL;
669 
670 	if (!aes->key) /* Gotta have a key SGL */
671 		return -EINVAL;
672 
673 	/* Zero defaults to 16 bytes, the maximum size */
674 	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
675 	switch (authsize) {
676 	case 16:
677 	case 15:
678 	case 14:
679 	case 13:
680 	case 12:
681 	case 8:
682 	case 4:
683 		break;
684 	default:
685 		return -EINVAL;
686 	}
687 
688 	/* First, decompose the source buffer into AAD & PT,
689 	 * and the destination buffer into AAD, CT & tag, or
690 	 * the input into CT & tag.
691 	 * It is expected that the input and output SGs will
692 	 * be valid, even if the AAD and input lengths are 0.
693 	 */
694 	p_aad = aes->src;
695 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
696 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
697 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
698 		ilen = aes->src_len;
699 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
700 	} else {
701 		/* Input length for decryption includes tag */
702 		ilen = aes->src_len - authsize;
703 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
704 	}
705 
706 	jobid = CCP_NEW_JOBID(cmd_q->ccp);
707 
708 	memset(&wa->op, 0, sizeof(wa->op));
709 	wa->op.cmd_q = cmd_q;
710 	wa->op.jobid = jobid;
711 	wa->op.sb_key = cmd_q->sb_key; /* Pre-allocated */
712 	wa->op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
713 	wa->op.init = 1;
714 	wa->op.u.aes.type = aes->type;
715 
716 	/* Copy the key to the LSB */
717 	ret = ccp_init_dm_workarea(&wa->key, cmd_q,
718 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
719 				   DMA_TO_DEVICE);
720 	if (ret)
721 		return ret;
722 
723 	dm_offset = CCP_SB_BYTES - aes->key_len;
724 	ret = ccp_set_dm_area(&wa->key, dm_offset, aes->key, 0, aes->key_len);
725 	if (ret)
726 		goto e_key;
727 	ret = ccp_copy_to_sb(cmd_q, &wa->key, wa->op.jobid, wa->op.sb_key,
728 			     CCP_PASSTHRU_BYTESWAP_256BIT);
729 	if (ret) {
730 		cmd->engine_error = cmd_q->cmd_error;
731 		goto e_key;
732 	}
733 
734 	/* Copy the context (IV) to the LSB.
735 	 * There is an assumption here that the IV is 96 bits in length, plus
736 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
737 	 */
738 	ret = ccp_init_dm_workarea(&wa->ctx, cmd_q,
739 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
740 				   DMA_BIDIRECTIONAL);
741 	if (ret)
742 		goto e_key;
743 
744 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
745 	ret = ccp_set_dm_area(&wa->ctx, dm_offset, aes->iv, 0, aes->iv_len);
746 	if (ret)
747 		goto e_ctx;
748 
749 	ret = ccp_copy_to_sb(cmd_q, &wa->ctx, wa->op.jobid, wa->op.sb_ctx,
750 			     CCP_PASSTHRU_BYTESWAP_256BIT);
751 	if (ret) {
752 		cmd->engine_error = cmd_q->cmd_error;
753 		goto e_ctx;
754 	}
755 
756 	wa->op.init = 1;
757 	if (aes->aad_len > 0) {
758 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
759 		ret = ccp_init_data(&wa->aad, cmd_q, p_aad, aes->aad_len,
760 				    AES_BLOCK_SIZE,
761 				    DMA_TO_DEVICE);
762 		if (ret)
763 			goto e_ctx;
764 
765 		wa->op.u.aes.mode = CCP_AES_MODE_GHASH;
766 		wa->op.u.aes.action = CCP_AES_GHASHAAD;
767 
768 		while (wa->aad.sg_wa.bytes_left) {
769 			ccp_prepare_data(&wa->aad, NULL, &wa->op, AES_BLOCK_SIZE, true);
770 
771 			ret = cmd_q->ccp->vdata->perform->aes(&wa->op);
772 			if (ret) {
773 				cmd->engine_error = cmd_q->cmd_error;
774 				goto e_aad;
775 			}
776 
777 			ccp_process_data(&wa->aad, NULL, &wa->op);
778 			wa->op.init = 0;
779 		}
780 	}
781 
782 	wa->op.u.aes.mode = CCP_AES_MODE_GCTR;
783 	wa->op.u.aes.action = aes->action;
784 
785 	if (ilen > 0) {
786 		/* Step 2: Run a GCTR over the plaintext */
787 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
788 
789 		ret = ccp_init_data(&wa->src, cmd_q, p_inp, ilen,
790 				    AES_BLOCK_SIZE,
791 				    in_place ? DMA_BIDIRECTIONAL
792 					     : DMA_TO_DEVICE);
793 		if (ret)
794 			goto e_aad;
795 
796 		if (in_place) {
797 			wa->dst = wa->src;
798 		} else {
799 			ret = ccp_init_data(&wa->dst, cmd_q, p_outp, ilen,
800 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
801 			if (ret)
802 				goto e_src;
803 		}
804 
805 		wa->op.soc = 0;
806 		wa->op.eom = 0;
807 		wa->op.init = 1;
808 		while (wa->src.sg_wa.bytes_left) {
809 			ccp_prepare_data(&wa->src, &wa->dst, &wa->op, AES_BLOCK_SIZE, true);
810 			if (!wa->src.sg_wa.bytes_left) {
811 				unsigned int nbytes = ilen % AES_BLOCK_SIZE;
812 
813 				if (nbytes) {
814 					wa->op.eom = 1;
815 					wa->op.u.aes.size = (nbytes * 8) - 1;
816 				}
817 			}
818 
819 			ret = cmd_q->ccp->vdata->perform->aes(&wa->op);
820 			if (ret) {
821 				cmd->engine_error = cmd_q->cmd_error;
822 				goto e_dst;
823 			}
824 
825 			ccp_process_data(&wa->src, &wa->dst, &wa->op);
826 			wa->op.init = 0;
827 		}
828 	}
829 
830 	/* Step 3: Update the IV portion of the context with the original IV */
831 	ret = ccp_copy_from_sb(cmd_q, &wa->ctx, wa->op.jobid, wa->op.sb_ctx,
832 			       CCP_PASSTHRU_BYTESWAP_256BIT);
833 	if (ret) {
834 		cmd->engine_error = cmd_q->cmd_error;
835 		goto e_dst;
836 	}
837 
838 	ret = ccp_set_dm_area(&wa->ctx, dm_offset, aes->iv, 0, aes->iv_len);
839 	if (ret)
840 		goto e_dst;
841 
842 	ret = ccp_copy_to_sb(cmd_q, &wa->ctx, wa->op.jobid, wa->op.sb_ctx,
843 			     CCP_PASSTHRU_BYTESWAP_256BIT);
844 	if (ret) {
845 		cmd->engine_error = cmd_q->cmd_error;
846 		goto e_dst;
847 	}
848 
849 	/* Step 4: Concatenate the lengths of the AAD and source, and
850 	 * hash that 16 byte buffer.
851 	 */
852 	ret = ccp_init_dm_workarea(&wa->final, cmd_q, AES_BLOCK_SIZE,
853 				   DMA_BIDIRECTIONAL);
854 	if (ret)
855 		goto e_dst;
856 	final = (__be64 *)wa->final.address;
857 	final[0] = cpu_to_be64(aes->aad_len * 8);
858 	final[1] = cpu_to_be64(ilen * 8);
859 
860 	memset(&wa->op, 0, sizeof(wa->op));
861 	wa->op.cmd_q = cmd_q;
862 	wa->op.jobid = jobid;
863 	wa->op.sb_key = cmd_q->sb_key; /* Pre-allocated */
864 	wa->op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
865 	wa->op.init = 1;
866 	wa->op.u.aes.type = aes->type;
867 	wa->op.u.aes.mode = CCP_AES_MODE_GHASH;
868 	wa->op.u.aes.action = CCP_AES_GHASHFINAL;
869 	wa->op.src.type = CCP_MEMTYPE_SYSTEM;
870 	wa->op.src.u.dma.address = wa->final.dma.address;
871 	wa->op.src.u.dma.length = AES_BLOCK_SIZE;
872 	wa->op.dst.type = CCP_MEMTYPE_SYSTEM;
873 	wa->op.dst.u.dma.address = wa->final.dma.address;
874 	wa->op.dst.u.dma.length = AES_BLOCK_SIZE;
875 	wa->op.eom = 1;
876 	wa->op.u.aes.size = 0;
877 	ret = cmd_q->ccp->vdata->perform->aes(&wa->op);
878 	if (ret)
879 		goto e_final_wa;
880 
881 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
882 		/* Put the ciphered tag after the ciphertext. */
883 		ccp_get_dm_area(&wa->final, 0, p_tag, 0, authsize);
884 	} else {
885 		/* Does this ciphered tag match the input? */
886 		ret = ccp_init_dm_workarea(&wa->tag, cmd_q, authsize,
887 					   DMA_BIDIRECTIONAL);
888 		if (ret)
889 			goto e_final_wa;
890 		ret = ccp_set_dm_area(&wa->tag, 0, p_tag, 0, authsize);
891 		if (ret) {
892 			ccp_dm_free(&wa->tag);
893 			goto e_final_wa;
894 		}
895 
896 		ret = crypto_memneq(wa->tag.address, wa->final.address,
897 				    authsize) ? -EBADMSG : 0;
898 		ccp_dm_free(&wa->tag);
899 	}
900 
901 e_final_wa:
902 	ccp_dm_free(&wa->final);
903 
904 e_dst:
905 	if (ilen > 0 && !in_place)
906 		ccp_free_data(&wa->dst, cmd_q);
907 
908 e_src:
909 	if (ilen > 0)
910 		ccp_free_data(&wa->src, cmd_q);
911 
912 e_aad:
913 	if (aes->aad_len)
914 		ccp_free_data(&wa->aad, cmd_q);
915 
916 e_ctx:
917 	ccp_dm_free(&wa->ctx);
918 
919 e_key:
920 	ccp_dm_free(&wa->key);
921 
922 	return ret;
923 }
924 
925 static noinline_for_stack int
926 ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
927 {
928 	struct ccp_aes_engine *aes = &cmd->u.aes;
929 	struct ccp_dm_workarea key, ctx;
930 	struct ccp_data src, dst;
931 	struct ccp_op op;
932 	unsigned int dm_offset;
933 	bool in_place = false;
934 	int ret;
935 
936 	if (!((aes->key_len == AES_KEYSIZE_128) ||
937 	      (aes->key_len == AES_KEYSIZE_192) ||
938 	      (aes->key_len == AES_KEYSIZE_256)))
939 		return -EINVAL;
940 
941 	if (((aes->mode == CCP_AES_MODE_ECB) ||
942 	     (aes->mode == CCP_AES_MODE_CBC)) &&
943 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
944 		return -EINVAL;
945 
946 	if (!aes->key || !aes->src || !aes->dst)
947 		return -EINVAL;
948 
949 	if (aes->mode != CCP_AES_MODE_ECB) {
950 		if (aes->iv_len != AES_BLOCK_SIZE)
951 			return -EINVAL;
952 
953 		if (!aes->iv)
954 			return -EINVAL;
955 	}
956 
957 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
958 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
959 
960 	ret = -EIO;
961 	memset(&op, 0, sizeof(op));
962 	op.cmd_q = cmd_q;
963 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
964 	op.sb_key = cmd_q->sb_key;
965 	op.sb_ctx = cmd_q->sb_ctx;
966 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
967 	op.u.aes.type = aes->type;
968 	op.u.aes.mode = aes->mode;
969 	op.u.aes.action = aes->action;
970 
971 	/* All supported key sizes fit in a single (32-byte) SB entry
972 	 * and must be in little endian format. Use the 256-bit byte
973 	 * swap passthru option to convert from big endian to little
974 	 * endian.
975 	 */
976 	ret = ccp_init_dm_workarea(&key, cmd_q,
977 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
978 				   DMA_TO_DEVICE);
979 	if (ret)
980 		return ret;
981 
982 	dm_offset = CCP_SB_BYTES - aes->key_len;
983 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
984 	if (ret)
985 		goto e_key;
986 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
987 			     CCP_PASSTHRU_BYTESWAP_256BIT);
988 	if (ret) {
989 		cmd->engine_error = cmd_q->cmd_error;
990 		goto e_key;
991 	}
992 
993 	/* The AES context fits in a single (32-byte) SB entry and
994 	 * must be in little endian format. Use the 256-bit byte swap
995 	 * passthru option to convert from big endian to little endian.
996 	 */
997 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
998 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
999 				   DMA_BIDIRECTIONAL);
1000 	if (ret)
1001 		goto e_key;
1002 
1003 	if (aes->mode != CCP_AES_MODE_ECB) {
1004 		/* Load the AES context - convert to LE */
1005 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1006 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1007 		if (ret)
1008 			goto e_ctx;
1009 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1010 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1011 		if (ret) {
1012 			cmd->engine_error = cmd_q->cmd_error;
1013 			goto e_ctx;
1014 		}
1015 	}
1016 	switch (aes->mode) {
1017 	case CCP_AES_MODE_CFB: /* CFB128 only */
1018 	case CCP_AES_MODE_CTR:
1019 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
1020 		break;
1021 	default:
1022 		op.u.aes.size = 0;
1023 	}
1024 
1025 	/* Prepare the input and output data workareas. For in-place
1026 	 * operations we need to set the dma direction to BIDIRECTIONAL
1027 	 * and copy the src workarea to the dst workarea.
1028 	 */
1029 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1030 		in_place = true;
1031 
1032 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1033 			    AES_BLOCK_SIZE,
1034 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1035 	if (ret)
1036 		goto e_ctx;
1037 
1038 	if (in_place) {
1039 		dst = src;
1040 	} else {
1041 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1042 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1043 		if (ret)
1044 			goto e_src;
1045 	}
1046 
1047 	/* Send data to the CCP AES engine */
1048 	while (src.sg_wa.bytes_left) {
1049 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1050 		if (!src.sg_wa.bytes_left) {
1051 			op.eom = 1;
1052 
1053 			/* Since we don't retrieve the AES context in ECB
1054 			 * mode we have to wait for the operation to complete
1055 			 * on the last piece of data
1056 			 */
1057 			if (aes->mode == CCP_AES_MODE_ECB)
1058 				op.soc = 1;
1059 		}
1060 
1061 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1062 		if (ret) {
1063 			cmd->engine_error = cmd_q->cmd_error;
1064 			goto e_dst;
1065 		}
1066 
1067 		ccp_process_data(&src, &dst, &op);
1068 	}
1069 
1070 	if (aes->mode != CCP_AES_MODE_ECB) {
1071 		/* Retrieve the AES context - convert from LE to BE using
1072 		 * 32-byte (256-bit) byteswapping
1073 		 */
1074 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1075 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1076 		if (ret) {
1077 			cmd->engine_error = cmd_q->cmd_error;
1078 			goto e_dst;
1079 		}
1080 
1081 		/* ...but we only need AES_BLOCK_SIZE bytes */
1082 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1083 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1084 	}
1085 
1086 e_dst:
1087 	if (!in_place)
1088 		ccp_free_data(&dst, cmd_q);
1089 
1090 e_src:
1091 	ccp_free_data(&src, cmd_q);
1092 
1093 e_ctx:
1094 	ccp_dm_free(&ctx);
1095 
1096 e_key:
1097 	ccp_dm_free(&key);
1098 
1099 	return ret;
1100 }
1101 
1102 static noinline_for_stack int
1103 ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1104 {
1105 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1106 	struct ccp_dm_workarea key, ctx;
1107 	struct ccp_data src, dst;
1108 	struct ccp_op op;
1109 	unsigned int unit_size, dm_offset;
1110 	bool in_place = false;
1111 	unsigned int sb_count;
1112 	enum ccp_aes_type aestype;
1113 	int ret;
1114 
1115 	switch (xts->unit_size) {
1116 	case CCP_XTS_AES_UNIT_SIZE_16:
1117 		unit_size = 16;
1118 		break;
1119 	case CCP_XTS_AES_UNIT_SIZE_512:
1120 		unit_size = 512;
1121 		break;
1122 	case CCP_XTS_AES_UNIT_SIZE_1024:
1123 		unit_size = 1024;
1124 		break;
1125 	case CCP_XTS_AES_UNIT_SIZE_2048:
1126 		unit_size = 2048;
1127 		break;
1128 	case CCP_XTS_AES_UNIT_SIZE_4096:
1129 		unit_size = 4096;
1130 		break;
1131 
1132 	default:
1133 		return -EINVAL;
1134 	}
1135 
1136 	if (xts->key_len == AES_KEYSIZE_128)
1137 		aestype = CCP_AES_TYPE_128;
1138 	else if (xts->key_len == AES_KEYSIZE_256)
1139 		aestype = CCP_AES_TYPE_256;
1140 	else
1141 		return -EINVAL;
1142 
1143 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1144 		return -EINVAL;
1145 
1146 	if (xts->iv_len != AES_BLOCK_SIZE)
1147 		return -EINVAL;
1148 
1149 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1150 		return -EINVAL;
1151 
1152 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1153 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1154 
1155 	ret = -EIO;
1156 	memset(&op, 0, sizeof(op));
1157 	op.cmd_q = cmd_q;
1158 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1159 	op.sb_key = cmd_q->sb_key;
1160 	op.sb_ctx = cmd_q->sb_ctx;
1161 	op.init = 1;
1162 	op.u.xts.type = aestype;
1163 	op.u.xts.action = xts->action;
1164 	op.u.xts.unit_size = xts->unit_size;
1165 
1166 	/* A version 3 device only supports 128-bit keys, which fits into a
1167 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1168 	 * SB entries.
1169 	 */
1170 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1171 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1172 	else
1173 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1174 	ret = ccp_init_dm_workarea(&key, cmd_q,
1175 				   sb_count * CCP_SB_BYTES,
1176 				   DMA_TO_DEVICE);
1177 	if (ret)
1178 		return ret;
1179 
1180 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1181 		/* All supported key sizes must be in little endian format.
1182 		 * Use the 256-bit byte swap passthru option to convert from
1183 		 * big endian to little endian.
1184 		 */
1185 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1186 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1187 		if (ret)
1188 			goto e_key;
1189 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1190 		if (ret)
1191 			goto e_key;
1192 	} else {
1193 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1194 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1195 		 */
1196 		unsigned int pad;
1197 
1198 		dm_offset = CCP_SB_BYTES;
1199 		pad = dm_offset - xts->key_len;
1200 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1201 		if (ret)
1202 			goto e_key;
1203 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1204 				      xts->key_len, xts->key_len);
1205 		if (ret)
1206 			goto e_key;
1207 	}
1208 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1209 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1210 	if (ret) {
1211 		cmd->engine_error = cmd_q->cmd_error;
1212 		goto e_key;
1213 	}
1214 
1215 	/* The AES context fits in a single (32-byte) SB entry and
1216 	 * for XTS is already in little endian format so no byte swapping
1217 	 * is needed.
1218 	 */
1219 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1220 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1221 				   DMA_BIDIRECTIONAL);
1222 	if (ret)
1223 		goto e_key;
1224 
1225 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1226 	if (ret)
1227 		goto e_ctx;
1228 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1229 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1230 	if (ret) {
1231 		cmd->engine_error = cmd_q->cmd_error;
1232 		goto e_ctx;
1233 	}
1234 
1235 	/* Prepare the input and output data workareas. For in-place
1236 	 * operations we need to set the dma direction to BIDIRECTIONAL
1237 	 * and copy the src workarea to the dst workarea.
1238 	 */
1239 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1240 		in_place = true;
1241 
1242 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1243 			    unit_size,
1244 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1245 	if (ret)
1246 		goto e_ctx;
1247 
1248 	if (in_place) {
1249 		dst = src;
1250 	} else {
1251 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1252 				    unit_size, DMA_FROM_DEVICE);
1253 		if (ret)
1254 			goto e_src;
1255 	}
1256 
1257 	/* Send data to the CCP AES engine */
1258 	while (src.sg_wa.bytes_left) {
1259 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1260 		if (!src.sg_wa.bytes_left)
1261 			op.eom = 1;
1262 
1263 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1264 		if (ret) {
1265 			cmd->engine_error = cmd_q->cmd_error;
1266 			goto e_dst;
1267 		}
1268 
1269 		ccp_process_data(&src, &dst, &op);
1270 	}
1271 
1272 	/* Retrieve the AES context - convert from LE to BE using
1273 	 * 32-byte (256-bit) byteswapping
1274 	 */
1275 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1276 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1277 	if (ret) {
1278 		cmd->engine_error = cmd_q->cmd_error;
1279 		goto e_dst;
1280 	}
1281 
1282 	/* ...but we only need AES_BLOCK_SIZE bytes */
1283 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1284 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1285 
1286 e_dst:
1287 	if (!in_place)
1288 		ccp_free_data(&dst, cmd_q);
1289 
1290 e_src:
1291 	ccp_free_data(&src, cmd_q);
1292 
1293 e_ctx:
1294 	ccp_dm_free(&ctx);
1295 
1296 e_key:
1297 	ccp_dm_free(&key);
1298 
1299 	return ret;
1300 }
1301 
1302 static noinline_for_stack int
1303 ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1304 {
1305 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1306 
1307 	struct ccp_dm_workarea key, ctx;
1308 	struct ccp_data src, dst;
1309 	struct ccp_op op;
1310 	unsigned int dm_offset;
1311 	unsigned int len_singlekey;
1312 	bool in_place = false;
1313 	int ret;
1314 
1315 	/* Error checks */
1316 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
1317 		return -EINVAL;
1318 
1319 	if (!cmd_q->ccp->vdata->perform->des3)
1320 		return -EINVAL;
1321 
1322 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1323 		return -EINVAL;
1324 
1325 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1326 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1327 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1328 		return -EINVAL;
1329 
1330 	if (!des3->key || !des3->src || !des3->dst)
1331 		return -EINVAL;
1332 
1333 	if (des3->mode != CCP_DES3_MODE_ECB) {
1334 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1335 			return -EINVAL;
1336 
1337 		if (!des3->iv)
1338 			return -EINVAL;
1339 	}
1340 
1341 	/* Zero out all the fields of the command desc */
1342 	memset(&op, 0, sizeof(op));
1343 
1344 	/* Set up the Function field */
1345 	op.cmd_q = cmd_q;
1346 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1347 	op.sb_key = cmd_q->sb_key;
1348 
1349 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1350 	op.u.des3.type = des3->type;
1351 	op.u.des3.mode = des3->mode;
1352 	op.u.des3.action = des3->action;
1353 
1354 	/*
1355 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1356 	 * (like AES) must be in little endian format. Use the 256-bit byte
1357 	 * swap passthru option to convert from big endian to little endian.
1358 	 */
1359 	ret = ccp_init_dm_workarea(&key, cmd_q,
1360 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1361 				   DMA_TO_DEVICE);
1362 	if (ret)
1363 		return ret;
1364 
1365 	/*
1366 	 * The contents of the key triplet are in the reverse order of what
1367 	 * is required by the engine. Copy the 3 pieces individually to put
1368 	 * them where they belong.
1369 	 */
1370 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1371 
1372 	len_singlekey = des3->key_len / 3;
1373 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1374 			      des3->key, 0, len_singlekey);
1375 	if (ret)
1376 		goto e_key;
1377 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1378 			      des3->key, len_singlekey, len_singlekey);
1379 	if (ret)
1380 		goto e_key;
1381 	ret = ccp_set_dm_area(&key, dm_offset,
1382 			      des3->key, 2 * len_singlekey, len_singlekey);
1383 	if (ret)
1384 		goto e_key;
1385 
1386 	/* Copy the key to the SB */
1387 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1388 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1389 	if (ret) {
1390 		cmd->engine_error = cmd_q->cmd_error;
1391 		goto e_key;
1392 	}
1393 
1394 	/*
1395 	 * The DES3 context fits in a single (32-byte) KSB entry and
1396 	 * must be in little endian format. Use the 256-bit byte swap
1397 	 * passthru option to convert from big endian to little endian.
1398 	 */
1399 	if (des3->mode != CCP_DES3_MODE_ECB) {
1400 		op.sb_ctx = cmd_q->sb_ctx;
1401 
1402 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1403 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1404 					   DMA_BIDIRECTIONAL);
1405 		if (ret)
1406 			goto e_key;
1407 
1408 		/* Load the context into the LSB */
1409 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1410 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1411 				      des3->iv_len);
1412 		if (ret)
1413 			goto e_ctx;
1414 
1415 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1416 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1417 		if (ret) {
1418 			cmd->engine_error = cmd_q->cmd_error;
1419 			goto e_ctx;
1420 		}
1421 	}
1422 
1423 	/*
1424 	 * Prepare the input and output data workareas. For in-place
1425 	 * operations we need to set the dma direction to BIDIRECTIONAL
1426 	 * and copy the src workarea to the dst workarea.
1427 	 */
1428 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1429 		in_place = true;
1430 
1431 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1432 			DES3_EDE_BLOCK_SIZE,
1433 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1434 	if (ret)
1435 		goto e_ctx;
1436 
1437 	if (in_place)
1438 		dst = src;
1439 	else {
1440 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1441 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1442 		if (ret)
1443 			goto e_src;
1444 	}
1445 
1446 	/* Send data to the CCP DES3 engine */
1447 	while (src.sg_wa.bytes_left) {
1448 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1449 		if (!src.sg_wa.bytes_left) {
1450 			op.eom = 1;
1451 
1452 			/* Since we don't retrieve the context in ECB mode
1453 			 * we have to wait for the operation to complete
1454 			 * on the last piece of data
1455 			 */
1456 			op.soc = 0;
1457 		}
1458 
1459 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1460 		if (ret) {
1461 			cmd->engine_error = cmd_q->cmd_error;
1462 			goto e_dst;
1463 		}
1464 
1465 		ccp_process_data(&src, &dst, &op);
1466 	}
1467 
1468 	if (des3->mode != CCP_DES3_MODE_ECB) {
1469 		/* Retrieve the context and make BE */
1470 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1471 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1472 		if (ret) {
1473 			cmd->engine_error = cmd_q->cmd_error;
1474 			goto e_dst;
1475 		}
1476 
1477 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1478 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1479 				DES3_EDE_BLOCK_SIZE);
1480 	}
1481 e_dst:
1482 	if (!in_place)
1483 		ccp_free_data(&dst, cmd_q);
1484 
1485 e_src:
1486 	ccp_free_data(&src, cmd_q);
1487 
1488 e_ctx:
1489 	if (des3->mode != CCP_DES3_MODE_ECB)
1490 		ccp_dm_free(&ctx);
1491 
1492 e_key:
1493 	ccp_dm_free(&key);
1494 
1495 	return ret;
1496 }
1497 
1498 static noinline_for_stack int
1499 ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1500 {
1501 	struct ccp_sha_engine *sha = &cmd->u.sha;
1502 	struct ccp_dm_workarea ctx;
1503 	struct ccp_data src;
1504 	struct ccp_op op;
1505 	unsigned int ioffset, ooffset;
1506 	unsigned int digest_size;
1507 	int sb_count;
1508 	const void *init;
1509 	u64 block_size;
1510 	int ctx_size;
1511 	int ret;
1512 
1513 	switch (sha->type) {
1514 	case CCP_SHA_TYPE_1:
1515 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1516 			return -EINVAL;
1517 		block_size = SHA1_BLOCK_SIZE;
1518 		break;
1519 	case CCP_SHA_TYPE_224:
1520 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1521 			return -EINVAL;
1522 		block_size = SHA224_BLOCK_SIZE;
1523 		break;
1524 	case CCP_SHA_TYPE_256:
1525 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1526 			return -EINVAL;
1527 		block_size = SHA256_BLOCK_SIZE;
1528 		break;
1529 	case CCP_SHA_TYPE_384:
1530 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1531 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1532 			return -EINVAL;
1533 		block_size = SHA384_BLOCK_SIZE;
1534 		break;
1535 	case CCP_SHA_TYPE_512:
1536 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1537 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1538 			return -EINVAL;
1539 		block_size = SHA512_BLOCK_SIZE;
1540 		break;
1541 	default:
1542 		return -EINVAL;
1543 	}
1544 
1545 	if (!sha->ctx)
1546 		return -EINVAL;
1547 
1548 	if (!sha->final && (sha->src_len & (block_size - 1)))
1549 		return -EINVAL;
1550 
1551 	/* The version 3 device can't handle zero-length input */
1552 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1553 
1554 		if (!sha->src_len) {
1555 			unsigned int digest_len;
1556 			const u8 *sha_zero;
1557 
1558 			/* Not final, just return */
1559 			if (!sha->final)
1560 				return 0;
1561 
1562 			/* CCP can't do a zero length sha operation so the
1563 			 * caller must buffer the data.
1564 			 */
1565 			if (sha->msg_bits)
1566 				return -EINVAL;
1567 
1568 			/* The CCP cannot perform zero-length sha operations
1569 			 * so the caller is required to buffer data for the
1570 			 * final operation. However, a sha operation for a
1571 			 * message with a total length of zero is valid so
1572 			 * known values are required to supply the result.
1573 			 */
1574 			switch (sha->type) {
1575 			case CCP_SHA_TYPE_1:
1576 				sha_zero = sha1_zero_message_hash;
1577 				digest_len = SHA1_DIGEST_SIZE;
1578 				break;
1579 			case CCP_SHA_TYPE_224:
1580 				sha_zero = sha224_zero_message_hash;
1581 				digest_len = SHA224_DIGEST_SIZE;
1582 				break;
1583 			case CCP_SHA_TYPE_256:
1584 				sha_zero = sha256_zero_message_hash;
1585 				digest_len = SHA256_DIGEST_SIZE;
1586 				break;
1587 			default:
1588 				return -EINVAL;
1589 			}
1590 
1591 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1592 						 digest_len, 1);
1593 
1594 			return 0;
1595 		}
1596 	}
1597 
1598 	/* Set variables used throughout */
1599 	switch (sha->type) {
1600 	case CCP_SHA_TYPE_1:
1601 		digest_size = SHA1_DIGEST_SIZE;
1602 		init = (void *) ccp_sha1_init;
1603 		ctx_size = SHA1_DIGEST_SIZE;
1604 		sb_count = 1;
1605 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1606 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1607 		else
1608 			ooffset = ioffset = 0;
1609 		break;
1610 	case CCP_SHA_TYPE_224:
1611 		digest_size = SHA224_DIGEST_SIZE;
1612 		init = (void *) ccp_sha224_init;
1613 		ctx_size = SHA256_DIGEST_SIZE;
1614 		sb_count = 1;
1615 		ioffset = 0;
1616 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1617 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1618 		else
1619 			ooffset = 0;
1620 		break;
1621 	case CCP_SHA_TYPE_256:
1622 		digest_size = SHA256_DIGEST_SIZE;
1623 		init = (void *) ccp_sha256_init;
1624 		ctx_size = SHA256_DIGEST_SIZE;
1625 		sb_count = 1;
1626 		ooffset = ioffset = 0;
1627 		break;
1628 	case CCP_SHA_TYPE_384:
1629 		digest_size = SHA384_DIGEST_SIZE;
1630 		init = (void *) ccp_sha384_init;
1631 		ctx_size = SHA512_DIGEST_SIZE;
1632 		sb_count = 2;
1633 		ioffset = 0;
1634 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1635 		break;
1636 	case CCP_SHA_TYPE_512:
1637 		digest_size = SHA512_DIGEST_SIZE;
1638 		init = (void *) ccp_sha512_init;
1639 		ctx_size = SHA512_DIGEST_SIZE;
1640 		sb_count = 2;
1641 		ooffset = ioffset = 0;
1642 		break;
1643 	default:
1644 		ret = -EINVAL;
1645 		goto e_data;
1646 	}
1647 
1648 	/* For zero-length plaintext the src pointer is ignored;
1649 	 * otherwise both parts must be valid
1650 	 */
1651 	if (sha->src_len && !sha->src)
1652 		return -EINVAL;
1653 
1654 	memset(&op, 0, sizeof(op));
1655 	op.cmd_q = cmd_q;
1656 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1657 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1658 	op.u.sha.type = sha->type;
1659 	op.u.sha.msg_bits = sha->msg_bits;
1660 
1661 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1662 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1663 	 * first slot, and the left half in the second. Each portion must then
1664 	 * be in little endian format: use the 256-bit byte swap option.
1665 	 */
1666 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1667 				   DMA_BIDIRECTIONAL);
1668 	if (ret)
1669 		return ret;
1670 	if (sha->first) {
1671 		switch (sha->type) {
1672 		case CCP_SHA_TYPE_1:
1673 		case CCP_SHA_TYPE_224:
1674 		case CCP_SHA_TYPE_256:
1675 			memcpy(ctx.address + ioffset, init, ctx_size);
1676 			break;
1677 		case CCP_SHA_TYPE_384:
1678 		case CCP_SHA_TYPE_512:
1679 			memcpy(ctx.address + ctx_size / 2, init,
1680 			       ctx_size / 2);
1681 			memcpy(ctx.address, init + ctx_size / 2,
1682 			       ctx_size / 2);
1683 			break;
1684 		default:
1685 			ret = -EINVAL;
1686 			goto e_ctx;
1687 		}
1688 	} else {
1689 		/* Restore the context */
1690 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1691 				      sb_count * CCP_SB_BYTES);
1692 		if (ret)
1693 			goto e_ctx;
1694 	}
1695 
1696 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1697 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1698 	if (ret) {
1699 		cmd->engine_error = cmd_q->cmd_error;
1700 		goto e_ctx;
1701 	}
1702 
1703 	if (sha->src) {
1704 		/* Send data to the CCP SHA engine; block_size is set above */
1705 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1706 				    block_size, DMA_TO_DEVICE);
1707 		if (ret)
1708 			goto e_ctx;
1709 
1710 		while (src.sg_wa.bytes_left) {
1711 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1712 			if (sha->final && !src.sg_wa.bytes_left)
1713 				op.eom = 1;
1714 
1715 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1716 			if (ret) {
1717 				cmd->engine_error = cmd_q->cmd_error;
1718 				goto e_data;
1719 			}
1720 
1721 			ccp_process_data(&src, NULL, &op);
1722 		}
1723 	} else {
1724 		op.eom = 1;
1725 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1726 		if (ret) {
1727 			cmd->engine_error = cmd_q->cmd_error;
1728 			goto e_data;
1729 		}
1730 	}
1731 
1732 	/* Retrieve the SHA context - convert from LE to BE using
1733 	 * 32-byte (256-bit) byteswapping to BE
1734 	 */
1735 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1736 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1737 	if (ret) {
1738 		cmd->engine_error = cmd_q->cmd_error;
1739 		goto e_data;
1740 	}
1741 
1742 	if (sha->final) {
1743 		/* Finishing up, so get the digest */
1744 		switch (sha->type) {
1745 		case CCP_SHA_TYPE_1:
1746 		case CCP_SHA_TYPE_224:
1747 		case CCP_SHA_TYPE_256:
1748 			ccp_get_dm_area(&ctx, ooffset,
1749 					sha->ctx, 0,
1750 					digest_size);
1751 			break;
1752 		case CCP_SHA_TYPE_384:
1753 		case CCP_SHA_TYPE_512:
1754 			ccp_get_dm_area(&ctx, 0,
1755 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1756 					LSB_ITEM_SIZE);
1757 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1758 					sha->ctx, 0,
1759 					LSB_ITEM_SIZE - ooffset);
1760 			break;
1761 		default:
1762 			ret = -EINVAL;
1763 			goto e_data;
1764 		}
1765 	} else {
1766 		/* Stash the context */
1767 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1768 				sb_count * CCP_SB_BYTES);
1769 	}
1770 
1771 	if (sha->final && sha->opad) {
1772 		/* HMAC operation, recursively perform final SHA */
1773 		struct ccp_cmd hmac_cmd;
1774 		struct scatterlist sg;
1775 		u8 *hmac_buf;
1776 
1777 		if (sha->opad_len != block_size) {
1778 			ret = -EINVAL;
1779 			goto e_data;
1780 		}
1781 
1782 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1783 		if (!hmac_buf) {
1784 			ret = -ENOMEM;
1785 			goto e_data;
1786 		}
1787 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1788 
1789 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1790 		switch (sha->type) {
1791 		case CCP_SHA_TYPE_1:
1792 		case CCP_SHA_TYPE_224:
1793 		case CCP_SHA_TYPE_256:
1794 			memcpy(hmac_buf + block_size,
1795 			       ctx.address + ooffset,
1796 			       digest_size);
1797 			break;
1798 		case CCP_SHA_TYPE_384:
1799 		case CCP_SHA_TYPE_512:
1800 			memcpy(hmac_buf + block_size,
1801 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1802 			       LSB_ITEM_SIZE);
1803 			memcpy(hmac_buf + block_size +
1804 			       (LSB_ITEM_SIZE - ooffset),
1805 			       ctx.address,
1806 			       LSB_ITEM_SIZE);
1807 			break;
1808 		default:
1809 			kfree(hmac_buf);
1810 			ret = -EINVAL;
1811 			goto e_data;
1812 		}
1813 
1814 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1815 		hmac_cmd.engine = CCP_ENGINE_SHA;
1816 		hmac_cmd.u.sha.type = sha->type;
1817 		hmac_cmd.u.sha.ctx = sha->ctx;
1818 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1819 		hmac_cmd.u.sha.src = &sg;
1820 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1821 		hmac_cmd.u.sha.opad = NULL;
1822 		hmac_cmd.u.sha.opad_len = 0;
1823 		hmac_cmd.u.sha.first = 1;
1824 		hmac_cmd.u.sha.final = 1;
1825 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1826 
1827 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1828 		if (ret)
1829 			cmd->engine_error = hmac_cmd.engine_error;
1830 
1831 		kfree(hmac_buf);
1832 	}
1833 
1834 e_data:
1835 	if (sha->src)
1836 		ccp_free_data(&src, cmd_q);
1837 
1838 e_ctx:
1839 	ccp_dm_free(&ctx);
1840 
1841 	return ret;
1842 }
1843 
1844 static noinline_for_stack int
1845 ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1846 {
1847 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1848 	struct ccp_dm_workarea exp, src, dst;
1849 	struct ccp_op op;
1850 	unsigned int sb_count, i_len, o_len;
1851 	int ret;
1852 
1853 	/* Check against the maximum allowable size, in bits */
1854 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1855 		return -EINVAL;
1856 
1857 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1858 		return -EINVAL;
1859 
1860 	memset(&op, 0, sizeof(op));
1861 	op.cmd_q = cmd_q;
1862 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1863 
1864 	/* The RSA modulus must precede the message being acted upon, so
1865 	 * it must be copied to a DMA area where the message and the
1866 	 * modulus can be concatenated.  Therefore the input buffer
1867 	 * length required is twice the output buffer length (which
1868 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1869 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1870 	 * required.
1871 	 */
1872 	o_len = 32 * ((rsa->key_size + 255) / 256);
1873 	i_len = o_len * 2;
1874 
1875 	sb_count = 0;
1876 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1877 		/* sb_count is the number of storage block slots required
1878 		 * for the modulus.
1879 		 */
1880 		sb_count = o_len / CCP_SB_BYTES;
1881 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1882 								sb_count);
1883 		if (!op.sb_key)
1884 			return -EIO;
1885 	} else {
1886 		/* A version 5 device allows a modulus size that will not fit
1887 		 * in the LSB, so the command will transfer it from memory.
1888 		 * Set the sb key to the default, even though it's not used.
1889 		 */
1890 		op.sb_key = cmd_q->sb_key;
1891 	}
1892 
1893 	/* The RSA exponent must be in little endian format. Reverse its
1894 	 * byte order.
1895 	 */
1896 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1897 	if (ret)
1898 		goto e_sb;
1899 
1900 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1901 	if (ret)
1902 		goto e_exp;
1903 
1904 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1905 		/* Copy the exponent to the local storage block, using
1906 		 * as many 32-byte blocks as were allocated above. It's
1907 		 * already little endian, so no further change is required.
1908 		 */
1909 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1910 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1911 		if (ret) {
1912 			cmd->engine_error = cmd_q->cmd_error;
1913 			goto e_exp;
1914 		}
1915 	} else {
1916 		/* The exponent can be retrieved from memory via DMA. */
1917 		op.exp.u.dma.address = exp.dma.address;
1918 		op.exp.u.dma.offset = 0;
1919 	}
1920 
1921 	/* Concatenate the modulus and the message. Both the modulus and
1922 	 * the operands must be in little endian format.  Since the input
1923 	 * is in big endian format it must be converted.
1924 	 */
1925 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1926 	if (ret)
1927 		goto e_exp;
1928 
1929 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1930 	if (ret)
1931 		goto e_src;
1932 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1933 	if (ret)
1934 		goto e_src;
1935 
1936 	/* Prepare the output area for the operation */
1937 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1938 	if (ret)
1939 		goto e_src;
1940 
1941 	op.soc = 1;
1942 	op.src.u.dma.address = src.dma.address;
1943 	op.src.u.dma.offset = 0;
1944 	op.src.u.dma.length = i_len;
1945 	op.dst.u.dma.address = dst.dma.address;
1946 	op.dst.u.dma.offset = 0;
1947 	op.dst.u.dma.length = o_len;
1948 
1949 	op.u.rsa.mod_size = rsa->key_size;
1950 	op.u.rsa.input_len = i_len;
1951 
1952 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1953 	if (ret) {
1954 		cmd->engine_error = cmd_q->cmd_error;
1955 		goto e_dst;
1956 	}
1957 
1958 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1959 
1960 e_dst:
1961 	ccp_dm_free(&dst);
1962 
1963 e_src:
1964 	ccp_dm_free(&src);
1965 
1966 e_exp:
1967 	ccp_dm_free(&exp);
1968 
1969 e_sb:
1970 	if (sb_count)
1971 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1972 
1973 	return ret;
1974 }
1975 
1976 static noinline_for_stack int
1977 ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1978 {
1979 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1980 	struct ccp_dm_workarea mask;
1981 	struct ccp_data src, dst;
1982 	struct ccp_op op;
1983 	bool in_place = false;
1984 	unsigned int i;
1985 	int ret = 0;
1986 
1987 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1988 		return -EINVAL;
1989 
1990 	if (!pt->src || !pt->dst)
1991 		return -EINVAL;
1992 
1993 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1994 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1995 			return -EINVAL;
1996 		if (!pt->mask)
1997 			return -EINVAL;
1998 	}
1999 
2000 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2001 
2002 	memset(&op, 0, sizeof(op));
2003 	op.cmd_q = cmd_q;
2004 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2005 
2006 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2007 		/* Load the mask */
2008 		op.sb_key = cmd_q->sb_key;
2009 
2010 		ret = ccp_init_dm_workarea(&mask, cmd_q,
2011 					   CCP_PASSTHRU_SB_COUNT *
2012 					   CCP_SB_BYTES,
2013 					   DMA_TO_DEVICE);
2014 		if (ret)
2015 			return ret;
2016 
2017 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
2018 		if (ret)
2019 			goto e_mask;
2020 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2021 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2022 		if (ret) {
2023 			cmd->engine_error = cmd_q->cmd_error;
2024 			goto e_mask;
2025 		}
2026 	}
2027 
2028 	/* Prepare the input and output data workareas. For in-place
2029 	 * operations we need to set the dma direction to BIDIRECTIONAL
2030 	 * and copy the src workarea to the dst workarea.
2031 	 */
2032 	if (sg_virt(pt->src) == sg_virt(pt->dst))
2033 		in_place = true;
2034 
2035 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
2036 			    CCP_PASSTHRU_MASKSIZE,
2037 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
2038 	if (ret)
2039 		goto e_mask;
2040 
2041 	if (in_place) {
2042 		dst = src;
2043 	} else {
2044 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2045 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2046 		if (ret)
2047 			goto e_src;
2048 	}
2049 
2050 	/* Send data to the CCP Passthru engine
2051 	 *   Because the CCP engine works on a single source and destination
2052 	 *   dma address at a time, each entry in the source scatterlist
2053 	 *   (after the dma_map_sg call) must be less than or equal to the
2054 	 *   (remaining) length in the destination scatterlist entry and the
2055 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2056 	 */
2057 	dst.sg_wa.sg_used = 0;
2058 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2059 		if (!dst.sg_wa.sg ||
2060 		    (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
2061 			ret = -EINVAL;
2062 			goto e_dst;
2063 		}
2064 
2065 		if (i == src.sg_wa.dma_count) {
2066 			op.eom = 1;
2067 			op.soc = 1;
2068 		}
2069 
2070 		op.src.type = CCP_MEMTYPE_SYSTEM;
2071 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2072 		op.src.u.dma.offset = 0;
2073 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2074 
2075 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2076 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2077 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2078 		op.dst.u.dma.length = op.src.u.dma.length;
2079 
2080 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2081 		if (ret) {
2082 			cmd->engine_error = cmd_q->cmd_error;
2083 			goto e_dst;
2084 		}
2085 
2086 		dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
2087 		if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
2088 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2089 			dst.sg_wa.sg_used = 0;
2090 		}
2091 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2092 	}
2093 
2094 e_dst:
2095 	if (!in_place)
2096 		ccp_free_data(&dst, cmd_q);
2097 
2098 e_src:
2099 	ccp_free_data(&src, cmd_q);
2100 
2101 e_mask:
2102 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2103 		ccp_dm_free(&mask);
2104 
2105 	return ret;
2106 }
2107 
2108 static noinline_for_stack int
2109 ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2110 				      struct ccp_cmd *cmd)
2111 {
2112 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2113 	struct ccp_dm_workarea mask;
2114 	struct ccp_op op;
2115 	int ret;
2116 
2117 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2118 		return -EINVAL;
2119 
2120 	if (!pt->src_dma || !pt->dst_dma)
2121 		return -EINVAL;
2122 
2123 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2124 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2125 			return -EINVAL;
2126 		if (!pt->mask)
2127 			return -EINVAL;
2128 	}
2129 
2130 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2131 
2132 	memset(&op, 0, sizeof(op));
2133 	op.cmd_q = cmd_q;
2134 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2135 
2136 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2137 		/* Load the mask */
2138 		op.sb_key = cmd_q->sb_key;
2139 
2140 		mask.length = pt->mask_len;
2141 		mask.dma.address = pt->mask;
2142 		mask.dma.length = pt->mask_len;
2143 
2144 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2145 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2146 		if (ret) {
2147 			cmd->engine_error = cmd_q->cmd_error;
2148 			return ret;
2149 		}
2150 	}
2151 
2152 	/* Send data to the CCP Passthru engine */
2153 	op.eom = 1;
2154 	op.soc = 1;
2155 
2156 	op.src.type = CCP_MEMTYPE_SYSTEM;
2157 	op.src.u.dma.address = pt->src_dma;
2158 	op.src.u.dma.offset = 0;
2159 	op.src.u.dma.length = pt->src_len;
2160 
2161 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2162 	op.dst.u.dma.address = pt->dst_dma;
2163 	op.dst.u.dma.offset = 0;
2164 	op.dst.u.dma.length = pt->src_len;
2165 
2166 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2167 	if (ret)
2168 		cmd->engine_error = cmd_q->cmd_error;
2169 
2170 	return ret;
2171 }
2172 
2173 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2174 {
2175 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2176 	struct ccp_dm_workarea src, dst;
2177 	struct ccp_op op;
2178 	int ret;
2179 	u8 *save;
2180 
2181 	if (!ecc->u.mm.operand_1 ||
2182 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2183 		return -EINVAL;
2184 
2185 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2186 		if (!ecc->u.mm.operand_2 ||
2187 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2188 			return -EINVAL;
2189 
2190 	if (!ecc->u.mm.result ||
2191 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2192 		return -EINVAL;
2193 
2194 	memset(&op, 0, sizeof(op));
2195 	op.cmd_q = cmd_q;
2196 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2197 
2198 	/* Concatenate the modulus and the operands. Both the modulus and
2199 	 * the operands must be in little endian format.  Since the input
2200 	 * is in big endian format it must be converted and placed in a
2201 	 * fixed length buffer.
2202 	 */
2203 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2204 				   DMA_TO_DEVICE);
2205 	if (ret)
2206 		return ret;
2207 
2208 	/* Save the workarea address since it is updated in order to perform
2209 	 * the concatenation
2210 	 */
2211 	save = src.address;
2212 
2213 	/* Copy the ECC modulus */
2214 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2215 	if (ret)
2216 		goto e_src;
2217 	src.address += CCP_ECC_OPERAND_SIZE;
2218 
2219 	/* Copy the first operand */
2220 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2221 				      ecc->u.mm.operand_1_len);
2222 	if (ret)
2223 		goto e_src;
2224 	src.address += CCP_ECC_OPERAND_SIZE;
2225 
2226 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2227 		/* Copy the second operand */
2228 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2229 					      ecc->u.mm.operand_2_len);
2230 		if (ret)
2231 			goto e_src;
2232 		src.address += CCP_ECC_OPERAND_SIZE;
2233 	}
2234 
2235 	/* Restore the workarea address */
2236 	src.address = save;
2237 
2238 	/* Prepare the output area for the operation */
2239 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2240 				   DMA_FROM_DEVICE);
2241 	if (ret)
2242 		goto e_src;
2243 
2244 	op.soc = 1;
2245 	op.src.u.dma.address = src.dma.address;
2246 	op.src.u.dma.offset = 0;
2247 	op.src.u.dma.length = src.length;
2248 	op.dst.u.dma.address = dst.dma.address;
2249 	op.dst.u.dma.offset = 0;
2250 	op.dst.u.dma.length = dst.length;
2251 
2252 	op.u.ecc.function = cmd->u.ecc.function;
2253 
2254 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2255 	if (ret) {
2256 		cmd->engine_error = cmd_q->cmd_error;
2257 		goto e_dst;
2258 	}
2259 
2260 	ecc->ecc_result = le16_to_cpup(
2261 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2262 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2263 		ret = -EIO;
2264 		goto e_dst;
2265 	}
2266 
2267 	/* Save the ECC result */
2268 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2269 				CCP_ECC_MODULUS_BYTES);
2270 
2271 e_dst:
2272 	ccp_dm_free(&dst);
2273 
2274 e_src:
2275 	ccp_dm_free(&src);
2276 
2277 	return ret;
2278 }
2279 
2280 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2281 {
2282 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2283 	struct ccp_dm_workarea src, dst;
2284 	struct ccp_op op;
2285 	int ret;
2286 	u8 *save;
2287 
2288 	if (!ecc->u.pm.point_1.x ||
2289 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2290 	    !ecc->u.pm.point_1.y ||
2291 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2292 		return -EINVAL;
2293 
2294 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2295 		if (!ecc->u.pm.point_2.x ||
2296 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2297 		    !ecc->u.pm.point_2.y ||
2298 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2299 			return -EINVAL;
2300 	} else {
2301 		if (!ecc->u.pm.domain_a ||
2302 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2303 			return -EINVAL;
2304 
2305 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2306 			if (!ecc->u.pm.scalar ||
2307 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2308 				return -EINVAL;
2309 	}
2310 
2311 	if (!ecc->u.pm.result.x ||
2312 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2313 	    !ecc->u.pm.result.y ||
2314 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2315 		return -EINVAL;
2316 
2317 	memset(&op, 0, sizeof(op));
2318 	op.cmd_q = cmd_q;
2319 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2320 
2321 	/* Concatenate the modulus and the operands. Both the modulus and
2322 	 * the operands must be in little endian format.  Since the input
2323 	 * is in big endian format it must be converted and placed in a
2324 	 * fixed length buffer.
2325 	 */
2326 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2327 				   DMA_TO_DEVICE);
2328 	if (ret)
2329 		return ret;
2330 
2331 	/* Save the workarea address since it is updated in order to perform
2332 	 * the concatenation
2333 	 */
2334 	save = src.address;
2335 
2336 	/* Copy the ECC modulus */
2337 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2338 	if (ret)
2339 		goto e_src;
2340 	src.address += CCP_ECC_OPERAND_SIZE;
2341 
2342 	/* Copy the first point X and Y coordinate */
2343 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2344 				      ecc->u.pm.point_1.x_len);
2345 	if (ret)
2346 		goto e_src;
2347 	src.address += CCP_ECC_OPERAND_SIZE;
2348 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2349 				      ecc->u.pm.point_1.y_len);
2350 	if (ret)
2351 		goto e_src;
2352 	src.address += CCP_ECC_OPERAND_SIZE;
2353 
2354 	/* Set the first point Z coordinate to 1 */
2355 	*src.address = 0x01;
2356 	src.address += CCP_ECC_OPERAND_SIZE;
2357 
2358 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2359 		/* Copy the second point X and Y coordinate */
2360 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2361 					      ecc->u.pm.point_2.x_len);
2362 		if (ret)
2363 			goto e_src;
2364 		src.address += CCP_ECC_OPERAND_SIZE;
2365 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2366 					      ecc->u.pm.point_2.y_len);
2367 		if (ret)
2368 			goto e_src;
2369 		src.address += CCP_ECC_OPERAND_SIZE;
2370 
2371 		/* Set the second point Z coordinate to 1 */
2372 		*src.address = 0x01;
2373 		src.address += CCP_ECC_OPERAND_SIZE;
2374 	} else {
2375 		/* Copy the Domain "a" parameter */
2376 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2377 					      ecc->u.pm.domain_a_len);
2378 		if (ret)
2379 			goto e_src;
2380 		src.address += CCP_ECC_OPERAND_SIZE;
2381 
2382 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2383 			/* Copy the scalar value */
2384 			ret = ccp_reverse_set_dm_area(&src, 0,
2385 						      ecc->u.pm.scalar, 0,
2386 						      ecc->u.pm.scalar_len);
2387 			if (ret)
2388 				goto e_src;
2389 			src.address += CCP_ECC_OPERAND_SIZE;
2390 		}
2391 	}
2392 
2393 	/* Restore the workarea address */
2394 	src.address = save;
2395 
2396 	/* Prepare the output area for the operation */
2397 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2398 				   DMA_FROM_DEVICE);
2399 	if (ret)
2400 		goto e_src;
2401 
2402 	op.soc = 1;
2403 	op.src.u.dma.address = src.dma.address;
2404 	op.src.u.dma.offset = 0;
2405 	op.src.u.dma.length = src.length;
2406 	op.dst.u.dma.address = dst.dma.address;
2407 	op.dst.u.dma.offset = 0;
2408 	op.dst.u.dma.length = dst.length;
2409 
2410 	op.u.ecc.function = cmd->u.ecc.function;
2411 
2412 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2413 	if (ret) {
2414 		cmd->engine_error = cmd_q->cmd_error;
2415 		goto e_dst;
2416 	}
2417 
2418 	ecc->ecc_result = le16_to_cpup(
2419 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2420 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2421 		ret = -EIO;
2422 		goto e_dst;
2423 	}
2424 
2425 	/* Save the workarea address since it is updated as we walk through
2426 	 * to copy the point math result
2427 	 */
2428 	save = dst.address;
2429 
2430 	/* Save the ECC result X and Y coordinates */
2431 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2432 				CCP_ECC_MODULUS_BYTES);
2433 	dst.address += CCP_ECC_OUTPUT_SIZE;
2434 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2435 				CCP_ECC_MODULUS_BYTES);
2436 
2437 	/* Restore the workarea address */
2438 	dst.address = save;
2439 
2440 e_dst:
2441 	ccp_dm_free(&dst);
2442 
2443 e_src:
2444 	ccp_dm_free(&src);
2445 
2446 	return ret;
2447 }
2448 
2449 static noinline_for_stack int
2450 ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2451 {
2452 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2453 
2454 	ecc->ecc_result = 0;
2455 
2456 	if (!ecc->mod ||
2457 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2458 		return -EINVAL;
2459 
2460 	switch (ecc->function) {
2461 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2462 	case CCP_ECC_FUNCTION_MADD_384BIT:
2463 	case CCP_ECC_FUNCTION_MINV_384BIT:
2464 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2465 
2466 	case CCP_ECC_FUNCTION_PADD_384BIT:
2467 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2468 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2469 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2470 
2471 	default:
2472 		return -EINVAL;
2473 	}
2474 }
2475 
2476 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2477 {
2478 	int ret;
2479 
2480 	cmd->engine_error = 0;
2481 	cmd_q->cmd_error = 0;
2482 	cmd_q->int_rcvd = 0;
2483 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2484 
2485 	switch (cmd->engine) {
2486 	case CCP_ENGINE_AES:
2487 		switch (cmd->u.aes.mode) {
2488 		case CCP_AES_MODE_CMAC:
2489 			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
2490 			break;
2491 		case CCP_AES_MODE_GCM:
2492 			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
2493 			break;
2494 		default:
2495 			ret = ccp_run_aes_cmd(cmd_q, cmd);
2496 			break;
2497 		}
2498 		break;
2499 	case CCP_ENGINE_XTS_AES_128:
2500 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2501 		break;
2502 	case CCP_ENGINE_DES3:
2503 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2504 		break;
2505 	case CCP_ENGINE_SHA:
2506 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2507 		break;
2508 	case CCP_ENGINE_RSA:
2509 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2510 		break;
2511 	case CCP_ENGINE_PASSTHRU:
2512 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2513 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2514 		else
2515 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2516 		break;
2517 	case CCP_ENGINE_ECC:
2518 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2519 		break;
2520 	default:
2521 		ret = -EINVAL;
2522 	}
2523 
2524 	return ret;
2525 }
2526