xref: /linux/drivers/crypto/ccp/ccp-ops.c (revision ccebcf3f224a44ec8e9c5bfca9d8e5d29298a5a8)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013,2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  * Author: Gary R Hook <gary.hook@amd.com>
8  *
9  * This program is free software; you can redistribute it and/or modify
10  * it under the terms of the GNU General Public License version 2 as
11  * published by the Free Software Foundation.
12  */
13 
14 #include <linux/module.h>
15 #include <linux/kernel.h>
16 #include <linux/pci.h>
17 #include <linux/interrupt.h>
18 #include <crypto/scatterwalk.h>
19 #include <linux/ccp.h>
20 
21 #include "ccp-dev.h"
22 
23 /* SHA initial context values */
24 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
25 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
26 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
27 	cpu_to_be32(SHA1_H4),
28 };
29 
30 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
31 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
32 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
33 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
34 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
35 };
36 
37 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
38 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
39 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
40 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
41 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
42 };
43 
44 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
45 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
46 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
47 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
48 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
49 };
50 
51 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
52 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
53 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
54 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
55 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
56 };
57 
58 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
59 					ccp_gen_jobid(ccp) : 0)
60 
61 static u32 ccp_gen_jobid(struct ccp_device *ccp)
62 {
63 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
64 }
65 
66 static void ccp_sg_free(struct ccp_sg_workarea *wa)
67 {
68 	if (wa->dma_count)
69 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
70 
71 	wa->dma_count = 0;
72 }
73 
74 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
75 				struct scatterlist *sg, u64 len,
76 				enum dma_data_direction dma_dir)
77 {
78 	memset(wa, 0, sizeof(*wa));
79 
80 	wa->sg = sg;
81 	if (!sg)
82 		return 0;
83 
84 	wa->nents = sg_nents_for_len(sg, len);
85 	if (wa->nents < 0)
86 		return wa->nents;
87 
88 	wa->bytes_left = len;
89 	wa->sg_used = 0;
90 
91 	if (len == 0)
92 		return 0;
93 
94 	if (dma_dir == DMA_NONE)
95 		return 0;
96 
97 	wa->dma_sg = sg;
98 	wa->dma_dev = dev;
99 	wa->dma_dir = dma_dir;
100 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
101 	if (!wa->dma_count)
102 		return -ENOMEM;
103 
104 	return 0;
105 }
106 
107 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
108 {
109 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
110 
111 	if (!wa->sg)
112 		return;
113 
114 	wa->sg_used += nbytes;
115 	wa->bytes_left -= nbytes;
116 	if (wa->sg_used == wa->sg->length) {
117 		wa->sg = sg_next(wa->sg);
118 		wa->sg_used = 0;
119 	}
120 }
121 
122 static void ccp_dm_free(struct ccp_dm_workarea *wa)
123 {
124 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
125 		if (wa->address)
126 			dma_pool_free(wa->dma_pool, wa->address,
127 				      wa->dma.address);
128 	} else {
129 		if (wa->dma.address)
130 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
131 					 wa->dma.dir);
132 		kfree(wa->address);
133 	}
134 
135 	wa->address = NULL;
136 	wa->dma.address = 0;
137 }
138 
139 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
140 				struct ccp_cmd_queue *cmd_q,
141 				unsigned int len,
142 				enum dma_data_direction dir)
143 {
144 	memset(wa, 0, sizeof(*wa));
145 
146 	if (!len)
147 		return 0;
148 
149 	wa->dev = cmd_q->ccp->dev;
150 	wa->length = len;
151 
152 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
153 		wa->dma_pool = cmd_q->dma_pool;
154 
155 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
156 					     &wa->dma.address);
157 		if (!wa->address)
158 			return -ENOMEM;
159 
160 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
161 
162 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
163 	} else {
164 		wa->address = kzalloc(len, GFP_KERNEL);
165 		if (!wa->address)
166 			return -ENOMEM;
167 
168 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
169 						 dir);
170 		if (!wa->dma.address)
171 			return -ENOMEM;
172 
173 		wa->dma.length = len;
174 	}
175 	wa->dma.dir = dir;
176 
177 	return 0;
178 }
179 
180 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
181 			    struct scatterlist *sg, unsigned int sg_offset,
182 			    unsigned int len)
183 {
184 	WARN_ON(!wa->address);
185 
186 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
187 				 0);
188 }
189 
190 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
191 			    struct scatterlist *sg, unsigned int sg_offset,
192 			    unsigned int len)
193 {
194 	WARN_ON(!wa->address);
195 
196 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
197 				 1);
198 }
199 
200 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
201 				   unsigned int wa_offset,
202 				   struct scatterlist *sg,
203 				   unsigned int sg_offset,
204 				   unsigned int len)
205 {
206 	u8 *p, *q;
207 
208 	ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
209 
210 	p = wa->address + wa_offset;
211 	q = p + len - 1;
212 	while (p < q) {
213 		*p = *p ^ *q;
214 		*q = *p ^ *q;
215 		*p = *p ^ *q;
216 		p++;
217 		q--;
218 	}
219 	return 0;
220 }
221 
222 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
223 				    unsigned int wa_offset,
224 				    struct scatterlist *sg,
225 				    unsigned int sg_offset,
226 				    unsigned int len)
227 {
228 	u8 *p, *q;
229 
230 	p = wa->address + wa_offset;
231 	q = p + len - 1;
232 	while (p < q) {
233 		*p = *p ^ *q;
234 		*q = *p ^ *q;
235 		*p = *p ^ *q;
236 		p++;
237 		q--;
238 	}
239 
240 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
241 }
242 
243 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
244 {
245 	ccp_dm_free(&data->dm_wa);
246 	ccp_sg_free(&data->sg_wa);
247 }
248 
249 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
250 			 struct scatterlist *sg, u64 sg_len,
251 			 unsigned int dm_len,
252 			 enum dma_data_direction dir)
253 {
254 	int ret;
255 
256 	memset(data, 0, sizeof(*data));
257 
258 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
259 				   dir);
260 	if (ret)
261 		goto e_err;
262 
263 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
264 	if (ret)
265 		goto e_err;
266 
267 	return 0;
268 
269 e_err:
270 	ccp_free_data(data, cmd_q);
271 
272 	return ret;
273 }
274 
275 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
276 {
277 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
278 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
279 	unsigned int buf_count, nbytes;
280 
281 	/* Clear the buffer if setting it */
282 	if (!from)
283 		memset(dm_wa->address, 0, dm_wa->length);
284 
285 	if (!sg_wa->sg)
286 		return 0;
287 
288 	/* Perform the copy operation
289 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
290 	 *   an unsigned int
291 	 */
292 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
293 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
294 				 nbytes, from);
295 
296 	/* Update the structures and generate the count */
297 	buf_count = 0;
298 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
299 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
300 			     dm_wa->length - buf_count);
301 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
302 
303 		buf_count += nbytes;
304 		ccp_update_sg_workarea(sg_wa, nbytes);
305 	}
306 
307 	return buf_count;
308 }
309 
310 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
311 {
312 	return ccp_queue_buf(data, 0);
313 }
314 
315 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
316 {
317 	return ccp_queue_buf(data, 1);
318 }
319 
320 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
321 			     struct ccp_op *op, unsigned int block_size,
322 			     bool blocksize_op)
323 {
324 	unsigned int sg_src_len, sg_dst_len, op_len;
325 
326 	/* The CCP can only DMA from/to one address each per operation. This
327 	 * requires that we find the smallest DMA area between the source
328 	 * and destination. The resulting len values will always be <= UINT_MAX
329 	 * because the dma length is an unsigned int.
330 	 */
331 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
332 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
333 
334 	if (dst) {
335 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
336 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
337 		op_len = min(sg_src_len, sg_dst_len);
338 	} else {
339 		op_len = sg_src_len;
340 	}
341 
342 	/* The data operation length will be at least block_size in length
343 	 * or the smaller of available sg room remaining for the source or
344 	 * the destination
345 	 */
346 	op_len = max(op_len, block_size);
347 
348 	/* Unless we have to buffer data, there's no reason to wait */
349 	op->soc = 0;
350 
351 	if (sg_src_len < block_size) {
352 		/* Not enough data in the sg element, so it
353 		 * needs to be buffered into a blocksize chunk
354 		 */
355 		int cp_len = ccp_fill_queue_buf(src);
356 
357 		op->soc = 1;
358 		op->src.u.dma.address = src->dm_wa.dma.address;
359 		op->src.u.dma.offset = 0;
360 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
361 	} else {
362 		/* Enough data in the sg element, but we need to
363 		 * adjust for any previously copied data
364 		 */
365 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
366 		op->src.u.dma.offset = src->sg_wa.sg_used;
367 		op->src.u.dma.length = op_len & ~(block_size - 1);
368 
369 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
370 	}
371 
372 	if (dst) {
373 		if (sg_dst_len < block_size) {
374 			/* Not enough room in the sg element or we're on the
375 			 * last piece of data (when using padding), so the
376 			 * output needs to be buffered into a blocksize chunk
377 			 */
378 			op->soc = 1;
379 			op->dst.u.dma.address = dst->dm_wa.dma.address;
380 			op->dst.u.dma.offset = 0;
381 			op->dst.u.dma.length = op->src.u.dma.length;
382 		} else {
383 			/* Enough room in the sg element, but we need to
384 			 * adjust for any previously used area
385 			 */
386 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
387 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
388 			op->dst.u.dma.length = op->src.u.dma.length;
389 		}
390 	}
391 }
392 
393 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
394 			     struct ccp_op *op)
395 {
396 	op->init = 0;
397 
398 	if (dst) {
399 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
400 			ccp_empty_queue_buf(dst);
401 		else
402 			ccp_update_sg_workarea(&dst->sg_wa,
403 					       op->dst.u.dma.length);
404 	}
405 }
406 
407 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
408 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
409 			       u32 byte_swap, bool from)
410 {
411 	struct ccp_op op;
412 
413 	memset(&op, 0, sizeof(op));
414 
415 	op.cmd_q = cmd_q;
416 	op.jobid = jobid;
417 	op.eom = 1;
418 
419 	if (from) {
420 		op.soc = 1;
421 		op.src.type = CCP_MEMTYPE_SB;
422 		op.src.u.sb = sb;
423 		op.dst.type = CCP_MEMTYPE_SYSTEM;
424 		op.dst.u.dma.address = wa->dma.address;
425 		op.dst.u.dma.length = wa->length;
426 	} else {
427 		op.src.type = CCP_MEMTYPE_SYSTEM;
428 		op.src.u.dma.address = wa->dma.address;
429 		op.src.u.dma.length = wa->length;
430 		op.dst.type = CCP_MEMTYPE_SB;
431 		op.dst.u.sb = sb;
432 	}
433 
434 	op.u.passthru.byte_swap = byte_swap;
435 
436 	return cmd_q->ccp->vdata->perform->passthru(&op);
437 }
438 
439 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
440 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
441 			  u32 byte_swap)
442 {
443 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
444 }
445 
446 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
447 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
448 			    u32 byte_swap)
449 {
450 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
451 }
452 
453 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
454 				struct ccp_cmd *cmd)
455 {
456 	struct ccp_aes_engine *aes = &cmd->u.aes;
457 	struct ccp_dm_workarea key, ctx;
458 	struct ccp_data src;
459 	struct ccp_op op;
460 	unsigned int dm_offset;
461 	int ret;
462 
463 	if (!((aes->key_len == AES_KEYSIZE_128) ||
464 	      (aes->key_len == AES_KEYSIZE_192) ||
465 	      (aes->key_len == AES_KEYSIZE_256)))
466 		return -EINVAL;
467 
468 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
469 		return -EINVAL;
470 
471 	if (aes->iv_len != AES_BLOCK_SIZE)
472 		return -EINVAL;
473 
474 	if (!aes->key || !aes->iv || !aes->src)
475 		return -EINVAL;
476 
477 	if (aes->cmac_final) {
478 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
479 			return -EINVAL;
480 
481 		if (!aes->cmac_key)
482 			return -EINVAL;
483 	}
484 
485 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
486 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
487 
488 	ret = -EIO;
489 	memset(&op, 0, sizeof(op));
490 	op.cmd_q = cmd_q;
491 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
492 	op.sb_key = cmd_q->sb_key;
493 	op.sb_ctx = cmd_q->sb_ctx;
494 	op.init = 1;
495 	op.u.aes.type = aes->type;
496 	op.u.aes.mode = aes->mode;
497 	op.u.aes.action = aes->action;
498 
499 	/* All supported key sizes fit in a single (32-byte) SB entry
500 	 * and must be in little endian format. Use the 256-bit byte
501 	 * swap passthru option to convert from big endian to little
502 	 * endian.
503 	 */
504 	ret = ccp_init_dm_workarea(&key, cmd_q,
505 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
506 				   DMA_TO_DEVICE);
507 	if (ret)
508 		return ret;
509 
510 	dm_offset = CCP_SB_BYTES - aes->key_len;
511 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
512 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
513 			     CCP_PASSTHRU_BYTESWAP_256BIT);
514 	if (ret) {
515 		cmd->engine_error = cmd_q->cmd_error;
516 		goto e_key;
517 	}
518 
519 	/* The AES context fits in a single (32-byte) SB entry and
520 	 * must be in little endian format. Use the 256-bit byte swap
521 	 * passthru option to convert from big endian to little endian.
522 	 */
523 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
524 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
525 				   DMA_BIDIRECTIONAL);
526 	if (ret)
527 		goto e_key;
528 
529 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
530 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
531 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
532 			     CCP_PASSTHRU_BYTESWAP_256BIT);
533 	if (ret) {
534 		cmd->engine_error = cmd_q->cmd_error;
535 		goto e_ctx;
536 	}
537 
538 	/* Send data to the CCP AES engine */
539 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
540 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
541 	if (ret)
542 		goto e_ctx;
543 
544 	while (src.sg_wa.bytes_left) {
545 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
546 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
547 			op.eom = 1;
548 
549 			/* Push the K1/K2 key to the CCP now */
550 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
551 					       op.sb_ctx,
552 					       CCP_PASSTHRU_BYTESWAP_256BIT);
553 			if (ret) {
554 				cmd->engine_error = cmd_q->cmd_error;
555 				goto e_src;
556 			}
557 
558 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
559 					aes->cmac_key_len);
560 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
561 					     CCP_PASSTHRU_BYTESWAP_256BIT);
562 			if (ret) {
563 				cmd->engine_error = cmd_q->cmd_error;
564 				goto e_src;
565 			}
566 		}
567 
568 		ret = cmd_q->ccp->vdata->perform->aes(&op);
569 		if (ret) {
570 			cmd->engine_error = cmd_q->cmd_error;
571 			goto e_src;
572 		}
573 
574 		ccp_process_data(&src, NULL, &op);
575 	}
576 
577 	/* Retrieve the AES context - convert from LE to BE using
578 	 * 32-byte (256-bit) byteswapping
579 	 */
580 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
581 			       CCP_PASSTHRU_BYTESWAP_256BIT);
582 	if (ret) {
583 		cmd->engine_error = cmd_q->cmd_error;
584 		goto e_src;
585 	}
586 
587 	/* ...but we only need AES_BLOCK_SIZE bytes */
588 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
589 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
590 
591 e_src:
592 	ccp_free_data(&src, cmd_q);
593 
594 e_ctx:
595 	ccp_dm_free(&ctx);
596 
597 e_key:
598 	ccp_dm_free(&key);
599 
600 	return ret;
601 }
602 
603 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
604 {
605 	struct ccp_aes_engine *aes = &cmd->u.aes;
606 	struct ccp_dm_workarea key, ctx;
607 	struct ccp_data src, dst;
608 	struct ccp_op op;
609 	unsigned int dm_offset;
610 	bool in_place = false;
611 	int ret;
612 
613 	if (aes->mode == CCP_AES_MODE_CMAC)
614 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
615 
616 	if (!((aes->key_len == AES_KEYSIZE_128) ||
617 	      (aes->key_len == AES_KEYSIZE_192) ||
618 	      (aes->key_len == AES_KEYSIZE_256)))
619 		return -EINVAL;
620 
621 	if (((aes->mode == CCP_AES_MODE_ECB) ||
622 	     (aes->mode == CCP_AES_MODE_CBC) ||
623 	     (aes->mode == CCP_AES_MODE_CFB)) &&
624 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
625 		return -EINVAL;
626 
627 	if (!aes->key || !aes->src || !aes->dst)
628 		return -EINVAL;
629 
630 	if (aes->mode != CCP_AES_MODE_ECB) {
631 		if (aes->iv_len != AES_BLOCK_SIZE)
632 			return -EINVAL;
633 
634 		if (!aes->iv)
635 			return -EINVAL;
636 	}
637 
638 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
639 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
640 
641 	ret = -EIO;
642 	memset(&op, 0, sizeof(op));
643 	op.cmd_q = cmd_q;
644 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
645 	op.sb_key = cmd_q->sb_key;
646 	op.sb_ctx = cmd_q->sb_ctx;
647 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
648 	op.u.aes.type = aes->type;
649 	op.u.aes.mode = aes->mode;
650 	op.u.aes.action = aes->action;
651 
652 	/* All supported key sizes fit in a single (32-byte) SB entry
653 	 * and must be in little endian format. Use the 256-bit byte
654 	 * swap passthru option to convert from big endian to little
655 	 * endian.
656 	 */
657 	ret = ccp_init_dm_workarea(&key, cmd_q,
658 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
659 				   DMA_TO_DEVICE);
660 	if (ret)
661 		return ret;
662 
663 	dm_offset = CCP_SB_BYTES - aes->key_len;
664 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
665 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
666 			     CCP_PASSTHRU_BYTESWAP_256BIT);
667 	if (ret) {
668 		cmd->engine_error = cmd_q->cmd_error;
669 		goto e_key;
670 	}
671 
672 	/* The AES context fits in a single (32-byte) SB entry and
673 	 * must be in little endian format. Use the 256-bit byte swap
674 	 * passthru option to convert from big endian to little endian.
675 	 */
676 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
677 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
678 				   DMA_BIDIRECTIONAL);
679 	if (ret)
680 		goto e_key;
681 
682 	if (aes->mode != CCP_AES_MODE_ECB) {
683 		/* Load the AES context - convert to LE */
684 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
685 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
686 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
687 				     CCP_PASSTHRU_BYTESWAP_256BIT);
688 		if (ret) {
689 			cmd->engine_error = cmd_q->cmd_error;
690 			goto e_ctx;
691 		}
692 	}
693 	switch (aes->mode) {
694 	case CCP_AES_MODE_CFB: /* CFB128 only */
695 	case CCP_AES_MODE_CTR:
696 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
697 		break;
698 	default:
699 		op.u.aes.size = 0;
700 	}
701 
702 	/* Prepare the input and output data workareas. For in-place
703 	 * operations we need to set the dma direction to BIDIRECTIONAL
704 	 * and copy the src workarea to the dst workarea.
705 	 */
706 	if (sg_virt(aes->src) == sg_virt(aes->dst))
707 		in_place = true;
708 
709 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
710 			    AES_BLOCK_SIZE,
711 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
712 	if (ret)
713 		goto e_ctx;
714 
715 	if (in_place) {
716 		dst = src;
717 	} else {
718 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
719 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
720 		if (ret)
721 			goto e_src;
722 	}
723 
724 	/* Send data to the CCP AES engine */
725 	while (src.sg_wa.bytes_left) {
726 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
727 		if (!src.sg_wa.bytes_left) {
728 			op.eom = 1;
729 
730 			/* Since we don't retrieve the AES context in ECB
731 			 * mode we have to wait for the operation to complete
732 			 * on the last piece of data
733 			 */
734 			if (aes->mode == CCP_AES_MODE_ECB)
735 				op.soc = 1;
736 		}
737 
738 		ret = cmd_q->ccp->vdata->perform->aes(&op);
739 		if (ret) {
740 			cmd->engine_error = cmd_q->cmd_error;
741 			goto e_dst;
742 		}
743 
744 		ccp_process_data(&src, &dst, &op);
745 	}
746 
747 	if (aes->mode != CCP_AES_MODE_ECB) {
748 		/* Retrieve the AES context - convert from LE to BE using
749 		 * 32-byte (256-bit) byteswapping
750 		 */
751 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
752 				       CCP_PASSTHRU_BYTESWAP_256BIT);
753 		if (ret) {
754 			cmd->engine_error = cmd_q->cmd_error;
755 			goto e_dst;
756 		}
757 
758 		/* ...but we only need AES_BLOCK_SIZE bytes */
759 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
760 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
761 	}
762 
763 e_dst:
764 	if (!in_place)
765 		ccp_free_data(&dst, cmd_q);
766 
767 e_src:
768 	ccp_free_data(&src, cmd_q);
769 
770 e_ctx:
771 	ccp_dm_free(&ctx);
772 
773 e_key:
774 	ccp_dm_free(&key);
775 
776 	return ret;
777 }
778 
779 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
780 			       struct ccp_cmd *cmd)
781 {
782 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
783 	struct ccp_dm_workarea key, ctx;
784 	struct ccp_data src, dst;
785 	struct ccp_op op;
786 	unsigned int unit_size, dm_offset;
787 	bool in_place = false;
788 	int ret;
789 
790 	switch (xts->unit_size) {
791 	case CCP_XTS_AES_UNIT_SIZE_16:
792 		unit_size = 16;
793 		break;
794 	case CCP_XTS_AES_UNIT_SIZE_512:
795 		unit_size = 512;
796 		break;
797 	case CCP_XTS_AES_UNIT_SIZE_1024:
798 		unit_size = 1024;
799 		break;
800 	case CCP_XTS_AES_UNIT_SIZE_2048:
801 		unit_size = 2048;
802 		break;
803 	case CCP_XTS_AES_UNIT_SIZE_4096:
804 		unit_size = 4096;
805 		break;
806 
807 	default:
808 		return -EINVAL;
809 	}
810 
811 	if (xts->key_len != AES_KEYSIZE_128)
812 		return -EINVAL;
813 
814 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
815 		return -EINVAL;
816 
817 	if (xts->iv_len != AES_BLOCK_SIZE)
818 		return -EINVAL;
819 
820 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
821 		return -EINVAL;
822 
823 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
824 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
825 
826 	ret = -EIO;
827 	memset(&op, 0, sizeof(op));
828 	op.cmd_q = cmd_q;
829 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
830 	op.sb_key = cmd_q->sb_key;
831 	op.sb_ctx = cmd_q->sb_ctx;
832 	op.init = 1;
833 	op.u.xts.action = xts->action;
834 	op.u.xts.unit_size = xts->unit_size;
835 
836 	/* All supported key sizes fit in a single (32-byte) SB entry
837 	 * and must be in little endian format. Use the 256-bit byte
838 	 * swap passthru option to convert from big endian to little
839 	 * endian.
840 	 */
841 	ret = ccp_init_dm_workarea(&key, cmd_q,
842 				   CCP_XTS_AES_KEY_SB_COUNT * CCP_SB_BYTES,
843 				   DMA_TO_DEVICE);
844 	if (ret)
845 		return ret;
846 
847 	dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
848 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
849 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
850 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
851 			     CCP_PASSTHRU_BYTESWAP_256BIT);
852 	if (ret) {
853 		cmd->engine_error = cmd_q->cmd_error;
854 		goto e_key;
855 	}
856 
857 	/* The AES context fits in a single (32-byte) SB entry and
858 	 * for XTS is already in little endian format so no byte swapping
859 	 * is needed.
860 	 */
861 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
862 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
863 				   DMA_BIDIRECTIONAL);
864 	if (ret)
865 		goto e_key;
866 
867 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
868 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
869 			     CCP_PASSTHRU_BYTESWAP_NOOP);
870 	if (ret) {
871 		cmd->engine_error = cmd_q->cmd_error;
872 		goto e_ctx;
873 	}
874 
875 	/* Prepare the input and output data workareas. For in-place
876 	 * operations we need to set the dma direction to BIDIRECTIONAL
877 	 * and copy the src workarea to the dst workarea.
878 	 */
879 	if (sg_virt(xts->src) == sg_virt(xts->dst))
880 		in_place = true;
881 
882 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
883 			    unit_size,
884 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
885 	if (ret)
886 		goto e_ctx;
887 
888 	if (in_place) {
889 		dst = src;
890 	} else {
891 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
892 				    unit_size, DMA_FROM_DEVICE);
893 		if (ret)
894 			goto e_src;
895 	}
896 
897 	/* Send data to the CCP AES engine */
898 	while (src.sg_wa.bytes_left) {
899 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
900 		if (!src.sg_wa.bytes_left)
901 			op.eom = 1;
902 
903 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
904 		if (ret) {
905 			cmd->engine_error = cmd_q->cmd_error;
906 			goto e_dst;
907 		}
908 
909 		ccp_process_data(&src, &dst, &op);
910 	}
911 
912 	/* Retrieve the AES context - convert from LE to BE using
913 	 * 32-byte (256-bit) byteswapping
914 	 */
915 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
916 			       CCP_PASSTHRU_BYTESWAP_256BIT);
917 	if (ret) {
918 		cmd->engine_error = cmd_q->cmd_error;
919 		goto e_dst;
920 	}
921 
922 	/* ...but we only need AES_BLOCK_SIZE bytes */
923 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
924 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
925 
926 e_dst:
927 	if (!in_place)
928 		ccp_free_data(&dst, cmd_q);
929 
930 e_src:
931 	ccp_free_data(&src, cmd_q);
932 
933 e_ctx:
934 	ccp_dm_free(&ctx);
935 
936 e_key:
937 	ccp_dm_free(&key);
938 
939 	return ret;
940 }
941 
942 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
943 {
944 	struct ccp_sha_engine *sha = &cmd->u.sha;
945 	struct ccp_dm_workarea ctx;
946 	struct ccp_data src;
947 	struct ccp_op op;
948 	unsigned int ioffset, ooffset;
949 	unsigned int digest_size;
950 	int sb_count;
951 	const void *init;
952 	u64 block_size;
953 	int ctx_size;
954 	int ret;
955 
956 	switch (sha->type) {
957 	case CCP_SHA_TYPE_1:
958 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
959 			return -EINVAL;
960 		block_size = SHA1_BLOCK_SIZE;
961 		break;
962 	case CCP_SHA_TYPE_224:
963 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
964 			return -EINVAL;
965 		block_size = SHA224_BLOCK_SIZE;
966 		break;
967 	case CCP_SHA_TYPE_256:
968 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
969 			return -EINVAL;
970 		block_size = SHA256_BLOCK_SIZE;
971 		break;
972 	case CCP_SHA_TYPE_384:
973 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
974 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
975 			return -EINVAL;
976 		block_size = SHA384_BLOCK_SIZE;
977 		break;
978 	case CCP_SHA_TYPE_512:
979 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
980 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
981 			return -EINVAL;
982 		block_size = SHA512_BLOCK_SIZE;
983 		break;
984 	default:
985 		return -EINVAL;
986 	}
987 
988 	if (!sha->ctx)
989 		return -EINVAL;
990 
991 	if (!sha->final && (sha->src_len & (block_size - 1)))
992 		return -EINVAL;
993 
994 	/* The version 3 device can't handle zero-length input */
995 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
996 
997 		if (!sha->src_len) {
998 			unsigned int digest_len;
999 			const u8 *sha_zero;
1000 
1001 			/* Not final, just return */
1002 			if (!sha->final)
1003 				return 0;
1004 
1005 			/* CCP can't do a zero length sha operation so the
1006 			 * caller must buffer the data.
1007 			 */
1008 			if (sha->msg_bits)
1009 				return -EINVAL;
1010 
1011 			/* The CCP cannot perform zero-length sha operations
1012 			 * so the caller is required to buffer data for the
1013 			 * final operation. However, a sha operation for a
1014 			 * message with a total length of zero is valid so
1015 			 * known values are required to supply the result.
1016 			 */
1017 			switch (sha->type) {
1018 			case CCP_SHA_TYPE_1:
1019 				sha_zero = sha1_zero_message_hash;
1020 				digest_len = SHA1_DIGEST_SIZE;
1021 				break;
1022 			case CCP_SHA_TYPE_224:
1023 				sha_zero = sha224_zero_message_hash;
1024 				digest_len = SHA224_DIGEST_SIZE;
1025 				break;
1026 			case CCP_SHA_TYPE_256:
1027 				sha_zero = sha256_zero_message_hash;
1028 				digest_len = SHA256_DIGEST_SIZE;
1029 				break;
1030 			default:
1031 				return -EINVAL;
1032 			}
1033 
1034 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1035 						 digest_len, 1);
1036 
1037 			return 0;
1038 		}
1039 	}
1040 
1041 	/* Set variables used throughout */
1042 	switch (sha->type) {
1043 	case CCP_SHA_TYPE_1:
1044 		digest_size = SHA1_DIGEST_SIZE;
1045 		init = (void *) ccp_sha1_init;
1046 		ctx_size = SHA1_DIGEST_SIZE;
1047 		sb_count = 1;
1048 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1049 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1050 		else
1051 			ooffset = ioffset = 0;
1052 		break;
1053 	case CCP_SHA_TYPE_224:
1054 		digest_size = SHA224_DIGEST_SIZE;
1055 		init = (void *) ccp_sha224_init;
1056 		ctx_size = SHA256_DIGEST_SIZE;
1057 		sb_count = 1;
1058 		ioffset = 0;
1059 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1060 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1061 		else
1062 			ooffset = 0;
1063 		break;
1064 	case CCP_SHA_TYPE_256:
1065 		digest_size = SHA256_DIGEST_SIZE;
1066 		init = (void *) ccp_sha256_init;
1067 		ctx_size = SHA256_DIGEST_SIZE;
1068 		sb_count = 1;
1069 		ooffset = ioffset = 0;
1070 		break;
1071 	case CCP_SHA_TYPE_384:
1072 		digest_size = SHA384_DIGEST_SIZE;
1073 		init = (void *) ccp_sha384_init;
1074 		ctx_size = SHA512_DIGEST_SIZE;
1075 		sb_count = 2;
1076 		ioffset = 0;
1077 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1078 		break;
1079 	case CCP_SHA_TYPE_512:
1080 		digest_size = SHA512_DIGEST_SIZE;
1081 		init = (void *) ccp_sha512_init;
1082 		ctx_size = SHA512_DIGEST_SIZE;
1083 		sb_count = 2;
1084 		ooffset = ioffset = 0;
1085 		break;
1086 	default:
1087 		ret = -EINVAL;
1088 		goto e_data;
1089 	}
1090 
1091 	/* For zero-length plaintext the src pointer is ignored;
1092 	 * otherwise both parts must be valid
1093 	 */
1094 	if (sha->src_len && !sha->src)
1095 		return -EINVAL;
1096 
1097 	memset(&op, 0, sizeof(op));
1098 	op.cmd_q = cmd_q;
1099 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1100 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1101 	op.u.sha.type = sha->type;
1102 	op.u.sha.msg_bits = sha->msg_bits;
1103 
1104 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1105 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1106 	 * first slot, and the left half in the second. Each portion must then
1107 	 * be in little endian format: use the 256-bit byte swap option.
1108 	 */
1109 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1110 				   DMA_BIDIRECTIONAL);
1111 	if (ret)
1112 		return ret;
1113 	if (sha->first) {
1114 		switch (sha->type) {
1115 		case CCP_SHA_TYPE_1:
1116 		case CCP_SHA_TYPE_224:
1117 		case CCP_SHA_TYPE_256:
1118 			memcpy(ctx.address + ioffset, init, ctx_size);
1119 			break;
1120 		case CCP_SHA_TYPE_384:
1121 		case CCP_SHA_TYPE_512:
1122 			memcpy(ctx.address + ctx_size / 2, init,
1123 			       ctx_size / 2);
1124 			memcpy(ctx.address, init + ctx_size / 2,
1125 			       ctx_size / 2);
1126 			break;
1127 		default:
1128 			ret = -EINVAL;
1129 			goto e_ctx;
1130 		}
1131 	} else {
1132 		/* Restore the context */
1133 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1134 				sb_count * CCP_SB_BYTES);
1135 	}
1136 
1137 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1138 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1139 	if (ret) {
1140 		cmd->engine_error = cmd_q->cmd_error;
1141 		goto e_ctx;
1142 	}
1143 
1144 	if (sha->src) {
1145 		/* Send data to the CCP SHA engine; block_size is set above */
1146 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1147 				    block_size, DMA_TO_DEVICE);
1148 		if (ret)
1149 			goto e_ctx;
1150 
1151 		while (src.sg_wa.bytes_left) {
1152 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1153 			if (sha->final && !src.sg_wa.bytes_left)
1154 				op.eom = 1;
1155 
1156 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1157 			if (ret) {
1158 				cmd->engine_error = cmd_q->cmd_error;
1159 				goto e_data;
1160 			}
1161 
1162 			ccp_process_data(&src, NULL, &op);
1163 		}
1164 	} else {
1165 		op.eom = 1;
1166 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1167 		if (ret) {
1168 			cmd->engine_error = cmd_q->cmd_error;
1169 			goto e_data;
1170 		}
1171 	}
1172 
1173 	/* Retrieve the SHA context - convert from LE to BE using
1174 	 * 32-byte (256-bit) byteswapping to BE
1175 	 */
1176 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1177 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1178 	if (ret) {
1179 		cmd->engine_error = cmd_q->cmd_error;
1180 		goto e_data;
1181 	}
1182 
1183 	if (sha->final) {
1184 		/* Finishing up, so get the digest */
1185 		switch (sha->type) {
1186 		case CCP_SHA_TYPE_1:
1187 		case CCP_SHA_TYPE_224:
1188 		case CCP_SHA_TYPE_256:
1189 			ccp_get_dm_area(&ctx, ooffset,
1190 					sha->ctx, 0,
1191 					digest_size);
1192 			break;
1193 		case CCP_SHA_TYPE_384:
1194 		case CCP_SHA_TYPE_512:
1195 			ccp_get_dm_area(&ctx, 0,
1196 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1197 					LSB_ITEM_SIZE);
1198 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1199 					sha->ctx, 0,
1200 					LSB_ITEM_SIZE - ooffset);
1201 			break;
1202 		default:
1203 			ret = -EINVAL;
1204 			goto e_ctx;
1205 		}
1206 	} else {
1207 		/* Stash the context */
1208 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1209 				sb_count * CCP_SB_BYTES);
1210 	}
1211 
1212 	if (sha->final && sha->opad) {
1213 		/* HMAC operation, recursively perform final SHA */
1214 		struct ccp_cmd hmac_cmd;
1215 		struct scatterlist sg;
1216 		u8 *hmac_buf;
1217 
1218 		if (sha->opad_len != block_size) {
1219 			ret = -EINVAL;
1220 			goto e_data;
1221 		}
1222 
1223 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1224 		if (!hmac_buf) {
1225 			ret = -ENOMEM;
1226 			goto e_data;
1227 		}
1228 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1229 
1230 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1231 		switch (sha->type) {
1232 		case CCP_SHA_TYPE_1:
1233 		case CCP_SHA_TYPE_224:
1234 		case CCP_SHA_TYPE_256:
1235 			memcpy(hmac_buf + block_size,
1236 			       ctx.address + ooffset,
1237 			       digest_size);
1238 			break;
1239 		case CCP_SHA_TYPE_384:
1240 		case CCP_SHA_TYPE_512:
1241 			memcpy(hmac_buf + block_size,
1242 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1243 			       LSB_ITEM_SIZE);
1244 			memcpy(hmac_buf + block_size +
1245 			       (LSB_ITEM_SIZE - ooffset),
1246 			       ctx.address,
1247 			       LSB_ITEM_SIZE);
1248 			break;
1249 		default:
1250 			ret = -EINVAL;
1251 			goto e_ctx;
1252 		}
1253 
1254 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1255 		hmac_cmd.engine = CCP_ENGINE_SHA;
1256 		hmac_cmd.u.sha.type = sha->type;
1257 		hmac_cmd.u.sha.ctx = sha->ctx;
1258 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1259 		hmac_cmd.u.sha.src = &sg;
1260 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1261 		hmac_cmd.u.sha.opad = NULL;
1262 		hmac_cmd.u.sha.opad_len = 0;
1263 		hmac_cmd.u.sha.first = 1;
1264 		hmac_cmd.u.sha.final = 1;
1265 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1266 
1267 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1268 		if (ret)
1269 			cmd->engine_error = hmac_cmd.engine_error;
1270 
1271 		kfree(hmac_buf);
1272 	}
1273 
1274 e_data:
1275 	if (sha->src)
1276 		ccp_free_data(&src, cmd_q);
1277 
1278 e_ctx:
1279 	ccp_dm_free(&ctx);
1280 
1281 	return ret;
1282 }
1283 
1284 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1285 {
1286 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1287 	struct ccp_dm_workarea exp, src;
1288 	struct ccp_data dst;
1289 	struct ccp_op op;
1290 	unsigned int sb_count, i_len, o_len;
1291 	int ret;
1292 
1293 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1294 		return -EINVAL;
1295 
1296 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1297 		return -EINVAL;
1298 
1299 	/* The RSA modulus must precede the message being acted upon, so
1300 	 * it must be copied to a DMA area where the message and the
1301 	 * modulus can be concatenated.  Therefore the input buffer
1302 	 * length required is twice the output buffer length (which
1303 	 * must be a multiple of 256-bits).
1304 	 */
1305 	o_len = ((rsa->key_size + 255) / 256) * 32;
1306 	i_len = o_len * 2;
1307 
1308 	sb_count = o_len / CCP_SB_BYTES;
1309 
1310 	memset(&op, 0, sizeof(op));
1311 	op.cmd_q = cmd_q;
1312 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1313 	op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q, sb_count);
1314 
1315 	if (!op.sb_key)
1316 		return -EIO;
1317 
1318 	/* The RSA exponent may span multiple (32-byte) SB entries and must
1319 	 * be in little endian format. Reverse copy each 32-byte chunk
1320 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1321 	 * and each byte within that chunk and do not perform any byte swap
1322 	 * operations on the passthru operation.
1323 	 */
1324 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1325 	if (ret)
1326 		goto e_sb;
1327 
1328 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1329 	if (ret)
1330 		goto e_exp;
1331 	ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1332 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1333 	if (ret) {
1334 		cmd->engine_error = cmd_q->cmd_error;
1335 		goto e_exp;
1336 	}
1337 
1338 	/* Concatenate the modulus and the message. Both the modulus and
1339 	 * the operands must be in little endian format.  Since the input
1340 	 * is in big endian format it must be converted.
1341 	 */
1342 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1343 	if (ret)
1344 		goto e_exp;
1345 
1346 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1347 	if (ret)
1348 		goto e_src;
1349 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1350 	if (ret)
1351 		goto e_src;
1352 
1353 	/* Prepare the output area for the operation */
1354 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1355 			    o_len, DMA_FROM_DEVICE);
1356 	if (ret)
1357 		goto e_src;
1358 
1359 	op.soc = 1;
1360 	op.src.u.dma.address = src.dma.address;
1361 	op.src.u.dma.offset = 0;
1362 	op.src.u.dma.length = i_len;
1363 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1364 	op.dst.u.dma.offset = 0;
1365 	op.dst.u.dma.length = o_len;
1366 
1367 	op.u.rsa.mod_size = rsa->key_size;
1368 	op.u.rsa.input_len = i_len;
1369 
1370 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1371 	if (ret) {
1372 		cmd->engine_error = cmd_q->cmd_error;
1373 		goto e_dst;
1374 	}
1375 
1376 	ccp_reverse_get_dm_area(&dst.dm_wa, 0, rsa->dst, 0, rsa->mod_len);
1377 
1378 e_dst:
1379 	ccp_free_data(&dst, cmd_q);
1380 
1381 e_src:
1382 	ccp_dm_free(&src);
1383 
1384 e_exp:
1385 	ccp_dm_free(&exp);
1386 
1387 e_sb:
1388 	cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1389 
1390 	return ret;
1391 }
1392 
1393 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1394 				struct ccp_cmd *cmd)
1395 {
1396 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1397 	struct ccp_dm_workarea mask;
1398 	struct ccp_data src, dst;
1399 	struct ccp_op op;
1400 	bool in_place = false;
1401 	unsigned int i;
1402 	int ret = 0;
1403 
1404 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1405 		return -EINVAL;
1406 
1407 	if (!pt->src || !pt->dst)
1408 		return -EINVAL;
1409 
1410 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1411 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1412 			return -EINVAL;
1413 		if (!pt->mask)
1414 			return -EINVAL;
1415 	}
1416 
1417 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1418 
1419 	memset(&op, 0, sizeof(op));
1420 	op.cmd_q = cmd_q;
1421 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1422 
1423 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1424 		/* Load the mask */
1425 		op.sb_key = cmd_q->sb_key;
1426 
1427 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1428 					   CCP_PASSTHRU_SB_COUNT *
1429 					   CCP_SB_BYTES,
1430 					   DMA_TO_DEVICE);
1431 		if (ret)
1432 			return ret;
1433 
1434 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1435 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
1436 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1437 		if (ret) {
1438 			cmd->engine_error = cmd_q->cmd_error;
1439 			goto e_mask;
1440 		}
1441 	}
1442 
1443 	/* Prepare the input and output data workareas. For in-place
1444 	 * operations we need to set the dma direction to BIDIRECTIONAL
1445 	 * and copy the src workarea to the dst workarea.
1446 	 */
1447 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1448 		in_place = true;
1449 
1450 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1451 			    CCP_PASSTHRU_MASKSIZE,
1452 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1453 	if (ret)
1454 		goto e_mask;
1455 
1456 	if (in_place) {
1457 		dst = src;
1458 	} else {
1459 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1460 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1461 		if (ret)
1462 			goto e_src;
1463 	}
1464 
1465 	/* Send data to the CCP Passthru engine
1466 	 *   Because the CCP engine works on a single source and destination
1467 	 *   dma address at a time, each entry in the source scatterlist
1468 	 *   (after the dma_map_sg call) must be less than or equal to the
1469 	 *   (remaining) length in the destination scatterlist entry and the
1470 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1471 	 */
1472 	dst.sg_wa.sg_used = 0;
1473 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1474 		if (!dst.sg_wa.sg ||
1475 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1476 			ret = -EINVAL;
1477 			goto e_dst;
1478 		}
1479 
1480 		if (i == src.sg_wa.dma_count) {
1481 			op.eom = 1;
1482 			op.soc = 1;
1483 		}
1484 
1485 		op.src.type = CCP_MEMTYPE_SYSTEM;
1486 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1487 		op.src.u.dma.offset = 0;
1488 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1489 
1490 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1491 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1492 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1493 		op.dst.u.dma.length = op.src.u.dma.length;
1494 
1495 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
1496 		if (ret) {
1497 			cmd->engine_error = cmd_q->cmd_error;
1498 			goto e_dst;
1499 		}
1500 
1501 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1502 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1503 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1504 			dst.sg_wa.sg_used = 0;
1505 		}
1506 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1507 	}
1508 
1509 e_dst:
1510 	if (!in_place)
1511 		ccp_free_data(&dst, cmd_q);
1512 
1513 e_src:
1514 	ccp_free_data(&src, cmd_q);
1515 
1516 e_mask:
1517 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1518 		ccp_dm_free(&mask);
1519 
1520 	return ret;
1521 }
1522 
1523 static int ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
1524 				      struct ccp_cmd *cmd)
1525 {
1526 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
1527 	struct ccp_dm_workarea mask;
1528 	struct ccp_op op;
1529 	int ret;
1530 
1531 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1532 		return -EINVAL;
1533 
1534 	if (!pt->src_dma || !pt->dst_dma)
1535 		return -EINVAL;
1536 
1537 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1538 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1539 			return -EINVAL;
1540 		if (!pt->mask)
1541 			return -EINVAL;
1542 	}
1543 
1544 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1545 
1546 	memset(&op, 0, sizeof(op));
1547 	op.cmd_q = cmd_q;
1548 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1549 
1550 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1551 		/* Load the mask */
1552 		op.sb_key = cmd_q->sb_key;
1553 
1554 		mask.length = pt->mask_len;
1555 		mask.dma.address = pt->mask;
1556 		mask.dma.length = pt->mask_len;
1557 
1558 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
1559 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1560 		if (ret) {
1561 			cmd->engine_error = cmd_q->cmd_error;
1562 			return ret;
1563 		}
1564 	}
1565 
1566 	/* Send data to the CCP Passthru engine */
1567 	op.eom = 1;
1568 	op.soc = 1;
1569 
1570 	op.src.type = CCP_MEMTYPE_SYSTEM;
1571 	op.src.u.dma.address = pt->src_dma;
1572 	op.src.u.dma.offset = 0;
1573 	op.src.u.dma.length = pt->src_len;
1574 
1575 	op.dst.type = CCP_MEMTYPE_SYSTEM;
1576 	op.dst.u.dma.address = pt->dst_dma;
1577 	op.dst.u.dma.offset = 0;
1578 	op.dst.u.dma.length = pt->src_len;
1579 
1580 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
1581 	if (ret)
1582 		cmd->engine_error = cmd_q->cmd_error;
1583 
1584 	return ret;
1585 }
1586 
1587 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1588 {
1589 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1590 	struct ccp_dm_workarea src, dst;
1591 	struct ccp_op op;
1592 	int ret;
1593 	u8 *save;
1594 
1595 	if (!ecc->u.mm.operand_1 ||
1596 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1597 		return -EINVAL;
1598 
1599 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1600 		if (!ecc->u.mm.operand_2 ||
1601 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1602 			return -EINVAL;
1603 
1604 	if (!ecc->u.mm.result ||
1605 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1606 		return -EINVAL;
1607 
1608 	memset(&op, 0, sizeof(op));
1609 	op.cmd_q = cmd_q;
1610 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1611 
1612 	/* Concatenate the modulus and the operands. Both the modulus and
1613 	 * the operands must be in little endian format.  Since the input
1614 	 * is in big endian format it must be converted and placed in a
1615 	 * fixed length buffer.
1616 	 */
1617 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1618 				   DMA_TO_DEVICE);
1619 	if (ret)
1620 		return ret;
1621 
1622 	/* Save the workarea address since it is updated in order to perform
1623 	 * the concatenation
1624 	 */
1625 	save = src.address;
1626 
1627 	/* Copy the ECC modulus */
1628 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
1629 	if (ret)
1630 		goto e_src;
1631 	src.address += CCP_ECC_OPERAND_SIZE;
1632 
1633 	/* Copy the first operand */
1634 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
1635 				      ecc->u.mm.operand_1_len);
1636 	if (ret)
1637 		goto e_src;
1638 	src.address += CCP_ECC_OPERAND_SIZE;
1639 
1640 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1641 		/* Copy the second operand */
1642 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
1643 					      ecc->u.mm.operand_2_len);
1644 		if (ret)
1645 			goto e_src;
1646 		src.address += CCP_ECC_OPERAND_SIZE;
1647 	}
1648 
1649 	/* Restore the workarea address */
1650 	src.address = save;
1651 
1652 	/* Prepare the output area for the operation */
1653 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1654 				   DMA_FROM_DEVICE);
1655 	if (ret)
1656 		goto e_src;
1657 
1658 	op.soc = 1;
1659 	op.src.u.dma.address = src.dma.address;
1660 	op.src.u.dma.offset = 0;
1661 	op.src.u.dma.length = src.length;
1662 	op.dst.u.dma.address = dst.dma.address;
1663 	op.dst.u.dma.offset = 0;
1664 	op.dst.u.dma.length = dst.length;
1665 
1666 	op.u.ecc.function = cmd->u.ecc.function;
1667 
1668 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
1669 	if (ret) {
1670 		cmd->engine_error = cmd_q->cmd_error;
1671 		goto e_dst;
1672 	}
1673 
1674 	ecc->ecc_result = le16_to_cpup(
1675 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1676 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1677 		ret = -EIO;
1678 		goto e_dst;
1679 	}
1680 
1681 	/* Save the ECC result */
1682 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
1683 				CCP_ECC_MODULUS_BYTES);
1684 
1685 e_dst:
1686 	ccp_dm_free(&dst);
1687 
1688 e_src:
1689 	ccp_dm_free(&src);
1690 
1691 	return ret;
1692 }
1693 
1694 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1695 {
1696 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1697 	struct ccp_dm_workarea src, dst;
1698 	struct ccp_op op;
1699 	int ret;
1700 	u8 *save;
1701 
1702 	if (!ecc->u.pm.point_1.x ||
1703 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1704 	    !ecc->u.pm.point_1.y ||
1705 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1706 		return -EINVAL;
1707 
1708 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1709 		if (!ecc->u.pm.point_2.x ||
1710 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1711 		    !ecc->u.pm.point_2.y ||
1712 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1713 			return -EINVAL;
1714 	} else {
1715 		if (!ecc->u.pm.domain_a ||
1716 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1717 			return -EINVAL;
1718 
1719 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1720 			if (!ecc->u.pm.scalar ||
1721 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1722 				return -EINVAL;
1723 	}
1724 
1725 	if (!ecc->u.pm.result.x ||
1726 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1727 	    !ecc->u.pm.result.y ||
1728 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1729 		return -EINVAL;
1730 
1731 	memset(&op, 0, sizeof(op));
1732 	op.cmd_q = cmd_q;
1733 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1734 
1735 	/* Concatenate the modulus and the operands. Both the modulus and
1736 	 * the operands must be in little endian format.  Since the input
1737 	 * is in big endian format it must be converted and placed in a
1738 	 * fixed length buffer.
1739 	 */
1740 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1741 				   DMA_TO_DEVICE);
1742 	if (ret)
1743 		return ret;
1744 
1745 	/* Save the workarea address since it is updated in order to perform
1746 	 * the concatenation
1747 	 */
1748 	save = src.address;
1749 
1750 	/* Copy the ECC modulus */
1751 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
1752 	if (ret)
1753 		goto e_src;
1754 	src.address += CCP_ECC_OPERAND_SIZE;
1755 
1756 	/* Copy the first point X and Y coordinate */
1757 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
1758 				      ecc->u.pm.point_1.x_len);
1759 	if (ret)
1760 		goto e_src;
1761 	src.address += CCP_ECC_OPERAND_SIZE;
1762 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
1763 				      ecc->u.pm.point_1.y_len);
1764 	if (ret)
1765 		goto e_src;
1766 	src.address += CCP_ECC_OPERAND_SIZE;
1767 
1768 	/* Set the first point Z coordinate to 1 */
1769 	*src.address = 0x01;
1770 	src.address += CCP_ECC_OPERAND_SIZE;
1771 
1772 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1773 		/* Copy the second point X and Y coordinate */
1774 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
1775 					      ecc->u.pm.point_2.x_len);
1776 		if (ret)
1777 			goto e_src;
1778 		src.address += CCP_ECC_OPERAND_SIZE;
1779 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
1780 					      ecc->u.pm.point_2.y_len);
1781 		if (ret)
1782 			goto e_src;
1783 		src.address += CCP_ECC_OPERAND_SIZE;
1784 
1785 		/* Set the second point Z coordinate to 1 */
1786 		*src.address = 0x01;
1787 		src.address += CCP_ECC_OPERAND_SIZE;
1788 	} else {
1789 		/* Copy the Domain "a" parameter */
1790 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
1791 					      ecc->u.pm.domain_a_len);
1792 		if (ret)
1793 			goto e_src;
1794 		src.address += CCP_ECC_OPERAND_SIZE;
1795 
1796 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1797 			/* Copy the scalar value */
1798 			ret = ccp_reverse_set_dm_area(&src, 0,
1799 						      ecc->u.pm.scalar, 0,
1800 						      ecc->u.pm.scalar_len);
1801 			if (ret)
1802 				goto e_src;
1803 			src.address += CCP_ECC_OPERAND_SIZE;
1804 		}
1805 	}
1806 
1807 	/* Restore the workarea address */
1808 	src.address = save;
1809 
1810 	/* Prepare the output area for the operation */
1811 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1812 				   DMA_FROM_DEVICE);
1813 	if (ret)
1814 		goto e_src;
1815 
1816 	op.soc = 1;
1817 	op.src.u.dma.address = src.dma.address;
1818 	op.src.u.dma.offset = 0;
1819 	op.src.u.dma.length = src.length;
1820 	op.dst.u.dma.address = dst.dma.address;
1821 	op.dst.u.dma.offset = 0;
1822 	op.dst.u.dma.length = dst.length;
1823 
1824 	op.u.ecc.function = cmd->u.ecc.function;
1825 
1826 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
1827 	if (ret) {
1828 		cmd->engine_error = cmd_q->cmd_error;
1829 		goto e_dst;
1830 	}
1831 
1832 	ecc->ecc_result = le16_to_cpup(
1833 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1834 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1835 		ret = -EIO;
1836 		goto e_dst;
1837 	}
1838 
1839 	/* Save the workarea address since it is updated as we walk through
1840 	 * to copy the point math result
1841 	 */
1842 	save = dst.address;
1843 
1844 	/* Save the ECC result X and Y coordinates */
1845 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
1846 				CCP_ECC_MODULUS_BYTES);
1847 	dst.address += CCP_ECC_OUTPUT_SIZE;
1848 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
1849 				CCP_ECC_MODULUS_BYTES);
1850 	dst.address += CCP_ECC_OUTPUT_SIZE;
1851 
1852 	/* Restore the workarea address */
1853 	dst.address = save;
1854 
1855 e_dst:
1856 	ccp_dm_free(&dst);
1857 
1858 e_src:
1859 	ccp_dm_free(&src);
1860 
1861 	return ret;
1862 }
1863 
1864 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1865 {
1866 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1867 
1868 	ecc->ecc_result = 0;
1869 
1870 	if (!ecc->mod ||
1871 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1872 		return -EINVAL;
1873 
1874 	switch (ecc->function) {
1875 	case CCP_ECC_FUNCTION_MMUL_384BIT:
1876 	case CCP_ECC_FUNCTION_MADD_384BIT:
1877 	case CCP_ECC_FUNCTION_MINV_384BIT:
1878 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1879 
1880 	case CCP_ECC_FUNCTION_PADD_384BIT:
1881 	case CCP_ECC_FUNCTION_PMUL_384BIT:
1882 	case CCP_ECC_FUNCTION_PDBL_384BIT:
1883 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1884 
1885 	default:
1886 		return -EINVAL;
1887 	}
1888 }
1889 
1890 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1891 {
1892 	int ret;
1893 
1894 	cmd->engine_error = 0;
1895 	cmd_q->cmd_error = 0;
1896 	cmd_q->int_rcvd = 0;
1897 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
1898 
1899 	switch (cmd->engine) {
1900 	case CCP_ENGINE_AES:
1901 		ret = ccp_run_aes_cmd(cmd_q, cmd);
1902 		break;
1903 	case CCP_ENGINE_XTS_AES_128:
1904 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
1905 		break;
1906 	case CCP_ENGINE_SHA:
1907 		ret = ccp_run_sha_cmd(cmd_q, cmd);
1908 		break;
1909 	case CCP_ENGINE_RSA:
1910 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
1911 		break;
1912 	case CCP_ENGINE_PASSTHRU:
1913 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
1914 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
1915 		else
1916 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
1917 		break;
1918 	case CCP_ENGINE_ECC:
1919 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
1920 		break;
1921 	default:
1922 		ret = -EINVAL;
1923 	}
1924 
1925 	return ret;
1926 }
1927