xref: /linux/drivers/crypto/ccp/ccp-ops.c (revision 186779c036468038b0d077ec5333a51512f867e5)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Cryptographic Coprocessor (CCP) driver
4  *
5  * Copyright (C) 2013-2019 Advanced Micro Devices, Inc.
6  *
7  * Author: Tom Lendacky <thomas.lendacky@amd.com>
8  * Author: Gary R Hook <gary.hook@amd.com>
9  */
10 
11 #include <crypto/des.h>
12 #include <crypto/scatterwalk.h>
13 #include <crypto/utils.h>
14 #include <linux/ccp.h>
15 #include <linux/dma-mapping.h>
16 #include <linux/errno.h>
17 #include <linux/kernel.h>
18 #include <linux/module.h>
19 
20 #include "ccp-dev.h"
21 
22 /* SHA initial context values */
23 static const __be32 ccp_sha1_init[SHA1_DIGEST_SIZE / sizeof(__be32)] = {
24 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
25 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
26 	cpu_to_be32(SHA1_H4),
27 };
28 
29 static const __be32 ccp_sha224_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
30 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
31 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
32 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
33 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
34 };
35 
36 static const __be32 ccp_sha256_init[SHA256_DIGEST_SIZE / sizeof(__be32)] = {
37 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
38 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
39 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
40 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
41 };
42 
43 static const __be64 ccp_sha384_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
44 	cpu_to_be64(SHA384_H0), cpu_to_be64(SHA384_H1),
45 	cpu_to_be64(SHA384_H2), cpu_to_be64(SHA384_H3),
46 	cpu_to_be64(SHA384_H4), cpu_to_be64(SHA384_H5),
47 	cpu_to_be64(SHA384_H6), cpu_to_be64(SHA384_H7),
48 };
49 
50 static const __be64 ccp_sha512_init[SHA512_DIGEST_SIZE / sizeof(__be64)] = {
51 	cpu_to_be64(SHA512_H0), cpu_to_be64(SHA512_H1),
52 	cpu_to_be64(SHA512_H2), cpu_to_be64(SHA512_H3),
53 	cpu_to_be64(SHA512_H4), cpu_to_be64(SHA512_H5),
54 	cpu_to_be64(SHA512_H6), cpu_to_be64(SHA512_H7),
55 };
56 
57 #define	CCP_NEW_JOBID(ccp)	((ccp->vdata->version == CCP_VERSION(3, 0)) ? \
58 					ccp_gen_jobid(ccp) : 0)
59 
60 static u32 ccp_gen_jobid(struct ccp_device *ccp)
61 {
62 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
63 }
64 
65 static void ccp_sg_free(struct ccp_sg_workarea *wa)
66 {
67 	if (wa->dma_count)
68 		dma_unmap_sg(wa->dma_dev, wa->dma_sg_head, wa->nents, wa->dma_dir);
69 
70 	wa->dma_count = 0;
71 }
72 
73 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
74 				struct scatterlist *sg, u64 len,
75 				enum dma_data_direction dma_dir)
76 {
77 	memset(wa, 0, sizeof(*wa));
78 
79 	wa->sg = sg;
80 	if (!sg)
81 		return 0;
82 
83 	wa->nents = sg_nents_for_len(sg, len);
84 	if (wa->nents < 0)
85 		return wa->nents;
86 
87 	wa->bytes_left = len;
88 	wa->sg_used = 0;
89 
90 	if (len == 0)
91 		return 0;
92 
93 	if (dma_dir == DMA_NONE)
94 		return 0;
95 
96 	wa->dma_sg = sg;
97 	wa->dma_sg_head = sg;
98 	wa->dma_dev = dev;
99 	wa->dma_dir = dma_dir;
100 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
101 	if (!wa->dma_count)
102 		return -ENOMEM;
103 
104 	return 0;
105 }
106 
107 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
108 {
109 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
110 	unsigned int sg_combined_len = 0;
111 
112 	if (!wa->sg)
113 		return;
114 
115 	wa->sg_used += nbytes;
116 	wa->bytes_left -= nbytes;
117 	if (wa->sg_used == sg_dma_len(wa->dma_sg)) {
118 		/* Advance to the next DMA scatterlist entry */
119 		wa->dma_sg = sg_next(wa->dma_sg);
120 
121 		/* In the case that the DMA mapped scatterlist has entries
122 		 * that have been merged, the non-DMA mapped scatterlist
123 		 * must be advanced multiple times for each merged entry.
124 		 * This ensures that the current non-DMA mapped entry
125 		 * corresponds to the current DMA mapped entry.
126 		 */
127 		do {
128 			sg_combined_len += wa->sg->length;
129 			wa->sg = sg_next(wa->sg);
130 		} while (wa->sg_used > sg_combined_len);
131 
132 		wa->sg_used = 0;
133 	}
134 }
135 
136 static void ccp_dm_free(struct ccp_dm_workarea *wa)
137 {
138 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
139 		if (wa->address)
140 			dma_pool_free(wa->dma_pool, wa->address,
141 				      wa->dma.address);
142 	} else {
143 		if (wa->dma.address)
144 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
145 					 wa->dma.dir);
146 		kfree(wa->address);
147 	}
148 
149 	wa->address = NULL;
150 	wa->dma.address = 0;
151 }
152 
153 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
154 				struct ccp_cmd_queue *cmd_q,
155 				unsigned int len,
156 				enum dma_data_direction dir)
157 {
158 	memset(wa, 0, sizeof(*wa));
159 
160 	if (!len)
161 		return 0;
162 
163 	wa->dev = cmd_q->ccp->dev;
164 	wa->length = len;
165 
166 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
167 		wa->dma_pool = cmd_q->dma_pool;
168 
169 		wa->address = dma_pool_zalloc(wa->dma_pool, GFP_KERNEL,
170 					     &wa->dma.address);
171 		if (!wa->address)
172 			return -ENOMEM;
173 
174 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
175 
176 	} else {
177 		wa->address = kzalloc(len, GFP_KERNEL);
178 		if (!wa->address)
179 			return -ENOMEM;
180 
181 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
182 						 dir);
183 		if (dma_mapping_error(wa->dev, wa->dma.address)) {
184 			kfree(wa->address);
185 			wa->address = NULL;
186 			return -ENOMEM;
187 		}
188 
189 		wa->dma.length = len;
190 	}
191 	wa->dma.dir = dir;
192 
193 	return 0;
194 }
195 
196 static int ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
197 			   struct scatterlist *sg, unsigned int sg_offset,
198 			   unsigned int len)
199 {
200 	WARN_ON(!wa->address);
201 
202 	if (len > (wa->length - wa_offset))
203 		return -EINVAL;
204 
205 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
206 				 0);
207 	return 0;
208 }
209 
210 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
211 			    struct scatterlist *sg, unsigned int sg_offset,
212 			    unsigned int len)
213 {
214 	WARN_ON(!wa->address);
215 
216 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
217 				 1);
218 }
219 
220 static int ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
221 				   unsigned int wa_offset,
222 				   struct scatterlist *sg,
223 				   unsigned int sg_offset,
224 				   unsigned int len)
225 {
226 	u8 *p, *q;
227 	int	rc;
228 
229 	rc = ccp_set_dm_area(wa, wa_offset, sg, sg_offset, len);
230 	if (rc)
231 		return rc;
232 
233 	p = wa->address + wa_offset;
234 	q = p + len - 1;
235 	while (p < q) {
236 		*p = *p ^ *q;
237 		*q = *p ^ *q;
238 		*p = *p ^ *q;
239 		p++;
240 		q--;
241 	}
242 	return 0;
243 }
244 
245 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
246 				    unsigned int wa_offset,
247 				    struct scatterlist *sg,
248 				    unsigned int sg_offset,
249 				    unsigned int len)
250 {
251 	u8 *p, *q;
252 
253 	p = wa->address + wa_offset;
254 	q = p + len - 1;
255 	while (p < q) {
256 		*p = *p ^ *q;
257 		*q = *p ^ *q;
258 		*p = *p ^ *q;
259 		p++;
260 		q--;
261 	}
262 
263 	ccp_get_dm_area(wa, wa_offset, sg, sg_offset, len);
264 }
265 
266 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
267 {
268 	ccp_dm_free(&data->dm_wa);
269 	ccp_sg_free(&data->sg_wa);
270 }
271 
272 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
273 			 struct scatterlist *sg, u64 sg_len,
274 			 unsigned int dm_len,
275 			 enum dma_data_direction dir)
276 {
277 	int ret;
278 
279 	memset(data, 0, sizeof(*data));
280 
281 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
282 				   dir);
283 	if (ret)
284 		goto e_err;
285 
286 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
287 	if (ret)
288 		goto e_err;
289 
290 	return 0;
291 
292 e_err:
293 	ccp_free_data(data, cmd_q);
294 
295 	return ret;
296 }
297 
298 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
299 {
300 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
301 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
302 	unsigned int buf_count, nbytes;
303 
304 	/* Clear the buffer if setting it */
305 	if (!from)
306 		memset(dm_wa->address, 0, dm_wa->length);
307 
308 	if (!sg_wa->sg)
309 		return 0;
310 
311 	/* Perform the copy operation
312 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
313 	 *   an unsigned int
314 	 */
315 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
316 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
317 				 nbytes, from);
318 
319 	/* Update the structures and generate the count */
320 	buf_count = 0;
321 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
322 		nbytes = min(sg_dma_len(sg_wa->dma_sg) - sg_wa->sg_used,
323 			     dm_wa->length - buf_count);
324 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
325 
326 		buf_count += nbytes;
327 		ccp_update_sg_workarea(sg_wa, nbytes);
328 	}
329 
330 	return buf_count;
331 }
332 
333 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
334 {
335 	return ccp_queue_buf(data, 0);
336 }
337 
338 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
339 {
340 	return ccp_queue_buf(data, 1);
341 }
342 
343 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
344 			     struct ccp_op *op, unsigned int block_size,
345 			     bool blocksize_op)
346 {
347 	unsigned int sg_src_len, sg_dst_len, op_len;
348 
349 	/* The CCP can only DMA from/to one address each per operation. This
350 	 * requires that we find the smallest DMA area between the source
351 	 * and destination. The resulting len values will always be <= UINT_MAX
352 	 * because the dma length is an unsigned int.
353 	 */
354 	sg_src_len = sg_dma_len(src->sg_wa.dma_sg) - src->sg_wa.sg_used;
355 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
356 
357 	if (dst) {
358 		sg_dst_len = sg_dma_len(dst->sg_wa.dma_sg) - dst->sg_wa.sg_used;
359 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
360 		op_len = min(sg_src_len, sg_dst_len);
361 	} else {
362 		op_len = sg_src_len;
363 	}
364 
365 	/* The data operation length will be at least block_size in length
366 	 * or the smaller of available sg room remaining for the source or
367 	 * the destination
368 	 */
369 	op_len = max(op_len, block_size);
370 
371 	/* Unless we have to buffer data, there's no reason to wait */
372 	op->soc = 0;
373 
374 	if (sg_src_len < block_size) {
375 		/* Not enough data in the sg element, so it
376 		 * needs to be buffered into a blocksize chunk
377 		 */
378 		int cp_len = ccp_fill_queue_buf(src);
379 
380 		op->soc = 1;
381 		op->src.u.dma.address = src->dm_wa.dma.address;
382 		op->src.u.dma.offset = 0;
383 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
384 	} else {
385 		/* Enough data in the sg element, but we need to
386 		 * adjust for any previously copied data
387 		 */
388 		op->src.u.dma.address = sg_dma_address(src->sg_wa.dma_sg);
389 		op->src.u.dma.offset = src->sg_wa.sg_used;
390 		op->src.u.dma.length = op_len & ~(block_size - 1);
391 
392 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
393 	}
394 
395 	if (dst) {
396 		if (sg_dst_len < block_size) {
397 			/* Not enough room in the sg element or we're on the
398 			 * last piece of data (when using padding), so the
399 			 * output needs to be buffered into a blocksize chunk
400 			 */
401 			op->soc = 1;
402 			op->dst.u.dma.address = dst->dm_wa.dma.address;
403 			op->dst.u.dma.offset = 0;
404 			op->dst.u.dma.length = op->src.u.dma.length;
405 		} else {
406 			/* Enough room in the sg element, but we need to
407 			 * adjust for any previously used area
408 			 */
409 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.dma_sg);
410 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
411 			op->dst.u.dma.length = op->src.u.dma.length;
412 		}
413 	}
414 }
415 
416 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
417 			     struct ccp_op *op)
418 {
419 	op->init = 0;
420 
421 	if (dst) {
422 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
423 			ccp_empty_queue_buf(dst);
424 		else
425 			ccp_update_sg_workarea(&dst->sg_wa,
426 					       op->dst.u.dma.length);
427 	}
428 }
429 
430 static int ccp_copy_to_from_sb(struct ccp_cmd_queue *cmd_q,
431 			       struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
432 			       u32 byte_swap, bool from)
433 {
434 	struct ccp_op op;
435 
436 	memset(&op, 0, sizeof(op));
437 
438 	op.cmd_q = cmd_q;
439 	op.jobid = jobid;
440 	op.eom = 1;
441 
442 	if (from) {
443 		op.soc = 1;
444 		op.src.type = CCP_MEMTYPE_SB;
445 		op.src.u.sb = sb;
446 		op.dst.type = CCP_MEMTYPE_SYSTEM;
447 		op.dst.u.dma.address = wa->dma.address;
448 		op.dst.u.dma.length = wa->length;
449 	} else {
450 		op.src.type = CCP_MEMTYPE_SYSTEM;
451 		op.src.u.dma.address = wa->dma.address;
452 		op.src.u.dma.length = wa->length;
453 		op.dst.type = CCP_MEMTYPE_SB;
454 		op.dst.u.sb = sb;
455 	}
456 
457 	op.u.passthru.byte_swap = byte_swap;
458 
459 	return cmd_q->ccp->vdata->perform->passthru(&op);
460 }
461 
462 static int ccp_copy_to_sb(struct ccp_cmd_queue *cmd_q,
463 			  struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
464 			  u32 byte_swap)
465 {
466 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, false);
467 }
468 
469 static int ccp_copy_from_sb(struct ccp_cmd_queue *cmd_q,
470 			    struct ccp_dm_workarea *wa, u32 jobid, u32 sb,
471 			    u32 byte_swap)
472 {
473 	return ccp_copy_to_from_sb(cmd_q, wa, jobid, sb, byte_swap, true);
474 }
475 
476 static noinline_for_stack int
477 ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
478 {
479 	struct ccp_aes_engine *aes = &cmd->u.aes;
480 	struct ccp_dm_workarea key, ctx;
481 	struct ccp_data src;
482 	struct ccp_op op;
483 	unsigned int dm_offset;
484 	int ret;
485 
486 	if (!((aes->key_len == AES_KEYSIZE_128) ||
487 	      (aes->key_len == AES_KEYSIZE_192) ||
488 	      (aes->key_len == AES_KEYSIZE_256)))
489 		return -EINVAL;
490 
491 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
492 		return -EINVAL;
493 
494 	if (aes->iv_len != AES_BLOCK_SIZE)
495 		return -EINVAL;
496 
497 	if (!aes->key || !aes->iv || !aes->src)
498 		return -EINVAL;
499 
500 	if (aes->cmac_final) {
501 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
502 			return -EINVAL;
503 
504 		if (!aes->cmac_key)
505 			return -EINVAL;
506 	}
507 
508 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
509 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
510 
511 	ret = -EIO;
512 	memset(&op, 0, sizeof(op));
513 	op.cmd_q = cmd_q;
514 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
515 	op.sb_key = cmd_q->sb_key;
516 	op.sb_ctx = cmd_q->sb_ctx;
517 	op.init = 1;
518 	op.u.aes.type = aes->type;
519 	op.u.aes.mode = aes->mode;
520 	op.u.aes.action = aes->action;
521 
522 	/* All supported key sizes fit in a single (32-byte) SB entry
523 	 * and must be in little endian format. Use the 256-bit byte
524 	 * swap passthru option to convert from big endian to little
525 	 * endian.
526 	 */
527 	ret = ccp_init_dm_workarea(&key, cmd_q,
528 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
529 				   DMA_TO_DEVICE);
530 	if (ret)
531 		return ret;
532 
533 	dm_offset = CCP_SB_BYTES - aes->key_len;
534 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
535 	if (ret)
536 		goto e_key;
537 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
538 			     CCP_PASSTHRU_BYTESWAP_256BIT);
539 	if (ret) {
540 		cmd->engine_error = cmd_q->cmd_error;
541 		goto e_key;
542 	}
543 
544 	/* The AES context fits in a single (32-byte) SB entry and
545 	 * must be in little endian format. Use the 256-bit byte swap
546 	 * passthru option to convert from big endian to little endian.
547 	 */
548 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
549 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
550 				   DMA_BIDIRECTIONAL);
551 	if (ret)
552 		goto e_key;
553 
554 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
555 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
556 	if (ret)
557 		goto e_ctx;
558 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
559 			     CCP_PASSTHRU_BYTESWAP_256BIT);
560 	if (ret) {
561 		cmd->engine_error = cmd_q->cmd_error;
562 		goto e_ctx;
563 	}
564 
565 	/* Send data to the CCP AES engine */
566 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
567 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
568 	if (ret)
569 		goto e_ctx;
570 
571 	while (src.sg_wa.bytes_left) {
572 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
573 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
574 			op.eom = 1;
575 
576 			/* Push the K1/K2 key to the CCP now */
577 			ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid,
578 					       op.sb_ctx,
579 					       CCP_PASSTHRU_BYTESWAP_256BIT);
580 			if (ret) {
581 				cmd->engine_error = cmd_q->cmd_error;
582 				goto e_src;
583 			}
584 
585 			ret = ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
586 					      aes->cmac_key_len);
587 			if (ret)
588 				goto e_src;
589 			ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
590 					     CCP_PASSTHRU_BYTESWAP_256BIT);
591 			if (ret) {
592 				cmd->engine_error = cmd_q->cmd_error;
593 				goto e_src;
594 			}
595 		}
596 
597 		ret = cmd_q->ccp->vdata->perform->aes(&op);
598 		if (ret) {
599 			cmd->engine_error = cmd_q->cmd_error;
600 			goto e_src;
601 		}
602 
603 		ccp_process_data(&src, NULL, &op);
604 	}
605 
606 	/* Retrieve the AES context - convert from LE to BE using
607 	 * 32-byte (256-bit) byteswapping
608 	 */
609 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
610 			       CCP_PASSTHRU_BYTESWAP_256BIT);
611 	if (ret) {
612 		cmd->engine_error = cmd_q->cmd_error;
613 		goto e_src;
614 	}
615 
616 	/* ...but we only need AES_BLOCK_SIZE bytes */
617 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
618 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
619 
620 e_src:
621 	ccp_free_data(&src, cmd_q);
622 
623 e_ctx:
624 	ccp_dm_free(&ctx);
625 
626 e_key:
627 	ccp_dm_free(&key);
628 
629 	return ret;
630 }
631 
632 static noinline_for_stack int
633 ccp_run_aes_gcm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
634 {
635 	struct ccp_aes_engine *aes = &cmd->u.aes;
636 	struct ccp_dm_workarea key, ctx, final_wa, tag;
637 	struct ccp_data src, dst;
638 	struct ccp_data aad;
639 	struct ccp_op op;
640 	unsigned int dm_offset;
641 	unsigned int authsize;
642 	unsigned int jobid;
643 	unsigned int ilen;
644 	bool in_place = true; /* Default value */
645 	__be64 *final;
646 	int ret;
647 
648 	struct scatterlist *p_inp, sg_inp[2];
649 	struct scatterlist *p_tag, sg_tag[2];
650 	struct scatterlist *p_outp, sg_outp[2];
651 	struct scatterlist *p_aad;
652 
653 	if (!aes->iv)
654 		return -EINVAL;
655 
656 	if (!((aes->key_len == AES_KEYSIZE_128) ||
657 		(aes->key_len == AES_KEYSIZE_192) ||
658 		(aes->key_len == AES_KEYSIZE_256)))
659 		return -EINVAL;
660 
661 	if (!aes->key) /* Gotta have a key SGL */
662 		return -EINVAL;
663 
664 	/* Zero defaults to 16 bytes, the maximum size */
665 	authsize = aes->authsize ? aes->authsize : AES_BLOCK_SIZE;
666 	switch (authsize) {
667 	case 16:
668 	case 15:
669 	case 14:
670 	case 13:
671 	case 12:
672 	case 8:
673 	case 4:
674 		break;
675 	default:
676 		return -EINVAL;
677 	}
678 
679 	/* First, decompose the source buffer into AAD & PT,
680 	 * and the destination buffer into AAD, CT & tag, or
681 	 * the input into CT & tag.
682 	 * It is expected that the input and output SGs will
683 	 * be valid, even if the AAD and input lengths are 0.
684 	 */
685 	p_aad = aes->src;
686 	p_inp = scatterwalk_ffwd(sg_inp, aes->src, aes->aad_len);
687 	p_outp = scatterwalk_ffwd(sg_outp, aes->dst, aes->aad_len);
688 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
689 		ilen = aes->src_len;
690 		p_tag = scatterwalk_ffwd(sg_tag, p_outp, ilen);
691 	} else {
692 		/* Input length for decryption includes tag */
693 		ilen = aes->src_len - authsize;
694 		p_tag = scatterwalk_ffwd(sg_tag, p_inp, ilen);
695 	}
696 
697 	jobid = CCP_NEW_JOBID(cmd_q->ccp);
698 
699 	memset(&op, 0, sizeof(op));
700 	op.cmd_q = cmd_q;
701 	op.jobid = jobid;
702 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
703 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
704 	op.init = 1;
705 	op.u.aes.type = aes->type;
706 
707 	/* Copy the key to the LSB */
708 	ret = ccp_init_dm_workarea(&key, cmd_q,
709 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
710 				   DMA_TO_DEVICE);
711 	if (ret)
712 		return ret;
713 
714 	dm_offset = CCP_SB_BYTES - aes->key_len;
715 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
716 	if (ret)
717 		goto e_key;
718 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
719 			     CCP_PASSTHRU_BYTESWAP_256BIT);
720 	if (ret) {
721 		cmd->engine_error = cmd_q->cmd_error;
722 		goto e_key;
723 	}
724 
725 	/* Copy the context (IV) to the LSB.
726 	 * There is an assumption here that the IV is 96 bits in length, plus
727 	 * a nonce of 32 bits. If no IV is present, use a zeroed buffer.
728 	 */
729 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
730 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
731 				   DMA_BIDIRECTIONAL);
732 	if (ret)
733 		goto e_key;
734 
735 	dm_offset = CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES - aes->iv_len;
736 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
737 	if (ret)
738 		goto e_ctx;
739 
740 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
741 			     CCP_PASSTHRU_BYTESWAP_256BIT);
742 	if (ret) {
743 		cmd->engine_error = cmd_q->cmd_error;
744 		goto e_ctx;
745 	}
746 
747 	op.init = 1;
748 	if (aes->aad_len > 0) {
749 		/* Step 1: Run a GHASH over the Additional Authenticated Data */
750 		ret = ccp_init_data(&aad, cmd_q, p_aad, aes->aad_len,
751 				    AES_BLOCK_SIZE,
752 				    DMA_TO_DEVICE);
753 		if (ret)
754 			goto e_ctx;
755 
756 		op.u.aes.mode = CCP_AES_MODE_GHASH;
757 		op.u.aes.action = CCP_AES_GHASHAAD;
758 
759 		while (aad.sg_wa.bytes_left) {
760 			ccp_prepare_data(&aad, NULL, &op, AES_BLOCK_SIZE, true);
761 
762 			ret = cmd_q->ccp->vdata->perform->aes(&op);
763 			if (ret) {
764 				cmd->engine_error = cmd_q->cmd_error;
765 				goto e_aad;
766 			}
767 
768 			ccp_process_data(&aad, NULL, &op);
769 			op.init = 0;
770 		}
771 	}
772 
773 	op.u.aes.mode = CCP_AES_MODE_GCTR;
774 	op.u.aes.action = aes->action;
775 
776 	if (ilen > 0) {
777 		/* Step 2: Run a GCTR over the plaintext */
778 		in_place = (sg_virt(p_inp) == sg_virt(p_outp)) ? true : false;
779 
780 		ret = ccp_init_data(&src, cmd_q, p_inp, ilen,
781 				    AES_BLOCK_SIZE,
782 				    in_place ? DMA_BIDIRECTIONAL
783 					     : DMA_TO_DEVICE);
784 		if (ret)
785 			goto e_aad;
786 
787 		if (in_place) {
788 			dst = src;
789 		} else {
790 			ret = ccp_init_data(&dst, cmd_q, p_outp, ilen,
791 					    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
792 			if (ret)
793 				goto e_src;
794 		}
795 
796 		op.soc = 0;
797 		op.eom = 0;
798 		op.init = 1;
799 		while (src.sg_wa.bytes_left) {
800 			ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
801 			if (!src.sg_wa.bytes_left) {
802 				unsigned int nbytes = ilen % AES_BLOCK_SIZE;
803 
804 				if (nbytes) {
805 					op.eom = 1;
806 					op.u.aes.size = (nbytes * 8) - 1;
807 				}
808 			}
809 
810 			ret = cmd_q->ccp->vdata->perform->aes(&op);
811 			if (ret) {
812 				cmd->engine_error = cmd_q->cmd_error;
813 				goto e_dst;
814 			}
815 
816 			ccp_process_data(&src, &dst, &op);
817 			op.init = 0;
818 		}
819 	}
820 
821 	/* Step 3: Update the IV portion of the context with the original IV */
822 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
823 			       CCP_PASSTHRU_BYTESWAP_256BIT);
824 	if (ret) {
825 		cmd->engine_error = cmd_q->cmd_error;
826 		goto e_dst;
827 	}
828 
829 	ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
830 	if (ret)
831 		goto e_dst;
832 
833 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
834 			     CCP_PASSTHRU_BYTESWAP_256BIT);
835 	if (ret) {
836 		cmd->engine_error = cmd_q->cmd_error;
837 		goto e_dst;
838 	}
839 
840 	/* Step 4: Concatenate the lengths of the AAD and source, and
841 	 * hash that 16 byte buffer.
842 	 */
843 	ret = ccp_init_dm_workarea(&final_wa, cmd_q, AES_BLOCK_SIZE,
844 				   DMA_BIDIRECTIONAL);
845 	if (ret)
846 		goto e_dst;
847 	final = (__be64 *)final_wa.address;
848 	final[0] = cpu_to_be64(aes->aad_len * 8);
849 	final[1] = cpu_to_be64(ilen * 8);
850 
851 	memset(&op, 0, sizeof(op));
852 	op.cmd_q = cmd_q;
853 	op.jobid = jobid;
854 	op.sb_key = cmd_q->sb_key; /* Pre-allocated */
855 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
856 	op.init = 1;
857 	op.u.aes.type = aes->type;
858 	op.u.aes.mode = CCP_AES_MODE_GHASH;
859 	op.u.aes.action = CCP_AES_GHASHFINAL;
860 	op.src.type = CCP_MEMTYPE_SYSTEM;
861 	op.src.u.dma.address = final_wa.dma.address;
862 	op.src.u.dma.length = AES_BLOCK_SIZE;
863 	op.dst.type = CCP_MEMTYPE_SYSTEM;
864 	op.dst.u.dma.address = final_wa.dma.address;
865 	op.dst.u.dma.length = AES_BLOCK_SIZE;
866 	op.eom = 1;
867 	op.u.aes.size = 0;
868 	ret = cmd_q->ccp->vdata->perform->aes(&op);
869 	if (ret)
870 		goto e_final_wa;
871 
872 	if (aes->action == CCP_AES_ACTION_ENCRYPT) {
873 		/* Put the ciphered tag after the ciphertext. */
874 		ccp_get_dm_area(&final_wa, 0, p_tag, 0, authsize);
875 	} else {
876 		/* Does this ciphered tag match the input? */
877 		ret = ccp_init_dm_workarea(&tag, cmd_q, authsize,
878 					   DMA_BIDIRECTIONAL);
879 		if (ret)
880 			goto e_final_wa;
881 		ret = ccp_set_dm_area(&tag, 0, p_tag, 0, authsize);
882 		if (ret) {
883 			ccp_dm_free(&tag);
884 			goto e_final_wa;
885 		}
886 
887 		ret = crypto_memneq(tag.address, final_wa.address,
888 				    authsize) ? -EBADMSG : 0;
889 		ccp_dm_free(&tag);
890 	}
891 
892 e_final_wa:
893 	ccp_dm_free(&final_wa);
894 
895 e_dst:
896 	if (ilen > 0 && !in_place)
897 		ccp_free_data(&dst, cmd_q);
898 
899 e_src:
900 	if (ilen > 0)
901 		ccp_free_data(&src, cmd_q);
902 
903 e_aad:
904 	if (aes->aad_len)
905 		ccp_free_data(&aad, cmd_q);
906 
907 e_ctx:
908 	ccp_dm_free(&ctx);
909 
910 e_key:
911 	ccp_dm_free(&key);
912 
913 	return ret;
914 }
915 
916 static noinline_for_stack int
917 ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
918 {
919 	struct ccp_aes_engine *aes = &cmd->u.aes;
920 	struct ccp_dm_workarea key, ctx;
921 	struct ccp_data src, dst;
922 	struct ccp_op op;
923 	unsigned int dm_offset;
924 	bool in_place = false;
925 	int ret;
926 
927 	if (!((aes->key_len == AES_KEYSIZE_128) ||
928 	      (aes->key_len == AES_KEYSIZE_192) ||
929 	      (aes->key_len == AES_KEYSIZE_256)))
930 		return -EINVAL;
931 
932 	if (((aes->mode == CCP_AES_MODE_ECB) ||
933 	     (aes->mode == CCP_AES_MODE_CBC)) &&
934 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
935 		return -EINVAL;
936 
937 	if (!aes->key || !aes->src || !aes->dst)
938 		return -EINVAL;
939 
940 	if (aes->mode != CCP_AES_MODE_ECB) {
941 		if (aes->iv_len != AES_BLOCK_SIZE)
942 			return -EINVAL;
943 
944 		if (!aes->iv)
945 			return -EINVAL;
946 	}
947 
948 	BUILD_BUG_ON(CCP_AES_KEY_SB_COUNT != 1);
949 	BUILD_BUG_ON(CCP_AES_CTX_SB_COUNT != 1);
950 
951 	ret = -EIO;
952 	memset(&op, 0, sizeof(op));
953 	op.cmd_q = cmd_q;
954 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
955 	op.sb_key = cmd_q->sb_key;
956 	op.sb_ctx = cmd_q->sb_ctx;
957 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
958 	op.u.aes.type = aes->type;
959 	op.u.aes.mode = aes->mode;
960 	op.u.aes.action = aes->action;
961 
962 	/* All supported key sizes fit in a single (32-byte) SB entry
963 	 * and must be in little endian format. Use the 256-bit byte
964 	 * swap passthru option to convert from big endian to little
965 	 * endian.
966 	 */
967 	ret = ccp_init_dm_workarea(&key, cmd_q,
968 				   CCP_AES_KEY_SB_COUNT * CCP_SB_BYTES,
969 				   DMA_TO_DEVICE);
970 	if (ret)
971 		return ret;
972 
973 	dm_offset = CCP_SB_BYTES - aes->key_len;
974 	ret = ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
975 	if (ret)
976 		goto e_key;
977 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
978 			     CCP_PASSTHRU_BYTESWAP_256BIT);
979 	if (ret) {
980 		cmd->engine_error = cmd_q->cmd_error;
981 		goto e_key;
982 	}
983 
984 	/* The AES context fits in a single (32-byte) SB entry and
985 	 * must be in little endian format. Use the 256-bit byte swap
986 	 * passthru option to convert from big endian to little endian.
987 	 */
988 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
989 				   CCP_AES_CTX_SB_COUNT * CCP_SB_BYTES,
990 				   DMA_BIDIRECTIONAL);
991 	if (ret)
992 		goto e_key;
993 
994 	if (aes->mode != CCP_AES_MODE_ECB) {
995 		/* Load the AES context - convert to LE */
996 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
997 		ret = ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
998 		if (ret)
999 			goto e_ctx;
1000 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1001 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1002 		if (ret) {
1003 			cmd->engine_error = cmd_q->cmd_error;
1004 			goto e_ctx;
1005 		}
1006 	}
1007 	switch (aes->mode) {
1008 	case CCP_AES_MODE_CFB: /* CFB128 only */
1009 	case CCP_AES_MODE_CTR:
1010 		op.u.aes.size = AES_BLOCK_SIZE * BITS_PER_BYTE - 1;
1011 		break;
1012 	default:
1013 		op.u.aes.size = 0;
1014 	}
1015 
1016 	/* Prepare the input and output data workareas. For in-place
1017 	 * operations we need to set the dma direction to BIDIRECTIONAL
1018 	 * and copy the src workarea to the dst workarea.
1019 	 */
1020 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1021 		in_place = true;
1022 
1023 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1024 			    AES_BLOCK_SIZE,
1025 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1026 	if (ret)
1027 		goto e_ctx;
1028 
1029 	if (in_place) {
1030 		dst = src;
1031 	} else {
1032 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1033 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1034 		if (ret)
1035 			goto e_src;
1036 	}
1037 
1038 	/* Send data to the CCP AES engine */
1039 	while (src.sg_wa.bytes_left) {
1040 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1041 		if (!src.sg_wa.bytes_left) {
1042 			op.eom = 1;
1043 
1044 			/* Since we don't retrieve the AES context in ECB
1045 			 * mode we have to wait for the operation to complete
1046 			 * on the last piece of data
1047 			 */
1048 			if (aes->mode == CCP_AES_MODE_ECB)
1049 				op.soc = 1;
1050 		}
1051 
1052 		ret = cmd_q->ccp->vdata->perform->aes(&op);
1053 		if (ret) {
1054 			cmd->engine_error = cmd_q->cmd_error;
1055 			goto e_dst;
1056 		}
1057 
1058 		ccp_process_data(&src, &dst, &op);
1059 	}
1060 
1061 	if (aes->mode != CCP_AES_MODE_ECB) {
1062 		/* Retrieve the AES context - convert from LE to BE using
1063 		 * 32-byte (256-bit) byteswapping
1064 		 */
1065 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1066 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1067 		if (ret) {
1068 			cmd->engine_error = cmd_q->cmd_error;
1069 			goto e_dst;
1070 		}
1071 
1072 		/* ...but we only need AES_BLOCK_SIZE bytes */
1073 		dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1074 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1075 	}
1076 
1077 e_dst:
1078 	if (!in_place)
1079 		ccp_free_data(&dst, cmd_q);
1080 
1081 e_src:
1082 	ccp_free_data(&src, cmd_q);
1083 
1084 e_ctx:
1085 	ccp_dm_free(&ctx);
1086 
1087 e_key:
1088 	ccp_dm_free(&key);
1089 
1090 	return ret;
1091 }
1092 
1093 static noinline_for_stack int
1094 ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1095 {
1096 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1097 	struct ccp_dm_workarea key, ctx;
1098 	struct ccp_data src, dst;
1099 	struct ccp_op op;
1100 	unsigned int unit_size, dm_offset;
1101 	bool in_place = false;
1102 	unsigned int sb_count;
1103 	enum ccp_aes_type aestype;
1104 	int ret;
1105 
1106 	switch (xts->unit_size) {
1107 	case CCP_XTS_AES_UNIT_SIZE_16:
1108 		unit_size = 16;
1109 		break;
1110 	case CCP_XTS_AES_UNIT_SIZE_512:
1111 		unit_size = 512;
1112 		break;
1113 	case CCP_XTS_AES_UNIT_SIZE_1024:
1114 		unit_size = 1024;
1115 		break;
1116 	case CCP_XTS_AES_UNIT_SIZE_2048:
1117 		unit_size = 2048;
1118 		break;
1119 	case CCP_XTS_AES_UNIT_SIZE_4096:
1120 		unit_size = 4096;
1121 		break;
1122 
1123 	default:
1124 		return -EINVAL;
1125 	}
1126 
1127 	if (xts->key_len == AES_KEYSIZE_128)
1128 		aestype = CCP_AES_TYPE_128;
1129 	else if (xts->key_len == AES_KEYSIZE_256)
1130 		aestype = CCP_AES_TYPE_256;
1131 	else
1132 		return -EINVAL;
1133 
1134 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1135 		return -EINVAL;
1136 
1137 	if (xts->iv_len != AES_BLOCK_SIZE)
1138 		return -EINVAL;
1139 
1140 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1141 		return -EINVAL;
1142 
1143 	BUILD_BUG_ON(CCP_XTS_AES_KEY_SB_COUNT != 1);
1144 	BUILD_BUG_ON(CCP_XTS_AES_CTX_SB_COUNT != 1);
1145 
1146 	ret = -EIO;
1147 	memset(&op, 0, sizeof(op));
1148 	op.cmd_q = cmd_q;
1149 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1150 	op.sb_key = cmd_q->sb_key;
1151 	op.sb_ctx = cmd_q->sb_ctx;
1152 	op.init = 1;
1153 	op.u.xts.type = aestype;
1154 	op.u.xts.action = xts->action;
1155 	op.u.xts.unit_size = xts->unit_size;
1156 
1157 	/* A version 3 device only supports 128-bit keys, which fits into a
1158 	 * single SB entry. A version 5 device uses a 512-bit vector, so two
1159 	 * SB entries.
1160 	 */
1161 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0))
1162 		sb_count = CCP_XTS_AES_KEY_SB_COUNT;
1163 	else
1164 		sb_count = CCP5_XTS_AES_KEY_SB_COUNT;
1165 	ret = ccp_init_dm_workarea(&key, cmd_q,
1166 				   sb_count * CCP_SB_BYTES,
1167 				   DMA_TO_DEVICE);
1168 	if (ret)
1169 		return ret;
1170 
1171 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1172 		/* All supported key sizes must be in little endian format.
1173 		 * Use the 256-bit byte swap passthru option to convert from
1174 		 * big endian to little endian.
1175 		 */
1176 		dm_offset = CCP_SB_BYTES - AES_KEYSIZE_128;
1177 		ret = ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1178 		if (ret)
1179 			goto e_key;
1180 		ret = ccp_set_dm_area(&key, 0, xts->key, xts->key_len, xts->key_len);
1181 		if (ret)
1182 			goto e_key;
1183 	} else {
1184 		/* Version 5 CCPs use a 512-bit space for the key: each portion
1185 		 * occupies 256 bits, or one entire slot, and is zero-padded.
1186 		 */
1187 		unsigned int pad;
1188 
1189 		dm_offset = CCP_SB_BYTES;
1190 		pad = dm_offset - xts->key_len;
1191 		ret = ccp_set_dm_area(&key, pad, xts->key, 0, xts->key_len);
1192 		if (ret)
1193 			goto e_key;
1194 		ret = ccp_set_dm_area(&key, dm_offset + pad, xts->key,
1195 				      xts->key_len, xts->key_len);
1196 		if (ret)
1197 			goto e_key;
1198 	}
1199 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1200 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1201 	if (ret) {
1202 		cmd->engine_error = cmd_q->cmd_error;
1203 		goto e_key;
1204 	}
1205 
1206 	/* The AES context fits in a single (32-byte) SB entry and
1207 	 * for XTS is already in little endian format so no byte swapping
1208 	 * is needed.
1209 	 */
1210 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1211 				   CCP_XTS_AES_CTX_SB_COUNT * CCP_SB_BYTES,
1212 				   DMA_BIDIRECTIONAL);
1213 	if (ret)
1214 		goto e_key;
1215 
1216 	ret = ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1217 	if (ret)
1218 		goto e_ctx;
1219 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1220 			     CCP_PASSTHRU_BYTESWAP_NOOP);
1221 	if (ret) {
1222 		cmd->engine_error = cmd_q->cmd_error;
1223 		goto e_ctx;
1224 	}
1225 
1226 	/* Prepare the input and output data workareas. For in-place
1227 	 * operations we need to set the dma direction to BIDIRECTIONAL
1228 	 * and copy the src workarea to the dst workarea.
1229 	 */
1230 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1231 		in_place = true;
1232 
1233 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1234 			    unit_size,
1235 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1236 	if (ret)
1237 		goto e_ctx;
1238 
1239 	if (in_place) {
1240 		dst = src;
1241 	} else {
1242 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1243 				    unit_size, DMA_FROM_DEVICE);
1244 		if (ret)
1245 			goto e_src;
1246 	}
1247 
1248 	/* Send data to the CCP AES engine */
1249 	while (src.sg_wa.bytes_left) {
1250 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1251 		if (!src.sg_wa.bytes_left)
1252 			op.eom = 1;
1253 
1254 		ret = cmd_q->ccp->vdata->perform->xts_aes(&op);
1255 		if (ret) {
1256 			cmd->engine_error = cmd_q->cmd_error;
1257 			goto e_dst;
1258 		}
1259 
1260 		ccp_process_data(&src, &dst, &op);
1261 	}
1262 
1263 	/* Retrieve the AES context - convert from LE to BE using
1264 	 * 32-byte (256-bit) byteswapping
1265 	 */
1266 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1267 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1268 	if (ret) {
1269 		cmd->engine_error = cmd_q->cmd_error;
1270 		goto e_dst;
1271 	}
1272 
1273 	/* ...but we only need AES_BLOCK_SIZE bytes */
1274 	dm_offset = CCP_SB_BYTES - AES_BLOCK_SIZE;
1275 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1276 
1277 e_dst:
1278 	if (!in_place)
1279 		ccp_free_data(&dst, cmd_q);
1280 
1281 e_src:
1282 	ccp_free_data(&src, cmd_q);
1283 
1284 e_ctx:
1285 	ccp_dm_free(&ctx);
1286 
1287 e_key:
1288 	ccp_dm_free(&key);
1289 
1290 	return ret;
1291 }
1292 
1293 static noinline_for_stack int
1294 ccp_run_des3_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1295 {
1296 	struct ccp_des3_engine *des3 = &cmd->u.des3;
1297 
1298 	struct ccp_dm_workarea key, ctx;
1299 	struct ccp_data src, dst;
1300 	struct ccp_op op;
1301 	unsigned int dm_offset;
1302 	unsigned int len_singlekey;
1303 	bool in_place = false;
1304 	int ret;
1305 
1306 	/* Error checks */
1307 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0))
1308 		return -EINVAL;
1309 
1310 	if (!cmd_q->ccp->vdata->perform->des3)
1311 		return -EINVAL;
1312 
1313 	if (des3->key_len != DES3_EDE_KEY_SIZE)
1314 		return -EINVAL;
1315 
1316 	if (((des3->mode == CCP_DES3_MODE_ECB) ||
1317 		(des3->mode == CCP_DES3_MODE_CBC)) &&
1318 		(des3->src_len & (DES3_EDE_BLOCK_SIZE - 1)))
1319 		return -EINVAL;
1320 
1321 	if (!des3->key || !des3->src || !des3->dst)
1322 		return -EINVAL;
1323 
1324 	if (des3->mode != CCP_DES3_MODE_ECB) {
1325 		if (des3->iv_len != DES3_EDE_BLOCK_SIZE)
1326 			return -EINVAL;
1327 
1328 		if (!des3->iv)
1329 			return -EINVAL;
1330 	}
1331 
1332 	/* Zero out all the fields of the command desc */
1333 	memset(&op, 0, sizeof(op));
1334 
1335 	/* Set up the Function field */
1336 	op.cmd_q = cmd_q;
1337 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1338 	op.sb_key = cmd_q->sb_key;
1339 
1340 	op.init = (des3->mode == CCP_DES3_MODE_ECB) ? 0 : 1;
1341 	op.u.des3.type = des3->type;
1342 	op.u.des3.mode = des3->mode;
1343 	op.u.des3.action = des3->action;
1344 
1345 	/*
1346 	 * All supported key sizes fit in a single (32-byte) KSB entry and
1347 	 * (like AES) must be in little endian format. Use the 256-bit byte
1348 	 * swap passthru option to convert from big endian to little endian.
1349 	 */
1350 	ret = ccp_init_dm_workarea(&key, cmd_q,
1351 				   CCP_DES3_KEY_SB_COUNT * CCP_SB_BYTES,
1352 				   DMA_TO_DEVICE);
1353 	if (ret)
1354 		return ret;
1355 
1356 	/*
1357 	 * The contents of the key triplet are in the reverse order of what
1358 	 * is required by the engine. Copy the 3 pieces individually to put
1359 	 * them where they belong.
1360 	 */
1361 	dm_offset = CCP_SB_BYTES - des3->key_len; /* Basic offset */
1362 
1363 	len_singlekey = des3->key_len / 3;
1364 	ret = ccp_set_dm_area(&key, dm_offset + 2 * len_singlekey,
1365 			      des3->key, 0, len_singlekey);
1366 	if (ret)
1367 		goto e_key;
1368 	ret = ccp_set_dm_area(&key, dm_offset + len_singlekey,
1369 			      des3->key, len_singlekey, len_singlekey);
1370 	if (ret)
1371 		goto e_key;
1372 	ret = ccp_set_dm_area(&key, dm_offset,
1373 			      des3->key, 2 * len_singlekey, len_singlekey);
1374 	if (ret)
1375 		goto e_key;
1376 
1377 	/* Copy the key to the SB */
1378 	ret = ccp_copy_to_sb(cmd_q, &key, op.jobid, op.sb_key,
1379 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1380 	if (ret) {
1381 		cmd->engine_error = cmd_q->cmd_error;
1382 		goto e_key;
1383 	}
1384 
1385 	/*
1386 	 * The DES3 context fits in a single (32-byte) KSB entry and
1387 	 * must be in little endian format. Use the 256-bit byte swap
1388 	 * passthru option to convert from big endian to little endian.
1389 	 */
1390 	if (des3->mode != CCP_DES3_MODE_ECB) {
1391 		op.sb_ctx = cmd_q->sb_ctx;
1392 
1393 		ret = ccp_init_dm_workarea(&ctx, cmd_q,
1394 					   CCP_DES3_CTX_SB_COUNT * CCP_SB_BYTES,
1395 					   DMA_BIDIRECTIONAL);
1396 		if (ret)
1397 			goto e_key;
1398 
1399 		/* Load the context into the LSB */
1400 		dm_offset = CCP_SB_BYTES - des3->iv_len;
1401 		ret = ccp_set_dm_area(&ctx, dm_offset, des3->iv, 0,
1402 				      des3->iv_len);
1403 		if (ret)
1404 			goto e_ctx;
1405 
1406 		ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1407 				     CCP_PASSTHRU_BYTESWAP_256BIT);
1408 		if (ret) {
1409 			cmd->engine_error = cmd_q->cmd_error;
1410 			goto e_ctx;
1411 		}
1412 	}
1413 
1414 	/*
1415 	 * Prepare the input and output data workareas. For in-place
1416 	 * operations we need to set the dma direction to BIDIRECTIONAL
1417 	 * and copy the src workarea to the dst workarea.
1418 	 */
1419 	if (sg_virt(des3->src) == sg_virt(des3->dst))
1420 		in_place = true;
1421 
1422 	ret = ccp_init_data(&src, cmd_q, des3->src, des3->src_len,
1423 			DES3_EDE_BLOCK_SIZE,
1424 			in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1425 	if (ret)
1426 		goto e_ctx;
1427 
1428 	if (in_place)
1429 		dst = src;
1430 	else {
1431 		ret = ccp_init_data(&dst, cmd_q, des3->dst, des3->src_len,
1432 				DES3_EDE_BLOCK_SIZE, DMA_FROM_DEVICE);
1433 		if (ret)
1434 			goto e_src;
1435 	}
1436 
1437 	/* Send data to the CCP DES3 engine */
1438 	while (src.sg_wa.bytes_left) {
1439 		ccp_prepare_data(&src, &dst, &op, DES3_EDE_BLOCK_SIZE, true);
1440 		if (!src.sg_wa.bytes_left) {
1441 			op.eom = 1;
1442 
1443 			/* Since we don't retrieve the context in ECB mode
1444 			 * we have to wait for the operation to complete
1445 			 * on the last piece of data
1446 			 */
1447 			op.soc = 0;
1448 		}
1449 
1450 		ret = cmd_q->ccp->vdata->perform->des3(&op);
1451 		if (ret) {
1452 			cmd->engine_error = cmd_q->cmd_error;
1453 			goto e_dst;
1454 		}
1455 
1456 		ccp_process_data(&src, &dst, &op);
1457 	}
1458 
1459 	if (des3->mode != CCP_DES3_MODE_ECB) {
1460 		/* Retrieve the context and make BE */
1461 		ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1462 				       CCP_PASSTHRU_BYTESWAP_256BIT);
1463 		if (ret) {
1464 			cmd->engine_error = cmd_q->cmd_error;
1465 			goto e_dst;
1466 		}
1467 
1468 		/* ...but we only need the last DES3_EDE_BLOCK_SIZE bytes */
1469 		ccp_get_dm_area(&ctx, dm_offset, des3->iv, 0,
1470 				DES3_EDE_BLOCK_SIZE);
1471 	}
1472 e_dst:
1473 	if (!in_place)
1474 		ccp_free_data(&dst, cmd_q);
1475 
1476 e_src:
1477 	ccp_free_data(&src, cmd_q);
1478 
1479 e_ctx:
1480 	if (des3->mode != CCP_DES3_MODE_ECB)
1481 		ccp_dm_free(&ctx);
1482 
1483 e_key:
1484 	ccp_dm_free(&key);
1485 
1486 	return ret;
1487 }
1488 
1489 static noinline_for_stack int
1490 ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1491 {
1492 	struct ccp_sha_engine *sha = &cmd->u.sha;
1493 	struct ccp_dm_workarea ctx;
1494 	struct ccp_data src;
1495 	struct ccp_op op;
1496 	unsigned int ioffset, ooffset;
1497 	unsigned int digest_size;
1498 	int sb_count;
1499 	const void *init;
1500 	u64 block_size;
1501 	int ctx_size;
1502 	int ret;
1503 
1504 	switch (sha->type) {
1505 	case CCP_SHA_TYPE_1:
1506 		if (sha->ctx_len < SHA1_DIGEST_SIZE)
1507 			return -EINVAL;
1508 		block_size = SHA1_BLOCK_SIZE;
1509 		break;
1510 	case CCP_SHA_TYPE_224:
1511 		if (sha->ctx_len < SHA224_DIGEST_SIZE)
1512 			return -EINVAL;
1513 		block_size = SHA224_BLOCK_SIZE;
1514 		break;
1515 	case CCP_SHA_TYPE_256:
1516 		if (sha->ctx_len < SHA256_DIGEST_SIZE)
1517 			return -EINVAL;
1518 		block_size = SHA256_BLOCK_SIZE;
1519 		break;
1520 	case CCP_SHA_TYPE_384:
1521 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1522 		    || sha->ctx_len < SHA384_DIGEST_SIZE)
1523 			return -EINVAL;
1524 		block_size = SHA384_BLOCK_SIZE;
1525 		break;
1526 	case CCP_SHA_TYPE_512:
1527 		if (cmd_q->ccp->vdata->version < CCP_VERSION(4, 0)
1528 		    || sha->ctx_len < SHA512_DIGEST_SIZE)
1529 			return -EINVAL;
1530 		block_size = SHA512_BLOCK_SIZE;
1531 		break;
1532 	default:
1533 		return -EINVAL;
1534 	}
1535 
1536 	if (!sha->ctx)
1537 		return -EINVAL;
1538 
1539 	if (!sha->final && (sha->src_len & (block_size - 1)))
1540 		return -EINVAL;
1541 
1542 	/* The version 3 device can't handle zero-length input */
1543 	if (cmd_q->ccp->vdata->version == CCP_VERSION(3, 0)) {
1544 
1545 		if (!sha->src_len) {
1546 			unsigned int digest_len;
1547 			const u8 *sha_zero;
1548 
1549 			/* Not final, just return */
1550 			if (!sha->final)
1551 				return 0;
1552 
1553 			/* CCP can't do a zero length sha operation so the
1554 			 * caller must buffer the data.
1555 			 */
1556 			if (sha->msg_bits)
1557 				return -EINVAL;
1558 
1559 			/* The CCP cannot perform zero-length sha operations
1560 			 * so the caller is required to buffer data for the
1561 			 * final operation. However, a sha operation for a
1562 			 * message with a total length of zero is valid so
1563 			 * known values are required to supply the result.
1564 			 */
1565 			switch (sha->type) {
1566 			case CCP_SHA_TYPE_1:
1567 				sha_zero = sha1_zero_message_hash;
1568 				digest_len = SHA1_DIGEST_SIZE;
1569 				break;
1570 			case CCP_SHA_TYPE_224:
1571 				sha_zero = sha224_zero_message_hash;
1572 				digest_len = SHA224_DIGEST_SIZE;
1573 				break;
1574 			case CCP_SHA_TYPE_256:
1575 				sha_zero = sha256_zero_message_hash;
1576 				digest_len = SHA256_DIGEST_SIZE;
1577 				break;
1578 			default:
1579 				return -EINVAL;
1580 			}
1581 
1582 			scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1583 						 digest_len, 1);
1584 
1585 			return 0;
1586 		}
1587 	}
1588 
1589 	/* Set variables used throughout */
1590 	switch (sha->type) {
1591 	case CCP_SHA_TYPE_1:
1592 		digest_size = SHA1_DIGEST_SIZE;
1593 		init = (void *) ccp_sha1_init;
1594 		ctx_size = SHA1_DIGEST_SIZE;
1595 		sb_count = 1;
1596 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1597 			ooffset = ioffset = CCP_SB_BYTES - SHA1_DIGEST_SIZE;
1598 		else
1599 			ooffset = ioffset = 0;
1600 		break;
1601 	case CCP_SHA_TYPE_224:
1602 		digest_size = SHA224_DIGEST_SIZE;
1603 		init = (void *) ccp_sha224_init;
1604 		ctx_size = SHA256_DIGEST_SIZE;
1605 		sb_count = 1;
1606 		ioffset = 0;
1607 		if (cmd_q->ccp->vdata->version != CCP_VERSION(3, 0))
1608 			ooffset = CCP_SB_BYTES - SHA224_DIGEST_SIZE;
1609 		else
1610 			ooffset = 0;
1611 		break;
1612 	case CCP_SHA_TYPE_256:
1613 		digest_size = SHA256_DIGEST_SIZE;
1614 		init = (void *) ccp_sha256_init;
1615 		ctx_size = SHA256_DIGEST_SIZE;
1616 		sb_count = 1;
1617 		ooffset = ioffset = 0;
1618 		break;
1619 	case CCP_SHA_TYPE_384:
1620 		digest_size = SHA384_DIGEST_SIZE;
1621 		init = (void *) ccp_sha384_init;
1622 		ctx_size = SHA512_DIGEST_SIZE;
1623 		sb_count = 2;
1624 		ioffset = 0;
1625 		ooffset = 2 * CCP_SB_BYTES - SHA384_DIGEST_SIZE;
1626 		break;
1627 	case CCP_SHA_TYPE_512:
1628 		digest_size = SHA512_DIGEST_SIZE;
1629 		init = (void *) ccp_sha512_init;
1630 		ctx_size = SHA512_DIGEST_SIZE;
1631 		sb_count = 2;
1632 		ooffset = ioffset = 0;
1633 		break;
1634 	default:
1635 		ret = -EINVAL;
1636 		goto e_data;
1637 	}
1638 
1639 	/* For zero-length plaintext the src pointer is ignored;
1640 	 * otherwise both parts must be valid
1641 	 */
1642 	if (sha->src_len && !sha->src)
1643 		return -EINVAL;
1644 
1645 	memset(&op, 0, sizeof(op));
1646 	op.cmd_q = cmd_q;
1647 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1648 	op.sb_ctx = cmd_q->sb_ctx; /* Pre-allocated */
1649 	op.u.sha.type = sha->type;
1650 	op.u.sha.msg_bits = sha->msg_bits;
1651 
1652 	/* For SHA1/224/256 the context fits in a single (32-byte) SB entry;
1653 	 * SHA384/512 require 2 adjacent SB slots, with the right half in the
1654 	 * first slot, and the left half in the second. Each portion must then
1655 	 * be in little endian format: use the 256-bit byte swap option.
1656 	 */
1657 	ret = ccp_init_dm_workarea(&ctx, cmd_q, sb_count * CCP_SB_BYTES,
1658 				   DMA_BIDIRECTIONAL);
1659 	if (ret)
1660 		return ret;
1661 	if (sha->first) {
1662 		switch (sha->type) {
1663 		case CCP_SHA_TYPE_1:
1664 		case CCP_SHA_TYPE_224:
1665 		case CCP_SHA_TYPE_256:
1666 			memcpy(ctx.address + ioffset, init, ctx_size);
1667 			break;
1668 		case CCP_SHA_TYPE_384:
1669 		case CCP_SHA_TYPE_512:
1670 			memcpy(ctx.address + ctx_size / 2, init,
1671 			       ctx_size / 2);
1672 			memcpy(ctx.address, init + ctx_size / 2,
1673 			       ctx_size / 2);
1674 			break;
1675 		default:
1676 			ret = -EINVAL;
1677 			goto e_ctx;
1678 		}
1679 	} else {
1680 		/* Restore the context */
1681 		ret = ccp_set_dm_area(&ctx, 0, sha->ctx, 0,
1682 				      sb_count * CCP_SB_BYTES);
1683 		if (ret)
1684 			goto e_ctx;
1685 	}
1686 
1687 	ret = ccp_copy_to_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1688 			     CCP_PASSTHRU_BYTESWAP_256BIT);
1689 	if (ret) {
1690 		cmd->engine_error = cmd_q->cmd_error;
1691 		goto e_ctx;
1692 	}
1693 
1694 	if (sha->src) {
1695 		/* Send data to the CCP SHA engine; block_size is set above */
1696 		ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1697 				    block_size, DMA_TO_DEVICE);
1698 		if (ret)
1699 			goto e_ctx;
1700 
1701 		while (src.sg_wa.bytes_left) {
1702 			ccp_prepare_data(&src, NULL, &op, block_size, false);
1703 			if (sha->final && !src.sg_wa.bytes_left)
1704 				op.eom = 1;
1705 
1706 			ret = cmd_q->ccp->vdata->perform->sha(&op);
1707 			if (ret) {
1708 				cmd->engine_error = cmd_q->cmd_error;
1709 				goto e_data;
1710 			}
1711 
1712 			ccp_process_data(&src, NULL, &op);
1713 		}
1714 	} else {
1715 		op.eom = 1;
1716 		ret = cmd_q->ccp->vdata->perform->sha(&op);
1717 		if (ret) {
1718 			cmd->engine_error = cmd_q->cmd_error;
1719 			goto e_data;
1720 		}
1721 	}
1722 
1723 	/* Retrieve the SHA context - convert from LE to BE using
1724 	 * 32-byte (256-bit) byteswapping to BE
1725 	 */
1726 	ret = ccp_copy_from_sb(cmd_q, &ctx, op.jobid, op.sb_ctx,
1727 			       CCP_PASSTHRU_BYTESWAP_256BIT);
1728 	if (ret) {
1729 		cmd->engine_error = cmd_q->cmd_error;
1730 		goto e_data;
1731 	}
1732 
1733 	if (sha->final) {
1734 		/* Finishing up, so get the digest */
1735 		switch (sha->type) {
1736 		case CCP_SHA_TYPE_1:
1737 		case CCP_SHA_TYPE_224:
1738 		case CCP_SHA_TYPE_256:
1739 			ccp_get_dm_area(&ctx, ooffset,
1740 					sha->ctx, 0,
1741 					digest_size);
1742 			break;
1743 		case CCP_SHA_TYPE_384:
1744 		case CCP_SHA_TYPE_512:
1745 			ccp_get_dm_area(&ctx, 0,
1746 					sha->ctx, LSB_ITEM_SIZE - ooffset,
1747 					LSB_ITEM_SIZE);
1748 			ccp_get_dm_area(&ctx, LSB_ITEM_SIZE + ooffset,
1749 					sha->ctx, 0,
1750 					LSB_ITEM_SIZE - ooffset);
1751 			break;
1752 		default:
1753 			ret = -EINVAL;
1754 			goto e_data;
1755 		}
1756 	} else {
1757 		/* Stash the context */
1758 		ccp_get_dm_area(&ctx, 0, sha->ctx, 0,
1759 				sb_count * CCP_SB_BYTES);
1760 	}
1761 
1762 	if (sha->final && sha->opad) {
1763 		/* HMAC operation, recursively perform final SHA */
1764 		struct ccp_cmd hmac_cmd;
1765 		struct scatterlist sg;
1766 		u8 *hmac_buf;
1767 
1768 		if (sha->opad_len != block_size) {
1769 			ret = -EINVAL;
1770 			goto e_data;
1771 		}
1772 
1773 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1774 		if (!hmac_buf) {
1775 			ret = -ENOMEM;
1776 			goto e_data;
1777 		}
1778 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1779 
1780 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1781 		switch (sha->type) {
1782 		case CCP_SHA_TYPE_1:
1783 		case CCP_SHA_TYPE_224:
1784 		case CCP_SHA_TYPE_256:
1785 			memcpy(hmac_buf + block_size,
1786 			       ctx.address + ooffset,
1787 			       digest_size);
1788 			break;
1789 		case CCP_SHA_TYPE_384:
1790 		case CCP_SHA_TYPE_512:
1791 			memcpy(hmac_buf + block_size,
1792 			       ctx.address + LSB_ITEM_SIZE + ooffset,
1793 			       LSB_ITEM_SIZE);
1794 			memcpy(hmac_buf + block_size +
1795 			       (LSB_ITEM_SIZE - ooffset),
1796 			       ctx.address,
1797 			       LSB_ITEM_SIZE);
1798 			break;
1799 		default:
1800 			kfree(hmac_buf);
1801 			ret = -EINVAL;
1802 			goto e_data;
1803 		}
1804 
1805 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1806 		hmac_cmd.engine = CCP_ENGINE_SHA;
1807 		hmac_cmd.u.sha.type = sha->type;
1808 		hmac_cmd.u.sha.ctx = sha->ctx;
1809 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1810 		hmac_cmd.u.sha.src = &sg;
1811 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1812 		hmac_cmd.u.sha.opad = NULL;
1813 		hmac_cmd.u.sha.opad_len = 0;
1814 		hmac_cmd.u.sha.first = 1;
1815 		hmac_cmd.u.sha.final = 1;
1816 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1817 
1818 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1819 		if (ret)
1820 			cmd->engine_error = hmac_cmd.engine_error;
1821 
1822 		kfree(hmac_buf);
1823 	}
1824 
1825 e_data:
1826 	if (sha->src)
1827 		ccp_free_data(&src, cmd_q);
1828 
1829 e_ctx:
1830 	ccp_dm_free(&ctx);
1831 
1832 	return ret;
1833 }
1834 
1835 static noinline_for_stack int
1836 ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1837 {
1838 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1839 	struct ccp_dm_workarea exp, src, dst;
1840 	struct ccp_op op;
1841 	unsigned int sb_count, i_len, o_len;
1842 	int ret;
1843 
1844 	/* Check against the maximum allowable size, in bits */
1845 	if (rsa->key_size > cmd_q->ccp->vdata->rsamax)
1846 		return -EINVAL;
1847 
1848 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1849 		return -EINVAL;
1850 
1851 	memset(&op, 0, sizeof(op));
1852 	op.cmd_q = cmd_q;
1853 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1854 
1855 	/* The RSA modulus must precede the message being acted upon, so
1856 	 * it must be copied to a DMA area where the message and the
1857 	 * modulus can be concatenated.  Therefore the input buffer
1858 	 * length required is twice the output buffer length (which
1859 	 * must be a multiple of 256-bits).  Compute o_len, i_len in bytes.
1860 	 * Buffer sizes must be a multiple of 32 bytes; rounding up may be
1861 	 * required.
1862 	 */
1863 	o_len = 32 * ((rsa->key_size + 255) / 256);
1864 	i_len = o_len * 2;
1865 
1866 	sb_count = 0;
1867 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1868 		/* sb_count is the number of storage block slots required
1869 		 * for the modulus.
1870 		 */
1871 		sb_count = o_len / CCP_SB_BYTES;
1872 		op.sb_key = cmd_q->ccp->vdata->perform->sballoc(cmd_q,
1873 								sb_count);
1874 		if (!op.sb_key)
1875 			return -EIO;
1876 	} else {
1877 		/* A version 5 device allows a modulus size that will not fit
1878 		 * in the LSB, so the command will transfer it from memory.
1879 		 * Set the sb key to the default, even though it's not used.
1880 		 */
1881 		op.sb_key = cmd_q->sb_key;
1882 	}
1883 
1884 	/* The RSA exponent must be in little endian format. Reverse its
1885 	 * byte order.
1886 	 */
1887 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1888 	if (ret)
1889 		goto e_sb;
1890 
1891 	ret = ccp_reverse_set_dm_area(&exp, 0, rsa->exp, 0, rsa->exp_len);
1892 	if (ret)
1893 		goto e_exp;
1894 
1895 	if (cmd_q->ccp->vdata->version < CCP_VERSION(5, 0)) {
1896 		/* Copy the exponent to the local storage block, using
1897 		 * as many 32-byte blocks as were allocated above. It's
1898 		 * already little endian, so no further change is required.
1899 		 */
1900 		ret = ccp_copy_to_sb(cmd_q, &exp, op.jobid, op.sb_key,
1901 				     CCP_PASSTHRU_BYTESWAP_NOOP);
1902 		if (ret) {
1903 			cmd->engine_error = cmd_q->cmd_error;
1904 			goto e_exp;
1905 		}
1906 	} else {
1907 		/* The exponent can be retrieved from memory via DMA. */
1908 		op.exp.u.dma.address = exp.dma.address;
1909 		op.exp.u.dma.offset = 0;
1910 	}
1911 
1912 	/* Concatenate the modulus and the message. Both the modulus and
1913 	 * the operands must be in little endian format.  Since the input
1914 	 * is in big endian format it must be converted.
1915 	 */
1916 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1917 	if (ret)
1918 		goto e_exp;
1919 
1920 	ret = ccp_reverse_set_dm_area(&src, 0, rsa->mod, 0, rsa->mod_len);
1921 	if (ret)
1922 		goto e_src;
1923 	ret = ccp_reverse_set_dm_area(&src, o_len, rsa->src, 0, rsa->src_len);
1924 	if (ret)
1925 		goto e_src;
1926 
1927 	/* Prepare the output area for the operation */
1928 	ret = ccp_init_dm_workarea(&dst, cmd_q, o_len, DMA_FROM_DEVICE);
1929 	if (ret)
1930 		goto e_src;
1931 
1932 	op.soc = 1;
1933 	op.src.u.dma.address = src.dma.address;
1934 	op.src.u.dma.offset = 0;
1935 	op.src.u.dma.length = i_len;
1936 	op.dst.u.dma.address = dst.dma.address;
1937 	op.dst.u.dma.offset = 0;
1938 	op.dst.u.dma.length = o_len;
1939 
1940 	op.u.rsa.mod_size = rsa->key_size;
1941 	op.u.rsa.input_len = i_len;
1942 
1943 	ret = cmd_q->ccp->vdata->perform->rsa(&op);
1944 	if (ret) {
1945 		cmd->engine_error = cmd_q->cmd_error;
1946 		goto e_dst;
1947 	}
1948 
1949 	ccp_reverse_get_dm_area(&dst, 0, rsa->dst, 0, rsa->mod_len);
1950 
1951 e_dst:
1952 	ccp_dm_free(&dst);
1953 
1954 e_src:
1955 	ccp_dm_free(&src);
1956 
1957 e_exp:
1958 	ccp_dm_free(&exp);
1959 
1960 e_sb:
1961 	if (sb_count)
1962 		cmd_q->ccp->vdata->perform->sbfree(cmd_q, op.sb_key, sb_count);
1963 
1964 	return ret;
1965 }
1966 
1967 static noinline_for_stack int
1968 ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1969 {
1970 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1971 	struct ccp_dm_workarea mask;
1972 	struct ccp_data src, dst;
1973 	struct ccp_op op;
1974 	bool in_place = false;
1975 	unsigned int i;
1976 	int ret = 0;
1977 
1978 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1979 		return -EINVAL;
1980 
1981 	if (!pt->src || !pt->dst)
1982 		return -EINVAL;
1983 
1984 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1985 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1986 			return -EINVAL;
1987 		if (!pt->mask)
1988 			return -EINVAL;
1989 	}
1990 
1991 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
1992 
1993 	memset(&op, 0, sizeof(op));
1994 	op.cmd_q = cmd_q;
1995 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
1996 
1997 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1998 		/* Load the mask */
1999 		op.sb_key = cmd_q->sb_key;
2000 
2001 		ret = ccp_init_dm_workarea(&mask, cmd_q,
2002 					   CCP_PASSTHRU_SB_COUNT *
2003 					   CCP_SB_BYTES,
2004 					   DMA_TO_DEVICE);
2005 		if (ret)
2006 			return ret;
2007 
2008 		ret = ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
2009 		if (ret)
2010 			goto e_mask;
2011 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2012 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2013 		if (ret) {
2014 			cmd->engine_error = cmd_q->cmd_error;
2015 			goto e_mask;
2016 		}
2017 	}
2018 
2019 	/* Prepare the input and output data workareas. For in-place
2020 	 * operations we need to set the dma direction to BIDIRECTIONAL
2021 	 * and copy the src workarea to the dst workarea.
2022 	 */
2023 	if (sg_virt(pt->src) == sg_virt(pt->dst))
2024 		in_place = true;
2025 
2026 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
2027 			    CCP_PASSTHRU_MASKSIZE,
2028 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
2029 	if (ret)
2030 		goto e_mask;
2031 
2032 	if (in_place) {
2033 		dst = src;
2034 	} else {
2035 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
2036 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
2037 		if (ret)
2038 			goto e_src;
2039 	}
2040 
2041 	/* Send data to the CCP Passthru engine
2042 	 *   Because the CCP engine works on a single source and destination
2043 	 *   dma address at a time, each entry in the source scatterlist
2044 	 *   (after the dma_map_sg call) must be less than or equal to the
2045 	 *   (remaining) length in the destination scatterlist entry and the
2046 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
2047 	 */
2048 	dst.sg_wa.sg_used = 0;
2049 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
2050 		if (!dst.sg_wa.sg ||
2051 		    (sg_dma_len(dst.sg_wa.sg) < sg_dma_len(src.sg_wa.sg))) {
2052 			ret = -EINVAL;
2053 			goto e_dst;
2054 		}
2055 
2056 		if (i == src.sg_wa.dma_count) {
2057 			op.eom = 1;
2058 			op.soc = 1;
2059 		}
2060 
2061 		op.src.type = CCP_MEMTYPE_SYSTEM;
2062 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
2063 		op.src.u.dma.offset = 0;
2064 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
2065 
2066 		op.dst.type = CCP_MEMTYPE_SYSTEM;
2067 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
2068 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
2069 		op.dst.u.dma.length = op.src.u.dma.length;
2070 
2071 		ret = cmd_q->ccp->vdata->perform->passthru(&op);
2072 		if (ret) {
2073 			cmd->engine_error = cmd_q->cmd_error;
2074 			goto e_dst;
2075 		}
2076 
2077 		dst.sg_wa.sg_used += sg_dma_len(src.sg_wa.sg);
2078 		if (dst.sg_wa.sg_used == sg_dma_len(dst.sg_wa.sg)) {
2079 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
2080 			dst.sg_wa.sg_used = 0;
2081 		}
2082 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
2083 	}
2084 
2085 e_dst:
2086 	if (!in_place)
2087 		ccp_free_data(&dst, cmd_q);
2088 
2089 e_src:
2090 	ccp_free_data(&src, cmd_q);
2091 
2092 e_mask:
2093 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
2094 		ccp_dm_free(&mask);
2095 
2096 	return ret;
2097 }
2098 
2099 static noinline_for_stack int
2100 ccp_run_passthru_nomap_cmd(struct ccp_cmd_queue *cmd_q,
2101 				      struct ccp_cmd *cmd)
2102 {
2103 	struct ccp_passthru_nomap_engine *pt = &cmd->u.passthru_nomap;
2104 	struct ccp_dm_workarea mask;
2105 	struct ccp_op op;
2106 	int ret;
2107 
2108 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
2109 		return -EINVAL;
2110 
2111 	if (!pt->src_dma || !pt->dst_dma)
2112 		return -EINVAL;
2113 
2114 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2115 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
2116 			return -EINVAL;
2117 		if (!pt->mask)
2118 			return -EINVAL;
2119 	}
2120 
2121 	BUILD_BUG_ON(CCP_PASSTHRU_SB_COUNT != 1);
2122 
2123 	memset(&op, 0, sizeof(op));
2124 	op.cmd_q = cmd_q;
2125 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2126 
2127 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
2128 		/* Load the mask */
2129 		op.sb_key = cmd_q->sb_key;
2130 
2131 		mask.length = pt->mask_len;
2132 		mask.dma.address = pt->mask;
2133 		mask.dma.length = pt->mask_len;
2134 
2135 		ret = ccp_copy_to_sb(cmd_q, &mask, op.jobid, op.sb_key,
2136 				     CCP_PASSTHRU_BYTESWAP_NOOP);
2137 		if (ret) {
2138 			cmd->engine_error = cmd_q->cmd_error;
2139 			return ret;
2140 		}
2141 	}
2142 
2143 	/* Send data to the CCP Passthru engine */
2144 	op.eom = 1;
2145 	op.soc = 1;
2146 
2147 	op.src.type = CCP_MEMTYPE_SYSTEM;
2148 	op.src.u.dma.address = pt->src_dma;
2149 	op.src.u.dma.offset = 0;
2150 	op.src.u.dma.length = pt->src_len;
2151 
2152 	op.dst.type = CCP_MEMTYPE_SYSTEM;
2153 	op.dst.u.dma.address = pt->dst_dma;
2154 	op.dst.u.dma.offset = 0;
2155 	op.dst.u.dma.length = pt->src_len;
2156 
2157 	ret = cmd_q->ccp->vdata->perform->passthru(&op);
2158 	if (ret)
2159 		cmd->engine_error = cmd_q->cmd_error;
2160 
2161 	return ret;
2162 }
2163 
2164 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2165 {
2166 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2167 	struct ccp_dm_workarea src, dst;
2168 	struct ccp_op op;
2169 	int ret;
2170 	u8 *save;
2171 
2172 	if (!ecc->u.mm.operand_1 ||
2173 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
2174 		return -EINVAL;
2175 
2176 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
2177 		if (!ecc->u.mm.operand_2 ||
2178 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
2179 			return -EINVAL;
2180 
2181 	if (!ecc->u.mm.result ||
2182 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
2183 		return -EINVAL;
2184 
2185 	memset(&op, 0, sizeof(op));
2186 	op.cmd_q = cmd_q;
2187 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2188 
2189 	/* Concatenate the modulus and the operands. Both the modulus and
2190 	 * the operands must be in little endian format.  Since the input
2191 	 * is in big endian format it must be converted and placed in a
2192 	 * fixed length buffer.
2193 	 */
2194 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2195 				   DMA_TO_DEVICE);
2196 	if (ret)
2197 		return ret;
2198 
2199 	/* Save the workarea address since it is updated in order to perform
2200 	 * the concatenation
2201 	 */
2202 	save = src.address;
2203 
2204 	/* Copy the ECC modulus */
2205 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2206 	if (ret)
2207 		goto e_src;
2208 	src.address += CCP_ECC_OPERAND_SIZE;
2209 
2210 	/* Copy the first operand */
2211 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_1, 0,
2212 				      ecc->u.mm.operand_1_len);
2213 	if (ret)
2214 		goto e_src;
2215 	src.address += CCP_ECC_OPERAND_SIZE;
2216 
2217 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
2218 		/* Copy the second operand */
2219 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.mm.operand_2, 0,
2220 					      ecc->u.mm.operand_2_len);
2221 		if (ret)
2222 			goto e_src;
2223 		src.address += CCP_ECC_OPERAND_SIZE;
2224 	}
2225 
2226 	/* Restore the workarea address */
2227 	src.address = save;
2228 
2229 	/* Prepare the output area for the operation */
2230 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2231 				   DMA_FROM_DEVICE);
2232 	if (ret)
2233 		goto e_src;
2234 
2235 	op.soc = 1;
2236 	op.src.u.dma.address = src.dma.address;
2237 	op.src.u.dma.offset = 0;
2238 	op.src.u.dma.length = src.length;
2239 	op.dst.u.dma.address = dst.dma.address;
2240 	op.dst.u.dma.offset = 0;
2241 	op.dst.u.dma.length = dst.length;
2242 
2243 	op.u.ecc.function = cmd->u.ecc.function;
2244 
2245 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2246 	if (ret) {
2247 		cmd->engine_error = cmd_q->cmd_error;
2248 		goto e_dst;
2249 	}
2250 
2251 	ecc->ecc_result = le16_to_cpup(
2252 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2253 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2254 		ret = -EIO;
2255 		goto e_dst;
2256 	}
2257 
2258 	/* Save the ECC result */
2259 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.mm.result, 0,
2260 				CCP_ECC_MODULUS_BYTES);
2261 
2262 e_dst:
2263 	ccp_dm_free(&dst);
2264 
2265 e_src:
2266 	ccp_dm_free(&src);
2267 
2268 	return ret;
2269 }
2270 
2271 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2272 {
2273 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2274 	struct ccp_dm_workarea src, dst;
2275 	struct ccp_op op;
2276 	int ret;
2277 	u8 *save;
2278 
2279 	if (!ecc->u.pm.point_1.x ||
2280 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
2281 	    !ecc->u.pm.point_1.y ||
2282 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
2283 		return -EINVAL;
2284 
2285 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2286 		if (!ecc->u.pm.point_2.x ||
2287 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
2288 		    !ecc->u.pm.point_2.y ||
2289 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
2290 			return -EINVAL;
2291 	} else {
2292 		if (!ecc->u.pm.domain_a ||
2293 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
2294 			return -EINVAL;
2295 
2296 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
2297 			if (!ecc->u.pm.scalar ||
2298 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
2299 				return -EINVAL;
2300 	}
2301 
2302 	if (!ecc->u.pm.result.x ||
2303 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
2304 	    !ecc->u.pm.result.y ||
2305 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
2306 		return -EINVAL;
2307 
2308 	memset(&op, 0, sizeof(op));
2309 	op.cmd_q = cmd_q;
2310 	op.jobid = CCP_NEW_JOBID(cmd_q->ccp);
2311 
2312 	/* Concatenate the modulus and the operands. Both the modulus and
2313 	 * the operands must be in little endian format.  Since the input
2314 	 * is in big endian format it must be converted and placed in a
2315 	 * fixed length buffer.
2316 	 */
2317 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
2318 				   DMA_TO_DEVICE);
2319 	if (ret)
2320 		return ret;
2321 
2322 	/* Save the workarea address since it is updated in order to perform
2323 	 * the concatenation
2324 	 */
2325 	save = src.address;
2326 
2327 	/* Copy the ECC modulus */
2328 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->mod, 0, ecc->mod_len);
2329 	if (ret)
2330 		goto e_src;
2331 	src.address += CCP_ECC_OPERAND_SIZE;
2332 
2333 	/* Copy the first point X and Y coordinate */
2334 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.x, 0,
2335 				      ecc->u.pm.point_1.x_len);
2336 	if (ret)
2337 		goto e_src;
2338 	src.address += CCP_ECC_OPERAND_SIZE;
2339 	ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_1.y, 0,
2340 				      ecc->u.pm.point_1.y_len);
2341 	if (ret)
2342 		goto e_src;
2343 	src.address += CCP_ECC_OPERAND_SIZE;
2344 
2345 	/* Set the first point Z coordinate to 1 */
2346 	*src.address = 0x01;
2347 	src.address += CCP_ECC_OPERAND_SIZE;
2348 
2349 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
2350 		/* Copy the second point X and Y coordinate */
2351 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.x, 0,
2352 					      ecc->u.pm.point_2.x_len);
2353 		if (ret)
2354 			goto e_src;
2355 		src.address += CCP_ECC_OPERAND_SIZE;
2356 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.point_2.y, 0,
2357 					      ecc->u.pm.point_2.y_len);
2358 		if (ret)
2359 			goto e_src;
2360 		src.address += CCP_ECC_OPERAND_SIZE;
2361 
2362 		/* Set the second point Z coordinate to 1 */
2363 		*src.address = 0x01;
2364 		src.address += CCP_ECC_OPERAND_SIZE;
2365 	} else {
2366 		/* Copy the Domain "a" parameter */
2367 		ret = ccp_reverse_set_dm_area(&src, 0, ecc->u.pm.domain_a, 0,
2368 					      ecc->u.pm.domain_a_len);
2369 		if (ret)
2370 			goto e_src;
2371 		src.address += CCP_ECC_OPERAND_SIZE;
2372 
2373 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2374 			/* Copy the scalar value */
2375 			ret = ccp_reverse_set_dm_area(&src, 0,
2376 						      ecc->u.pm.scalar, 0,
2377 						      ecc->u.pm.scalar_len);
2378 			if (ret)
2379 				goto e_src;
2380 			src.address += CCP_ECC_OPERAND_SIZE;
2381 		}
2382 	}
2383 
2384 	/* Restore the workarea address */
2385 	src.address = save;
2386 
2387 	/* Prepare the output area for the operation */
2388 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2389 				   DMA_FROM_DEVICE);
2390 	if (ret)
2391 		goto e_src;
2392 
2393 	op.soc = 1;
2394 	op.src.u.dma.address = src.dma.address;
2395 	op.src.u.dma.offset = 0;
2396 	op.src.u.dma.length = src.length;
2397 	op.dst.u.dma.address = dst.dma.address;
2398 	op.dst.u.dma.offset = 0;
2399 	op.dst.u.dma.length = dst.length;
2400 
2401 	op.u.ecc.function = cmd->u.ecc.function;
2402 
2403 	ret = cmd_q->ccp->vdata->perform->ecc(&op);
2404 	if (ret) {
2405 		cmd->engine_error = cmd_q->cmd_error;
2406 		goto e_dst;
2407 	}
2408 
2409 	ecc->ecc_result = le16_to_cpup(
2410 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2411 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2412 		ret = -EIO;
2413 		goto e_dst;
2414 	}
2415 
2416 	/* Save the workarea address since it is updated as we walk through
2417 	 * to copy the point math result
2418 	 */
2419 	save = dst.address;
2420 
2421 	/* Save the ECC result X and Y coordinates */
2422 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.x, 0,
2423 				CCP_ECC_MODULUS_BYTES);
2424 	dst.address += CCP_ECC_OUTPUT_SIZE;
2425 	ccp_reverse_get_dm_area(&dst, 0, ecc->u.pm.result.y, 0,
2426 				CCP_ECC_MODULUS_BYTES);
2427 
2428 	/* Restore the workarea address */
2429 	dst.address = save;
2430 
2431 e_dst:
2432 	ccp_dm_free(&dst);
2433 
2434 e_src:
2435 	ccp_dm_free(&src);
2436 
2437 	return ret;
2438 }
2439 
2440 static noinline_for_stack int
2441 ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2442 {
2443 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2444 
2445 	ecc->ecc_result = 0;
2446 
2447 	if (!ecc->mod ||
2448 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2449 		return -EINVAL;
2450 
2451 	switch (ecc->function) {
2452 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2453 	case CCP_ECC_FUNCTION_MADD_384BIT:
2454 	case CCP_ECC_FUNCTION_MINV_384BIT:
2455 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2456 
2457 	case CCP_ECC_FUNCTION_PADD_384BIT:
2458 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2459 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2460 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2461 
2462 	default:
2463 		return -EINVAL;
2464 	}
2465 }
2466 
2467 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2468 {
2469 	int ret;
2470 
2471 	cmd->engine_error = 0;
2472 	cmd_q->cmd_error = 0;
2473 	cmd_q->int_rcvd = 0;
2474 	cmd_q->free_slots = cmd_q->ccp->vdata->perform->get_free_slots(cmd_q);
2475 
2476 	switch (cmd->engine) {
2477 	case CCP_ENGINE_AES:
2478 		switch (cmd->u.aes.mode) {
2479 		case CCP_AES_MODE_CMAC:
2480 			ret = ccp_run_aes_cmac_cmd(cmd_q, cmd);
2481 			break;
2482 		case CCP_AES_MODE_GCM:
2483 			ret = ccp_run_aes_gcm_cmd(cmd_q, cmd);
2484 			break;
2485 		default:
2486 			ret = ccp_run_aes_cmd(cmd_q, cmd);
2487 			break;
2488 		}
2489 		break;
2490 	case CCP_ENGINE_XTS_AES_128:
2491 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2492 		break;
2493 	case CCP_ENGINE_DES3:
2494 		ret = ccp_run_des3_cmd(cmd_q, cmd);
2495 		break;
2496 	case CCP_ENGINE_SHA:
2497 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2498 		break;
2499 	case CCP_ENGINE_RSA:
2500 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2501 		break;
2502 	case CCP_ENGINE_PASSTHRU:
2503 		if (cmd->flags & CCP_CMD_PASSTHRU_NO_DMA_MAP)
2504 			ret = ccp_run_passthru_nomap_cmd(cmd_q, cmd);
2505 		else
2506 			ret = ccp_run_passthru_cmd(cmd_q, cmd);
2507 		break;
2508 	case CCP_ENGINE_ECC:
2509 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2510 		break;
2511 	default:
2512 		ret = -EINVAL;
2513 	}
2514 
2515 	return ret;
2516 }
2517