xref: /linux/drivers/crypto/ccp/ccp-ops.c (revision d91517839e5d95adc0cf4b28caa7af62a71de526)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 
27 #include "ccp-dev.h"
28 
29 
30 enum ccp_memtype {
31 	CCP_MEMTYPE_SYSTEM = 0,
32 	CCP_MEMTYPE_KSB,
33 	CCP_MEMTYPE_LOCAL,
34 	CCP_MEMTYPE__LAST,
35 };
36 
37 struct ccp_dma_info {
38 	dma_addr_t address;
39 	unsigned int offset;
40 	unsigned int length;
41 	enum dma_data_direction dir;
42 };
43 
44 struct ccp_dm_workarea {
45 	struct device *dev;
46 	struct dma_pool *dma_pool;
47 	unsigned int length;
48 
49 	u8 *address;
50 	struct ccp_dma_info dma;
51 };
52 
53 struct ccp_sg_workarea {
54 	struct scatterlist *sg;
55 	unsigned int nents;
56 	unsigned int length;
57 
58 	struct scatterlist *dma_sg;
59 	struct device *dma_dev;
60 	unsigned int dma_count;
61 	enum dma_data_direction dma_dir;
62 
63 	unsigned int sg_used;
64 
65 	u64 bytes_left;
66 };
67 
68 struct ccp_data {
69 	struct ccp_sg_workarea sg_wa;
70 	struct ccp_dm_workarea dm_wa;
71 };
72 
73 struct ccp_mem {
74 	enum ccp_memtype type;
75 	union {
76 		struct ccp_dma_info dma;
77 		u32 ksb;
78 	} u;
79 };
80 
81 struct ccp_aes_op {
82 	enum ccp_aes_type type;
83 	enum ccp_aes_mode mode;
84 	enum ccp_aes_action action;
85 };
86 
87 struct ccp_xts_aes_op {
88 	enum ccp_aes_action action;
89 	enum ccp_xts_aes_unit_size unit_size;
90 };
91 
92 struct ccp_sha_op {
93 	enum ccp_sha_type type;
94 	u64 msg_bits;
95 };
96 
97 struct ccp_rsa_op {
98 	u32 mod_size;
99 	u32 input_len;
100 };
101 
102 struct ccp_passthru_op {
103 	enum ccp_passthru_bitwise bit_mod;
104 	enum ccp_passthru_byteswap byte_swap;
105 };
106 
107 struct ccp_ecc_op {
108 	enum ccp_ecc_function function;
109 };
110 
111 struct ccp_op {
112 	struct ccp_cmd_queue *cmd_q;
113 
114 	u32 jobid;
115 	u32 ioc;
116 	u32 soc;
117 	u32 ksb_key;
118 	u32 ksb_ctx;
119 	u32 init;
120 	u32 eom;
121 
122 	struct ccp_mem src;
123 	struct ccp_mem dst;
124 
125 	union {
126 		struct ccp_aes_op aes;
127 		struct ccp_xts_aes_op xts;
128 		struct ccp_sha_op sha;
129 		struct ccp_rsa_op rsa;
130 		struct ccp_passthru_op passthru;
131 		struct ccp_ecc_op ecc;
132 	} u;
133 };
134 
135 /* The CCP cannot perform zero-length sha operations so the caller
136  * is required to buffer data for the final operation.  However, a
137  * sha operation for a message with a total length of zero is valid
138  * so known values are required to supply the result.
139  */
140 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
141 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
142 	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
143 	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
144 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
145 };
146 
147 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
148 	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
149 	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
150 	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
151 	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
152 };
153 
154 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
155 	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
156 	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
157 	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
158 	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
159 };
160 
161 static u32 ccp_addr_lo(struct ccp_dma_info *info)
162 {
163 	return lower_32_bits(info->address + info->offset);
164 }
165 
166 static u32 ccp_addr_hi(struct ccp_dma_info *info)
167 {
168 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
169 }
170 
171 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
172 {
173 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
174 	struct ccp_device *ccp = cmd_q->ccp;
175 	void __iomem *cr_addr;
176 	u32 cr0, cmd;
177 	unsigned int i;
178 	int ret = 0;
179 
180 	/* We could read a status register to see how many free slots
181 	 * are actually available, but reading that register resets it
182 	 * and you could lose some error information.
183 	 */
184 	cmd_q->free_slots--;
185 
186 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
187 	      | (op->jobid << REQ0_JOBID_SHIFT)
188 	      | REQ0_WAIT_FOR_WRITE;
189 
190 	if (op->soc)
191 		cr0 |= REQ0_STOP_ON_COMPLETE
192 		       | REQ0_INT_ON_COMPLETE;
193 
194 	if (op->ioc || !cmd_q->free_slots)
195 		cr0 |= REQ0_INT_ON_COMPLETE;
196 
197 	/* Start at CMD_REQ1 */
198 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
199 
200 	mutex_lock(&ccp->req_mutex);
201 
202 	/* Write CMD_REQ1 through CMD_REQx first */
203 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
204 		iowrite32(*(cr + i), cr_addr);
205 
206 	/* Tell the CCP to start */
207 	wmb();
208 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
209 
210 	mutex_unlock(&ccp->req_mutex);
211 
212 	if (cr0 & REQ0_INT_ON_COMPLETE) {
213 		/* Wait for the job to complete */
214 		ret = wait_event_interruptible(cmd_q->int_queue,
215 					       cmd_q->int_rcvd);
216 		if (ret || cmd_q->cmd_error) {
217 			/* On error delete all related jobs from the queue */
218 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
219 			      | op->jobid;
220 
221 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
222 
223 			if (!ret)
224 				ret = -EIO;
225 		} else if (op->soc) {
226 			/* Delete just head job from the queue on SoC */
227 			cmd = DEL_Q_ACTIVE
228 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
229 			      | op->jobid;
230 
231 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
232 		}
233 
234 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
235 
236 		cmd_q->int_rcvd = 0;
237 	}
238 
239 	return ret;
240 }
241 
242 static int ccp_perform_aes(struct ccp_op *op)
243 {
244 	u32 cr[6];
245 
246 	/* Fill out the register contents for REQ1 through REQ6 */
247 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
248 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
249 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
250 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
251 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
252 	cr[1] = op->src.u.dma.length - 1;
253 	cr[2] = ccp_addr_lo(&op->src.u.dma);
254 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
255 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
256 		| ccp_addr_hi(&op->src.u.dma);
257 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
258 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
259 		| ccp_addr_hi(&op->dst.u.dma);
260 
261 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
262 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
263 
264 	if (op->eom)
265 		cr[0] |= REQ1_EOM;
266 
267 	if (op->init)
268 		cr[0] |= REQ1_INIT;
269 
270 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
271 }
272 
273 static int ccp_perform_xts_aes(struct ccp_op *op)
274 {
275 	u32 cr[6];
276 
277 	/* Fill out the register contents for REQ1 through REQ6 */
278 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
279 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
280 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
281 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
282 	cr[1] = op->src.u.dma.length - 1;
283 	cr[2] = ccp_addr_lo(&op->src.u.dma);
284 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
285 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
286 		| ccp_addr_hi(&op->src.u.dma);
287 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
288 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
289 		| ccp_addr_hi(&op->dst.u.dma);
290 
291 	if (op->eom)
292 		cr[0] |= REQ1_EOM;
293 
294 	if (op->init)
295 		cr[0] |= REQ1_INIT;
296 
297 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
298 }
299 
300 static int ccp_perform_sha(struct ccp_op *op)
301 {
302 	u32 cr[6];
303 
304 	/* Fill out the register contents for REQ1 through REQ6 */
305 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
306 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
307 		| REQ1_INIT;
308 	cr[1] = op->src.u.dma.length - 1;
309 	cr[2] = ccp_addr_lo(&op->src.u.dma);
310 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
311 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
312 		| ccp_addr_hi(&op->src.u.dma);
313 
314 	if (op->eom) {
315 		cr[0] |= REQ1_EOM;
316 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
317 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
318 	} else {
319 		cr[4] = 0;
320 		cr[5] = 0;
321 	}
322 
323 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
324 }
325 
326 static int ccp_perform_rsa(struct ccp_op *op)
327 {
328 	u32 cr[6];
329 
330 	/* Fill out the register contents for REQ1 through REQ6 */
331 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
332 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
333 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
334 		| REQ1_EOM;
335 	cr[1] = op->u.rsa.input_len - 1;
336 	cr[2] = ccp_addr_lo(&op->src.u.dma);
337 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
338 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
339 		| ccp_addr_hi(&op->src.u.dma);
340 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
341 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
342 		| ccp_addr_hi(&op->dst.u.dma);
343 
344 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
345 }
346 
347 static int ccp_perform_passthru(struct ccp_op *op)
348 {
349 	u32 cr[6];
350 
351 	/* Fill out the register contents for REQ1 through REQ6 */
352 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
353 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
354 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
355 
356 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
357 		cr[1] = op->src.u.dma.length - 1;
358 	else
359 		cr[1] = op->dst.u.dma.length - 1;
360 
361 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
362 		cr[2] = ccp_addr_lo(&op->src.u.dma);
363 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
364 			| ccp_addr_hi(&op->src.u.dma);
365 
366 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
367 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
368 	} else {
369 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
370 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
371 	}
372 
373 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
374 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
375 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
376 			| ccp_addr_hi(&op->dst.u.dma);
377 	} else {
378 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
379 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
380 	}
381 
382 	if (op->eom)
383 		cr[0] |= REQ1_EOM;
384 
385 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
386 }
387 
388 static int ccp_perform_ecc(struct ccp_op *op)
389 {
390 	u32 cr[6];
391 
392 	/* Fill out the register contents for REQ1 through REQ6 */
393 	cr[0] = REQ1_ECC_AFFINE_CONVERT
394 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
395 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
396 		| REQ1_EOM;
397 	cr[1] = op->src.u.dma.length - 1;
398 	cr[2] = ccp_addr_lo(&op->src.u.dma);
399 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
400 		| ccp_addr_hi(&op->src.u.dma);
401 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
402 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
403 		| ccp_addr_hi(&op->dst.u.dma);
404 
405 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
406 }
407 
408 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
409 {
410 	int start;
411 
412 	for (;;) {
413 		mutex_lock(&ccp->ksb_mutex);
414 
415 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
416 							ccp->ksb_count,
417 							ccp->ksb_start,
418 							count, 0);
419 		if (start <= ccp->ksb_count) {
420 			bitmap_set(ccp->ksb, start, count);
421 
422 			mutex_unlock(&ccp->ksb_mutex);
423 			break;
424 		}
425 
426 		ccp->ksb_avail = 0;
427 
428 		mutex_unlock(&ccp->ksb_mutex);
429 
430 		/* Wait for KSB entries to become available */
431 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
432 			return 0;
433 	}
434 
435 	return KSB_START + start;
436 }
437 
438 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
439 			 unsigned int count)
440 {
441 	if (!start)
442 		return;
443 
444 	mutex_lock(&ccp->ksb_mutex);
445 
446 	bitmap_clear(ccp->ksb, start - KSB_START, count);
447 
448 	ccp->ksb_avail = 1;
449 
450 	mutex_unlock(&ccp->ksb_mutex);
451 
452 	wake_up_interruptible_all(&ccp->ksb_queue);
453 }
454 
455 static u32 ccp_gen_jobid(struct ccp_device *ccp)
456 {
457 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
458 }
459 
460 static void ccp_sg_free(struct ccp_sg_workarea *wa)
461 {
462 	if (wa->dma_count)
463 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
464 
465 	wa->dma_count = 0;
466 }
467 
468 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
469 				struct scatterlist *sg, u64 len,
470 				enum dma_data_direction dma_dir)
471 {
472 	memset(wa, 0, sizeof(*wa));
473 
474 	wa->sg = sg;
475 	if (!sg)
476 		return 0;
477 
478 	wa->nents = sg_nents(sg);
479 	wa->length = sg->length;
480 	wa->bytes_left = len;
481 	wa->sg_used = 0;
482 
483 	if (len == 0)
484 		return 0;
485 
486 	if (dma_dir == DMA_NONE)
487 		return 0;
488 
489 	wa->dma_sg = sg;
490 	wa->dma_dev = dev;
491 	wa->dma_dir = dma_dir;
492 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
493 	if (!wa->dma_count)
494 		return -ENOMEM;
495 
496 
497 	return 0;
498 }
499 
500 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
501 {
502 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
503 
504 	if (!wa->sg)
505 		return;
506 
507 	wa->sg_used += nbytes;
508 	wa->bytes_left -= nbytes;
509 	if (wa->sg_used == wa->sg->length) {
510 		wa->sg = sg_next(wa->sg);
511 		wa->sg_used = 0;
512 	}
513 }
514 
515 static void ccp_dm_free(struct ccp_dm_workarea *wa)
516 {
517 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
518 		if (wa->address)
519 			dma_pool_free(wa->dma_pool, wa->address,
520 				      wa->dma.address);
521 	} else {
522 		if (wa->dma.address)
523 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
524 					 wa->dma.dir);
525 		kfree(wa->address);
526 	}
527 
528 	wa->address = NULL;
529 	wa->dma.address = 0;
530 }
531 
532 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
533 				struct ccp_cmd_queue *cmd_q,
534 				unsigned int len,
535 				enum dma_data_direction dir)
536 {
537 	memset(wa, 0, sizeof(*wa));
538 
539 	if (!len)
540 		return 0;
541 
542 	wa->dev = cmd_q->ccp->dev;
543 	wa->length = len;
544 
545 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
546 		wa->dma_pool = cmd_q->dma_pool;
547 
548 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
549 					     &wa->dma.address);
550 		if (!wa->address)
551 			return -ENOMEM;
552 
553 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
554 
555 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
556 	} else {
557 		wa->address = kzalloc(len, GFP_KERNEL);
558 		if (!wa->address)
559 			return -ENOMEM;
560 
561 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
562 						 dir);
563 		if (!wa->dma.address)
564 			return -ENOMEM;
565 
566 		wa->dma.length = len;
567 	}
568 	wa->dma.dir = dir;
569 
570 	return 0;
571 }
572 
573 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
574 			    struct scatterlist *sg, unsigned int sg_offset,
575 			    unsigned int len)
576 {
577 	WARN_ON(!wa->address);
578 
579 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
580 				 0);
581 }
582 
583 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
584 			    struct scatterlist *sg, unsigned int sg_offset,
585 			    unsigned int len)
586 {
587 	WARN_ON(!wa->address);
588 
589 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
590 				 1);
591 }
592 
593 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
594 				    struct scatterlist *sg,
595 				    unsigned int len, unsigned int se_len,
596 				    bool sign_extend)
597 {
598 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
599 	u8 buffer[CCP_REVERSE_BUF_SIZE];
600 
601 	BUG_ON(se_len > sizeof(buffer));
602 
603 	sg_offset = len;
604 	dm_offset = 0;
605 	nbytes = len;
606 	while (nbytes) {
607 		ksb_len = min_t(unsigned int, nbytes, se_len);
608 		sg_offset -= ksb_len;
609 
610 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
611 		for (i = 0; i < ksb_len; i++)
612 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
613 
614 		dm_offset += ksb_len;
615 		nbytes -= ksb_len;
616 
617 		if ((ksb_len != se_len) && sign_extend) {
618 			/* Must sign-extend to nearest sign-extend length */
619 			if (wa->address[dm_offset - 1] & 0x80)
620 				memset(wa->address + dm_offset, 0xff,
621 				       se_len - ksb_len);
622 		}
623 	}
624 }
625 
626 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
627 				    struct scatterlist *sg,
628 				    unsigned int len)
629 {
630 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
631 	u8 buffer[CCP_REVERSE_BUF_SIZE];
632 
633 	sg_offset = 0;
634 	dm_offset = len;
635 	nbytes = len;
636 	while (nbytes) {
637 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
638 		dm_offset -= ksb_len;
639 
640 		for (i = 0; i < ksb_len; i++)
641 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
642 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
643 
644 		sg_offset += ksb_len;
645 		nbytes -= ksb_len;
646 	}
647 }
648 
649 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
650 {
651 	ccp_dm_free(&data->dm_wa);
652 	ccp_sg_free(&data->sg_wa);
653 }
654 
655 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
656 			 struct scatterlist *sg, u64 sg_len,
657 			 unsigned int dm_len,
658 			 enum dma_data_direction dir)
659 {
660 	int ret;
661 
662 	memset(data, 0, sizeof(*data));
663 
664 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
665 				   dir);
666 	if (ret)
667 		goto e_err;
668 
669 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
670 	if (ret)
671 		goto e_err;
672 
673 	return 0;
674 
675 e_err:
676 	ccp_free_data(data, cmd_q);
677 
678 	return ret;
679 }
680 
681 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
682 {
683 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
684 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
685 	unsigned int buf_count, nbytes;
686 
687 	/* Clear the buffer if setting it */
688 	if (!from)
689 		memset(dm_wa->address, 0, dm_wa->length);
690 
691 	if (!sg_wa->sg)
692 		return 0;
693 
694 	/* Perform the copy operation
695 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
696 	 *   an unsigned int
697 	 */
698 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
699 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
700 				 nbytes, from);
701 
702 	/* Update the structures and generate the count */
703 	buf_count = 0;
704 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
705 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
706 			     dm_wa->length - buf_count);
707 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
708 
709 		buf_count += nbytes;
710 		ccp_update_sg_workarea(sg_wa, nbytes);
711 	}
712 
713 	return buf_count;
714 }
715 
716 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
717 {
718 	return ccp_queue_buf(data, 0);
719 }
720 
721 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
722 {
723 	return ccp_queue_buf(data, 1);
724 }
725 
726 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
727 			     struct ccp_op *op, unsigned int block_size,
728 			     bool blocksize_op)
729 {
730 	unsigned int sg_src_len, sg_dst_len, op_len;
731 
732 	/* The CCP can only DMA from/to one address each per operation. This
733 	 * requires that we find the smallest DMA area between the source
734 	 * and destination. The resulting len values will always be <= UINT_MAX
735 	 * because the dma length is an unsigned int.
736 	 */
737 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
738 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
739 
740 	if (dst) {
741 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
742 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
743 		op_len = min(sg_src_len, sg_dst_len);
744 	} else
745 		op_len = sg_src_len;
746 
747 	/* The data operation length will be at least block_size in length
748 	 * or the smaller of available sg room remaining for the source or
749 	 * the destination
750 	 */
751 	op_len = max(op_len, block_size);
752 
753 	/* Unless we have to buffer data, there's no reason to wait */
754 	op->soc = 0;
755 
756 	if (sg_src_len < block_size) {
757 		/* Not enough data in the sg element, so it
758 		 * needs to be buffered into a blocksize chunk
759 		 */
760 		int cp_len = ccp_fill_queue_buf(src);
761 
762 		op->soc = 1;
763 		op->src.u.dma.address = src->dm_wa.dma.address;
764 		op->src.u.dma.offset = 0;
765 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
766 	} else {
767 		/* Enough data in the sg element, but we need to
768 		 * adjust for any previously copied data
769 		 */
770 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
771 		op->src.u.dma.offset = src->sg_wa.sg_used;
772 		op->src.u.dma.length = op_len & ~(block_size - 1);
773 
774 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
775 	}
776 
777 	if (dst) {
778 		if (sg_dst_len < block_size) {
779 			/* Not enough room in the sg element or we're on the
780 			 * last piece of data (when using padding), so the
781 			 * output needs to be buffered into a blocksize chunk
782 			 */
783 			op->soc = 1;
784 			op->dst.u.dma.address = dst->dm_wa.dma.address;
785 			op->dst.u.dma.offset = 0;
786 			op->dst.u.dma.length = op->src.u.dma.length;
787 		} else {
788 			/* Enough room in the sg element, but we need to
789 			 * adjust for any previously used area
790 			 */
791 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
792 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
793 			op->dst.u.dma.length = op->src.u.dma.length;
794 		}
795 	}
796 }
797 
798 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
799 			     struct ccp_op *op)
800 {
801 	op->init = 0;
802 
803 	if (dst) {
804 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
805 			ccp_empty_queue_buf(dst);
806 		else
807 			ccp_update_sg_workarea(&dst->sg_wa,
808 					       op->dst.u.dma.length);
809 	}
810 }
811 
812 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
813 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
814 				u32 byte_swap, bool from)
815 {
816 	struct ccp_op op;
817 
818 	memset(&op, 0, sizeof(op));
819 
820 	op.cmd_q = cmd_q;
821 	op.jobid = jobid;
822 	op.eom = 1;
823 
824 	if (from) {
825 		op.soc = 1;
826 		op.src.type = CCP_MEMTYPE_KSB;
827 		op.src.u.ksb = ksb;
828 		op.dst.type = CCP_MEMTYPE_SYSTEM;
829 		op.dst.u.dma.address = wa->dma.address;
830 		op.dst.u.dma.length = wa->length;
831 	} else {
832 		op.src.type = CCP_MEMTYPE_SYSTEM;
833 		op.src.u.dma.address = wa->dma.address;
834 		op.src.u.dma.length = wa->length;
835 		op.dst.type = CCP_MEMTYPE_KSB;
836 		op.dst.u.ksb = ksb;
837 	}
838 
839 	op.u.passthru.byte_swap = byte_swap;
840 
841 	return ccp_perform_passthru(&op);
842 }
843 
844 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
845 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
846 			   u32 byte_swap)
847 {
848 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
849 }
850 
851 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
852 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
853 			     u32 byte_swap)
854 {
855 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
856 }
857 
858 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
859 				struct ccp_cmd *cmd)
860 {
861 	struct ccp_aes_engine *aes = &cmd->u.aes;
862 	struct ccp_dm_workarea key, ctx;
863 	struct ccp_data src;
864 	struct ccp_op op;
865 	unsigned int dm_offset;
866 	int ret;
867 
868 	if (!((aes->key_len == AES_KEYSIZE_128) ||
869 	      (aes->key_len == AES_KEYSIZE_192) ||
870 	      (aes->key_len == AES_KEYSIZE_256)))
871 		return -EINVAL;
872 
873 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
874 		return -EINVAL;
875 
876 	if (aes->iv_len != AES_BLOCK_SIZE)
877 		return -EINVAL;
878 
879 	if (!aes->key || !aes->iv || !aes->src)
880 		return -EINVAL;
881 
882 	if (aes->cmac_final) {
883 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
884 			return -EINVAL;
885 
886 		if (!aes->cmac_key)
887 			return -EINVAL;
888 	}
889 
890 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
891 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
892 
893 	ret = -EIO;
894 	memset(&op, 0, sizeof(op));
895 	op.cmd_q = cmd_q;
896 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
897 	op.ksb_key = cmd_q->ksb_key;
898 	op.ksb_ctx = cmd_q->ksb_ctx;
899 	op.init = 1;
900 	op.u.aes.type = aes->type;
901 	op.u.aes.mode = aes->mode;
902 	op.u.aes.action = aes->action;
903 
904 	/* All supported key sizes fit in a single (32-byte) KSB entry
905 	 * and must be in little endian format. Use the 256-bit byte
906 	 * swap passthru option to convert from big endian to little
907 	 * endian.
908 	 */
909 	ret = ccp_init_dm_workarea(&key, cmd_q,
910 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
911 				   DMA_TO_DEVICE);
912 	if (ret)
913 		return ret;
914 
915 	dm_offset = CCP_KSB_BYTES - aes->key_len;
916 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
917 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
918 			      CCP_PASSTHRU_BYTESWAP_256BIT);
919 	if (ret) {
920 		cmd->engine_error = cmd_q->cmd_error;
921 		goto e_key;
922 	}
923 
924 	/* The AES context fits in a single (32-byte) KSB entry and
925 	 * must be in little endian format. Use the 256-bit byte swap
926 	 * passthru option to convert from big endian to little endian.
927 	 */
928 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
929 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
930 				   DMA_BIDIRECTIONAL);
931 	if (ret)
932 		goto e_key;
933 
934 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
935 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
936 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
937 			      CCP_PASSTHRU_BYTESWAP_256BIT);
938 	if (ret) {
939 		cmd->engine_error = cmd_q->cmd_error;
940 		goto e_ctx;
941 	}
942 
943 	/* Send data to the CCP AES engine */
944 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
945 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
946 	if (ret)
947 		goto e_ctx;
948 
949 	while (src.sg_wa.bytes_left) {
950 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
951 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
952 			op.eom = 1;
953 
954 			/* Push the K1/K2 key to the CCP now */
955 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
956 						op.ksb_ctx,
957 						CCP_PASSTHRU_BYTESWAP_256BIT);
958 			if (ret) {
959 				cmd->engine_error = cmd_q->cmd_error;
960 				goto e_src;
961 			}
962 
963 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
964 					aes->cmac_key_len);
965 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
966 					      CCP_PASSTHRU_BYTESWAP_256BIT);
967 			if (ret) {
968 				cmd->engine_error = cmd_q->cmd_error;
969 				goto e_src;
970 			}
971 		}
972 
973 		ret = ccp_perform_aes(&op);
974 		if (ret) {
975 			cmd->engine_error = cmd_q->cmd_error;
976 			goto e_src;
977 		}
978 
979 		ccp_process_data(&src, NULL, &op);
980 	}
981 
982 	/* Retrieve the AES context - convert from LE to BE using
983 	 * 32-byte (256-bit) byteswapping
984 	 */
985 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
986 				CCP_PASSTHRU_BYTESWAP_256BIT);
987 	if (ret) {
988 		cmd->engine_error = cmd_q->cmd_error;
989 		goto e_src;
990 	}
991 
992 	/* ...but we only need AES_BLOCK_SIZE bytes */
993 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
994 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
995 
996 e_src:
997 	ccp_free_data(&src, cmd_q);
998 
999 e_ctx:
1000 	ccp_dm_free(&ctx);
1001 
1002 e_key:
1003 	ccp_dm_free(&key);
1004 
1005 	return ret;
1006 }
1007 
1008 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1009 {
1010 	struct ccp_aes_engine *aes = &cmd->u.aes;
1011 	struct ccp_dm_workarea key, ctx;
1012 	struct ccp_data src, dst;
1013 	struct ccp_op op;
1014 	unsigned int dm_offset;
1015 	bool in_place = false;
1016 	int ret;
1017 
1018 	if (aes->mode == CCP_AES_MODE_CMAC)
1019 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1020 
1021 	if (!((aes->key_len == AES_KEYSIZE_128) ||
1022 	      (aes->key_len == AES_KEYSIZE_192) ||
1023 	      (aes->key_len == AES_KEYSIZE_256)))
1024 		return -EINVAL;
1025 
1026 	if (((aes->mode == CCP_AES_MODE_ECB) ||
1027 	     (aes->mode == CCP_AES_MODE_CBC) ||
1028 	     (aes->mode == CCP_AES_MODE_CFB)) &&
1029 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1030 		return -EINVAL;
1031 
1032 	if (!aes->key || !aes->src || !aes->dst)
1033 		return -EINVAL;
1034 
1035 	if (aes->mode != CCP_AES_MODE_ECB) {
1036 		if (aes->iv_len != AES_BLOCK_SIZE)
1037 			return -EINVAL;
1038 
1039 		if (!aes->iv)
1040 			return -EINVAL;
1041 	}
1042 
1043 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1044 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1045 
1046 	ret = -EIO;
1047 	memset(&op, 0, sizeof(op));
1048 	op.cmd_q = cmd_q;
1049 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1050 	op.ksb_key = cmd_q->ksb_key;
1051 	op.ksb_ctx = cmd_q->ksb_ctx;
1052 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1053 	op.u.aes.type = aes->type;
1054 	op.u.aes.mode = aes->mode;
1055 	op.u.aes.action = aes->action;
1056 
1057 	/* All supported key sizes fit in a single (32-byte) KSB entry
1058 	 * and must be in little endian format. Use the 256-bit byte
1059 	 * swap passthru option to convert from big endian to little
1060 	 * endian.
1061 	 */
1062 	ret = ccp_init_dm_workarea(&key, cmd_q,
1063 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1064 				   DMA_TO_DEVICE);
1065 	if (ret)
1066 		return ret;
1067 
1068 	dm_offset = CCP_KSB_BYTES - aes->key_len;
1069 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1070 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1071 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1072 	if (ret) {
1073 		cmd->engine_error = cmd_q->cmd_error;
1074 		goto e_key;
1075 	}
1076 
1077 	/* The AES context fits in a single (32-byte) KSB entry and
1078 	 * must be in little endian format. Use the 256-bit byte swap
1079 	 * passthru option to convert from big endian to little endian.
1080 	 */
1081 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1082 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1083 				   DMA_BIDIRECTIONAL);
1084 	if (ret)
1085 		goto e_key;
1086 
1087 	if (aes->mode != CCP_AES_MODE_ECB) {
1088 		/* Load the AES context - conver to LE */
1089 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1090 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1091 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1092 				      CCP_PASSTHRU_BYTESWAP_256BIT);
1093 		if (ret) {
1094 			cmd->engine_error = cmd_q->cmd_error;
1095 			goto e_ctx;
1096 		}
1097 	}
1098 
1099 	/* Prepare the input and output data workareas. For in-place
1100 	 * operations we need to set the dma direction to BIDIRECTIONAL
1101 	 * and copy the src workarea to the dst workarea.
1102 	 */
1103 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1104 		in_place = true;
1105 
1106 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1107 			    AES_BLOCK_SIZE,
1108 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1109 	if (ret)
1110 		goto e_ctx;
1111 
1112 	if (in_place)
1113 		dst = src;
1114 	else {
1115 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1116 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1117 		if (ret)
1118 			goto e_src;
1119 	}
1120 
1121 	/* Send data to the CCP AES engine */
1122 	while (src.sg_wa.bytes_left) {
1123 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1124 		if (!src.sg_wa.bytes_left) {
1125 			op.eom = 1;
1126 
1127 			/* Since we don't retrieve the AES context in ECB
1128 			 * mode we have to wait for the operation to complete
1129 			 * on the last piece of data
1130 			 */
1131 			if (aes->mode == CCP_AES_MODE_ECB)
1132 				op.soc = 1;
1133 		}
1134 
1135 		ret = ccp_perform_aes(&op);
1136 		if (ret) {
1137 			cmd->engine_error = cmd_q->cmd_error;
1138 			goto e_dst;
1139 		}
1140 
1141 		ccp_process_data(&src, &dst, &op);
1142 	}
1143 
1144 	if (aes->mode != CCP_AES_MODE_ECB) {
1145 		/* Retrieve the AES context - convert from LE to BE using
1146 		 * 32-byte (256-bit) byteswapping
1147 		 */
1148 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1149 					CCP_PASSTHRU_BYTESWAP_256BIT);
1150 		if (ret) {
1151 			cmd->engine_error = cmd_q->cmd_error;
1152 			goto e_dst;
1153 		}
1154 
1155 		/* ...but we only need AES_BLOCK_SIZE bytes */
1156 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1157 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1158 	}
1159 
1160 e_dst:
1161 	if (!in_place)
1162 		ccp_free_data(&dst, cmd_q);
1163 
1164 e_src:
1165 	ccp_free_data(&src, cmd_q);
1166 
1167 e_ctx:
1168 	ccp_dm_free(&ctx);
1169 
1170 e_key:
1171 	ccp_dm_free(&key);
1172 
1173 	return ret;
1174 }
1175 
1176 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1177 			       struct ccp_cmd *cmd)
1178 {
1179 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1180 	struct ccp_dm_workarea key, ctx;
1181 	struct ccp_data src, dst;
1182 	struct ccp_op op;
1183 	unsigned int unit_size, dm_offset;
1184 	bool in_place = false;
1185 	int ret;
1186 
1187 	switch (xts->unit_size) {
1188 	case CCP_XTS_AES_UNIT_SIZE_16:
1189 		unit_size = 16;
1190 		break;
1191 	case CCP_XTS_AES_UNIT_SIZE_512:
1192 		unit_size = 512;
1193 		break;
1194 	case CCP_XTS_AES_UNIT_SIZE_1024:
1195 		unit_size = 1024;
1196 		break;
1197 	case CCP_XTS_AES_UNIT_SIZE_2048:
1198 		unit_size = 2048;
1199 		break;
1200 	case CCP_XTS_AES_UNIT_SIZE_4096:
1201 		unit_size = 4096;
1202 		break;
1203 
1204 	default:
1205 		return -EINVAL;
1206 	}
1207 
1208 	if (xts->key_len != AES_KEYSIZE_128)
1209 		return -EINVAL;
1210 
1211 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1212 		return -EINVAL;
1213 
1214 	if (xts->iv_len != AES_BLOCK_SIZE)
1215 		return -EINVAL;
1216 
1217 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1218 		return -EINVAL;
1219 
1220 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1221 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1222 
1223 	ret = -EIO;
1224 	memset(&op, 0, sizeof(op));
1225 	op.cmd_q = cmd_q;
1226 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1227 	op.ksb_key = cmd_q->ksb_key;
1228 	op.ksb_ctx = cmd_q->ksb_ctx;
1229 	op.init = 1;
1230 	op.u.xts.action = xts->action;
1231 	op.u.xts.unit_size = xts->unit_size;
1232 
1233 	/* All supported key sizes fit in a single (32-byte) KSB entry
1234 	 * and must be in little endian format. Use the 256-bit byte
1235 	 * swap passthru option to convert from big endian to little
1236 	 * endian.
1237 	 */
1238 	ret = ccp_init_dm_workarea(&key, cmd_q,
1239 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1240 				   DMA_TO_DEVICE);
1241 	if (ret)
1242 		return ret;
1243 
1244 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1245 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1246 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1247 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1248 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1249 	if (ret) {
1250 		cmd->engine_error = cmd_q->cmd_error;
1251 		goto e_key;
1252 	}
1253 
1254 	/* The AES context fits in a single (32-byte) KSB entry and
1255 	 * for XTS is already in little endian format so no byte swapping
1256 	 * is needed.
1257 	 */
1258 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1259 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1260 				   DMA_BIDIRECTIONAL);
1261 	if (ret)
1262 		goto e_key;
1263 
1264 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1265 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1266 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1267 	if (ret) {
1268 		cmd->engine_error = cmd_q->cmd_error;
1269 		goto e_ctx;
1270 	}
1271 
1272 	/* Prepare the input and output data workareas. For in-place
1273 	 * operations we need to set the dma direction to BIDIRECTIONAL
1274 	 * and copy the src workarea to the dst workarea.
1275 	 */
1276 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1277 		in_place = true;
1278 
1279 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1280 			    unit_size,
1281 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1282 	if (ret)
1283 		goto e_ctx;
1284 
1285 	if (in_place)
1286 		dst = src;
1287 	else {
1288 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1289 				    unit_size, DMA_FROM_DEVICE);
1290 		if (ret)
1291 			goto e_src;
1292 	}
1293 
1294 	/* Send data to the CCP AES engine */
1295 	while (src.sg_wa.bytes_left) {
1296 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1297 		if (!src.sg_wa.bytes_left)
1298 			op.eom = 1;
1299 
1300 		ret = ccp_perform_xts_aes(&op);
1301 		if (ret) {
1302 			cmd->engine_error = cmd_q->cmd_error;
1303 			goto e_dst;
1304 		}
1305 
1306 		ccp_process_data(&src, &dst, &op);
1307 	}
1308 
1309 	/* Retrieve the AES context - convert from LE to BE using
1310 	 * 32-byte (256-bit) byteswapping
1311 	 */
1312 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1313 				CCP_PASSTHRU_BYTESWAP_256BIT);
1314 	if (ret) {
1315 		cmd->engine_error = cmd_q->cmd_error;
1316 		goto e_dst;
1317 	}
1318 
1319 	/* ...but we only need AES_BLOCK_SIZE bytes */
1320 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1321 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1322 
1323 e_dst:
1324 	if (!in_place)
1325 		ccp_free_data(&dst, cmd_q);
1326 
1327 e_src:
1328 	ccp_free_data(&src, cmd_q);
1329 
1330 e_ctx:
1331 	ccp_dm_free(&ctx);
1332 
1333 e_key:
1334 	ccp_dm_free(&key);
1335 
1336 	return ret;
1337 }
1338 
1339 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1340 {
1341 	struct ccp_sha_engine *sha = &cmd->u.sha;
1342 	struct ccp_dm_workarea ctx;
1343 	struct ccp_data src;
1344 	struct ccp_op op;
1345 	int ret;
1346 
1347 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1348 		return -EINVAL;
1349 
1350 	if (!sha->ctx)
1351 		return -EINVAL;
1352 
1353 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1354 		return -EINVAL;
1355 
1356 	if (!sha->src_len) {
1357 		const u8 *sha_zero;
1358 
1359 		/* Not final, just return */
1360 		if (!sha->final)
1361 			return 0;
1362 
1363 		/* CCP can't do a zero length sha operation so the caller
1364 		 * must buffer the data.
1365 		 */
1366 		if (sha->msg_bits)
1367 			return -EINVAL;
1368 
1369 		/* A sha operation for a message with a total length of zero,
1370 		 * return known result.
1371 		 */
1372 		switch (sha->type) {
1373 		case CCP_SHA_TYPE_1:
1374 			sha_zero = ccp_sha1_zero;
1375 			break;
1376 		case CCP_SHA_TYPE_224:
1377 			sha_zero = ccp_sha224_zero;
1378 			break;
1379 		case CCP_SHA_TYPE_256:
1380 			sha_zero = ccp_sha256_zero;
1381 			break;
1382 		default:
1383 			return -EINVAL;
1384 		}
1385 
1386 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1387 					 sha->ctx_len, 1);
1388 
1389 		return 0;
1390 	}
1391 
1392 	if (!sha->src)
1393 		return -EINVAL;
1394 
1395 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1396 
1397 	memset(&op, 0, sizeof(op));
1398 	op.cmd_q = cmd_q;
1399 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1400 	op.ksb_ctx = cmd_q->ksb_ctx;
1401 	op.u.sha.type = sha->type;
1402 	op.u.sha.msg_bits = sha->msg_bits;
1403 
1404 	/* The SHA context fits in a single (32-byte) KSB entry and
1405 	 * must be in little endian format. Use the 256-bit byte swap
1406 	 * passthru option to convert from big endian to little endian.
1407 	 */
1408 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1409 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1410 				   DMA_BIDIRECTIONAL);
1411 	if (ret)
1412 		return ret;
1413 
1414 	ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1415 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1416 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1417 	if (ret) {
1418 		cmd->engine_error = cmd_q->cmd_error;
1419 		goto e_ctx;
1420 	}
1421 
1422 	/* Send data to the CCP SHA engine */
1423 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1424 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1425 	if (ret)
1426 		goto e_ctx;
1427 
1428 	while (src.sg_wa.bytes_left) {
1429 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1430 		if (sha->final && !src.sg_wa.bytes_left)
1431 			op.eom = 1;
1432 
1433 		ret = ccp_perform_sha(&op);
1434 		if (ret) {
1435 			cmd->engine_error = cmd_q->cmd_error;
1436 			goto e_data;
1437 		}
1438 
1439 		ccp_process_data(&src, NULL, &op);
1440 	}
1441 
1442 	/* Retrieve the SHA context - convert from LE to BE using
1443 	 * 32-byte (256-bit) byteswapping to BE
1444 	 */
1445 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1446 				CCP_PASSTHRU_BYTESWAP_256BIT);
1447 	if (ret) {
1448 		cmd->engine_error = cmd_q->cmd_error;
1449 		goto e_data;
1450 	}
1451 
1452 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1453 
1454 e_data:
1455 	ccp_free_data(&src, cmd_q);
1456 
1457 e_ctx:
1458 	ccp_dm_free(&ctx);
1459 
1460 	return ret;
1461 }
1462 
1463 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1464 {
1465 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1466 	struct ccp_dm_workarea exp, src;
1467 	struct ccp_data dst;
1468 	struct ccp_op op;
1469 	unsigned int ksb_count, i_len, o_len;
1470 	int ret;
1471 
1472 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1473 		return -EINVAL;
1474 
1475 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1476 		return -EINVAL;
1477 
1478 	/* The RSA modulus must precede the message being acted upon, so
1479 	 * it must be copied to a DMA area where the message and the
1480 	 * modulus can be concatenated.  Therefore the input buffer
1481 	 * length required is twice the output buffer length (which
1482 	 * must be a multiple of 256-bits).
1483 	 */
1484 	o_len = ((rsa->key_size + 255) / 256) * 32;
1485 	i_len = o_len * 2;
1486 
1487 	ksb_count = o_len / CCP_KSB_BYTES;
1488 
1489 	memset(&op, 0, sizeof(op));
1490 	op.cmd_q = cmd_q;
1491 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1492 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1493 	if (!op.ksb_key)
1494 		return -EIO;
1495 
1496 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1497 	 * be in little endian format. Reverse copy each 32-byte chunk
1498 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1499 	 * and each byte within that chunk and do not perform any byte swap
1500 	 * operations on the passthru operation.
1501 	 */
1502 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1503 	if (ret)
1504 		goto e_ksb;
1505 
1506 	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1507 				true);
1508 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1509 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1510 	if (ret) {
1511 		cmd->engine_error = cmd_q->cmd_error;
1512 		goto e_exp;
1513 	}
1514 
1515 	/* Concatenate the modulus and the message. Both the modulus and
1516 	 * the operands must be in little endian format.  Since the input
1517 	 * is in big endian format it must be converted.
1518 	 */
1519 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1520 	if (ret)
1521 		goto e_exp;
1522 
1523 	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1524 				true);
1525 	src.address += o_len;	/* Adjust the address for the copy operation */
1526 	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1527 				true);
1528 	src.address -= o_len;	/* Reset the address to original value */
1529 
1530 	/* Prepare the output area for the operation */
1531 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1532 			    o_len, DMA_FROM_DEVICE);
1533 	if (ret)
1534 		goto e_src;
1535 
1536 	op.soc = 1;
1537 	op.src.u.dma.address = src.dma.address;
1538 	op.src.u.dma.offset = 0;
1539 	op.src.u.dma.length = i_len;
1540 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1541 	op.dst.u.dma.offset = 0;
1542 	op.dst.u.dma.length = o_len;
1543 
1544 	op.u.rsa.mod_size = rsa->key_size;
1545 	op.u.rsa.input_len = i_len;
1546 
1547 	ret = ccp_perform_rsa(&op);
1548 	if (ret) {
1549 		cmd->engine_error = cmd_q->cmd_error;
1550 		goto e_dst;
1551 	}
1552 
1553 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1554 
1555 e_dst:
1556 	ccp_free_data(&dst, cmd_q);
1557 
1558 e_src:
1559 	ccp_dm_free(&src);
1560 
1561 e_exp:
1562 	ccp_dm_free(&exp);
1563 
1564 e_ksb:
1565 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1566 
1567 	return ret;
1568 }
1569 
1570 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1571 				struct ccp_cmd *cmd)
1572 {
1573 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1574 	struct ccp_dm_workarea mask;
1575 	struct ccp_data src, dst;
1576 	struct ccp_op op;
1577 	bool in_place = false;
1578 	unsigned int i;
1579 	int ret;
1580 
1581 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1582 		return -EINVAL;
1583 
1584 	if (!pt->src || !pt->dst)
1585 		return -EINVAL;
1586 
1587 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1588 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1589 			return -EINVAL;
1590 		if (!pt->mask)
1591 			return -EINVAL;
1592 	}
1593 
1594 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1595 
1596 	memset(&op, 0, sizeof(op));
1597 	op.cmd_q = cmd_q;
1598 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1599 
1600 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1601 		/* Load the mask */
1602 		op.ksb_key = cmd_q->ksb_key;
1603 
1604 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1605 					   CCP_PASSTHRU_KSB_COUNT *
1606 					   CCP_KSB_BYTES,
1607 					   DMA_TO_DEVICE);
1608 		if (ret)
1609 			return ret;
1610 
1611 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1612 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1613 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1614 		if (ret) {
1615 			cmd->engine_error = cmd_q->cmd_error;
1616 			goto e_mask;
1617 		}
1618 	}
1619 
1620 	/* Prepare the input and output data workareas. For in-place
1621 	 * operations we need to set the dma direction to BIDIRECTIONAL
1622 	 * and copy the src workarea to the dst workarea.
1623 	 */
1624 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1625 		in_place = true;
1626 
1627 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1628 			    CCP_PASSTHRU_MASKSIZE,
1629 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1630 	if (ret)
1631 		goto e_mask;
1632 
1633 	if (in_place)
1634 		dst = src;
1635 	else {
1636 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1637 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1638 		if (ret)
1639 			goto e_src;
1640 	}
1641 
1642 	/* Send data to the CCP Passthru engine
1643 	 *   Because the CCP engine works on a single source and destination
1644 	 *   dma address at a time, each entry in the source scatterlist
1645 	 *   (after the dma_map_sg call) must be less than or equal to the
1646 	 *   (remaining) length in the destination scatterlist entry and the
1647 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1648 	 */
1649 	dst.sg_wa.sg_used = 0;
1650 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1651 		if (!dst.sg_wa.sg ||
1652 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1653 			ret = -EINVAL;
1654 			goto e_dst;
1655 		}
1656 
1657 		if (i == src.sg_wa.dma_count) {
1658 			op.eom = 1;
1659 			op.soc = 1;
1660 		}
1661 
1662 		op.src.type = CCP_MEMTYPE_SYSTEM;
1663 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1664 		op.src.u.dma.offset = 0;
1665 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1666 
1667 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1668 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1669 		op.src.u.dma.offset = dst.sg_wa.sg_used;
1670 		op.src.u.dma.length = op.src.u.dma.length;
1671 
1672 		ret = ccp_perform_passthru(&op);
1673 		if (ret) {
1674 			cmd->engine_error = cmd_q->cmd_error;
1675 			goto e_dst;
1676 		}
1677 
1678 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1679 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1680 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1681 			dst.sg_wa.sg_used = 0;
1682 		}
1683 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1684 	}
1685 
1686 e_dst:
1687 	if (!in_place)
1688 		ccp_free_data(&dst, cmd_q);
1689 
1690 e_src:
1691 	ccp_free_data(&src, cmd_q);
1692 
1693 e_mask:
1694 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1695 		ccp_dm_free(&mask);
1696 
1697 	return ret;
1698 }
1699 
1700 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1701 {
1702 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1703 	struct ccp_dm_workarea src, dst;
1704 	struct ccp_op op;
1705 	int ret;
1706 	u8 *save;
1707 
1708 	if (!ecc->u.mm.operand_1 ||
1709 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1710 		return -EINVAL;
1711 
1712 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1713 		if (!ecc->u.mm.operand_2 ||
1714 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1715 			return -EINVAL;
1716 
1717 	if (!ecc->u.mm.result ||
1718 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1719 		return -EINVAL;
1720 
1721 	memset(&op, 0, sizeof(op));
1722 	op.cmd_q = cmd_q;
1723 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1724 
1725 	/* Concatenate the modulus and the operands. Both the modulus and
1726 	 * the operands must be in little endian format.  Since the input
1727 	 * is in big endian format it must be converted and placed in a
1728 	 * fixed length buffer.
1729 	 */
1730 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1731 				   DMA_TO_DEVICE);
1732 	if (ret)
1733 		return ret;
1734 
1735 	/* Save the workarea address since it is updated in order to perform
1736 	 * the concatenation
1737 	 */
1738 	save = src.address;
1739 
1740 	/* Copy the ECC modulus */
1741 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1742 				CCP_ECC_OPERAND_SIZE, true);
1743 	src.address += CCP_ECC_OPERAND_SIZE;
1744 
1745 	/* Copy the first operand */
1746 	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1747 				ecc->u.mm.operand_1_len,
1748 				CCP_ECC_OPERAND_SIZE, true);
1749 	src.address += CCP_ECC_OPERAND_SIZE;
1750 
1751 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1752 		/* Copy the second operand */
1753 		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1754 					ecc->u.mm.operand_2_len,
1755 					CCP_ECC_OPERAND_SIZE, true);
1756 		src.address += CCP_ECC_OPERAND_SIZE;
1757 	}
1758 
1759 	/* Restore the workarea address */
1760 	src.address = save;
1761 
1762 	/* Prepare the output area for the operation */
1763 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1764 				   DMA_FROM_DEVICE);
1765 	if (ret)
1766 		goto e_src;
1767 
1768 	op.soc = 1;
1769 	op.src.u.dma.address = src.dma.address;
1770 	op.src.u.dma.offset = 0;
1771 	op.src.u.dma.length = src.length;
1772 	op.dst.u.dma.address = dst.dma.address;
1773 	op.dst.u.dma.offset = 0;
1774 	op.dst.u.dma.length = dst.length;
1775 
1776 	op.u.ecc.function = cmd->u.ecc.function;
1777 
1778 	ret = ccp_perform_ecc(&op);
1779 	if (ret) {
1780 		cmd->engine_error = cmd_q->cmd_error;
1781 		goto e_dst;
1782 	}
1783 
1784 	ecc->ecc_result = le16_to_cpup(
1785 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1786 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1787 		ret = -EIO;
1788 		goto e_dst;
1789 	}
1790 
1791 	/* Save the ECC result */
1792 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1793 
1794 e_dst:
1795 	ccp_dm_free(&dst);
1796 
1797 e_src:
1798 	ccp_dm_free(&src);
1799 
1800 	return ret;
1801 }
1802 
1803 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1804 {
1805 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1806 	struct ccp_dm_workarea src, dst;
1807 	struct ccp_op op;
1808 	int ret;
1809 	u8 *save;
1810 
1811 	if (!ecc->u.pm.point_1.x ||
1812 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1813 	    !ecc->u.pm.point_1.y ||
1814 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1815 		return -EINVAL;
1816 
1817 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1818 		if (!ecc->u.pm.point_2.x ||
1819 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1820 		    !ecc->u.pm.point_2.y ||
1821 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1822 			return -EINVAL;
1823 	} else {
1824 		if (!ecc->u.pm.domain_a ||
1825 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1826 			return -EINVAL;
1827 
1828 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1829 			if (!ecc->u.pm.scalar ||
1830 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1831 				return -EINVAL;
1832 	}
1833 
1834 	if (!ecc->u.pm.result.x ||
1835 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1836 	    !ecc->u.pm.result.y ||
1837 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1838 		return -EINVAL;
1839 
1840 	memset(&op, 0, sizeof(op));
1841 	op.cmd_q = cmd_q;
1842 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1843 
1844 	/* Concatenate the modulus and the operands. Both the modulus and
1845 	 * the operands must be in little endian format.  Since the input
1846 	 * is in big endian format it must be converted and placed in a
1847 	 * fixed length buffer.
1848 	 */
1849 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1850 				   DMA_TO_DEVICE);
1851 	if (ret)
1852 		return ret;
1853 
1854 	/* Save the workarea address since it is updated in order to perform
1855 	 * the concatenation
1856 	 */
1857 	save = src.address;
1858 
1859 	/* Copy the ECC modulus */
1860 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1861 				CCP_ECC_OPERAND_SIZE, true);
1862 	src.address += CCP_ECC_OPERAND_SIZE;
1863 
1864 	/* Copy the first point X and Y coordinate */
1865 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1866 				ecc->u.pm.point_1.x_len,
1867 				CCP_ECC_OPERAND_SIZE, true);
1868 	src.address += CCP_ECC_OPERAND_SIZE;
1869 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1870 				ecc->u.pm.point_1.y_len,
1871 				CCP_ECC_OPERAND_SIZE, true);
1872 	src.address += CCP_ECC_OPERAND_SIZE;
1873 
1874 	/* Set the first point Z coordianate to 1 */
1875 	*(src.address) = 0x01;
1876 	src.address += CCP_ECC_OPERAND_SIZE;
1877 
1878 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1879 		/* Copy the second point X and Y coordinate */
1880 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1881 					ecc->u.pm.point_2.x_len,
1882 					CCP_ECC_OPERAND_SIZE, true);
1883 		src.address += CCP_ECC_OPERAND_SIZE;
1884 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1885 					ecc->u.pm.point_2.y_len,
1886 					CCP_ECC_OPERAND_SIZE, true);
1887 		src.address += CCP_ECC_OPERAND_SIZE;
1888 
1889 		/* Set the second point Z coordianate to 1 */
1890 		*(src.address) = 0x01;
1891 		src.address += CCP_ECC_OPERAND_SIZE;
1892 	} else {
1893 		/* Copy the Domain "a" parameter */
1894 		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1895 					ecc->u.pm.domain_a_len,
1896 					CCP_ECC_OPERAND_SIZE, true);
1897 		src.address += CCP_ECC_OPERAND_SIZE;
1898 
1899 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
1900 			/* Copy the scalar value */
1901 			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
1902 						ecc->u.pm.scalar_len,
1903 						CCP_ECC_OPERAND_SIZE, true);
1904 			src.address += CCP_ECC_OPERAND_SIZE;
1905 		}
1906 	}
1907 
1908 	/* Restore the workarea address */
1909 	src.address = save;
1910 
1911 	/* Prepare the output area for the operation */
1912 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1913 				   DMA_FROM_DEVICE);
1914 	if (ret)
1915 		goto e_src;
1916 
1917 	op.soc = 1;
1918 	op.src.u.dma.address = src.dma.address;
1919 	op.src.u.dma.offset = 0;
1920 	op.src.u.dma.length = src.length;
1921 	op.dst.u.dma.address = dst.dma.address;
1922 	op.dst.u.dma.offset = 0;
1923 	op.dst.u.dma.length = dst.length;
1924 
1925 	op.u.ecc.function = cmd->u.ecc.function;
1926 
1927 	ret = ccp_perform_ecc(&op);
1928 	if (ret) {
1929 		cmd->engine_error = cmd_q->cmd_error;
1930 		goto e_dst;
1931 	}
1932 
1933 	ecc->ecc_result = le16_to_cpup(
1934 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1935 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1936 		ret = -EIO;
1937 		goto e_dst;
1938 	}
1939 
1940 	/* Save the workarea address since it is updated as we walk through
1941 	 * to copy the point math result
1942 	 */
1943 	save = dst.address;
1944 
1945 	/* Save the ECC result X and Y coordinates */
1946 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
1947 				CCP_ECC_MODULUS_BYTES);
1948 	dst.address += CCP_ECC_OUTPUT_SIZE;
1949 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
1950 				CCP_ECC_MODULUS_BYTES);
1951 	dst.address += CCP_ECC_OUTPUT_SIZE;
1952 
1953 	/* Restore the workarea address */
1954 	dst.address = save;
1955 
1956 e_dst:
1957 	ccp_dm_free(&dst);
1958 
1959 e_src:
1960 	ccp_dm_free(&src);
1961 
1962 	return ret;
1963 }
1964 
1965 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1966 {
1967 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1968 
1969 	ecc->ecc_result = 0;
1970 
1971 	if (!ecc->mod ||
1972 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
1973 		return -EINVAL;
1974 
1975 	switch (ecc->function) {
1976 	case CCP_ECC_FUNCTION_MMUL_384BIT:
1977 	case CCP_ECC_FUNCTION_MADD_384BIT:
1978 	case CCP_ECC_FUNCTION_MINV_384BIT:
1979 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
1980 
1981 	case CCP_ECC_FUNCTION_PADD_384BIT:
1982 	case CCP_ECC_FUNCTION_PMUL_384BIT:
1983 	case CCP_ECC_FUNCTION_PDBL_384BIT:
1984 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
1985 
1986 	default:
1987 		return -EINVAL;
1988 	}
1989 }
1990 
1991 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1992 {
1993 	int ret;
1994 
1995 	cmd->engine_error = 0;
1996 	cmd_q->cmd_error = 0;
1997 	cmd_q->int_rcvd = 0;
1998 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
1999 
2000 	switch (cmd->engine) {
2001 	case CCP_ENGINE_AES:
2002 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2003 		break;
2004 	case CCP_ENGINE_XTS_AES_128:
2005 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2006 		break;
2007 	case CCP_ENGINE_SHA:
2008 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2009 		break;
2010 	case CCP_ENGINE_RSA:
2011 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2012 		break;
2013 	case CCP_ENGINE_PASSTHRU:
2014 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2015 		break;
2016 	case CCP_ENGINE_ECC:
2017 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2018 		break;
2019 	default:
2020 		ret = -EINVAL;
2021 	}
2022 
2023 	return ret;
2024 }
2025