xref: /linux/drivers/crypto/ccp/ccp-ops.c (revision b85d45947951d23cb22d90caecf4c1eb81342c96)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2013 Advanced Micro Devices, Inc.
5  *
6  * Author: Tom Lendacky <thomas.lendacky@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/pci_ids.h>
17 #include <linux/kthread.h>
18 #include <linux/sched.h>
19 #include <linux/interrupt.h>
20 #include <linux/spinlock.h>
21 #include <linux/mutex.h>
22 #include <linux/delay.h>
23 #include <linux/ccp.h>
24 #include <linux/scatterlist.h>
25 #include <crypto/scatterwalk.h>
26 #include <crypto/sha.h>
27 
28 #include "ccp-dev.h"
29 
30 enum ccp_memtype {
31 	CCP_MEMTYPE_SYSTEM = 0,
32 	CCP_MEMTYPE_KSB,
33 	CCP_MEMTYPE_LOCAL,
34 	CCP_MEMTYPE__LAST,
35 };
36 
37 struct ccp_dma_info {
38 	dma_addr_t address;
39 	unsigned int offset;
40 	unsigned int length;
41 	enum dma_data_direction dir;
42 };
43 
44 struct ccp_dm_workarea {
45 	struct device *dev;
46 	struct dma_pool *dma_pool;
47 	unsigned int length;
48 
49 	u8 *address;
50 	struct ccp_dma_info dma;
51 };
52 
53 struct ccp_sg_workarea {
54 	struct scatterlist *sg;
55 	int nents;
56 
57 	struct scatterlist *dma_sg;
58 	struct device *dma_dev;
59 	unsigned int dma_count;
60 	enum dma_data_direction dma_dir;
61 
62 	unsigned int sg_used;
63 
64 	u64 bytes_left;
65 };
66 
67 struct ccp_data {
68 	struct ccp_sg_workarea sg_wa;
69 	struct ccp_dm_workarea dm_wa;
70 };
71 
72 struct ccp_mem {
73 	enum ccp_memtype type;
74 	union {
75 		struct ccp_dma_info dma;
76 		u32 ksb;
77 	} u;
78 };
79 
80 struct ccp_aes_op {
81 	enum ccp_aes_type type;
82 	enum ccp_aes_mode mode;
83 	enum ccp_aes_action action;
84 };
85 
86 struct ccp_xts_aes_op {
87 	enum ccp_aes_action action;
88 	enum ccp_xts_aes_unit_size unit_size;
89 };
90 
91 struct ccp_sha_op {
92 	enum ccp_sha_type type;
93 	u64 msg_bits;
94 };
95 
96 struct ccp_rsa_op {
97 	u32 mod_size;
98 	u32 input_len;
99 };
100 
101 struct ccp_passthru_op {
102 	enum ccp_passthru_bitwise bit_mod;
103 	enum ccp_passthru_byteswap byte_swap;
104 };
105 
106 struct ccp_ecc_op {
107 	enum ccp_ecc_function function;
108 };
109 
110 struct ccp_op {
111 	struct ccp_cmd_queue *cmd_q;
112 
113 	u32 jobid;
114 	u32 ioc;
115 	u32 soc;
116 	u32 ksb_key;
117 	u32 ksb_ctx;
118 	u32 init;
119 	u32 eom;
120 
121 	struct ccp_mem src;
122 	struct ccp_mem dst;
123 
124 	union {
125 		struct ccp_aes_op aes;
126 		struct ccp_xts_aes_op xts;
127 		struct ccp_sha_op sha;
128 		struct ccp_rsa_op rsa;
129 		struct ccp_passthru_op passthru;
130 		struct ccp_ecc_op ecc;
131 	} u;
132 };
133 
134 /* SHA initial context values */
135 static const __be32 ccp_sha1_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
136 	cpu_to_be32(SHA1_H0), cpu_to_be32(SHA1_H1),
137 	cpu_to_be32(SHA1_H2), cpu_to_be32(SHA1_H3),
138 	cpu_to_be32(SHA1_H4), 0, 0, 0,
139 };
140 
141 static const __be32 ccp_sha224_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
142 	cpu_to_be32(SHA224_H0), cpu_to_be32(SHA224_H1),
143 	cpu_to_be32(SHA224_H2), cpu_to_be32(SHA224_H3),
144 	cpu_to_be32(SHA224_H4), cpu_to_be32(SHA224_H5),
145 	cpu_to_be32(SHA224_H6), cpu_to_be32(SHA224_H7),
146 };
147 
148 static const __be32 ccp_sha256_init[CCP_SHA_CTXSIZE / sizeof(__be32)] = {
149 	cpu_to_be32(SHA256_H0), cpu_to_be32(SHA256_H1),
150 	cpu_to_be32(SHA256_H2), cpu_to_be32(SHA256_H3),
151 	cpu_to_be32(SHA256_H4), cpu_to_be32(SHA256_H5),
152 	cpu_to_be32(SHA256_H6), cpu_to_be32(SHA256_H7),
153 };
154 
155 /* The CCP cannot perform zero-length sha operations so the caller
156  * is required to buffer data for the final operation.  However, a
157  * sha operation for a message with a total length of zero is valid
158  * so known values are required to supply the result.
159  */
160 static const u8 ccp_sha1_zero[CCP_SHA_CTXSIZE] = {
161 	0xda, 0x39, 0xa3, 0xee, 0x5e, 0x6b, 0x4b, 0x0d,
162 	0x32, 0x55, 0xbf, 0xef, 0x95, 0x60, 0x18, 0x90,
163 	0xaf, 0xd8, 0x07, 0x09, 0x00, 0x00, 0x00, 0x00,
164 	0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
165 };
166 
167 static const u8 ccp_sha224_zero[CCP_SHA_CTXSIZE] = {
168 	0xd1, 0x4a, 0x02, 0x8c, 0x2a, 0x3a, 0x2b, 0xc9,
169 	0x47, 0x61, 0x02, 0xbb, 0x28, 0x82, 0x34, 0xc4,
170 	0x15, 0xa2, 0xb0, 0x1f, 0x82, 0x8e, 0xa6, 0x2a,
171 	0xc5, 0xb3, 0xe4, 0x2f, 0x00, 0x00, 0x00, 0x00,
172 };
173 
174 static const u8 ccp_sha256_zero[CCP_SHA_CTXSIZE] = {
175 	0xe3, 0xb0, 0xc4, 0x42, 0x98, 0xfc, 0x1c, 0x14,
176 	0x9a, 0xfb, 0xf4, 0xc8, 0x99, 0x6f, 0xb9, 0x24,
177 	0x27, 0xae, 0x41, 0xe4, 0x64, 0x9b, 0x93, 0x4c,
178 	0xa4, 0x95, 0x99, 0x1b, 0x78, 0x52, 0xb8, 0x55,
179 };
180 
181 static u32 ccp_addr_lo(struct ccp_dma_info *info)
182 {
183 	return lower_32_bits(info->address + info->offset);
184 }
185 
186 static u32 ccp_addr_hi(struct ccp_dma_info *info)
187 {
188 	return upper_32_bits(info->address + info->offset) & 0x0000ffff;
189 }
190 
191 static int ccp_do_cmd(struct ccp_op *op, u32 *cr, unsigned int cr_count)
192 {
193 	struct ccp_cmd_queue *cmd_q = op->cmd_q;
194 	struct ccp_device *ccp = cmd_q->ccp;
195 	void __iomem *cr_addr;
196 	u32 cr0, cmd;
197 	unsigned int i;
198 	int ret = 0;
199 
200 	/* We could read a status register to see how many free slots
201 	 * are actually available, but reading that register resets it
202 	 * and you could lose some error information.
203 	 */
204 	cmd_q->free_slots--;
205 
206 	cr0 = (cmd_q->id << REQ0_CMD_Q_SHIFT)
207 	      | (op->jobid << REQ0_JOBID_SHIFT)
208 	      | REQ0_WAIT_FOR_WRITE;
209 
210 	if (op->soc)
211 		cr0 |= REQ0_STOP_ON_COMPLETE
212 		       | REQ0_INT_ON_COMPLETE;
213 
214 	if (op->ioc || !cmd_q->free_slots)
215 		cr0 |= REQ0_INT_ON_COMPLETE;
216 
217 	/* Start at CMD_REQ1 */
218 	cr_addr = ccp->io_regs + CMD_REQ0 + CMD_REQ_INCR;
219 
220 	mutex_lock(&ccp->req_mutex);
221 
222 	/* Write CMD_REQ1 through CMD_REQx first */
223 	for (i = 0; i < cr_count; i++, cr_addr += CMD_REQ_INCR)
224 		iowrite32(*(cr + i), cr_addr);
225 
226 	/* Tell the CCP to start */
227 	wmb();
228 	iowrite32(cr0, ccp->io_regs + CMD_REQ0);
229 
230 	mutex_unlock(&ccp->req_mutex);
231 
232 	if (cr0 & REQ0_INT_ON_COMPLETE) {
233 		/* Wait for the job to complete */
234 		ret = wait_event_interruptible(cmd_q->int_queue,
235 					       cmd_q->int_rcvd);
236 		if (ret || cmd_q->cmd_error) {
237 			/* On error delete all related jobs from the queue */
238 			cmd = (cmd_q->id << DEL_Q_ID_SHIFT)
239 			      | op->jobid;
240 
241 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
242 
243 			if (!ret)
244 				ret = -EIO;
245 		} else if (op->soc) {
246 			/* Delete just head job from the queue on SoC */
247 			cmd = DEL_Q_ACTIVE
248 			      | (cmd_q->id << DEL_Q_ID_SHIFT)
249 			      | op->jobid;
250 
251 			iowrite32(cmd, ccp->io_regs + DEL_CMD_Q_JOB);
252 		}
253 
254 		cmd_q->free_slots = CMD_Q_DEPTH(cmd_q->q_status);
255 
256 		cmd_q->int_rcvd = 0;
257 	}
258 
259 	return ret;
260 }
261 
262 static int ccp_perform_aes(struct ccp_op *op)
263 {
264 	u32 cr[6];
265 
266 	/* Fill out the register contents for REQ1 through REQ6 */
267 	cr[0] = (CCP_ENGINE_AES << REQ1_ENGINE_SHIFT)
268 		| (op->u.aes.type << REQ1_AES_TYPE_SHIFT)
269 		| (op->u.aes.mode << REQ1_AES_MODE_SHIFT)
270 		| (op->u.aes.action << REQ1_AES_ACTION_SHIFT)
271 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
272 	cr[1] = op->src.u.dma.length - 1;
273 	cr[2] = ccp_addr_lo(&op->src.u.dma);
274 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
275 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
276 		| ccp_addr_hi(&op->src.u.dma);
277 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
278 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
279 		| ccp_addr_hi(&op->dst.u.dma);
280 
281 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
282 		cr[0] |= ((0x7f) << REQ1_AES_CFB_SIZE_SHIFT);
283 
284 	if (op->eom)
285 		cr[0] |= REQ1_EOM;
286 
287 	if (op->init)
288 		cr[0] |= REQ1_INIT;
289 
290 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
291 }
292 
293 static int ccp_perform_xts_aes(struct ccp_op *op)
294 {
295 	u32 cr[6];
296 
297 	/* Fill out the register contents for REQ1 through REQ6 */
298 	cr[0] = (CCP_ENGINE_XTS_AES_128 << REQ1_ENGINE_SHIFT)
299 		| (op->u.xts.action << REQ1_AES_ACTION_SHIFT)
300 		| (op->u.xts.unit_size << REQ1_XTS_AES_SIZE_SHIFT)
301 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT);
302 	cr[1] = op->src.u.dma.length - 1;
303 	cr[2] = ccp_addr_lo(&op->src.u.dma);
304 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
305 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
306 		| ccp_addr_hi(&op->src.u.dma);
307 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
308 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
309 		| ccp_addr_hi(&op->dst.u.dma);
310 
311 	if (op->eom)
312 		cr[0] |= REQ1_EOM;
313 
314 	if (op->init)
315 		cr[0] |= REQ1_INIT;
316 
317 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
318 }
319 
320 static int ccp_perform_sha(struct ccp_op *op)
321 {
322 	u32 cr[6];
323 
324 	/* Fill out the register contents for REQ1 through REQ6 */
325 	cr[0] = (CCP_ENGINE_SHA << REQ1_ENGINE_SHIFT)
326 		| (op->u.sha.type << REQ1_SHA_TYPE_SHIFT)
327 		| REQ1_INIT;
328 	cr[1] = op->src.u.dma.length - 1;
329 	cr[2] = ccp_addr_lo(&op->src.u.dma);
330 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
331 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
332 		| ccp_addr_hi(&op->src.u.dma);
333 
334 	if (op->eom) {
335 		cr[0] |= REQ1_EOM;
336 		cr[4] = lower_32_bits(op->u.sha.msg_bits);
337 		cr[5] = upper_32_bits(op->u.sha.msg_bits);
338 	} else {
339 		cr[4] = 0;
340 		cr[5] = 0;
341 	}
342 
343 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
344 }
345 
346 static int ccp_perform_rsa(struct ccp_op *op)
347 {
348 	u32 cr[6];
349 
350 	/* Fill out the register contents for REQ1 through REQ6 */
351 	cr[0] = (CCP_ENGINE_RSA << REQ1_ENGINE_SHIFT)
352 		| (op->u.rsa.mod_size << REQ1_RSA_MOD_SIZE_SHIFT)
353 		| (op->ksb_key << REQ1_KEY_KSB_SHIFT)
354 		| REQ1_EOM;
355 	cr[1] = op->u.rsa.input_len - 1;
356 	cr[2] = ccp_addr_lo(&op->src.u.dma);
357 	cr[3] = (op->ksb_ctx << REQ4_KSB_SHIFT)
358 		| (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
359 		| ccp_addr_hi(&op->src.u.dma);
360 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
361 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
362 		| ccp_addr_hi(&op->dst.u.dma);
363 
364 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
365 }
366 
367 static int ccp_perform_passthru(struct ccp_op *op)
368 {
369 	u32 cr[6];
370 
371 	/* Fill out the register contents for REQ1 through REQ6 */
372 	cr[0] = (CCP_ENGINE_PASSTHRU << REQ1_ENGINE_SHIFT)
373 		| (op->u.passthru.bit_mod << REQ1_PT_BW_SHIFT)
374 		| (op->u.passthru.byte_swap << REQ1_PT_BS_SHIFT);
375 
376 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
377 		cr[1] = op->src.u.dma.length - 1;
378 	else
379 		cr[1] = op->dst.u.dma.length - 1;
380 
381 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
382 		cr[2] = ccp_addr_lo(&op->src.u.dma);
383 		cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
384 			| ccp_addr_hi(&op->src.u.dma);
385 
386 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
387 			cr[3] |= (op->ksb_key << REQ4_KSB_SHIFT);
388 	} else {
389 		cr[2] = op->src.u.ksb * CCP_KSB_BYTES;
390 		cr[3] = (CCP_MEMTYPE_KSB << REQ4_MEMTYPE_SHIFT);
391 	}
392 
393 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
394 		cr[4] = ccp_addr_lo(&op->dst.u.dma);
395 		cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
396 			| ccp_addr_hi(&op->dst.u.dma);
397 	} else {
398 		cr[4] = op->dst.u.ksb * CCP_KSB_BYTES;
399 		cr[5] = (CCP_MEMTYPE_KSB << REQ6_MEMTYPE_SHIFT);
400 	}
401 
402 	if (op->eom)
403 		cr[0] |= REQ1_EOM;
404 
405 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
406 }
407 
408 static int ccp_perform_ecc(struct ccp_op *op)
409 {
410 	u32 cr[6];
411 
412 	/* Fill out the register contents for REQ1 through REQ6 */
413 	cr[0] = REQ1_ECC_AFFINE_CONVERT
414 		| (CCP_ENGINE_ECC << REQ1_ENGINE_SHIFT)
415 		| (op->u.ecc.function << REQ1_ECC_FUNCTION_SHIFT)
416 		| REQ1_EOM;
417 	cr[1] = op->src.u.dma.length - 1;
418 	cr[2] = ccp_addr_lo(&op->src.u.dma);
419 	cr[3] = (CCP_MEMTYPE_SYSTEM << REQ4_MEMTYPE_SHIFT)
420 		| ccp_addr_hi(&op->src.u.dma);
421 	cr[4] = ccp_addr_lo(&op->dst.u.dma);
422 	cr[5] = (CCP_MEMTYPE_SYSTEM << REQ6_MEMTYPE_SHIFT)
423 		| ccp_addr_hi(&op->dst.u.dma);
424 
425 	return ccp_do_cmd(op, cr, ARRAY_SIZE(cr));
426 }
427 
428 static u32 ccp_alloc_ksb(struct ccp_device *ccp, unsigned int count)
429 {
430 	int start;
431 
432 	for (;;) {
433 		mutex_lock(&ccp->ksb_mutex);
434 
435 		start = (u32)bitmap_find_next_zero_area(ccp->ksb,
436 							ccp->ksb_count,
437 							ccp->ksb_start,
438 							count, 0);
439 		if (start <= ccp->ksb_count) {
440 			bitmap_set(ccp->ksb, start, count);
441 
442 			mutex_unlock(&ccp->ksb_mutex);
443 			break;
444 		}
445 
446 		ccp->ksb_avail = 0;
447 
448 		mutex_unlock(&ccp->ksb_mutex);
449 
450 		/* Wait for KSB entries to become available */
451 		if (wait_event_interruptible(ccp->ksb_queue, ccp->ksb_avail))
452 			return 0;
453 	}
454 
455 	return KSB_START + start;
456 }
457 
458 static void ccp_free_ksb(struct ccp_device *ccp, unsigned int start,
459 			 unsigned int count)
460 {
461 	if (!start)
462 		return;
463 
464 	mutex_lock(&ccp->ksb_mutex);
465 
466 	bitmap_clear(ccp->ksb, start - KSB_START, count);
467 
468 	ccp->ksb_avail = 1;
469 
470 	mutex_unlock(&ccp->ksb_mutex);
471 
472 	wake_up_interruptible_all(&ccp->ksb_queue);
473 }
474 
475 static u32 ccp_gen_jobid(struct ccp_device *ccp)
476 {
477 	return atomic_inc_return(&ccp->current_id) & CCP_JOBID_MASK;
478 }
479 
480 static void ccp_sg_free(struct ccp_sg_workarea *wa)
481 {
482 	if (wa->dma_count)
483 		dma_unmap_sg(wa->dma_dev, wa->dma_sg, wa->nents, wa->dma_dir);
484 
485 	wa->dma_count = 0;
486 }
487 
488 static int ccp_init_sg_workarea(struct ccp_sg_workarea *wa, struct device *dev,
489 				struct scatterlist *sg, u64 len,
490 				enum dma_data_direction dma_dir)
491 {
492 	memset(wa, 0, sizeof(*wa));
493 
494 	wa->sg = sg;
495 	if (!sg)
496 		return 0;
497 
498 	wa->nents = sg_nents_for_len(sg, len);
499 	if (wa->nents < 0)
500 		return wa->nents;
501 
502 	wa->bytes_left = len;
503 	wa->sg_used = 0;
504 
505 	if (len == 0)
506 		return 0;
507 
508 	if (dma_dir == DMA_NONE)
509 		return 0;
510 
511 	wa->dma_sg = sg;
512 	wa->dma_dev = dev;
513 	wa->dma_dir = dma_dir;
514 	wa->dma_count = dma_map_sg(dev, sg, wa->nents, dma_dir);
515 	if (!wa->dma_count)
516 		return -ENOMEM;
517 
518 	return 0;
519 }
520 
521 static void ccp_update_sg_workarea(struct ccp_sg_workarea *wa, unsigned int len)
522 {
523 	unsigned int nbytes = min_t(u64, len, wa->bytes_left);
524 
525 	if (!wa->sg)
526 		return;
527 
528 	wa->sg_used += nbytes;
529 	wa->bytes_left -= nbytes;
530 	if (wa->sg_used == wa->sg->length) {
531 		wa->sg = sg_next(wa->sg);
532 		wa->sg_used = 0;
533 	}
534 }
535 
536 static void ccp_dm_free(struct ccp_dm_workarea *wa)
537 {
538 	if (wa->length <= CCP_DMAPOOL_MAX_SIZE) {
539 		if (wa->address)
540 			dma_pool_free(wa->dma_pool, wa->address,
541 				      wa->dma.address);
542 	} else {
543 		if (wa->dma.address)
544 			dma_unmap_single(wa->dev, wa->dma.address, wa->length,
545 					 wa->dma.dir);
546 		kfree(wa->address);
547 	}
548 
549 	wa->address = NULL;
550 	wa->dma.address = 0;
551 }
552 
553 static int ccp_init_dm_workarea(struct ccp_dm_workarea *wa,
554 				struct ccp_cmd_queue *cmd_q,
555 				unsigned int len,
556 				enum dma_data_direction dir)
557 {
558 	memset(wa, 0, sizeof(*wa));
559 
560 	if (!len)
561 		return 0;
562 
563 	wa->dev = cmd_q->ccp->dev;
564 	wa->length = len;
565 
566 	if (len <= CCP_DMAPOOL_MAX_SIZE) {
567 		wa->dma_pool = cmd_q->dma_pool;
568 
569 		wa->address = dma_pool_alloc(wa->dma_pool, GFP_KERNEL,
570 					     &wa->dma.address);
571 		if (!wa->address)
572 			return -ENOMEM;
573 
574 		wa->dma.length = CCP_DMAPOOL_MAX_SIZE;
575 
576 		memset(wa->address, 0, CCP_DMAPOOL_MAX_SIZE);
577 	} else {
578 		wa->address = kzalloc(len, GFP_KERNEL);
579 		if (!wa->address)
580 			return -ENOMEM;
581 
582 		wa->dma.address = dma_map_single(wa->dev, wa->address, len,
583 						 dir);
584 		if (!wa->dma.address)
585 			return -ENOMEM;
586 
587 		wa->dma.length = len;
588 	}
589 	wa->dma.dir = dir;
590 
591 	return 0;
592 }
593 
594 static void ccp_set_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
595 			    struct scatterlist *sg, unsigned int sg_offset,
596 			    unsigned int len)
597 {
598 	WARN_ON(!wa->address);
599 
600 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
601 				 0);
602 }
603 
604 static void ccp_get_dm_area(struct ccp_dm_workarea *wa, unsigned int wa_offset,
605 			    struct scatterlist *sg, unsigned int sg_offset,
606 			    unsigned int len)
607 {
608 	WARN_ON(!wa->address);
609 
610 	scatterwalk_map_and_copy(wa->address + wa_offset, sg, sg_offset, len,
611 				 1);
612 }
613 
614 static void ccp_reverse_set_dm_area(struct ccp_dm_workarea *wa,
615 				    struct scatterlist *sg,
616 				    unsigned int len, unsigned int se_len,
617 				    bool sign_extend)
618 {
619 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
620 	u8 buffer[CCP_REVERSE_BUF_SIZE];
621 
622 	BUG_ON(se_len > sizeof(buffer));
623 
624 	sg_offset = len;
625 	dm_offset = 0;
626 	nbytes = len;
627 	while (nbytes) {
628 		ksb_len = min_t(unsigned int, nbytes, se_len);
629 		sg_offset -= ksb_len;
630 
631 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 0);
632 		for (i = 0; i < ksb_len; i++)
633 			wa->address[dm_offset + i] = buffer[ksb_len - i - 1];
634 
635 		dm_offset += ksb_len;
636 		nbytes -= ksb_len;
637 
638 		if ((ksb_len != se_len) && sign_extend) {
639 			/* Must sign-extend to nearest sign-extend length */
640 			if (wa->address[dm_offset - 1] & 0x80)
641 				memset(wa->address + dm_offset, 0xff,
642 				       se_len - ksb_len);
643 		}
644 	}
645 }
646 
647 static void ccp_reverse_get_dm_area(struct ccp_dm_workarea *wa,
648 				    struct scatterlist *sg,
649 				    unsigned int len)
650 {
651 	unsigned int nbytes, sg_offset, dm_offset, ksb_len, i;
652 	u8 buffer[CCP_REVERSE_BUF_SIZE];
653 
654 	sg_offset = 0;
655 	dm_offset = len;
656 	nbytes = len;
657 	while (nbytes) {
658 		ksb_len = min_t(unsigned int, nbytes, sizeof(buffer));
659 		dm_offset -= ksb_len;
660 
661 		for (i = 0; i < ksb_len; i++)
662 			buffer[ksb_len - i - 1] = wa->address[dm_offset + i];
663 		scatterwalk_map_and_copy(buffer, sg, sg_offset, ksb_len, 1);
664 
665 		sg_offset += ksb_len;
666 		nbytes -= ksb_len;
667 	}
668 }
669 
670 static void ccp_free_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q)
671 {
672 	ccp_dm_free(&data->dm_wa);
673 	ccp_sg_free(&data->sg_wa);
674 }
675 
676 static int ccp_init_data(struct ccp_data *data, struct ccp_cmd_queue *cmd_q,
677 			 struct scatterlist *sg, u64 sg_len,
678 			 unsigned int dm_len,
679 			 enum dma_data_direction dir)
680 {
681 	int ret;
682 
683 	memset(data, 0, sizeof(*data));
684 
685 	ret = ccp_init_sg_workarea(&data->sg_wa, cmd_q->ccp->dev, sg, sg_len,
686 				   dir);
687 	if (ret)
688 		goto e_err;
689 
690 	ret = ccp_init_dm_workarea(&data->dm_wa, cmd_q, dm_len, dir);
691 	if (ret)
692 		goto e_err;
693 
694 	return 0;
695 
696 e_err:
697 	ccp_free_data(data, cmd_q);
698 
699 	return ret;
700 }
701 
702 static unsigned int ccp_queue_buf(struct ccp_data *data, unsigned int from)
703 {
704 	struct ccp_sg_workarea *sg_wa = &data->sg_wa;
705 	struct ccp_dm_workarea *dm_wa = &data->dm_wa;
706 	unsigned int buf_count, nbytes;
707 
708 	/* Clear the buffer if setting it */
709 	if (!from)
710 		memset(dm_wa->address, 0, dm_wa->length);
711 
712 	if (!sg_wa->sg)
713 		return 0;
714 
715 	/* Perform the copy operation
716 	 *   nbytes will always be <= UINT_MAX because dm_wa->length is
717 	 *   an unsigned int
718 	 */
719 	nbytes = min_t(u64, sg_wa->bytes_left, dm_wa->length);
720 	scatterwalk_map_and_copy(dm_wa->address, sg_wa->sg, sg_wa->sg_used,
721 				 nbytes, from);
722 
723 	/* Update the structures and generate the count */
724 	buf_count = 0;
725 	while (sg_wa->bytes_left && (buf_count < dm_wa->length)) {
726 		nbytes = min(sg_wa->sg->length - sg_wa->sg_used,
727 			     dm_wa->length - buf_count);
728 		nbytes = min_t(u64, sg_wa->bytes_left, nbytes);
729 
730 		buf_count += nbytes;
731 		ccp_update_sg_workarea(sg_wa, nbytes);
732 	}
733 
734 	return buf_count;
735 }
736 
737 static unsigned int ccp_fill_queue_buf(struct ccp_data *data)
738 {
739 	return ccp_queue_buf(data, 0);
740 }
741 
742 static unsigned int ccp_empty_queue_buf(struct ccp_data *data)
743 {
744 	return ccp_queue_buf(data, 1);
745 }
746 
747 static void ccp_prepare_data(struct ccp_data *src, struct ccp_data *dst,
748 			     struct ccp_op *op, unsigned int block_size,
749 			     bool blocksize_op)
750 {
751 	unsigned int sg_src_len, sg_dst_len, op_len;
752 
753 	/* The CCP can only DMA from/to one address each per operation. This
754 	 * requires that we find the smallest DMA area between the source
755 	 * and destination. The resulting len values will always be <= UINT_MAX
756 	 * because the dma length is an unsigned int.
757 	 */
758 	sg_src_len = sg_dma_len(src->sg_wa.sg) - src->sg_wa.sg_used;
759 	sg_src_len = min_t(u64, src->sg_wa.bytes_left, sg_src_len);
760 
761 	if (dst) {
762 		sg_dst_len = sg_dma_len(dst->sg_wa.sg) - dst->sg_wa.sg_used;
763 		sg_dst_len = min_t(u64, src->sg_wa.bytes_left, sg_dst_len);
764 		op_len = min(sg_src_len, sg_dst_len);
765 	} else {
766 		op_len = sg_src_len;
767 	}
768 
769 	/* The data operation length will be at least block_size in length
770 	 * or the smaller of available sg room remaining for the source or
771 	 * the destination
772 	 */
773 	op_len = max(op_len, block_size);
774 
775 	/* Unless we have to buffer data, there's no reason to wait */
776 	op->soc = 0;
777 
778 	if (sg_src_len < block_size) {
779 		/* Not enough data in the sg element, so it
780 		 * needs to be buffered into a blocksize chunk
781 		 */
782 		int cp_len = ccp_fill_queue_buf(src);
783 
784 		op->soc = 1;
785 		op->src.u.dma.address = src->dm_wa.dma.address;
786 		op->src.u.dma.offset = 0;
787 		op->src.u.dma.length = (blocksize_op) ? block_size : cp_len;
788 	} else {
789 		/* Enough data in the sg element, but we need to
790 		 * adjust for any previously copied data
791 		 */
792 		op->src.u.dma.address = sg_dma_address(src->sg_wa.sg);
793 		op->src.u.dma.offset = src->sg_wa.sg_used;
794 		op->src.u.dma.length = op_len & ~(block_size - 1);
795 
796 		ccp_update_sg_workarea(&src->sg_wa, op->src.u.dma.length);
797 	}
798 
799 	if (dst) {
800 		if (sg_dst_len < block_size) {
801 			/* Not enough room in the sg element or we're on the
802 			 * last piece of data (when using padding), so the
803 			 * output needs to be buffered into a blocksize chunk
804 			 */
805 			op->soc = 1;
806 			op->dst.u.dma.address = dst->dm_wa.dma.address;
807 			op->dst.u.dma.offset = 0;
808 			op->dst.u.dma.length = op->src.u.dma.length;
809 		} else {
810 			/* Enough room in the sg element, but we need to
811 			 * adjust for any previously used area
812 			 */
813 			op->dst.u.dma.address = sg_dma_address(dst->sg_wa.sg);
814 			op->dst.u.dma.offset = dst->sg_wa.sg_used;
815 			op->dst.u.dma.length = op->src.u.dma.length;
816 		}
817 	}
818 }
819 
820 static void ccp_process_data(struct ccp_data *src, struct ccp_data *dst,
821 			     struct ccp_op *op)
822 {
823 	op->init = 0;
824 
825 	if (dst) {
826 		if (op->dst.u.dma.address == dst->dm_wa.dma.address)
827 			ccp_empty_queue_buf(dst);
828 		else
829 			ccp_update_sg_workarea(&dst->sg_wa,
830 					       op->dst.u.dma.length);
831 	}
832 }
833 
834 static int ccp_copy_to_from_ksb(struct ccp_cmd_queue *cmd_q,
835 				struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
836 				u32 byte_swap, bool from)
837 {
838 	struct ccp_op op;
839 
840 	memset(&op, 0, sizeof(op));
841 
842 	op.cmd_q = cmd_q;
843 	op.jobid = jobid;
844 	op.eom = 1;
845 
846 	if (from) {
847 		op.soc = 1;
848 		op.src.type = CCP_MEMTYPE_KSB;
849 		op.src.u.ksb = ksb;
850 		op.dst.type = CCP_MEMTYPE_SYSTEM;
851 		op.dst.u.dma.address = wa->dma.address;
852 		op.dst.u.dma.length = wa->length;
853 	} else {
854 		op.src.type = CCP_MEMTYPE_SYSTEM;
855 		op.src.u.dma.address = wa->dma.address;
856 		op.src.u.dma.length = wa->length;
857 		op.dst.type = CCP_MEMTYPE_KSB;
858 		op.dst.u.ksb = ksb;
859 	}
860 
861 	op.u.passthru.byte_swap = byte_swap;
862 
863 	return ccp_perform_passthru(&op);
864 }
865 
866 static int ccp_copy_to_ksb(struct ccp_cmd_queue *cmd_q,
867 			   struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
868 			   u32 byte_swap)
869 {
870 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, false);
871 }
872 
873 static int ccp_copy_from_ksb(struct ccp_cmd_queue *cmd_q,
874 			     struct ccp_dm_workarea *wa, u32 jobid, u32 ksb,
875 			     u32 byte_swap)
876 {
877 	return ccp_copy_to_from_ksb(cmd_q, wa, jobid, ksb, byte_swap, true);
878 }
879 
880 static int ccp_run_aes_cmac_cmd(struct ccp_cmd_queue *cmd_q,
881 				struct ccp_cmd *cmd)
882 {
883 	struct ccp_aes_engine *aes = &cmd->u.aes;
884 	struct ccp_dm_workarea key, ctx;
885 	struct ccp_data src;
886 	struct ccp_op op;
887 	unsigned int dm_offset;
888 	int ret;
889 
890 	if (!((aes->key_len == AES_KEYSIZE_128) ||
891 	      (aes->key_len == AES_KEYSIZE_192) ||
892 	      (aes->key_len == AES_KEYSIZE_256)))
893 		return -EINVAL;
894 
895 	if (aes->src_len & (AES_BLOCK_SIZE - 1))
896 		return -EINVAL;
897 
898 	if (aes->iv_len != AES_BLOCK_SIZE)
899 		return -EINVAL;
900 
901 	if (!aes->key || !aes->iv || !aes->src)
902 		return -EINVAL;
903 
904 	if (aes->cmac_final) {
905 		if (aes->cmac_key_len != AES_BLOCK_SIZE)
906 			return -EINVAL;
907 
908 		if (!aes->cmac_key)
909 			return -EINVAL;
910 	}
911 
912 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
913 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
914 
915 	ret = -EIO;
916 	memset(&op, 0, sizeof(op));
917 	op.cmd_q = cmd_q;
918 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
919 	op.ksb_key = cmd_q->ksb_key;
920 	op.ksb_ctx = cmd_q->ksb_ctx;
921 	op.init = 1;
922 	op.u.aes.type = aes->type;
923 	op.u.aes.mode = aes->mode;
924 	op.u.aes.action = aes->action;
925 
926 	/* All supported key sizes fit in a single (32-byte) KSB entry
927 	 * and must be in little endian format. Use the 256-bit byte
928 	 * swap passthru option to convert from big endian to little
929 	 * endian.
930 	 */
931 	ret = ccp_init_dm_workarea(&key, cmd_q,
932 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
933 				   DMA_TO_DEVICE);
934 	if (ret)
935 		return ret;
936 
937 	dm_offset = CCP_KSB_BYTES - aes->key_len;
938 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
939 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
940 			      CCP_PASSTHRU_BYTESWAP_256BIT);
941 	if (ret) {
942 		cmd->engine_error = cmd_q->cmd_error;
943 		goto e_key;
944 	}
945 
946 	/* The AES context fits in a single (32-byte) KSB entry and
947 	 * must be in little endian format. Use the 256-bit byte swap
948 	 * passthru option to convert from big endian to little endian.
949 	 */
950 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
951 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
952 				   DMA_BIDIRECTIONAL);
953 	if (ret)
954 		goto e_key;
955 
956 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
957 	ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
958 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
959 			      CCP_PASSTHRU_BYTESWAP_256BIT);
960 	if (ret) {
961 		cmd->engine_error = cmd_q->cmd_error;
962 		goto e_ctx;
963 	}
964 
965 	/* Send data to the CCP AES engine */
966 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
967 			    AES_BLOCK_SIZE, DMA_TO_DEVICE);
968 	if (ret)
969 		goto e_ctx;
970 
971 	while (src.sg_wa.bytes_left) {
972 		ccp_prepare_data(&src, NULL, &op, AES_BLOCK_SIZE, true);
973 		if (aes->cmac_final && !src.sg_wa.bytes_left) {
974 			op.eom = 1;
975 
976 			/* Push the K1/K2 key to the CCP now */
977 			ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid,
978 						op.ksb_ctx,
979 						CCP_PASSTHRU_BYTESWAP_256BIT);
980 			if (ret) {
981 				cmd->engine_error = cmd_q->cmd_error;
982 				goto e_src;
983 			}
984 
985 			ccp_set_dm_area(&ctx, 0, aes->cmac_key, 0,
986 					aes->cmac_key_len);
987 			ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
988 					      CCP_PASSTHRU_BYTESWAP_256BIT);
989 			if (ret) {
990 				cmd->engine_error = cmd_q->cmd_error;
991 				goto e_src;
992 			}
993 		}
994 
995 		ret = ccp_perform_aes(&op);
996 		if (ret) {
997 			cmd->engine_error = cmd_q->cmd_error;
998 			goto e_src;
999 		}
1000 
1001 		ccp_process_data(&src, NULL, &op);
1002 	}
1003 
1004 	/* Retrieve the AES context - convert from LE to BE using
1005 	 * 32-byte (256-bit) byteswapping
1006 	 */
1007 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1008 				CCP_PASSTHRU_BYTESWAP_256BIT);
1009 	if (ret) {
1010 		cmd->engine_error = cmd_q->cmd_error;
1011 		goto e_src;
1012 	}
1013 
1014 	/* ...but we only need AES_BLOCK_SIZE bytes */
1015 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1016 	ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1017 
1018 e_src:
1019 	ccp_free_data(&src, cmd_q);
1020 
1021 e_ctx:
1022 	ccp_dm_free(&ctx);
1023 
1024 e_key:
1025 	ccp_dm_free(&key);
1026 
1027 	return ret;
1028 }
1029 
1030 static int ccp_run_aes_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1031 {
1032 	struct ccp_aes_engine *aes = &cmd->u.aes;
1033 	struct ccp_dm_workarea key, ctx;
1034 	struct ccp_data src, dst;
1035 	struct ccp_op op;
1036 	unsigned int dm_offset;
1037 	bool in_place = false;
1038 	int ret;
1039 
1040 	if (aes->mode == CCP_AES_MODE_CMAC)
1041 		return ccp_run_aes_cmac_cmd(cmd_q, cmd);
1042 
1043 	if (!((aes->key_len == AES_KEYSIZE_128) ||
1044 	      (aes->key_len == AES_KEYSIZE_192) ||
1045 	      (aes->key_len == AES_KEYSIZE_256)))
1046 		return -EINVAL;
1047 
1048 	if (((aes->mode == CCP_AES_MODE_ECB) ||
1049 	     (aes->mode == CCP_AES_MODE_CBC) ||
1050 	     (aes->mode == CCP_AES_MODE_CFB)) &&
1051 	    (aes->src_len & (AES_BLOCK_SIZE - 1)))
1052 		return -EINVAL;
1053 
1054 	if (!aes->key || !aes->src || !aes->dst)
1055 		return -EINVAL;
1056 
1057 	if (aes->mode != CCP_AES_MODE_ECB) {
1058 		if (aes->iv_len != AES_BLOCK_SIZE)
1059 			return -EINVAL;
1060 
1061 		if (!aes->iv)
1062 			return -EINVAL;
1063 	}
1064 
1065 	BUILD_BUG_ON(CCP_AES_KEY_KSB_COUNT != 1);
1066 	BUILD_BUG_ON(CCP_AES_CTX_KSB_COUNT != 1);
1067 
1068 	ret = -EIO;
1069 	memset(&op, 0, sizeof(op));
1070 	op.cmd_q = cmd_q;
1071 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1072 	op.ksb_key = cmd_q->ksb_key;
1073 	op.ksb_ctx = cmd_q->ksb_ctx;
1074 	op.init = (aes->mode == CCP_AES_MODE_ECB) ? 0 : 1;
1075 	op.u.aes.type = aes->type;
1076 	op.u.aes.mode = aes->mode;
1077 	op.u.aes.action = aes->action;
1078 
1079 	/* All supported key sizes fit in a single (32-byte) KSB entry
1080 	 * and must be in little endian format. Use the 256-bit byte
1081 	 * swap passthru option to convert from big endian to little
1082 	 * endian.
1083 	 */
1084 	ret = ccp_init_dm_workarea(&key, cmd_q,
1085 				   CCP_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1086 				   DMA_TO_DEVICE);
1087 	if (ret)
1088 		return ret;
1089 
1090 	dm_offset = CCP_KSB_BYTES - aes->key_len;
1091 	ccp_set_dm_area(&key, dm_offset, aes->key, 0, aes->key_len);
1092 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1093 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1094 	if (ret) {
1095 		cmd->engine_error = cmd_q->cmd_error;
1096 		goto e_key;
1097 	}
1098 
1099 	/* The AES context fits in a single (32-byte) KSB entry and
1100 	 * must be in little endian format. Use the 256-bit byte swap
1101 	 * passthru option to convert from big endian to little endian.
1102 	 */
1103 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1104 				   CCP_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1105 				   DMA_BIDIRECTIONAL);
1106 	if (ret)
1107 		goto e_key;
1108 
1109 	if (aes->mode != CCP_AES_MODE_ECB) {
1110 		/* Load the AES context - conver to LE */
1111 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1112 		ccp_set_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1113 		ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1114 				      CCP_PASSTHRU_BYTESWAP_256BIT);
1115 		if (ret) {
1116 			cmd->engine_error = cmd_q->cmd_error;
1117 			goto e_ctx;
1118 		}
1119 	}
1120 
1121 	/* Prepare the input and output data workareas. For in-place
1122 	 * operations we need to set the dma direction to BIDIRECTIONAL
1123 	 * and copy the src workarea to the dst workarea.
1124 	 */
1125 	if (sg_virt(aes->src) == sg_virt(aes->dst))
1126 		in_place = true;
1127 
1128 	ret = ccp_init_data(&src, cmd_q, aes->src, aes->src_len,
1129 			    AES_BLOCK_SIZE,
1130 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1131 	if (ret)
1132 		goto e_ctx;
1133 
1134 	if (in_place) {
1135 		dst = src;
1136 	} else {
1137 		ret = ccp_init_data(&dst, cmd_q, aes->dst, aes->src_len,
1138 				    AES_BLOCK_SIZE, DMA_FROM_DEVICE);
1139 		if (ret)
1140 			goto e_src;
1141 	}
1142 
1143 	/* Send data to the CCP AES engine */
1144 	while (src.sg_wa.bytes_left) {
1145 		ccp_prepare_data(&src, &dst, &op, AES_BLOCK_SIZE, true);
1146 		if (!src.sg_wa.bytes_left) {
1147 			op.eom = 1;
1148 
1149 			/* Since we don't retrieve the AES context in ECB
1150 			 * mode we have to wait for the operation to complete
1151 			 * on the last piece of data
1152 			 */
1153 			if (aes->mode == CCP_AES_MODE_ECB)
1154 				op.soc = 1;
1155 		}
1156 
1157 		ret = ccp_perform_aes(&op);
1158 		if (ret) {
1159 			cmd->engine_error = cmd_q->cmd_error;
1160 			goto e_dst;
1161 		}
1162 
1163 		ccp_process_data(&src, &dst, &op);
1164 	}
1165 
1166 	if (aes->mode != CCP_AES_MODE_ECB) {
1167 		/* Retrieve the AES context - convert from LE to BE using
1168 		 * 32-byte (256-bit) byteswapping
1169 		 */
1170 		ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1171 					CCP_PASSTHRU_BYTESWAP_256BIT);
1172 		if (ret) {
1173 			cmd->engine_error = cmd_q->cmd_error;
1174 			goto e_dst;
1175 		}
1176 
1177 		/* ...but we only need AES_BLOCK_SIZE bytes */
1178 		dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1179 		ccp_get_dm_area(&ctx, dm_offset, aes->iv, 0, aes->iv_len);
1180 	}
1181 
1182 e_dst:
1183 	if (!in_place)
1184 		ccp_free_data(&dst, cmd_q);
1185 
1186 e_src:
1187 	ccp_free_data(&src, cmd_q);
1188 
1189 e_ctx:
1190 	ccp_dm_free(&ctx);
1191 
1192 e_key:
1193 	ccp_dm_free(&key);
1194 
1195 	return ret;
1196 }
1197 
1198 static int ccp_run_xts_aes_cmd(struct ccp_cmd_queue *cmd_q,
1199 			       struct ccp_cmd *cmd)
1200 {
1201 	struct ccp_xts_aes_engine *xts = &cmd->u.xts;
1202 	struct ccp_dm_workarea key, ctx;
1203 	struct ccp_data src, dst;
1204 	struct ccp_op op;
1205 	unsigned int unit_size, dm_offset;
1206 	bool in_place = false;
1207 	int ret;
1208 
1209 	switch (xts->unit_size) {
1210 	case CCP_XTS_AES_UNIT_SIZE_16:
1211 		unit_size = 16;
1212 		break;
1213 	case CCP_XTS_AES_UNIT_SIZE_512:
1214 		unit_size = 512;
1215 		break;
1216 	case CCP_XTS_AES_UNIT_SIZE_1024:
1217 		unit_size = 1024;
1218 		break;
1219 	case CCP_XTS_AES_UNIT_SIZE_2048:
1220 		unit_size = 2048;
1221 		break;
1222 	case CCP_XTS_AES_UNIT_SIZE_4096:
1223 		unit_size = 4096;
1224 		break;
1225 
1226 	default:
1227 		return -EINVAL;
1228 	}
1229 
1230 	if (xts->key_len != AES_KEYSIZE_128)
1231 		return -EINVAL;
1232 
1233 	if (!xts->final && (xts->src_len & (AES_BLOCK_SIZE - 1)))
1234 		return -EINVAL;
1235 
1236 	if (xts->iv_len != AES_BLOCK_SIZE)
1237 		return -EINVAL;
1238 
1239 	if (!xts->key || !xts->iv || !xts->src || !xts->dst)
1240 		return -EINVAL;
1241 
1242 	BUILD_BUG_ON(CCP_XTS_AES_KEY_KSB_COUNT != 1);
1243 	BUILD_BUG_ON(CCP_XTS_AES_CTX_KSB_COUNT != 1);
1244 
1245 	ret = -EIO;
1246 	memset(&op, 0, sizeof(op));
1247 	op.cmd_q = cmd_q;
1248 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1249 	op.ksb_key = cmd_q->ksb_key;
1250 	op.ksb_ctx = cmd_q->ksb_ctx;
1251 	op.init = 1;
1252 	op.u.xts.action = xts->action;
1253 	op.u.xts.unit_size = xts->unit_size;
1254 
1255 	/* All supported key sizes fit in a single (32-byte) KSB entry
1256 	 * and must be in little endian format. Use the 256-bit byte
1257 	 * swap passthru option to convert from big endian to little
1258 	 * endian.
1259 	 */
1260 	ret = ccp_init_dm_workarea(&key, cmd_q,
1261 				   CCP_XTS_AES_KEY_KSB_COUNT * CCP_KSB_BYTES,
1262 				   DMA_TO_DEVICE);
1263 	if (ret)
1264 		return ret;
1265 
1266 	dm_offset = CCP_KSB_BYTES - AES_KEYSIZE_128;
1267 	ccp_set_dm_area(&key, dm_offset, xts->key, 0, xts->key_len);
1268 	ccp_set_dm_area(&key, 0, xts->key, dm_offset, xts->key_len);
1269 	ret = ccp_copy_to_ksb(cmd_q, &key, op.jobid, op.ksb_key,
1270 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1271 	if (ret) {
1272 		cmd->engine_error = cmd_q->cmd_error;
1273 		goto e_key;
1274 	}
1275 
1276 	/* The AES context fits in a single (32-byte) KSB entry and
1277 	 * for XTS is already in little endian format so no byte swapping
1278 	 * is needed.
1279 	 */
1280 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1281 				   CCP_XTS_AES_CTX_KSB_COUNT * CCP_KSB_BYTES,
1282 				   DMA_BIDIRECTIONAL);
1283 	if (ret)
1284 		goto e_key;
1285 
1286 	ccp_set_dm_area(&ctx, 0, xts->iv, 0, xts->iv_len);
1287 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1288 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1289 	if (ret) {
1290 		cmd->engine_error = cmd_q->cmd_error;
1291 		goto e_ctx;
1292 	}
1293 
1294 	/* Prepare the input and output data workareas. For in-place
1295 	 * operations we need to set the dma direction to BIDIRECTIONAL
1296 	 * and copy the src workarea to the dst workarea.
1297 	 */
1298 	if (sg_virt(xts->src) == sg_virt(xts->dst))
1299 		in_place = true;
1300 
1301 	ret = ccp_init_data(&src, cmd_q, xts->src, xts->src_len,
1302 			    unit_size,
1303 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1304 	if (ret)
1305 		goto e_ctx;
1306 
1307 	if (in_place) {
1308 		dst = src;
1309 	} else {
1310 		ret = ccp_init_data(&dst, cmd_q, xts->dst, xts->src_len,
1311 				    unit_size, DMA_FROM_DEVICE);
1312 		if (ret)
1313 			goto e_src;
1314 	}
1315 
1316 	/* Send data to the CCP AES engine */
1317 	while (src.sg_wa.bytes_left) {
1318 		ccp_prepare_data(&src, &dst, &op, unit_size, true);
1319 		if (!src.sg_wa.bytes_left)
1320 			op.eom = 1;
1321 
1322 		ret = ccp_perform_xts_aes(&op);
1323 		if (ret) {
1324 			cmd->engine_error = cmd_q->cmd_error;
1325 			goto e_dst;
1326 		}
1327 
1328 		ccp_process_data(&src, &dst, &op);
1329 	}
1330 
1331 	/* Retrieve the AES context - convert from LE to BE using
1332 	 * 32-byte (256-bit) byteswapping
1333 	 */
1334 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1335 				CCP_PASSTHRU_BYTESWAP_256BIT);
1336 	if (ret) {
1337 		cmd->engine_error = cmd_q->cmd_error;
1338 		goto e_dst;
1339 	}
1340 
1341 	/* ...but we only need AES_BLOCK_SIZE bytes */
1342 	dm_offset = CCP_KSB_BYTES - AES_BLOCK_SIZE;
1343 	ccp_get_dm_area(&ctx, dm_offset, xts->iv, 0, xts->iv_len);
1344 
1345 e_dst:
1346 	if (!in_place)
1347 		ccp_free_data(&dst, cmd_q);
1348 
1349 e_src:
1350 	ccp_free_data(&src, cmd_q);
1351 
1352 e_ctx:
1353 	ccp_dm_free(&ctx);
1354 
1355 e_key:
1356 	ccp_dm_free(&key);
1357 
1358 	return ret;
1359 }
1360 
1361 static int ccp_run_sha_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1362 {
1363 	struct ccp_sha_engine *sha = &cmd->u.sha;
1364 	struct ccp_dm_workarea ctx;
1365 	struct ccp_data src;
1366 	struct ccp_op op;
1367 	int ret;
1368 
1369 	if (sha->ctx_len != CCP_SHA_CTXSIZE)
1370 		return -EINVAL;
1371 
1372 	if (!sha->ctx)
1373 		return -EINVAL;
1374 
1375 	if (!sha->final && (sha->src_len & (CCP_SHA_BLOCKSIZE - 1)))
1376 		return -EINVAL;
1377 
1378 	if (!sha->src_len) {
1379 		const u8 *sha_zero;
1380 
1381 		/* Not final, just return */
1382 		if (!sha->final)
1383 			return 0;
1384 
1385 		/* CCP can't do a zero length sha operation so the caller
1386 		 * must buffer the data.
1387 		 */
1388 		if (sha->msg_bits)
1389 			return -EINVAL;
1390 
1391 		/* A sha operation for a message with a total length of zero,
1392 		 * return known result.
1393 		 */
1394 		switch (sha->type) {
1395 		case CCP_SHA_TYPE_1:
1396 			sha_zero = ccp_sha1_zero;
1397 			break;
1398 		case CCP_SHA_TYPE_224:
1399 			sha_zero = ccp_sha224_zero;
1400 			break;
1401 		case CCP_SHA_TYPE_256:
1402 			sha_zero = ccp_sha256_zero;
1403 			break;
1404 		default:
1405 			return -EINVAL;
1406 		}
1407 
1408 		scatterwalk_map_and_copy((void *)sha_zero, sha->ctx, 0,
1409 					 sha->ctx_len, 1);
1410 
1411 		return 0;
1412 	}
1413 
1414 	if (!sha->src)
1415 		return -EINVAL;
1416 
1417 	BUILD_BUG_ON(CCP_SHA_KSB_COUNT != 1);
1418 
1419 	memset(&op, 0, sizeof(op));
1420 	op.cmd_q = cmd_q;
1421 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1422 	op.ksb_ctx = cmd_q->ksb_ctx;
1423 	op.u.sha.type = sha->type;
1424 	op.u.sha.msg_bits = sha->msg_bits;
1425 
1426 	/* The SHA context fits in a single (32-byte) KSB entry and
1427 	 * must be in little endian format. Use the 256-bit byte swap
1428 	 * passthru option to convert from big endian to little endian.
1429 	 */
1430 	ret = ccp_init_dm_workarea(&ctx, cmd_q,
1431 				   CCP_SHA_KSB_COUNT * CCP_KSB_BYTES,
1432 				   DMA_BIDIRECTIONAL);
1433 	if (ret)
1434 		return ret;
1435 
1436 	if (sha->first) {
1437 		const __be32 *init;
1438 
1439 		switch (sha->type) {
1440 		case CCP_SHA_TYPE_1:
1441 			init = ccp_sha1_init;
1442 			break;
1443 		case CCP_SHA_TYPE_224:
1444 			init = ccp_sha224_init;
1445 			break;
1446 		case CCP_SHA_TYPE_256:
1447 			init = ccp_sha256_init;
1448 			break;
1449 		default:
1450 			ret = -EINVAL;
1451 			goto e_ctx;
1452 		}
1453 		memcpy(ctx.address, init, CCP_SHA_CTXSIZE);
1454 	} else {
1455 		ccp_set_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1456 	}
1457 
1458 	ret = ccp_copy_to_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1459 			      CCP_PASSTHRU_BYTESWAP_256BIT);
1460 	if (ret) {
1461 		cmd->engine_error = cmd_q->cmd_error;
1462 		goto e_ctx;
1463 	}
1464 
1465 	/* Send data to the CCP SHA engine */
1466 	ret = ccp_init_data(&src, cmd_q, sha->src, sha->src_len,
1467 			    CCP_SHA_BLOCKSIZE, DMA_TO_DEVICE);
1468 	if (ret)
1469 		goto e_ctx;
1470 
1471 	while (src.sg_wa.bytes_left) {
1472 		ccp_prepare_data(&src, NULL, &op, CCP_SHA_BLOCKSIZE, false);
1473 		if (sha->final && !src.sg_wa.bytes_left)
1474 			op.eom = 1;
1475 
1476 		ret = ccp_perform_sha(&op);
1477 		if (ret) {
1478 			cmd->engine_error = cmd_q->cmd_error;
1479 			goto e_data;
1480 		}
1481 
1482 		ccp_process_data(&src, NULL, &op);
1483 	}
1484 
1485 	/* Retrieve the SHA context - convert from LE to BE using
1486 	 * 32-byte (256-bit) byteswapping to BE
1487 	 */
1488 	ret = ccp_copy_from_ksb(cmd_q, &ctx, op.jobid, op.ksb_ctx,
1489 				CCP_PASSTHRU_BYTESWAP_256BIT);
1490 	if (ret) {
1491 		cmd->engine_error = cmd_q->cmd_error;
1492 		goto e_data;
1493 	}
1494 
1495 	ccp_get_dm_area(&ctx, 0, sha->ctx, 0, sha->ctx_len);
1496 
1497 	if (sha->final && sha->opad) {
1498 		/* HMAC operation, recursively perform final SHA */
1499 		struct ccp_cmd hmac_cmd;
1500 		struct scatterlist sg;
1501 		u64 block_size, digest_size;
1502 		u8 *hmac_buf;
1503 
1504 		switch (sha->type) {
1505 		case CCP_SHA_TYPE_1:
1506 			block_size = SHA1_BLOCK_SIZE;
1507 			digest_size = SHA1_DIGEST_SIZE;
1508 			break;
1509 		case CCP_SHA_TYPE_224:
1510 			block_size = SHA224_BLOCK_SIZE;
1511 			digest_size = SHA224_DIGEST_SIZE;
1512 			break;
1513 		case CCP_SHA_TYPE_256:
1514 			block_size = SHA256_BLOCK_SIZE;
1515 			digest_size = SHA256_DIGEST_SIZE;
1516 			break;
1517 		default:
1518 			ret = -EINVAL;
1519 			goto e_data;
1520 		}
1521 
1522 		if (sha->opad_len != block_size) {
1523 			ret = -EINVAL;
1524 			goto e_data;
1525 		}
1526 
1527 		hmac_buf = kmalloc(block_size + digest_size, GFP_KERNEL);
1528 		if (!hmac_buf) {
1529 			ret = -ENOMEM;
1530 			goto e_data;
1531 		}
1532 		sg_init_one(&sg, hmac_buf, block_size + digest_size);
1533 
1534 		scatterwalk_map_and_copy(hmac_buf, sha->opad, 0, block_size, 0);
1535 		memcpy(hmac_buf + block_size, ctx.address, digest_size);
1536 
1537 		memset(&hmac_cmd, 0, sizeof(hmac_cmd));
1538 		hmac_cmd.engine = CCP_ENGINE_SHA;
1539 		hmac_cmd.u.sha.type = sha->type;
1540 		hmac_cmd.u.sha.ctx = sha->ctx;
1541 		hmac_cmd.u.sha.ctx_len = sha->ctx_len;
1542 		hmac_cmd.u.sha.src = &sg;
1543 		hmac_cmd.u.sha.src_len = block_size + digest_size;
1544 		hmac_cmd.u.sha.opad = NULL;
1545 		hmac_cmd.u.sha.opad_len = 0;
1546 		hmac_cmd.u.sha.first = 1;
1547 		hmac_cmd.u.sha.final = 1;
1548 		hmac_cmd.u.sha.msg_bits = (block_size + digest_size) << 3;
1549 
1550 		ret = ccp_run_sha_cmd(cmd_q, &hmac_cmd);
1551 		if (ret)
1552 			cmd->engine_error = hmac_cmd.engine_error;
1553 
1554 		kfree(hmac_buf);
1555 	}
1556 
1557 e_data:
1558 	ccp_free_data(&src, cmd_q);
1559 
1560 e_ctx:
1561 	ccp_dm_free(&ctx);
1562 
1563 	return ret;
1564 }
1565 
1566 static int ccp_run_rsa_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1567 {
1568 	struct ccp_rsa_engine *rsa = &cmd->u.rsa;
1569 	struct ccp_dm_workarea exp, src;
1570 	struct ccp_data dst;
1571 	struct ccp_op op;
1572 	unsigned int ksb_count, i_len, o_len;
1573 	int ret;
1574 
1575 	if (rsa->key_size > CCP_RSA_MAX_WIDTH)
1576 		return -EINVAL;
1577 
1578 	if (!rsa->exp || !rsa->mod || !rsa->src || !rsa->dst)
1579 		return -EINVAL;
1580 
1581 	/* The RSA modulus must precede the message being acted upon, so
1582 	 * it must be copied to a DMA area where the message and the
1583 	 * modulus can be concatenated.  Therefore the input buffer
1584 	 * length required is twice the output buffer length (which
1585 	 * must be a multiple of 256-bits).
1586 	 */
1587 	o_len = ((rsa->key_size + 255) / 256) * 32;
1588 	i_len = o_len * 2;
1589 
1590 	ksb_count = o_len / CCP_KSB_BYTES;
1591 
1592 	memset(&op, 0, sizeof(op));
1593 	op.cmd_q = cmd_q;
1594 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1595 	op.ksb_key = ccp_alloc_ksb(cmd_q->ccp, ksb_count);
1596 	if (!op.ksb_key)
1597 		return -EIO;
1598 
1599 	/* The RSA exponent may span multiple (32-byte) KSB entries and must
1600 	 * be in little endian format. Reverse copy each 32-byte chunk
1601 	 * of the exponent (En chunk to E0 chunk, E(n-1) chunk to E1 chunk)
1602 	 * and each byte within that chunk and do not perform any byte swap
1603 	 * operations on the passthru operation.
1604 	 */
1605 	ret = ccp_init_dm_workarea(&exp, cmd_q, o_len, DMA_TO_DEVICE);
1606 	if (ret)
1607 		goto e_ksb;
1608 
1609 	ccp_reverse_set_dm_area(&exp, rsa->exp, rsa->exp_len, CCP_KSB_BYTES,
1610 				false);
1611 	ret = ccp_copy_to_ksb(cmd_q, &exp, op.jobid, op.ksb_key,
1612 			      CCP_PASSTHRU_BYTESWAP_NOOP);
1613 	if (ret) {
1614 		cmd->engine_error = cmd_q->cmd_error;
1615 		goto e_exp;
1616 	}
1617 
1618 	/* Concatenate the modulus and the message. Both the modulus and
1619 	 * the operands must be in little endian format.  Since the input
1620 	 * is in big endian format it must be converted.
1621 	 */
1622 	ret = ccp_init_dm_workarea(&src, cmd_q, i_len, DMA_TO_DEVICE);
1623 	if (ret)
1624 		goto e_exp;
1625 
1626 	ccp_reverse_set_dm_area(&src, rsa->mod, rsa->mod_len, CCP_KSB_BYTES,
1627 				false);
1628 	src.address += o_len;	/* Adjust the address for the copy operation */
1629 	ccp_reverse_set_dm_area(&src, rsa->src, rsa->src_len, CCP_KSB_BYTES,
1630 				false);
1631 	src.address -= o_len;	/* Reset the address to original value */
1632 
1633 	/* Prepare the output area for the operation */
1634 	ret = ccp_init_data(&dst, cmd_q, rsa->dst, rsa->mod_len,
1635 			    o_len, DMA_FROM_DEVICE);
1636 	if (ret)
1637 		goto e_src;
1638 
1639 	op.soc = 1;
1640 	op.src.u.dma.address = src.dma.address;
1641 	op.src.u.dma.offset = 0;
1642 	op.src.u.dma.length = i_len;
1643 	op.dst.u.dma.address = dst.dm_wa.dma.address;
1644 	op.dst.u.dma.offset = 0;
1645 	op.dst.u.dma.length = o_len;
1646 
1647 	op.u.rsa.mod_size = rsa->key_size;
1648 	op.u.rsa.input_len = i_len;
1649 
1650 	ret = ccp_perform_rsa(&op);
1651 	if (ret) {
1652 		cmd->engine_error = cmd_q->cmd_error;
1653 		goto e_dst;
1654 	}
1655 
1656 	ccp_reverse_get_dm_area(&dst.dm_wa, rsa->dst, rsa->mod_len);
1657 
1658 e_dst:
1659 	ccp_free_data(&dst, cmd_q);
1660 
1661 e_src:
1662 	ccp_dm_free(&src);
1663 
1664 e_exp:
1665 	ccp_dm_free(&exp);
1666 
1667 e_ksb:
1668 	ccp_free_ksb(cmd_q->ccp, op.ksb_key, ksb_count);
1669 
1670 	return ret;
1671 }
1672 
1673 static int ccp_run_passthru_cmd(struct ccp_cmd_queue *cmd_q,
1674 				struct ccp_cmd *cmd)
1675 {
1676 	struct ccp_passthru_engine *pt = &cmd->u.passthru;
1677 	struct ccp_dm_workarea mask;
1678 	struct ccp_data src, dst;
1679 	struct ccp_op op;
1680 	bool in_place = false;
1681 	unsigned int i;
1682 	int ret;
1683 
1684 	if (!pt->final && (pt->src_len & (CCP_PASSTHRU_BLOCKSIZE - 1)))
1685 		return -EINVAL;
1686 
1687 	if (!pt->src || !pt->dst)
1688 		return -EINVAL;
1689 
1690 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1691 		if (pt->mask_len != CCP_PASSTHRU_MASKSIZE)
1692 			return -EINVAL;
1693 		if (!pt->mask)
1694 			return -EINVAL;
1695 	}
1696 
1697 	BUILD_BUG_ON(CCP_PASSTHRU_KSB_COUNT != 1);
1698 
1699 	memset(&op, 0, sizeof(op));
1700 	op.cmd_q = cmd_q;
1701 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1702 
1703 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP) {
1704 		/* Load the mask */
1705 		op.ksb_key = cmd_q->ksb_key;
1706 
1707 		ret = ccp_init_dm_workarea(&mask, cmd_q,
1708 					   CCP_PASSTHRU_KSB_COUNT *
1709 					   CCP_KSB_BYTES,
1710 					   DMA_TO_DEVICE);
1711 		if (ret)
1712 			return ret;
1713 
1714 		ccp_set_dm_area(&mask, 0, pt->mask, 0, pt->mask_len);
1715 		ret = ccp_copy_to_ksb(cmd_q, &mask, op.jobid, op.ksb_key,
1716 				      CCP_PASSTHRU_BYTESWAP_NOOP);
1717 		if (ret) {
1718 			cmd->engine_error = cmd_q->cmd_error;
1719 			goto e_mask;
1720 		}
1721 	}
1722 
1723 	/* Prepare the input and output data workareas. For in-place
1724 	 * operations we need to set the dma direction to BIDIRECTIONAL
1725 	 * and copy the src workarea to the dst workarea.
1726 	 */
1727 	if (sg_virt(pt->src) == sg_virt(pt->dst))
1728 		in_place = true;
1729 
1730 	ret = ccp_init_data(&src, cmd_q, pt->src, pt->src_len,
1731 			    CCP_PASSTHRU_MASKSIZE,
1732 			    in_place ? DMA_BIDIRECTIONAL : DMA_TO_DEVICE);
1733 	if (ret)
1734 		goto e_mask;
1735 
1736 	if (in_place) {
1737 		dst = src;
1738 	} else {
1739 		ret = ccp_init_data(&dst, cmd_q, pt->dst, pt->src_len,
1740 				    CCP_PASSTHRU_MASKSIZE, DMA_FROM_DEVICE);
1741 		if (ret)
1742 			goto e_src;
1743 	}
1744 
1745 	/* Send data to the CCP Passthru engine
1746 	 *   Because the CCP engine works on a single source and destination
1747 	 *   dma address at a time, each entry in the source scatterlist
1748 	 *   (after the dma_map_sg call) must be less than or equal to the
1749 	 *   (remaining) length in the destination scatterlist entry and the
1750 	 *   length must be a multiple of CCP_PASSTHRU_BLOCKSIZE
1751 	 */
1752 	dst.sg_wa.sg_used = 0;
1753 	for (i = 1; i <= src.sg_wa.dma_count; i++) {
1754 		if (!dst.sg_wa.sg ||
1755 		    (dst.sg_wa.sg->length < src.sg_wa.sg->length)) {
1756 			ret = -EINVAL;
1757 			goto e_dst;
1758 		}
1759 
1760 		if (i == src.sg_wa.dma_count) {
1761 			op.eom = 1;
1762 			op.soc = 1;
1763 		}
1764 
1765 		op.src.type = CCP_MEMTYPE_SYSTEM;
1766 		op.src.u.dma.address = sg_dma_address(src.sg_wa.sg);
1767 		op.src.u.dma.offset = 0;
1768 		op.src.u.dma.length = sg_dma_len(src.sg_wa.sg);
1769 
1770 		op.dst.type = CCP_MEMTYPE_SYSTEM;
1771 		op.dst.u.dma.address = sg_dma_address(dst.sg_wa.sg);
1772 		op.dst.u.dma.offset = dst.sg_wa.sg_used;
1773 		op.dst.u.dma.length = op.src.u.dma.length;
1774 
1775 		ret = ccp_perform_passthru(&op);
1776 		if (ret) {
1777 			cmd->engine_error = cmd_q->cmd_error;
1778 			goto e_dst;
1779 		}
1780 
1781 		dst.sg_wa.sg_used += src.sg_wa.sg->length;
1782 		if (dst.sg_wa.sg_used == dst.sg_wa.sg->length) {
1783 			dst.sg_wa.sg = sg_next(dst.sg_wa.sg);
1784 			dst.sg_wa.sg_used = 0;
1785 		}
1786 		src.sg_wa.sg = sg_next(src.sg_wa.sg);
1787 	}
1788 
1789 e_dst:
1790 	if (!in_place)
1791 		ccp_free_data(&dst, cmd_q);
1792 
1793 e_src:
1794 	ccp_free_data(&src, cmd_q);
1795 
1796 e_mask:
1797 	if (pt->bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
1798 		ccp_dm_free(&mask);
1799 
1800 	return ret;
1801 }
1802 
1803 static int ccp_run_ecc_mm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1804 {
1805 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1806 	struct ccp_dm_workarea src, dst;
1807 	struct ccp_op op;
1808 	int ret;
1809 	u8 *save;
1810 
1811 	if (!ecc->u.mm.operand_1 ||
1812 	    (ecc->u.mm.operand_1_len > CCP_ECC_MODULUS_BYTES))
1813 		return -EINVAL;
1814 
1815 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT)
1816 		if (!ecc->u.mm.operand_2 ||
1817 		    (ecc->u.mm.operand_2_len > CCP_ECC_MODULUS_BYTES))
1818 			return -EINVAL;
1819 
1820 	if (!ecc->u.mm.result ||
1821 	    (ecc->u.mm.result_len < CCP_ECC_MODULUS_BYTES))
1822 		return -EINVAL;
1823 
1824 	memset(&op, 0, sizeof(op));
1825 	op.cmd_q = cmd_q;
1826 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1827 
1828 	/* Concatenate the modulus and the operands. Both the modulus and
1829 	 * the operands must be in little endian format.  Since the input
1830 	 * is in big endian format it must be converted and placed in a
1831 	 * fixed length buffer.
1832 	 */
1833 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1834 				   DMA_TO_DEVICE);
1835 	if (ret)
1836 		return ret;
1837 
1838 	/* Save the workarea address since it is updated in order to perform
1839 	 * the concatenation
1840 	 */
1841 	save = src.address;
1842 
1843 	/* Copy the ECC modulus */
1844 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1845 				CCP_ECC_OPERAND_SIZE, false);
1846 	src.address += CCP_ECC_OPERAND_SIZE;
1847 
1848 	/* Copy the first operand */
1849 	ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_1,
1850 				ecc->u.mm.operand_1_len,
1851 				CCP_ECC_OPERAND_SIZE, false);
1852 	src.address += CCP_ECC_OPERAND_SIZE;
1853 
1854 	if (ecc->function != CCP_ECC_FUNCTION_MINV_384BIT) {
1855 		/* Copy the second operand */
1856 		ccp_reverse_set_dm_area(&src, ecc->u.mm.operand_2,
1857 					ecc->u.mm.operand_2_len,
1858 					CCP_ECC_OPERAND_SIZE, false);
1859 		src.address += CCP_ECC_OPERAND_SIZE;
1860 	}
1861 
1862 	/* Restore the workarea address */
1863 	src.address = save;
1864 
1865 	/* Prepare the output area for the operation */
1866 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
1867 				   DMA_FROM_DEVICE);
1868 	if (ret)
1869 		goto e_src;
1870 
1871 	op.soc = 1;
1872 	op.src.u.dma.address = src.dma.address;
1873 	op.src.u.dma.offset = 0;
1874 	op.src.u.dma.length = src.length;
1875 	op.dst.u.dma.address = dst.dma.address;
1876 	op.dst.u.dma.offset = 0;
1877 	op.dst.u.dma.length = dst.length;
1878 
1879 	op.u.ecc.function = cmd->u.ecc.function;
1880 
1881 	ret = ccp_perform_ecc(&op);
1882 	if (ret) {
1883 		cmd->engine_error = cmd_q->cmd_error;
1884 		goto e_dst;
1885 	}
1886 
1887 	ecc->ecc_result = le16_to_cpup(
1888 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
1889 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
1890 		ret = -EIO;
1891 		goto e_dst;
1892 	}
1893 
1894 	/* Save the ECC result */
1895 	ccp_reverse_get_dm_area(&dst, ecc->u.mm.result, CCP_ECC_MODULUS_BYTES);
1896 
1897 e_dst:
1898 	ccp_dm_free(&dst);
1899 
1900 e_src:
1901 	ccp_dm_free(&src);
1902 
1903 	return ret;
1904 }
1905 
1906 static int ccp_run_ecc_pm_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
1907 {
1908 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
1909 	struct ccp_dm_workarea src, dst;
1910 	struct ccp_op op;
1911 	int ret;
1912 	u8 *save;
1913 
1914 	if (!ecc->u.pm.point_1.x ||
1915 	    (ecc->u.pm.point_1.x_len > CCP_ECC_MODULUS_BYTES) ||
1916 	    !ecc->u.pm.point_1.y ||
1917 	    (ecc->u.pm.point_1.y_len > CCP_ECC_MODULUS_BYTES))
1918 		return -EINVAL;
1919 
1920 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1921 		if (!ecc->u.pm.point_2.x ||
1922 		    (ecc->u.pm.point_2.x_len > CCP_ECC_MODULUS_BYTES) ||
1923 		    !ecc->u.pm.point_2.y ||
1924 		    (ecc->u.pm.point_2.y_len > CCP_ECC_MODULUS_BYTES))
1925 			return -EINVAL;
1926 	} else {
1927 		if (!ecc->u.pm.domain_a ||
1928 		    (ecc->u.pm.domain_a_len > CCP_ECC_MODULUS_BYTES))
1929 			return -EINVAL;
1930 
1931 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT)
1932 			if (!ecc->u.pm.scalar ||
1933 			    (ecc->u.pm.scalar_len > CCP_ECC_MODULUS_BYTES))
1934 				return -EINVAL;
1935 	}
1936 
1937 	if (!ecc->u.pm.result.x ||
1938 	    (ecc->u.pm.result.x_len < CCP_ECC_MODULUS_BYTES) ||
1939 	    !ecc->u.pm.result.y ||
1940 	    (ecc->u.pm.result.y_len < CCP_ECC_MODULUS_BYTES))
1941 		return -EINVAL;
1942 
1943 	memset(&op, 0, sizeof(op));
1944 	op.cmd_q = cmd_q;
1945 	op.jobid = ccp_gen_jobid(cmd_q->ccp);
1946 
1947 	/* Concatenate the modulus and the operands. Both the modulus and
1948 	 * the operands must be in little endian format.  Since the input
1949 	 * is in big endian format it must be converted and placed in a
1950 	 * fixed length buffer.
1951 	 */
1952 	ret = ccp_init_dm_workarea(&src, cmd_q, CCP_ECC_SRC_BUF_SIZE,
1953 				   DMA_TO_DEVICE);
1954 	if (ret)
1955 		return ret;
1956 
1957 	/* Save the workarea address since it is updated in order to perform
1958 	 * the concatenation
1959 	 */
1960 	save = src.address;
1961 
1962 	/* Copy the ECC modulus */
1963 	ccp_reverse_set_dm_area(&src, ecc->mod, ecc->mod_len,
1964 				CCP_ECC_OPERAND_SIZE, false);
1965 	src.address += CCP_ECC_OPERAND_SIZE;
1966 
1967 	/* Copy the first point X and Y coordinate */
1968 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.x,
1969 				ecc->u.pm.point_1.x_len,
1970 				CCP_ECC_OPERAND_SIZE, false);
1971 	src.address += CCP_ECC_OPERAND_SIZE;
1972 	ccp_reverse_set_dm_area(&src, ecc->u.pm.point_1.y,
1973 				ecc->u.pm.point_1.y_len,
1974 				CCP_ECC_OPERAND_SIZE, false);
1975 	src.address += CCP_ECC_OPERAND_SIZE;
1976 
1977 	/* Set the first point Z coordianate to 1 */
1978 	*src.address = 0x01;
1979 	src.address += CCP_ECC_OPERAND_SIZE;
1980 
1981 	if (ecc->function == CCP_ECC_FUNCTION_PADD_384BIT) {
1982 		/* Copy the second point X and Y coordinate */
1983 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.x,
1984 					ecc->u.pm.point_2.x_len,
1985 					CCP_ECC_OPERAND_SIZE, false);
1986 		src.address += CCP_ECC_OPERAND_SIZE;
1987 		ccp_reverse_set_dm_area(&src, ecc->u.pm.point_2.y,
1988 					ecc->u.pm.point_2.y_len,
1989 					CCP_ECC_OPERAND_SIZE, false);
1990 		src.address += CCP_ECC_OPERAND_SIZE;
1991 
1992 		/* Set the second point Z coordianate to 1 */
1993 		*src.address = 0x01;
1994 		src.address += CCP_ECC_OPERAND_SIZE;
1995 	} else {
1996 		/* Copy the Domain "a" parameter */
1997 		ccp_reverse_set_dm_area(&src, ecc->u.pm.domain_a,
1998 					ecc->u.pm.domain_a_len,
1999 					CCP_ECC_OPERAND_SIZE, false);
2000 		src.address += CCP_ECC_OPERAND_SIZE;
2001 
2002 		if (ecc->function == CCP_ECC_FUNCTION_PMUL_384BIT) {
2003 			/* Copy the scalar value */
2004 			ccp_reverse_set_dm_area(&src, ecc->u.pm.scalar,
2005 						ecc->u.pm.scalar_len,
2006 						CCP_ECC_OPERAND_SIZE, false);
2007 			src.address += CCP_ECC_OPERAND_SIZE;
2008 		}
2009 	}
2010 
2011 	/* Restore the workarea address */
2012 	src.address = save;
2013 
2014 	/* Prepare the output area for the operation */
2015 	ret = ccp_init_dm_workarea(&dst, cmd_q, CCP_ECC_DST_BUF_SIZE,
2016 				   DMA_FROM_DEVICE);
2017 	if (ret)
2018 		goto e_src;
2019 
2020 	op.soc = 1;
2021 	op.src.u.dma.address = src.dma.address;
2022 	op.src.u.dma.offset = 0;
2023 	op.src.u.dma.length = src.length;
2024 	op.dst.u.dma.address = dst.dma.address;
2025 	op.dst.u.dma.offset = 0;
2026 	op.dst.u.dma.length = dst.length;
2027 
2028 	op.u.ecc.function = cmd->u.ecc.function;
2029 
2030 	ret = ccp_perform_ecc(&op);
2031 	if (ret) {
2032 		cmd->engine_error = cmd_q->cmd_error;
2033 		goto e_dst;
2034 	}
2035 
2036 	ecc->ecc_result = le16_to_cpup(
2037 		(const __le16 *)(dst.address + CCP_ECC_RESULT_OFFSET));
2038 	if (!(ecc->ecc_result & CCP_ECC_RESULT_SUCCESS)) {
2039 		ret = -EIO;
2040 		goto e_dst;
2041 	}
2042 
2043 	/* Save the workarea address since it is updated as we walk through
2044 	 * to copy the point math result
2045 	 */
2046 	save = dst.address;
2047 
2048 	/* Save the ECC result X and Y coordinates */
2049 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.x,
2050 				CCP_ECC_MODULUS_BYTES);
2051 	dst.address += CCP_ECC_OUTPUT_SIZE;
2052 	ccp_reverse_get_dm_area(&dst, ecc->u.pm.result.y,
2053 				CCP_ECC_MODULUS_BYTES);
2054 	dst.address += CCP_ECC_OUTPUT_SIZE;
2055 
2056 	/* Restore the workarea address */
2057 	dst.address = save;
2058 
2059 e_dst:
2060 	ccp_dm_free(&dst);
2061 
2062 e_src:
2063 	ccp_dm_free(&src);
2064 
2065 	return ret;
2066 }
2067 
2068 static int ccp_run_ecc_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2069 {
2070 	struct ccp_ecc_engine *ecc = &cmd->u.ecc;
2071 
2072 	ecc->ecc_result = 0;
2073 
2074 	if (!ecc->mod ||
2075 	    (ecc->mod_len > CCP_ECC_MODULUS_BYTES))
2076 		return -EINVAL;
2077 
2078 	switch (ecc->function) {
2079 	case CCP_ECC_FUNCTION_MMUL_384BIT:
2080 	case CCP_ECC_FUNCTION_MADD_384BIT:
2081 	case CCP_ECC_FUNCTION_MINV_384BIT:
2082 		return ccp_run_ecc_mm_cmd(cmd_q, cmd);
2083 
2084 	case CCP_ECC_FUNCTION_PADD_384BIT:
2085 	case CCP_ECC_FUNCTION_PMUL_384BIT:
2086 	case CCP_ECC_FUNCTION_PDBL_384BIT:
2087 		return ccp_run_ecc_pm_cmd(cmd_q, cmd);
2088 
2089 	default:
2090 		return -EINVAL;
2091 	}
2092 }
2093 
2094 int ccp_run_cmd(struct ccp_cmd_queue *cmd_q, struct ccp_cmd *cmd)
2095 {
2096 	int ret;
2097 
2098 	cmd->engine_error = 0;
2099 	cmd_q->cmd_error = 0;
2100 	cmd_q->int_rcvd = 0;
2101 	cmd_q->free_slots = CMD_Q_DEPTH(ioread32(cmd_q->reg_status));
2102 
2103 	switch (cmd->engine) {
2104 	case CCP_ENGINE_AES:
2105 		ret = ccp_run_aes_cmd(cmd_q, cmd);
2106 		break;
2107 	case CCP_ENGINE_XTS_AES_128:
2108 		ret = ccp_run_xts_aes_cmd(cmd_q, cmd);
2109 		break;
2110 	case CCP_ENGINE_SHA:
2111 		ret = ccp_run_sha_cmd(cmd_q, cmd);
2112 		break;
2113 	case CCP_ENGINE_RSA:
2114 		ret = ccp_run_rsa_cmd(cmd_q, cmd);
2115 		break;
2116 	case CCP_ENGINE_PASSTHRU:
2117 		ret = ccp_run_passthru_cmd(cmd_q, cmd);
2118 		break;
2119 	case CCP_ENGINE_ECC:
2120 		ret = ccp_run_ecc_cmd(cmd_q, cmd);
2121 		break;
2122 	default:
2123 		ret = -EINVAL;
2124 	}
2125 
2126 	return ret;
2127 }
2128