xref: /linux/drivers/crypto/ccp/ccp-dev-v5.c (revision b70366e5d31788650b2a5cec5cd13ea80ac7e44a)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Gary R Hook <gary.hook@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/kthread.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/interrupt.h>
19 #include <linux/compiler.h>
20 #include <linux/ccp.h>
21 
22 #include "ccp-dev.h"
23 
24 /* Allocate the requested number of contiguous LSB slots
25  * from the LSB bitmap. Look in the private range for this
26  * queue first; failing that, check the public area.
27  * If no space is available, wait around.
28  * Return: first slot number
29  */
30 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
31 {
32 	struct ccp_device *ccp;
33 	int start;
34 
35 	/* First look at the map for the queue */
36 	if (cmd_q->lsb >= 0) {
37 		start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
38 							LSB_SIZE,
39 							0, count, 0);
40 		if (start < LSB_SIZE) {
41 			bitmap_set(cmd_q->lsbmap, start, count);
42 			return start + cmd_q->lsb * LSB_SIZE;
43 		}
44 	}
45 
46 	/* No joy; try to get an entry from the shared blocks */
47 	ccp = cmd_q->ccp;
48 	for (;;) {
49 		mutex_lock(&ccp->sb_mutex);
50 
51 		start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
52 							MAX_LSB_CNT * LSB_SIZE,
53 							0,
54 							count, 0);
55 		if (start <= MAX_LSB_CNT * LSB_SIZE) {
56 			bitmap_set(ccp->lsbmap, start, count);
57 
58 			mutex_unlock(&ccp->sb_mutex);
59 			return start;
60 		}
61 
62 		ccp->sb_avail = 0;
63 
64 		mutex_unlock(&ccp->sb_mutex);
65 
66 		/* Wait for KSB entries to become available */
67 		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
68 			return 0;
69 	}
70 }
71 
72 /* Free a number of LSB slots from the bitmap, starting at
73  * the indicated starting slot number.
74  */
75 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
76 			 unsigned int count)
77 {
78 	if (!start)
79 		return;
80 
81 	if (cmd_q->lsb == start) {
82 		/* An entry from the private LSB */
83 		bitmap_clear(cmd_q->lsbmap, start, count);
84 	} else {
85 		/* From the shared LSBs */
86 		struct ccp_device *ccp = cmd_q->ccp;
87 
88 		mutex_lock(&ccp->sb_mutex);
89 		bitmap_clear(ccp->lsbmap, start, count);
90 		ccp->sb_avail = 1;
91 		mutex_unlock(&ccp->sb_mutex);
92 		wake_up_interruptible_all(&ccp->sb_queue);
93 	}
94 }
95 
96 /* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
97 union ccp_function {
98 	struct {
99 		u16 size:7;
100 		u16 encrypt:1;
101 		u16 mode:5;
102 		u16 type:2;
103 	} aes;
104 	struct {
105 		u16 size:7;
106 		u16 encrypt:1;
107 		u16 rsvd:5;
108 		u16 type:2;
109 	} aes_xts;
110 	struct {
111 		u16 rsvd1:10;
112 		u16 type:4;
113 		u16 rsvd2:1;
114 	} sha;
115 	struct {
116 		u16 mode:3;
117 		u16 size:12;
118 	} rsa;
119 	struct {
120 		u16 byteswap:2;
121 		u16 bitwise:3;
122 		u16 reflect:2;
123 		u16 rsvd:8;
124 	} pt;
125 	struct  {
126 		u16 rsvd:13;
127 	} zlib;
128 	struct {
129 		u16 size:10;
130 		u16 type:2;
131 		u16 mode:3;
132 	} ecc;
133 	u16 raw;
134 };
135 
136 #define	CCP_AES_SIZE(p)		((p)->aes.size)
137 #define	CCP_AES_ENCRYPT(p)	((p)->aes.encrypt)
138 #define	CCP_AES_MODE(p)		((p)->aes.mode)
139 #define	CCP_AES_TYPE(p)		((p)->aes.type)
140 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
141 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
142 #define	CCP_SHA_TYPE(p)		((p)->sha.type)
143 #define	CCP_RSA_SIZE(p)		((p)->rsa.size)
144 #define	CCP_PT_BYTESWAP(p)	((p)->pt.byteswap)
145 #define	CCP_PT_BITWISE(p)	((p)->pt.bitwise)
146 #define	CCP_ECC_MODE(p)		((p)->ecc.mode)
147 #define	CCP_ECC_AFFINE(p)	((p)->ecc.one)
148 
149 /* Word 0 */
150 #define CCP5_CMD_DW0(p)		((p)->dw0)
151 #define CCP5_CMD_SOC(p)		(CCP5_CMD_DW0(p).soc)
152 #define CCP5_CMD_IOC(p)		(CCP5_CMD_DW0(p).ioc)
153 #define CCP5_CMD_INIT(p)	(CCP5_CMD_DW0(p).init)
154 #define CCP5_CMD_EOM(p)		(CCP5_CMD_DW0(p).eom)
155 #define CCP5_CMD_FUNCTION(p)	(CCP5_CMD_DW0(p).function)
156 #define CCP5_CMD_ENGINE(p)	(CCP5_CMD_DW0(p).engine)
157 #define CCP5_CMD_PROT(p)	(CCP5_CMD_DW0(p).prot)
158 
159 /* Word 1 */
160 #define CCP5_CMD_DW1(p)		((p)->length)
161 #define CCP5_CMD_LEN(p)		(CCP5_CMD_DW1(p))
162 
163 /* Word 2 */
164 #define CCP5_CMD_DW2(p)		((p)->src_lo)
165 #define CCP5_CMD_SRC_LO(p)	(CCP5_CMD_DW2(p))
166 
167 /* Word 3 */
168 #define CCP5_CMD_DW3(p)		((p)->dw3)
169 #define CCP5_CMD_SRC_MEM(p)	((p)->dw3.src_mem)
170 #define CCP5_CMD_SRC_HI(p)	((p)->dw3.src_hi)
171 #define CCP5_CMD_LSB_ID(p)	((p)->dw3.lsb_cxt_id)
172 #define CCP5_CMD_FIX_SRC(p)	((p)->dw3.fixed)
173 
174 /* Words 4/5 */
175 #define CCP5_CMD_DW4(p)		((p)->dw4)
176 #define CCP5_CMD_DST_LO(p)	(CCP5_CMD_DW4(p).dst_lo)
177 #define CCP5_CMD_DW5(p)		((p)->dw5.fields.dst_hi)
178 #define CCP5_CMD_DST_HI(p)	(CCP5_CMD_DW5(p))
179 #define CCP5_CMD_DST_MEM(p)	((p)->dw5.fields.dst_mem)
180 #define CCP5_CMD_FIX_DST(p)	((p)->dw5.fields.fixed)
181 #define CCP5_CMD_SHA_LO(p)	((p)->dw4.sha_len_lo)
182 #define CCP5_CMD_SHA_HI(p)	((p)->dw5.sha_len_hi)
183 
184 /* Word 6/7 */
185 #define CCP5_CMD_DW6(p)		((p)->key_lo)
186 #define CCP5_CMD_KEY_LO(p)	(CCP5_CMD_DW6(p))
187 #define CCP5_CMD_DW7(p)		((p)->dw7)
188 #define CCP5_CMD_KEY_HI(p)	((p)->dw7.key_hi)
189 #define CCP5_CMD_KEY_MEM(p)	((p)->dw7.key_mem)
190 
191 static inline u32 low_address(unsigned long addr)
192 {
193 	return (u64)addr & 0x0ffffffff;
194 }
195 
196 static inline u32 high_address(unsigned long addr)
197 {
198 	return ((u64)addr >> 32) & 0x00000ffff;
199 }
200 
201 static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
202 {
203 	unsigned int head_idx, n;
204 	u32 head_lo, queue_start;
205 
206 	queue_start = low_address(cmd_q->qdma_tail);
207 	head_lo = ioread32(cmd_q->reg_head_lo);
208 	head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
209 
210 	n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
211 
212 	return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
213 }
214 
215 static int ccp5_do_cmd(struct ccp5_desc *desc,
216 		       struct ccp_cmd_queue *cmd_q)
217 {
218 	u32 *mP;
219 	__le32 *dP;
220 	u32 tail;
221 	int	i;
222 	int ret = 0;
223 
224 	if (CCP5_CMD_SOC(desc)) {
225 		CCP5_CMD_IOC(desc) = 1;
226 		CCP5_CMD_SOC(desc) = 0;
227 	}
228 	mutex_lock(&cmd_q->q_mutex);
229 
230 	mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
231 	dP = (__le32 *) desc;
232 	for (i = 0; i < 8; i++)
233 		mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
234 
235 	cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
236 
237 	/* The data used by this command must be flushed to memory */
238 	wmb();
239 
240 	/* Write the new tail address back to the queue register */
241 	tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
242 	iowrite32(tail, cmd_q->reg_tail_lo);
243 
244 	/* Turn the queue back on using our cached control register */
245 	iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
246 	mutex_unlock(&cmd_q->q_mutex);
247 
248 	if (CCP5_CMD_IOC(desc)) {
249 		/* Wait for the job to complete */
250 		ret = wait_event_interruptible(cmd_q->int_queue,
251 					       cmd_q->int_rcvd);
252 		if (ret || cmd_q->cmd_error) {
253 			/* Log the error and flush the queue by
254 			 * moving the head pointer
255 			 */
256 			if (cmd_q->cmd_error)
257 				ccp_log_error(cmd_q->ccp,
258 					      cmd_q->cmd_error);
259 			iowrite32(tail, cmd_q->reg_head_lo);
260 			if (!ret)
261 				ret = -EIO;
262 		}
263 		cmd_q->int_rcvd = 0;
264 	}
265 
266 	return ret;
267 }
268 
269 static int ccp5_perform_aes(struct ccp_op *op)
270 {
271 	struct ccp5_desc desc;
272 	union ccp_function function;
273 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
274 
275 	/* Zero out all the fields of the command desc */
276 	memset(&desc, 0, Q_DESC_SIZE);
277 
278 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
279 
280 	CCP5_CMD_SOC(&desc) = op->soc;
281 	CCP5_CMD_IOC(&desc) = 1;
282 	CCP5_CMD_INIT(&desc) = op->init;
283 	CCP5_CMD_EOM(&desc) = op->eom;
284 	CCP5_CMD_PROT(&desc) = 0;
285 
286 	function.raw = 0;
287 	CCP_AES_ENCRYPT(&function) = op->u.aes.action;
288 	CCP_AES_MODE(&function) = op->u.aes.mode;
289 	CCP_AES_TYPE(&function) = op->u.aes.type;
290 	CCP_AES_SIZE(&function) = op->u.aes.size;
291 
292 	CCP5_CMD_FUNCTION(&desc) = function.raw;
293 
294 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
295 
296 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
297 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
298 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
299 
300 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
301 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
302 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
303 
304 	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
305 	CCP5_CMD_KEY_HI(&desc) = 0;
306 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
307 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
308 
309 	return ccp5_do_cmd(&desc, op->cmd_q);
310 }
311 
312 static int ccp5_perform_xts_aes(struct ccp_op *op)
313 {
314 	struct ccp5_desc desc;
315 	union ccp_function function;
316 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
317 
318 	/* Zero out all the fields of the command desc */
319 	memset(&desc, 0, Q_DESC_SIZE);
320 
321 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
322 
323 	CCP5_CMD_SOC(&desc) = op->soc;
324 	CCP5_CMD_IOC(&desc) = 1;
325 	CCP5_CMD_INIT(&desc) = op->init;
326 	CCP5_CMD_EOM(&desc) = op->eom;
327 	CCP5_CMD_PROT(&desc) = 0;
328 
329 	function.raw = 0;
330 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
331 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
332 	CCP5_CMD_FUNCTION(&desc) = function.raw;
333 
334 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
335 
336 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
337 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
338 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
339 
340 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
341 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
342 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
343 
344 	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
345 	CCP5_CMD_KEY_HI(&desc) =  0;
346 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
347 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
348 
349 	return ccp5_do_cmd(&desc, op->cmd_q);
350 }
351 
352 static int ccp5_perform_sha(struct ccp_op *op)
353 {
354 	struct ccp5_desc desc;
355 	union ccp_function function;
356 
357 	/* Zero out all the fields of the command desc */
358 	memset(&desc, 0, Q_DESC_SIZE);
359 
360 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
361 
362 	CCP5_CMD_SOC(&desc) = op->soc;
363 	CCP5_CMD_IOC(&desc) = 1;
364 	CCP5_CMD_INIT(&desc) = 1;
365 	CCP5_CMD_EOM(&desc) = op->eom;
366 	CCP5_CMD_PROT(&desc) = 0;
367 
368 	function.raw = 0;
369 	CCP_SHA_TYPE(&function) = op->u.sha.type;
370 	CCP5_CMD_FUNCTION(&desc) = function.raw;
371 
372 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
373 
374 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
375 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
376 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
377 
378 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
379 
380 	if (op->eom) {
381 		CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
382 		CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
383 	} else {
384 		CCP5_CMD_SHA_LO(&desc) = 0;
385 		CCP5_CMD_SHA_HI(&desc) = 0;
386 	}
387 
388 	return ccp5_do_cmd(&desc, op->cmd_q);
389 }
390 
391 static int ccp5_perform_rsa(struct ccp_op *op)
392 {
393 	struct ccp5_desc desc;
394 	union ccp_function function;
395 
396 	/* Zero out all the fields of the command desc */
397 	memset(&desc, 0, Q_DESC_SIZE);
398 
399 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
400 
401 	CCP5_CMD_SOC(&desc) = op->soc;
402 	CCP5_CMD_IOC(&desc) = 1;
403 	CCP5_CMD_INIT(&desc) = 0;
404 	CCP5_CMD_EOM(&desc) = 1;
405 	CCP5_CMD_PROT(&desc) = 0;
406 
407 	function.raw = 0;
408 	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
409 	CCP5_CMD_FUNCTION(&desc) = function.raw;
410 
411 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
412 
413 	/* Source is from external memory */
414 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
415 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
416 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
417 
418 	/* Destination is in external memory */
419 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
420 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
421 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
422 
423 	/* Exponent is in LSB memory */
424 	CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
425 	CCP5_CMD_KEY_HI(&desc) = 0;
426 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
427 
428 	return ccp5_do_cmd(&desc, op->cmd_q);
429 }
430 
431 static int ccp5_perform_passthru(struct ccp_op *op)
432 {
433 	struct ccp5_desc desc;
434 	union ccp_function function;
435 	struct ccp_dma_info *saddr = &op->src.u.dma;
436 	struct ccp_dma_info *daddr = &op->dst.u.dma;
437 
438 	memset(&desc, 0, Q_DESC_SIZE);
439 
440 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
441 
442 	CCP5_CMD_SOC(&desc) = 0;
443 	CCP5_CMD_IOC(&desc) = 1;
444 	CCP5_CMD_INIT(&desc) = 0;
445 	CCP5_CMD_EOM(&desc) = op->eom;
446 	CCP5_CMD_PROT(&desc) = 0;
447 
448 	function.raw = 0;
449 	CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
450 	CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
451 	CCP5_CMD_FUNCTION(&desc) = function.raw;
452 
453 	/* Length of source data is always 256 bytes */
454 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
455 		CCP5_CMD_LEN(&desc) = saddr->length;
456 	else
457 		CCP5_CMD_LEN(&desc) = daddr->length;
458 
459 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
460 		CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
461 		CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
462 		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
463 
464 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
465 			CCP5_CMD_LSB_ID(&desc) = op->sb_key;
466 	} else {
467 		u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
468 
469 		CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
470 		CCP5_CMD_SRC_HI(&desc) = 0;
471 		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
472 	}
473 
474 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
475 		CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
476 		CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
477 		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
478 	} else {
479 		u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
480 
481 		CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
482 		CCP5_CMD_DST_HI(&desc) = 0;
483 		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
484 	}
485 
486 	return ccp5_do_cmd(&desc, op->cmd_q);
487 }
488 
489 static int ccp5_perform_ecc(struct ccp_op *op)
490 {
491 	struct ccp5_desc desc;
492 	union ccp_function function;
493 
494 	/* Zero out all the fields of the command desc */
495 	memset(&desc, 0, Q_DESC_SIZE);
496 
497 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
498 
499 	CCP5_CMD_SOC(&desc) = 0;
500 	CCP5_CMD_IOC(&desc) = 1;
501 	CCP5_CMD_INIT(&desc) = 0;
502 	CCP5_CMD_EOM(&desc) = 1;
503 	CCP5_CMD_PROT(&desc) = 0;
504 
505 	function.raw = 0;
506 	function.ecc.mode = op->u.ecc.function;
507 	CCP5_CMD_FUNCTION(&desc) = function.raw;
508 
509 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
510 
511 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
512 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
513 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
514 
515 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
516 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
517 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
518 
519 	return ccp5_do_cmd(&desc, op->cmd_q);
520 }
521 
522 static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
523 {
524 	int q_mask = 1 << cmd_q->id;
525 	int queues = 0;
526 	int j;
527 
528 	/* Build a bit mask to know which LSBs this queue has access to.
529 	 * Don't bother with segment 0 as it has special privileges.
530 	 */
531 	for (j = 1; j < MAX_LSB_CNT; j++) {
532 		if (status & q_mask)
533 			bitmap_set(cmd_q->lsbmask, j, 1);
534 		status >>= LSB_REGION_WIDTH;
535 	}
536 	queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
537 	dev_dbg(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
538 		 cmd_q->id, queues);
539 
540 	return queues ? 0 : -EINVAL;
541 }
542 
543 
544 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
545 					int lsb_cnt, int n_lsbs,
546 					unsigned long *lsb_pub)
547 {
548 	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
549 	int bitno;
550 	int qlsb_wgt;
551 	int i;
552 
553 	/* For each queue:
554 	 * If the count of potential LSBs available to a queue matches the
555 	 * ordinal given to us in lsb_cnt:
556 	 * Copy the mask of possible LSBs for this queue into "qlsb";
557 	 * For each bit in qlsb, see if the corresponding bit in the
558 	 * aggregation mask is set; if so, we have a match.
559 	 *     If we have a match, clear the bit in the aggregation to
560 	 *     mark it as no longer available.
561 	 *     If there is no match, clear the bit in qlsb and keep looking.
562 	 */
563 	for (i = 0; i < ccp->cmd_q_count; i++) {
564 		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
565 
566 		qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
567 
568 		if (qlsb_wgt == lsb_cnt) {
569 			bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
570 
571 			bitno = find_first_bit(qlsb, MAX_LSB_CNT);
572 			while (bitno < MAX_LSB_CNT) {
573 				if (test_bit(bitno, lsb_pub)) {
574 					/* We found an available LSB
575 					 * that this queue can access
576 					 */
577 					cmd_q->lsb = bitno;
578 					bitmap_clear(lsb_pub, bitno, 1);
579 					dev_dbg(ccp->dev,
580 						 "Queue %d gets LSB %d\n",
581 						 i, bitno);
582 					break;
583 				}
584 				bitmap_clear(qlsb, bitno, 1);
585 				bitno = find_first_bit(qlsb, MAX_LSB_CNT);
586 			}
587 			if (bitno >= MAX_LSB_CNT)
588 				return -EINVAL;
589 			n_lsbs--;
590 		}
591 	}
592 	return n_lsbs;
593 }
594 
595 /* For each queue, from the most- to least-constrained:
596  * find an LSB that can be assigned to the queue. If there are N queues that
597  * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
598  * dedicated LSB. Remaining LSB regions become a shared resource.
599  * If we have fewer LSBs than queues, all LSB regions become shared resources.
600  */
601 static int ccp_assign_lsbs(struct ccp_device *ccp)
602 {
603 	DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
604 	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
605 	int n_lsbs = 0;
606 	int bitno;
607 	int i, lsb_cnt;
608 	int rc = 0;
609 
610 	bitmap_zero(lsb_pub, MAX_LSB_CNT);
611 
612 	/* Create an aggregate bitmap to get a total count of available LSBs */
613 	for (i = 0; i < ccp->cmd_q_count; i++)
614 		bitmap_or(lsb_pub,
615 			  lsb_pub, ccp->cmd_q[i].lsbmask,
616 			  MAX_LSB_CNT);
617 
618 	n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
619 
620 	if (n_lsbs >= ccp->cmd_q_count) {
621 		/* We have enough LSBS to give every queue a private LSB.
622 		 * Brute force search to start with the queues that are more
623 		 * constrained in LSB choice. When an LSB is privately
624 		 * assigned, it is removed from the public mask.
625 		 * This is an ugly N squared algorithm with some optimization.
626 		 */
627 		for (lsb_cnt = 1;
628 		     n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
629 		     lsb_cnt++) {
630 			rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
631 							  lsb_pub);
632 			if (rc < 0)
633 				return -EINVAL;
634 			n_lsbs = rc;
635 		}
636 	}
637 
638 	rc = 0;
639 	/* What's left of the LSBs, according to the public mask, now become
640 	 * shared. Any zero bits in the lsb_pub mask represent an LSB region
641 	 * that can't be used as a shared resource, so mark the LSB slots for
642 	 * them as "in use".
643 	 */
644 	bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
645 
646 	bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
647 	while (bitno < MAX_LSB_CNT) {
648 		bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
649 		bitmap_set(qlsb, bitno, 1);
650 		bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
651 	}
652 
653 	return rc;
654 }
655 
656 static int ccp5_init(struct ccp_device *ccp)
657 {
658 	struct device *dev = ccp->dev;
659 	struct ccp_cmd_queue *cmd_q;
660 	struct dma_pool *dma_pool;
661 	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
662 	unsigned int qmr, qim, i;
663 	u64 status;
664 	u32 status_lo, status_hi;
665 	int ret;
666 
667 	/* Find available queues */
668 	qim = 0;
669 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
670 	for (i = 0; i < MAX_HW_QUEUES; i++) {
671 
672 		if (!(qmr & (1 << i)))
673 			continue;
674 
675 		/* Allocate a dma pool for this queue */
676 		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
677 			 ccp->name, i);
678 		dma_pool = dma_pool_create(dma_pool_name, dev,
679 					   CCP_DMAPOOL_MAX_SIZE,
680 					   CCP_DMAPOOL_ALIGN, 0);
681 		if (!dma_pool) {
682 			dev_err(dev, "unable to allocate dma pool\n");
683 			ret = -ENOMEM;
684 		}
685 
686 		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
687 		ccp->cmd_q_count++;
688 
689 		cmd_q->ccp = ccp;
690 		cmd_q->id = i;
691 		cmd_q->dma_pool = dma_pool;
692 		mutex_init(&cmd_q->q_mutex);
693 
694 		/* Page alignment satisfies our needs for N <= 128 */
695 		BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
696 		cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
697 		cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
698 						   &cmd_q->qbase_dma,
699 						   GFP_KERNEL);
700 		if (!cmd_q->qbase) {
701 			dev_err(dev, "unable to allocate command queue\n");
702 			ret = -ENOMEM;
703 			goto e_pool;
704 		}
705 
706 		cmd_q->qidx = 0;
707 		/* Preset some register values and masks that are queue
708 		 * number dependent
709 		 */
710 		cmd_q->reg_control = ccp->io_regs +
711 				     CMD5_Q_STATUS_INCR * (i + 1);
712 		cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
713 		cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
714 		cmd_q->reg_int_enable = cmd_q->reg_control +
715 					CMD5_Q_INT_ENABLE_BASE;
716 		cmd_q->reg_interrupt_status = cmd_q->reg_control +
717 					      CMD5_Q_INTERRUPT_STATUS_BASE;
718 		cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
719 		cmd_q->reg_int_status = cmd_q->reg_control +
720 					CMD5_Q_INT_STATUS_BASE;
721 		cmd_q->reg_dma_status = cmd_q->reg_control +
722 					CMD5_Q_DMA_STATUS_BASE;
723 		cmd_q->reg_dma_read_status = cmd_q->reg_control +
724 					     CMD5_Q_DMA_READ_STATUS_BASE;
725 		cmd_q->reg_dma_write_status = cmd_q->reg_control +
726 					      CMD5_Q_DMA_WRITE_STATUS_BASE;
727 
728 		init_waitqueue_head(&cmd_q->int_queue);
729 
730 		dev_dbg(dev, "queue #%u available\n", i);
731 	}
732 	if (ccp->cmd_q_count == 0) {
733 		dev_notice(dev, "no command queues available\n");
734 		ret = -EIO;
735 		goto e_pool;
736 	}
737 
738 	/* Turn off the queues and disable interrupts until ready */
739 	for (i = 0; i < ccp->cmd_q_count; i++) {
740 		cmd_q = &ccp->cmd_q[i];
741 
742 		cmd_q->qcontrol = 0; /* Start with nothing */
743 		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
744 
745 		/* Disable the interrupts */
746 		iowrite32(0x00, cmd_q->reg_int_enable);
747 		ioread32(cmd_q->reg_int_status);
748 		ioread32(cmd_q->reg_status);
749 
750 		/* Clear the interrupts */
751 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
752 	}
753 
754 	dev_dbg(dev, "Requesting an IRQ...\n");
755 	/* Request an irq */
756 	ret = ccp->get_irq(ccp);
757 	if (ret) {
758 		dev_err(dev, "unable to allocate an IRQ\n");
759 		goto e_pool;
760 	}
761 
762 	dev_dbg(dev, "Loading LSB map...\n");
763 	/* Copy the private LSB mask to the public registers */
764 	status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
765 	status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
766 	iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
767 	iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
768 	status = ((u64)status_hi<<30) | (u64)status_lo;
769 
770 	dev_dbg(dev, "Configuring virtual queues...\n");
771 	/* Configure size of each virtual queue accessible to host */
772 	for (i = 0; i < ccp->cmd_q_count; i++) {
773 		u32 dma_addr_lo;
774 		u32 dma_addr_hi;
775 
776 		cmd_q = &ccp->cmd_q[i];
777 
778 		cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
779 		cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
780 
781 		cmd_q->qdma_tail = cmd_q->qbase_dma;
782 		dma_addr_lo = low_address(cmd_q->qdma_tail);
783 		iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
784 		iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
785 
786 		dma_addr_hi = high_address(cmd_q->qdma_tail);
787 		cmd_q->qcontrol |= (dma_addr_hi << 16);
788 		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
789 
790 		/* Find the LSB regions accessible to the queue */
791 		ccp_find_lsb_regions(cmd_q, status);
792 		cmd_q->lsb = -1; /* Unassigned value */
793 	}
794 
795 	dev_dbg(dev, "Assigning LSBs...\n");
796 	ret = ccp_assign_lsbs(ccp);
797 	if (ret) {
798 		dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
799 		goto e_irq;
800 	}
801 
802 	/* Optimization: pre-allocate LSB slots for each queue */
803 	for (i = 0; i < ccp->cmd_q_count; i++) {
804 		ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
805 		ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
806 	}
807 
808 	dev_dbg(dev, "Starting threads...\n");
809 	/* Create a kthread for each queue */
810 	for (i = 0; i < ccp->cmd_q_count; i++) {
811 		struct task_struct *kthread;
812 
813 		cmd_q = &ccp->cmd_q[i];
814 
815 		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
816 					 "%s-q%u", ccp->name, cmd_q->id);
817 		if (IS_ERR(kthread)) {
818 			dev_err(dev, "error creating queue thread (%ld)\n",
819 				PTR_ERR(kthread));
820 			ret = PTR_ERR(kthread);
821 			goto e_kthread;
822 		}
823 
824 		cmd_q->kthread = kthread;
825 		wake_up_process(kthread);
826 	}
827 
828 	dev_dbg(dev, "Enabling interrupts...\n");
829 	/* Enable interrupts */
830 	for (i = 0; i < ccp->cmd_q_count; i++) {
831 		cmd_q = &ccp->cmd_q[i];
832 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
833 	}
834 
835 	dev_dbg(dev, "Registering device...\n");
836 	/* Put this on the unit list to make it available */
837 	ccp_add_device(ccp);
838 
839 	ret = ccp_register_rng(ccp);
840 	if (ret)
841 		goto e_kthread;
842 
843 	/* Register the DMA engine support */
844 	ret = ccp_dmaengine_register(ccp);
845 	if (ret)
846 		goto e_hwrng;
847 
848 	return 0;
849 
850 e_hwrng:
851 	ccp_unregister_rng(ccp);
852 
853 e_kthread:
854 	for (i = 0; i < ccp->cmd_q_count; i++)
855 		if (ccp->cmd_q[i].kthread)
856 			kthread_stop(ccp->cmd_q[i].kthread);
857 
858 e_irq:
859 	ccp->free_irq(ccp);
860 
861 e_pool:
862 	for (i = 0; i < ccp->cmd_q_count; i++)
863 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
864 
865 	return ret;
866 }
867 
868 static void ccp5_destroy(struct ccp_device *ccp)
869 {
870 	struct device *dev = ccp->dev;
871 	struct ccp_cmd_queue *cmd_q;
872 	struct ccp_cmd *cmd;
873 	unsigned int i;
874 
875 	/* Unregister the DMA engine */
876 	ccp_dmaengine_unregister(ccp);
877 
878 	/* Unregister the RNG */
879 	ccp_unregister_rng(ccp);
880 
881 	/* Remove this device from the list of available units first */
882 	ccp_del_device(ccp);
883 
884 	/* Disable and clear interrupts */
885 	for (i = 0; i < ccp->cmd_q_count; i++) {
886 		cmd_q = &ccp->cmd_q[i];
887 
888 		/* Turn off the run bit */
889 		iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
890 
891 		/* Disable the interrupts */
892 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
893 
894 		/* Clear the interrupt status */
895 		iowrite32(0x00, cmd_q->reg_int_enable);
896 		ioread32(cmd_q->reg_int_status);
897 		ioread32(cmd_q->reg_status);
898 	}
899 
900 	/* Stop the queue kthreads */
901 	for (i = 0; i < ccp->cmd_q_count; i++)
902 		if (ccp->cmd_q[i].kthread)
903 			kthread_stop(ccp->cmd_q[i].kthread);
904 
905 	ccp->free_irq(ccp);
906 
907 	for (i = 0; i < ccp->cmd_q_count; i++) {
908 		cmd_q = &ccp->cmd_q[i];
909 		dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
910 				  cmd_q->qbase_dma);
911 	}
912 
913 	/* Flush the cmd and backlog queue */
914 	while (!list_empty(&ccp->cmd)) {
915 		/* Invoke the callback directly with an error code */
916 		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
917 		list_del(&cmd->entry);
918 		cmd->callback(cmd->data, -ENODEV);
919 	}
920 	while (!list_empty(&ccp->backlog)) {
921 		/* Invoke the callback directly with an error code */
922 		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
923 		list_del(&cmd->entry);
924 		cmd->callback(cmd->data, -ENODEV);
925 	}
926 }
927 
928 static irqreturn_t ccp5_irq_handler(int irq, void *data)
929 {
930 	struct device *dev = data;
931 	struct ccp_device *ccp = dev_get_drvdata(dev);
932 	u32 status;
933 	unsigned int i;
934 
935 	for (i = 0; i < ccp->cmd_q_count; i++) {
936 		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
937 
938 		status = ioread32(cmd_q->reg_interrupt_status);
939 
940 		if (status) {
941 			cmd_q->int_status = status;
942 			cmd_q->q_status = ioread32(cmd_q->reg_status);
943 			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
944 
945 			/* On error, only save the first error value */
946 			if ((status & INT_ERROR) && !cmd_q->cmd_error)
947 				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
948 
949 			cmd_q->int_rcvd = 1;
950 
951 			/* Acknowledge the interrupt and wake the kthread */
952 			iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
953 			wake_up_interruptible(&cmd_q->int_queue);
954 		}
955 	}
956 
957 	return IRQ_HANDLED;
958 }
959 
960 static void ccp5_config(struct ccp_device *ccp)
961 {
962 	/* Public side */
963 	iowrite32(0x0, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
964 }
965 
966 static void ccp5other_config(struct ccp_device *ccp)
967 {
968 	int i;
969 	u32 rnd;
970 
971 	/* We own all of the queues on the NTB CCP */
972 
973 	iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
974 	iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
975 	for (i = 0; i < 12; i++) {
976 		rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
977 		iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
978 	}
979 
980 	iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
981 	iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
982 	iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
983 
984 	iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
985 	iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
986 
987 	iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
988 
989 	ccp5_config(ccp);
990 }
991 
992 /* Version 5 adds some function, but is essentially the same as v5 */
993 static const struct ccp_actions ccp5_actions = {
994 	.aes = ccp5_perform_aes,
995 	.xts_aes = ccp5_perform_xts_aes,
996 	.sha = ccp5_perform_sha,
997 	.rsa = ccp5_perform_rsa,
998 	.passthru = ccp5_perform_passthru,
999 	.ecc = ccp5_perform_ecc,
1000 	.sballoc = ccp_lsb_alloc,
1001 	.sbfree = ccp_lsb_free,
1002 	.init = ccp5_init,
1003 	.destroy = ccp5_destroy,
1004 	.get_free_slots = ccp5_get_free_slots,
1005 	.irqhandler = ccp5_irq_handler,
1006 };
1007 
1008 const struct ccp_vdata ccpv5a = {
1009 	.version = CCP_VERSION(5, 0),
1010 	.setup = ccp5_config,
1011 	.perform = &ccp5_actions,
1012 	.bar = 2,
1013 	.offset = 0x0,
1014 };
1015 
1016 const struct ccp_vdata ccpv5b = {
1017 	.version = CCP_VERSION(5, 0),
1018 	.setup = ccp5other_config,
1019 	.perform = &ccp5_actions,
1020 	.bar = 2,
1021 	.offset = 0x0,
1022 };
1023