xref: /linux/drivers/crypto/ccp/ccp-dev-v5.c (revision 32786fdc9506aeba98278c1844d4bfb766863832)
1 /*
2  * AMD Cryptographic Coprocessor (CCP) driver
3  *
4  * Copyright (C) 2016 Advanced Micro Devices, Inc.
5  *
6  * Author: Gary R Hook <gary.hook@amd.com>
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License version 2 as
10  * published by the Free Software Foundation.
11  */
12 
13 #include <linux/module.h>
14 #include <linux/kernel.h>
15 #include <linux/pci.h>
16 #include <linux/kthread.h>
17 #include <linux/dma-mapping.h>
18 #include <linux/interrupt.h>
19 #include <linux/compiler.h>
20 #include <linux/ccp.h>
21 
22 #include "ccp-dev.h"
23 
24 /* Allocate the requested number of contiguous LSB slots
25  * from the LSB bitmap. Look in the private range for this
26  * queue first; failing that, check the public area.
27  * If no space is available, wait around.
28  * Return: first slot number
29  */
30 static u32 ccp_lsb_alloc(struct ccp_cmd_queue *cmd_q, unsigned int count)
31 {
32 	struct ccp_device *ccp;
33 	int start;
34 
35 	/* First look at the map for the queue */
36 	if (cmd_q->lsb >= 0) {
37 		start = (u32)bitmap_find_next_zero_area(cmd_q->lsbmap,
38 							LSB_SIZE,
39 							0, count, 0);
40 		if (start < LSB_SIZE) {
41 			bitmap_set(cmd_q->lsbmap, start, count);
42 			return start + cmd_q->lsb * LSB_SIZE;
43 		}
44 	}
45 
46 	/* No joy; try to get an entry from the shared blocks */
47 	ccp = cmd_q->ccp;
48 	for (;;) {
49 		mutex_lock(&ccp->sb_mutex);
50 
51 		start = (u32)bitmap_find_next_zero_area(ccp->lsbmap,
52 							MAX_LSB_CNT * LSB_SIZE,
53 							0,
54 							count, 0);
55 		if (start <= MAX_LSB_CNT * LSB_SIZE) {
56 			bitmap_set(ccp->lsbmap, start, count);
57 
58 			mutex_unlock(&ccp->sb_mutex);
59 			return start;
60 		}
61 
62 		ccp->sb_avail = 0;
63 
64 		mutex_unlock(&ccp->sb_mutex);
65 
66 		/* Wait for KSB entries to become available */
67 		if (wait_event_interruptible(ccp->sb_queue, ccp->sb_avail))
68 			return 0;
69 	}
70 }
71 
72 /* Free a number of LSB slots from the bitmap, starting at
73  * the indicated starting slot number.
74  */
75 static void ccp_lsb_free(struct ccp_cmd_queue *cmd_q, unsigned int start,
76 			 unsigned int count)
77 {
78 	if (!start)
79 		return;
80 
81 	if (cmd_q->lsb == start) {
82 		/* An entry from the private LSB */
83 		bitmap_clear(cmd_q->lsbmap, start, count);
84 	} else {
85 		/* From the shared LSBs */
86 		struct ccp_device *ccp = cmd_q->ccp;
87 
88 		mutex_lock(&ccp->sb_mutex);
89 		bitmap_clear(ccp->lsbmap, start, count);
90 		ccp->sb_avail = 1;
91 		mutex_unlock(&ccp->sb_mutex);
92 		wake_up_interruptible_all(&ccp->sb_queue);
93 	}
94 }
95 
96 /* CCP version 5: Union to define the function field (cmd_reg1/dword0) */
97 union ccp_function {
98 	struct {
99 		u16 size:7;
100 		u16 encrypt:1;
101 		u16 mode:5;
102 		u16 type:2;
103 	} aes;
104 	struct {
105 		u16 size:7;
106 		u16 encrypt:1;
107 		u16 rsvd:5;
108 		u16 type:2;
109 	} aes_xts;
110 	struct {
111 		u16 rsvd1:10;
112 		u16 type:4;
113 		u16 rsvd2:1;
114 	} sha;
115 	struct {
116 		u16 mode:3;
117 		u16 size:12;
118 	} rsa;
119 	struct {
120 		u16 byteswap:2;
121 		u16 bitwise:3;
122 		u16 reflect:2;
123 		u16 rsvd:8;
124 	} pt;
125 	struct  {
126 		u16 rsvd:13;
127 	} zlib;
128 	struct {
129 		u16 size:10;
130 		u16 type:2;
131 		u16 mode:3;
132 	} ecc;
133 	u16 raw;
134 };
135 
136 #define	CCP_AES_SIZE(p)		((p)->aes.size)
137 #define	CCP_AES_ENCRYPT(p)	((p)->aes.encrypt)
138 #define	CCP_AES_MODE(p)		((p)->aes.mode)
139 #define	CCP_AES_TYPE(p)		((p)->aes.type)
140 #define	CCP_XTS_SIZE(p)		((p)->aes_xts.size)
141 #define	CCP_XTS_ENCRYPT(p)	((p)->aes_xts.encrypt)
142 #define	CCP_SHA_TYPE(p)		((p)->sha.type)
143 #define	CCP_RSA_SIZE(p)		((p)->rsa.size)
144 #define	CCP_PT_BYTESWAP(p)	((p)->pt.byteswap)
145 #define	CCP_PT_BITWISE(p)	((p)->pt.bitwise)
146 #define	CCP_ECC_MODE(p)		((p)->ecc.mode)
147 #define	CCP_ECC_AFFINE(p)	((p)->ecc.one)
148 
149 /* Word 0 */
150 #define CCP5_CMD_DW0(p)		((p)->dw0)
151 #define CCP5_CMD_SOC(p)		(CCP5_CMD_DW0(p).soc)
152 #define CCP5_CMD_IOC(p)		(CCP5_CMD_DW0(p).ioc)
153 #define CCP5_CMD_INIT(p)	(CCP5_CMD_DW0(p).init)
154 #define CCP5_CMD_EOM(p)		(CCP5_CMD_DW0(p).eom)
155 #define CCP5_CMD_FUNCTION(p)	(CCP5_CMD_DW0(p).function)
156 #define CCP5_CMD_ENGINE(p)	(CCP5_CMD_DW0(p).engine)
157 #define CCP5_CMD_PROT(p)	(CCP5_CMD_DW0(p).prot)
158 
159 /* Word 1 */
160 #define CCP5_CMD_DW1(p)		((p)->length)
161 #define CCP5_CMD_LEN(p)		(CCP5_CMD_DW1(p))
162 
163 /* Word 2 */
164 #define CCP5_CMD_DW2(p)		((p)->src_lo)
165 #define CCP5_CMD_SRC_LO(p)	(CCP5_CMD_DW2(p))
166 
167 /* Word 3 */
168 #define CCP5_CMD_DW3(p)		((p)->dw3)
169 #define CCP5_CMD_SRC_MEM(p)	((p)->dw3.src_mem)
170 #define CCP5_CMD_SRC_HI(p)	((p)->dw3.src_hi)
171 #define CCP5_CMD_LSB_ID(p)	((p)->dw3.lsb_cxt_id)
172 #define CCP5_CMD_FIX_SRC(p)	((p)->dw3.fixed)
173 
174 /* Words 4/5 */
175 #define CCP5_CMD_DW4(p)		((p)->dw4)
176 #define CCP5_CMD_DST_LO(p)	(CCP5_CMD_DW4(p).dst_lo)
177 #define CCP5_CMD_DW5(p)		((p)->dw5.fields.dst_hi)
178 #define CCP5_CMD_DST_HI(p)	(CCP5_CMD_DW5(p))
179 #define CCP5_CMD_DST_MEM(p)	((p)->dw5.fields.dst_mem)
180 #define CCP5_CMD_FIX_DST(p)	((p)->dw5.fields.fixed)
181 #define CCP5_CMD_SHA_LO(p)	((p)->dw4.sha_len_lo)
182 #define CCP5_CMD_SHA_HI(p)	((p)->dw5.sha_len_hi)
183 
184 /* Word 6/7 */
185 #define CCP5_CMD_DW6(p)		((p)->key_lo)
186 #define CCP5_CMD_KEY_LO(p)	(CCP5_CMD_DW6(p))
187 #define CCP5_CMD_DW7(p)		((p)->dw7)
188 #define CCP5_CMD_KEY_HI(p)	((p)->dw7.key_hi)
189 #define CCP5_CMD_KEY_MEM(p)	((p)->dw7.key_mem)
190 
191 static inline u32 low_address(unsigned long addr)
192 {
193 	return (u64)addr & 0x0ffffffff;
194 }
195 
196 static inline u32 high_address(unsigned long addr)
197 {
198 	return ((u64)addr >> 32) & 0x00000ffff;
199 }
200 
201 static unsigned int ccp5_get_free_slots(struct ccp_cmd_queue *cmd_q)
202 {
203 	unsigned int head_idx, n;
204 	u32 head_lo, queue_start;
205 
206 	queue_start = low_address(cmd_q->qdma_tail);
207 	head_lo = ioread32(cmd_q->reg_head_lo);
208 	head_idx = (head_lo - queue_start) / sizeof(struct ccp5_desc);
209 
210 	n = head_idx + COMMANDS_PER_QUEUE - cmd_q->qidx - 1;
211 
212 	return n % COMMANDS_PER_QUEUE; /* Always one unused spot */
213 }
214 
215 static int ccp5_do_cmd(struct ccp5_desc *desc,
216 		       struct ccp_cmd_queue *cmd_q)
217 {
218 	u32 *mP;
219 	__le32 *dP;
220 	u32 tail;
221 	int	i;
222 	int ret = 0;
223 
224 	if (CCP5_CMD_SOC(desc)) {
225 		CCP5_CMD_IOC(desc) = 1;
226 		CCP5_CMD_SOC(desc) = 0;
227 	}
228 	mutex_lock(&cmd_q->q_mutex);
229 
230 	mP = (u32 *) &cmd_q->qbase[cmd_q->qidx];
231 	dP = (__le32 *) desc;
232 	for (i = 0; i < 8; i++)
233 		mP[i] = cpu_to_le32(dP[i]); /* handle endianness */
234 
235 	cmd_q->qidx = (cmd_q->qidx + 1) % COMMANDS_PER_QUEUE;
236 
237 	/* The data used by this command must be flushed to memory */
238 	wmb();
239 
240 	/* Write the new tail address back to the queue register */
241 	tail = low_address(cmd_q->qdma_tail + cmd_q->qidx * Q_DESC_SIZE);
242 	iowrite32(tail, cmd_q->reg_tail_lo);
243 
244 	/* Turn the queue back on using our cached control register */
245 	iowrite32(cmd_q->qcontrol | CMD5_Q_RUN, cmd_q->reg_control);
246 	mutex_unlock(&cmd_q->q_mutex);
247 
248 	if (CCP5_CMD_IOC(desc)) {
249 		/* Wait for the job to complete */
250 		ret = wait_event_interruptible(cmd_q->int_queue,
251 					       cmd_q->int_rcvd);
252 		if (ret || cmd_q->cmd_error) {
253 			if (cmd_q->cmd_error)
254 				ccp_log_error(cmd_q->ccp,
255 					      cmd_q->cmd_error);
256 			/* A version 5 device doesn't use Job IDs... */
257 			if (!ret)
258 				ret = -EIO;
259 		}
260 		cmd_q->int_rcvd = 0;
261 	}
262 
263 	return 0;
264 }
265 
266 static int ccp5_perform_aes(struct ccp_op *op)
267 {
268 	struct ccp5_desc desc;
269 	union ccp_function function;
270 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
271 
272 	/* Zero out all the fields of the command desc */
273 	memset(&desc, 0, Q_DESC_SIZE);
274 
275 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_AES;
276 
277 	CCP5_CMD_SOC(&desc) = op->soc;
278 	CCP5_CMD_IOC(&desc) = 1;
279 	CCP5_CMD_INIT(&desc) = op->init;
280 	CCP5_CMD_EOM(&desc) = op->eom;
281 	CCP5_CMD_PROT(&desc) = 0;
282 
283 	function.raw = 0;
284 	CCP_AES_ENCRYPT(&function) = op->u.aes.action;
285 	CCP_AES_MODE(&function) = op->u.aes.mode;
286 	CCP_AES_TYPE(&function) = op->u.aes.type;
287 	if (op->u.aes.mode == CCP_AES_MODE_CFB)
288 		CCP_AES_SIZE(&function) = 0x7f;
289 
290 	CCP5_CMD_FUNCTION(&desc) = function.raw;
291 
292 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
293 
294 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
295 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
296 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
297 
298 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
299 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
300 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
301 
302 	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
303 	CCP5_CMD_KEY_HI(&desc) = 0;
304 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
305 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
306 
307 	return ccp5_do_cmd(&desc, op->cmd_q);
308 }
309 
310 static int ccp5_perform_xts_aes(struct ccp_op *op)
311 {
312 	struct ccp5_desc desc;
313 	union ccp_function function;
314 	u32 key_addr = op->sb_key * LSB_ITEM_SIZE;
315 
316 	/* Zero out all the fields of the command desc */
317 	memset(&desc, 0, Q_DESC_SIZE);
318 
319 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_XTS_AES_128;
320 
321 	CCP5_CMD_SOC(&desc) = op->soc;
322 	CCP5_CMD_IOC(&desc) = 1;
323 	CCP5_CMD_INIT(&desc) = op->init;
324 	CCP5_CMD_EOM(&desc) = op->eom;
325 	CCP5_CMD_PROT(&desc) = 0;
326 
327 	function.raw = 0;
328 	CCP_XTS_ENCRYPT(&function) = op->u.xts.action;
329 	CCP_XTS_SIZE(&function) = op->u.xts.unit_size;
330 	CCP5_CMD_FUNCTION(&desc) = function.raw;
331 
332 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
333 
334 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
335 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
336 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
337 
338 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
339 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
340 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
341 
342 	CCP5_CMD_KEY_LO(&desc) = lower_32_bits(key_addr);
343 	CCP5_CMD_KEY_HI(&desc) =  0;
344 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
345 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
346 
347 	return ccp5_do_cmd(&desc, op->cmd_q);
348 }
349 
350 static int ccp5_perform_sha(struct ccp_op *op)
351 {
352 	struct ccp5_desc desc;
353 	union ccp_function function;
354 
355 	/* Zero out all the fields of the command desc */
356 	memset(&desc, 0, Q_DESC_SIZE);
357 
358 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_SHA;
359 
360 	CCP5_CMD_SOC(&desc) = op->soc;
361 	CCP5_CMD_IOC(&desc) = 1;
362 	CCP5_CMD_INIT(&desc) = 1;
363 	CCP5_CMD_EOM(&desc) = op->eom;
364 	CCP5_CMD_PROT(&desc) = 0;
365 
366 	function.raw = 0;
367 	CCP_SHA_TYPE(&function) = op->u.sha.type;
368 	CCP5_CMD_FUNCTION(&desc) = function.raw;
369 
370 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
371 
372 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
373 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
374 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
375 
376 	CCP5_CMD_LSB_ID(&desc) = op->sb_ctx;
377 
378 	if (op->eom) {
379 		CCP5_CMD_SHA_LO(&desc) = lower_32_bits(op->u.sha.msg_bits);
380 		CCP5_CMD_SHA_HI(&desc) = upper_32_bits(op->u.sha.msg_bits);
381 	} else {
382 		CCP5_CMD_SHA_LO(&desc) = 0;
383 		CCP5_CMD_SHA_HI(&desc) = 0;
384 	}
385 
386 	return ccp5_do_cmd(&desc, op->cmd_q);
387 }
388 
389 static int ccp5_perform_rsa(struct ccp_op *op)
390 {
391 	struct ccp5_desc desc;
392 	union ccp_function function;
393 
394 	/* Zero out all the fields of the command desc */
395 	memset(&desc, 0, Q_DESC_SIZE);
396 
397 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_RSA;
398 
399 	CCP5_CMD_SOC(&desc) = op->soc;
400 	CCP5_CMD_IOC(&desc) = 1;
401 	CCP5_CMD_INIT(&desc) = 0;
402 	CCP5_CMD_EOM(&desc) = 1;
403 	CCP5_CMD_PROT(&desc) = 0;
404 
405 	function.raw = 0;
406 	CCP_RSA_SIZE(&function) = op->u.rsa.mod_size >> 3;
407 	CCP5_CMD_FUNCTION(&desc) = function.raw;
408 
409 	CCP5_CMD_LEN(&desc) = op->u.rsa.input_len;
410 
411 	/* Source is from external memory */
412 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
413 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
414 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
415 
416 	/* Destination is in external memory */
417 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
418 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
419 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
420 
421 	/* Exponent is in LSB memory */
422 	CCP5_CMD_KEY_LO(&desc) = op->sb_key * LSB_ITEM_SIZE;
423 	CCP5_CMD_KEY_HI(&desc) = 0;
424 	CCP5_CMD_KEY_MEM(&desc) = CCP_MEMTYPE_SB;
425 
426 	return ccp5_do_cmd(&desc, op->cmd_q);
427 }
428 
429 static int ccp5_perform_passthru(struct ccp_op *op)
430 {
431 	struct ccp5_desc desc;
432 	union ccp_function function;
433 	struct ccp_dma_info *saddr = &op->src.u.dma;
434 	struct ccp_dma_info *daddr = &op->dst.u.dma;
435 
436 	memset(&desc, 0, Q_DESC_SIZE);
437 
438 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_PASSTHRU;
439 
440 	CCP5_CMD_SOC(&desc) = 0;
441 	CCP5_CMD_IOC(&desc) = 1;
442 	CCP5_CMD_INIT(&desc) = 0;
443 	CCP5_CMD_EOM(&desc) = op->eom;
444 	CCP5_CMD_PROT(&desc) = 0;
445 
446 	function.raw = 0;
447 	CCP_PT_BYTESWAP(&function) = op->u.passthru.byte_swap;
448 	CCP_PT_BITWISE(&function) = op->u.passthru.bit_mod;
449 	CCP5_CMD_FUNCTION(&desc) = function.raw;
450 
451 	/* Length of source data is always 256 bytes */
452 	if (op->src.type == CCP_MEMTYPE_SYSTEM)
453 		CCP5_CMD_LEN(&desc) = saddr->length;
454 	else
455 		CCP5_CMD_LEN(&desc) = daddr->length;
456 
457 	if (op->src.type == CCP_MEMTYPE_SYSTEM) {
458 		CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
459 		CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
460 		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
461 
462 		if (op->u.passthru.bit_mod != CCP_PASSTHRU_BITWISE_NOOP)
463 			CCP5_CMD_LSB_ID(&desc) = op->sb_key;
464 	} else {
465 		u32 key_addr = op->src.u.sb * CCP_SB_BYTES;
466 
467 		CCP5_CMD_SRC_LO(&desc) = lower_32_bits(key_addr);
468 		CCP5_CMD_SRC_HI(&desc) = 0;
469 		CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SB;
470 	}
471 
472 	if (op->dst.type == CCP_MEMTYPE_SYSTEM) {
473 		CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
474 		CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
475 		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
476 	} else {
477 		u32 key_addr = op->dst.u.sb * CCP_SB_BYTES;
478 
479 		CCP5_CMD_DST_LO(&desc) = lower_32_bits(key_addr);
480 		CCP5_CMD_DST_HI(&desc) = 0;
481 		CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SB;
482 	}
483 
484 	return ccp5_do_cmd(&desc, op->cmd_q);
485 }
486 
487 static int ccp5_perform_ecc(struct ccp_op *op)
488 {
489 	struct ccp5_desc desc;
490 	union ccp_function function;
491 
492 	/* Zero out all the fields of the command desc */
493 	memset(&desc, 0, Q_DESC_SIZE);
494 
495 	CCP5_CMD_ENGINE(&desc) = CCP_ENGINE_ECC;
496 
497 	CCP5_CMD_SOC(&desc) = 0;
498 	CCP5_CMD_IOC(&desc) = 1;
499 	CCP5_CMD_INIT(&desc) = 0;
500 	CCP5_CMD_EOM(&desc) = 1;
501 	CCP5_CMD_PROT(&desc) = 0;
502 
503 	function.raw = 0;
504 	function.ecc.mode = op->u.ecc.function;
505 	CCP5_CMD_FUNCTION(&desc) = function.raw;
506 
507 	CCP5_CMD_LEN(&desc) = op->src.u.dma.length;
508 
509 	CCP5_CMD_SRC_LO(&desc) = ccp_addr_lo(&op->src.u.dma);
510 	CCP5_CMD_SRC_HI(&desc) = ccp_addr_hi(&op->src.u.dma);
511 	CCP5_CMD_SRC_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
512 
513 	CCP5_CMD_DST_LO(&desc) = ccp_addr_lo(&op->dst.u.dma);
514 	CCP5_CMD_DST_HI(&desc) = ccp_addr_hi(&op->dst.u.dma);
515 	CCP5_CMD_DST_MEM(&desc) = CCP_MEMTYPE_SYSTEM;
516 
517 	return ccp5_do_cmd(&desc, op->cmd_q);
518 }
519 
520 static int ccp_find_lsb_regions(struct ccp_cmd_queue *cmd_q, u64 status)
521 {
522 	int q_mask = 1 << cmd_q->id;
523 	int queues = 0;
524 	int j;
525 
526 	/* Build a bit mask to know which LSBs this queue has access to.
527 	 * Don't bother with segment 0 as it has special privileges.
528 	 */
529 	for (j = 1; j < MAX_LSB_CNT; j++) {
530 		if (status & q_mask)
531 			bitmap_set(cmd_q->lsbmask, j, 1);
532 		status >>= LSB_REGION_WIDTH;
533 	}
534 	queues = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
535 	dev_info(cmd_q->ccp->dev, "Queue %d can access %d LSB regions\n",
536 		 cmd_q->id, queues);
537 
538 	return queues ? 0 : -EINVAL;
539 }
540 
541 
542 static int ccp_find_and_assign_lsb_to_q(struct ccp_device *ccp,
543 					int lsb_cnt, int n_lsbs,
544 					unsigned long *lsb_pub)
545 {
546 	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
547 	int bitno;
548 	int qlsb_wgt;
549 	int i;
550 
551 	/* For each queue:
552 	 * If the count of potential LSBs available to a queue matches the
553 	 * ordinal given to us in lsb_cnt:
554 	 * Copy the mask of possible LSBs for this queue into "qlsb";
555 	 * For each bit in qlsb, see if the corresponding bit in the
556 	 * aggregation mask is set; if so, we have a match.
557 	 *     If we have a match, clear the bit in the aggregation to
558 	 *     mark it as no longer available.
559 	 *     If there is no match, clear the bit in qlsb and keep looking.
560 	 */
561 	for (i = 0; i < ccp->cmd_q_count; i++) {
562 		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
563 
564 		qlsb_wgt = bitmap_weight(cmd_q->lsbmask, MAX_LSB_CNT);
565 
566 		if (qlsb_wgt == lsb_cnt) {
567 			bitmap_copy(qlsb, cmd_q->lsbmask, MAX_LSB_CNT);
568 
569 			bitno = find_first_bit(qlsb, MAX_LSB_CNT);
570 			while (bitno < MAX_LSB_CNT) {
571 				if (test_bit(bitno, lsb_pub)) {
572 					/* We found an available LSB
573 					 * that this queue can access
574 					 */
575 					cmd_q->lsb = bitno;
576 					bitmap_clear(lsb_pub, bitno, 1);
577 					dev_info(ccp->dev,
578 						 "Queue %d gets LSB %d\n",
579 						 i, bitno);
580 					break;
581 				}
582 				bitmap_clear(qlsb, bitno, 1);
583 				bitno = find_first_bit(qlsb, MAX_LSB_CNT);
584 			}
585 			if (bitno >= MAX_LSB_CNT)
586 				return -EINVAL;
587 			n_lsbs--;
588 		}
589 	}
590 	return n_lsbs;
591 }
592 
593 /* For each queue, from the most- to least-constrained:
594  * find an LSB that can be assigned to the queue. If there are N queues that
595  * can only use M LSBs, where N > M, fail; otherwise, every queue will get a
596  * dedicated LSB. Remaining LSB regions become a shared resource.
597  * If we have fewer LSBs than queues, all LSB regions become shared resources.
598  */
599 static int ccp_assign_lsbs(struct ccp_device *ccp)
600 {
601 	DECLARE_BITMAP(lsb_pub, MAX_LSB_CNT);
602 	DECLARE_BITMAP(qlsb, MAX_LSB_CNT);
603 	int n_lsbs = 0;
604 	int bitno;
605 	int i, lsb_cnt;
606 	int rc = 0;
607 
608 	bitmap_zero(lsb_pub, MAX_LSB_CNT);
609 
610 	/* Create an aggregate bitmap to get a total count of available LSBs */
611 	for (i = 0; i < ccp->cmd_q_count; i++)
612 		bitmap_or(lsb_pub,
613 			  lsb_pub, ccp->cmd_q[i].lsbmask,
614 			  MAX_LSB_CNT);
615 
616 	n_lsbs = bitmap_weight(lsb_pub, MAX_LSB_CNT);
617 
618 	if (n_lsbs >= ccp->cmd_q_count) {
619 		/* We have enough LSBS to give every queue a private LSB.
620 		 * Brute force search to start with the queues that are more
621 		 * constrained in LSB choice. When an LSB is privately
622 		 * assigned, it is removed from the public mask.
623 		 * This is an ugly N squared algorithm with some optimization.
624 		 */
625 		for (lsb_cnt = 1;
626 		     n_lsbs && (lsb_cnt <= MAX_LSB_CNT);
627 		     lsb_cnt++) {
628 			rc = ccp_find_and_assign_lsb_to_q(ccp, lsb_cnt, n_lsbs,
629 							  lsb_pub);
630 			if (rc < 0)
631 				return -EINVAL;
632 			n_lsbs = rc;
633 		}
634 	}
635 
636 	rc = 0;
637 	/* What's left of the LSBs, according to the public mask, now become
638 	 * shared. Any zero bits in the lsb_pub mask represent an LSB region
639 	 * that can't be used as a shared resource, so mark the LSB slots for
640 	 * them as "in use".
641 	 */
642 	bitmap_copy(qlsb, lsb_pub, MAX_LSB_CNT);
643 
644 	bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
645 	while (bitno < MAX_LSB_CNT) {
646 		bitmap_set(ccp->lsbmap, bitno * LSB_SIZE, LSB_SIZE);
647 		bitmap_set(qlsb, bitno, 1);
648 		bitno = find_first_zero_bit(qlsb, MAX_LSB_CNT);
649 	}
650 
651 	return rc;
652 }
653 
654 static int ccp5_init(struct ccp_device *ccp)
655 {
656 	struct device *dev = ccp->dev;
657 	struct ccp_cmd_queue *cmd_q;
658 	struct dma_pool *dma_pool;
659 	char dma_pool_name[MAX_DMAPOOL_NAME_LEN];
660 	unsigned int qmr, qim, i;
661 	u64 status;
662 	u32 status_lo, status_hi;
663 	int ret;
664 
665 	/* Find available queues */
666 	qim = 0;
667 	qmr = ioread32(ccp->io_regs + Q_MASK_REG);
668 	for (i = 0; i < MAX_HW_QUEUES; i++) {
669 
670 		if (!(qmr & (1 << i)))
671 			continue;
672 
673 		/* Allocate a dma pool for this queue */
674 		snprintf(dma_pool_name, sizeof(dma_pool_name), "%s_q%d",
675 			 ccp->name, i);
676 		dma_pool = dma_pool_create(dma_pool_name, dev,
677 					   CCP_DMAPOOL_MAX_SIZE,
678 					   CCP_DMAPOOL_ALIGN, 0);
679 		if (!dma_pool) {
680 			dev_err(dev, "unable to allocate dma pool\n");
681 			ret = -ENOMEM;
682 		}
683 
684 		cmd_q = &ccp->cmd_q[ccp->cmd_q_count];
685 		ccp->cmd_q_count++;
686 
687 		cmd_q->ccp = ccp;
688 		cmd_q->id = i;
689 		cmd_q->dma_pool = dma_pool;
690 		mutex_init(&cmd_q->q_mutex);
691 
692 		/* Page alignment satisfies our needs for N <= 128 */
693 		BUILD_BUG_ON(COMMANDS_PER_QUEUE > 128);
694 		cmd_q->qsize = Q_SIZE(Q_DESC_SIZE);
695 		cmd_q->qbase = dma_zalloc_coherent(dev, cmd_q->qsize,
696 						   &cmd_q->qbase_dma,
697 						   GFP_KERNEL);
698 		if (!cmd_q->qbase) {
699 			dev_err(dev, "unable to allocate command queue\n");
700 			ret = -ENOMEM;
701 			goto e_pool;
702 		}
703 
704 		cmd_q->qidx = 0;
705 		/* Preset some register values and masks that are queue
706 		 * number dependent
707 		 */
708 		cmd_q->reg_control = ccp->io_regs +
709 				     CMD5_Q_STATUS_INCR * (i + 1);
710 		cmd_q->reg_tail_lo = cmd_q->reg_control + CMD5_Q_TAIL_LO_BASE;
711 		cmd_q->reg_head_lo = cmd_q->reg_control + CMD5_Q_HEAD_LO_BASE;
712 		cmd_q->reg_int_enable = cmd_q->reg_control +
713 					CMD5_Q_INT_ENABLE_BASE;
714 		cmd_q->reg_interrupt_status = cmd_q->reg_control +
715 					      CMD5_Q_INTERRUPT_STATUS_BASE;
716 		cmd_q->reg_status = cmd_q->reg_control + CMD5_Q_STATUS_BASE;
717 		cmd_q->reg_int_status = cmd_q->reg_control +
718 					CMD5_Q_INT_STATUS_BASE;
719 		cmd_q->reg_dma_status = cmd_q->reg_control +
720 					CMD5_Q_DMA_STATUS_BASE;
721 		cmd_q->reg_dma_read_status = cmd_q->reg_control +
722 					     CMD5_Q_DMA_READ_STATUS_BASE;
723 		cmd_q->reg_dma_write_status = cmd_q->reg_control +
724 					      CMD5_Q_DMA_WRITE_STATUS_BASE;
725 
726 		init_waitqueue_head(&cmd_q->int_queue);
727 
728 		dev_dbg(dev, "queue #%u available\n", i);
729 	}
730 	if (ccp->cmd_q_count == 0) {
731 		dev_notice(dev, "no command queues available\n");
732 		ret = -EIO;
733 		goto e_pool;
734 	}
735 	dev_notice(dev, "%u command queues available\n", ccp->cmd_q_count);
736 
737 	/* Turn off the queues and disable interrupts until ready */
738 	for (i = 0; i < ccp->cmd_q_count; i++) {
739 		cmd_q = &ccp->cmd_q[i];
740 
741 		cmd_q->qcontrol = 0; /* Start with nothing */
742 		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
743 
744 		/* Disable the interrupts */
745 		iowrite32(0x00, cmd_q->reg_int_enable);
746 		ioread32(cmd_q->reg_int_status);
747 		ioread32(cmd_q->reg_status);
748 
749 		/* Clear the interrupts */
750 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
751 	}
752 
753 	dev_dbg(dev, "Requesting an IRQ...\n");
754 	/* Request an irq */
755 	ret = ccp->get_irq(ccp);
756 	if (ret) {
757 		dev_err(dev, "unable to allocate an IRQ\n");
758 		goto e_pool;
759 	}
760 
761 	dev_dbg(dev, "Loading LSB map...\n");
762 	/* Copy the private LSB mask to the public registers */
763 	status_lo = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
764 	status_hi = ioread32(ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
765 	iowrite32(status_lo, ccp->io_regs + LSB_PUBLIC_MASK_LO_OFFSET);
766 	iowrite32(status_hi, ccp->io_regs + LSB_PUBLIC_MASK_HI_OFFSET);
767 	status = ((u64)status_hi<<30) | (u64)status_lo;
768 
769 	dev_dbg(dev, "Configuring virtual queues...\n");
770 	/* Configure size of each virtual queue accessible to host */
771 	for (i = 0; i < ccp->cmd_q_count; i++) {
772 		u32 dma_addr_lo;
773 		u32 dma_addr_hi;
774 
775 		cmd_q = &ccp->cmd_q[i];
776 
777 		cmd_q->qcontrol &= ~(CMD5_Q_SIZE << CMD5_Q_SHIFT);
778 		cmd_q->qcontrol |= QUEUE_SIZE_VAL << CMD5_Q_SHIFT;
779 
780 		cmd_q->qdma_tail = cmd_q->qbase_dma;
781 		dma_addr_lo = low_address(cmd_q->qdma_tail);
782 		iowrite32((u32)dma_addr_lo, cmd_q->reg_tail_lo);
783 		iowrite32((u32)dma_addr_lo, cmd_q->reg_head_lo);
784 
785 		dma_addr_hi = high_address(cmd_q->qdma_tail);
786 		cmd_q->qcontrol |= (dma_addr_hi << 16);
787 		iowrite32(cmd_q->qcontrol, cmd_q->reg_control);
788 
789 		/* Find the LSB regions accessible to the queue */
790 		ccp_find_lsb_regions(cmd_q, status);
791 		cmd_q->lsb = -1; /* Unassigned value */
792 	}
793 
794 	dev_dbg(dev, "Assigning LSBs...\n");
795 	ret = ccp_assign_lsbs(ccp);
796 	if (ret) {
797 		dev_err(dev, "Unable to assign LSBs (%d)\n", ret);
798 		goto e_irq;
799 	}
800 
801 	/* Optimization: pre-allocate LSB slots for each queue */
802 	for (i = 0; i < ccp->cmd_q_count; i++) {
803 		ccp->cmd_q[i].sb_key = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
804 		ccp->cmd_q[i].sb_ctx = ccp_lsb_alloc(&ccp->cmd_q[i], 2);
805 	}
806 
807 	dev_dbg(dev, "Starting threads...\n");
808 	/* Create a kthread for each queue */
809 	for (i = 0; i < ccp->cmd_q_count; i++) {
810 		struct task_struct *kthread;
811 
812 		cmd_q = &ccp->cmd_q[i];
813 
814 		kthread = kthread_create(ccp_cmd_queue_thread, cmd_q,
815 					 "%s-q%u", ccp->name, cmd_q->id);
816 		if (IS_ERR(kthread)) {
817 			dev_err(dev, "error creating queue thread (%ld)\n",
818 				PTR_ERR(kthread));
819 			ret = PTR_ERR(kthread);
820 			goto e_kthread;
821 		}
822 
823 		cmd_q->kthread = kthread;
824 		wake_up_process(kthread);
825 	}
826 
827 	dev_dbg(dev, "Enabling interrupts...\n");
828 	/* Enable interrupts */
829 	for (i = 0; i < ccp->cmd_q_count; i++) {
830 		cmd_q = &ccp->cmd_q[i];
831 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_int_enable);
832 	}
833 
834 	dev_dbg(dev, "Registering device...\n");
835 	/* Put this on the unit list to make it available */
836 	ccp_add_device(ccp);
837 
838 	ret = ccp_register_rng(ccp);
839 	if (ret)
840 		goto e_kthread;
841 
842 	/* Register the DMA engine support */
843 	ret = ccp_dmaengine_register(ccp);
844 	if (ret)
845 		goto e_hwrng;
846 
847 	return 0;
848 
849 e_hwrng:
850 	ccp_unregister_rng(ccp);
851 
852 e_kthread:
853 	for (i = 0; i < ccp->cmd_q_count; i++)
854 		if (ccp->cmd_q[i].kthread)
855 			kthread_stop(ccp->cmd_q[i].kthread);
856 
857 e_irq:
858 	ccp->free_irq(ccp);
859 
860 e_pool:
861 	for (i = 0; i < ccp->cmd_q_count; i++)
862 		dma_pool_destroy(ccp->cmd_q[i].dma_pool);
863 
864 	return ret;
865 }
866 
867 static void ccp5_destroy(struct ccp_device *ccp)
868 {
869 	struct device *dev = ccp->dev;
870 	struct ccp_cmd_queue *cmd_q;
871 	struct ccp_cmd *cmd;
872 	unsigned int i;
873 
874 	/* Unregister the DMA engine */
875 	ccp_dmaengine_unregister(ccp);
876 
877 	/* Unregister the RNG */
878 	ccp_unregister_rng(ccp);
879 
880 	/* Remove this device from the list of available units first */
881 	ccp_del_device(ccp);
882 
883 	/* Disable and clear interrupts */
884 	for (i = 0; i < ccp->cmd_q_count; i++) {
885 		cmd_q = &ccp->cmd_q[i];
886 
887 		/* Turn off the run bit */
888 		iowrite32(cmd_q->qcontrol & ~CMD5_Q_RUN, cmd_q->reg_control);
889 
890 		/* Disable the interrupts */
891 		iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
892 
893 		/* Clear the interrupt status */
894 		iowrite32(0x00, cmd_q->reg_int_enable);
895 		ioread32(cmd_q->reg_int_status);
896 		ioread32(cmd_q->reg_status);
897 	}
898 
899 	/* Stop the queue kthreads */
900 	for (i = 0; i < ccp->cmd_q_count; i++)
901 		if (ccp->cmd_q[i].kthread)
902 			kthread_stop(ccp->cmd_q[i].kthread);
903 
904 	ccp->free_irq(ccp);
905 
906 	for (i = 0; i < ccp->cmd_q_count; i++) {
907 		cmd_q = &ccp->cmd_q[i];
908 		dma_free_coherent(dev, cmd_q->qsize, cmd_q->qbase,
909 				  cmd_q->qbase_dma);
910 	}
911 
912 	/* Flush the cmd and backlog queue */
913 	while (!list_empty(&ccp->cmd)) {
914 		/* Invoke the callback directly with an error code */
915 		cmd = list_first_entry(&ccp->cmd, struct ccp_cmd, entry);
916 		list_del(&cmd->entry);
917 		cmd->callback(cmd->data, -ENODEV);
918 	}
919 	while (!list_empty(&ccp->backlog)) {
920 		/* Invoke the callback directly with an error code */
921 		cmd = list_first_entry(&ccp->backlog, struct ccp_cmd, entry);
922 		list_del(&cmd->entry);
923 		cmd->callback(cmd->data, -ENODEV);
924 	}
925 }
926 
927 static irqreturn_t ccp5_irq_handler(int irq, void *data)
928 {
929 	struct device *dev = data;
930 	struct ccp_device *ccp = dev_get_drvdata(dev);
931 	u32 status;
932 	unsigned int i;
933 
934 	for (i = 0; i < ccp->cmd_q_count; i++) {
935 		struct ccp_cmd_queue *cmd_q = &ccp->cmd_q[i];
936 
937 		status = ioread32(cmd_q->reg_interrupt_status);
938 
939 		if (status) {
940 			cmd_q->int_status = status;
941 			cmd_q->q_status = ioread32(cmd_q->reg_status);
942 			cmd_q->q_int_status = ioread32(cmd_q->reg_int_status);
943 
944 			/* On error, only save the first error value */
945 			if ((status & INT_ERROR) && !cmd_q->cmd_error)
946 				cmd_q->cmd_error = CMD_Q_ERROR(cmd_q->q_status);
947 
948 			cmd_q->int_rcvd = 1;
949 
950 			/* Acknowledge the interrupt and wake the kthread */
951 			iowrite32(ALL_INTERRUPTS, cmd_q->reg_interrupt_status);
952 			wake_up_interruptible(&cmd_q->int_queue);
953 		}
954 	}
955 
956 	return IRQ_HANDLED;
957 }
958 
959 static void ccp5_config(struct ccp_device *ccp)
960 {
961 	/* Public side */
962 	iowrite32(0x00001249, ccp->io_regs + CMD5_REQID_CONFIG_OFFSET);
963 }
964 
965 static void ccp5other_config(struct ccp_device *ccp)
966 {
967 	int i;
968 	u32 rnd;
969 
970 	/* We own all of the queues on the NTB CCP */
971 
972 	iowrite32(0x00012D57, ccp->io_regs + CMD5_TRNG_CTL_OFFSET);
973 	iowrite32(0x00000003, ccp->io_regs + CMD5_CONFIG_0_OFFSET);
974 	for (i = 0; i < 12; i++) {
975 		rnd = ioread32(ccp->io_regs + TRNG_OUT_REG);
976 		iowrite32(rnd, ccp->io_regs + CMD5_AES_MASK_OFFSET);
977 	}
978 
979 	iowrite32(0x0000001F, ccp->io_regs + CMD5_QUEUE_MASK_OFFSET);
980 	iowrite32(0x00005B6D, ccp->io_regs + CMD5_QUEUE_PRIO_OFFSET);
981 	iowrite32(0x00000000, ccp->io_regs + CMD5_CMD_TIMEOUT_OFFSET);
982 
983 	iowrite32(0x3FFFFFFF, ccp->io_regs + LSB_PRIVATE_MASK_LO_OFFSET);
984 	iowrite32(0x000003FF, ccp->io_regs + LSB_PRIVATE_MASK_HI_OFFSET);
985 
986 	iowrite32(0x00108823, ccp->io_regs + CMD5_CLK_GATE_CTL_OFFSET);
987 
988 	ccp5_config(ccp);
989 }
990 
991 /* Version 5 adds some function, but is essentially the same as v5 */
992 static const struct ccp_actions ccp5_actions = {
993 	.aes = ccp5_perform_aes,
994 	.xts_aes = ccp5_perform_xts_aes,
995 	.sha = ccp5_perform_sha,
996 	.rsa = ccp5_perform_rsa,
997 	.passthru = ccp5_perform_passthru,
998 	.ecc = ccp5_perform_ecc,
999 	.sballoc = ccp_lsb_alloc,
1000 	.sbfree = ccp_lsb_free,
1001 	.init = ccp5_init,
1002 	.destroy = ccp5_destroy,
1003 	.get_free_slots = ccp5_get_free_slots,
1004 	.irqhandler = ccp5_irq_handler,
1005 };
1006 
1007 const struct ccp_vdata ccpv5a = {
1008 	.version = CCP_VERSION(5, 0),
1009 	.setup = ccp5_config,
1010 	.perform = &ccp5_actions,
1011 	.bar = 2,
1012 	.offset = 0x0,
1013 };
1014 
1015 const struct ccp_vdata ccpv5b = {
1016 	.version = CCP_VERSION(5, 0),
1017 	.setup = ccp5other_config,
1018 	.perform = &ccp5_actions,
1019 	.bar = 2,
1020 	.offset = 0x0,
1021 };
1022