xref: /linux/drivers/dma/amd/qdma/qdma.c (revision 2eff01ee2881becc9daaa0d53477ec202136b1f4)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3  * DMA driver for AMD Queue-based DMA Subsystem
4  *
5  * Copyright (C) 2023-2024, Advanced Micro Devices, Inc.
6  */
7 #include <linux/bitfield.h>
8 #include <linux/bitops.h>
9 #include <linux/dmaengine.h>
10 #include <linux/module.h>
11 #include <linux/mod_devicetable.h>
12 #include <linux/dma-map-ops.h>
13 #include <linux/platform_device.h>
14 #include <linux/platform_data/amd_qdma.h>
15 #include <linux/regmap.h>
16 
17 #include "qdma.h"
18 
19 #define CHAN_STR(q)		(((q)->dir == DMA_MEM_TO_DEV) ? "H2C" : "C2H")
20 #define QDMA_REG_OFF(d, r)	((d)->roffs[r].off)
21 
22 /* MMIO regmap config for all QDMA registers */
23 static const struct regmap_config qdma_regmap_config = {
24 	.reg_bits = 32,
25 	.val_bits = 32,
26 	.reg_stride = 4,
27 };
28 
29 static inline struct qdma_queue *to_qdma_queue(struct dma_chan *chan)
30 {
31 	return container_of(chan, struct qdma_queue, vchan.chan);
32 }
33 
34 static inline struct qdma_mm_vdesc *to_qdma_vdesc(struct virt_dma_desc *vdesc)
35 {
36 	return container_of(vdesc, struct qdma_mm_vdesc, vdesc);
37 }
38 
39 static inline u32 qdma_get_intr_ring_idx(struct qdma_device *qdev)
40 {
41 	u32 idx;
42 
43 	idx = qdev->qintr_rings[qdev->qintr_ring_idx++].ridx;
44 	qdev->qintr_ring_idx %= qdev->qintr_ring_num;
45 
46 	return idx;
47 }
48 
49 static u64 qdma_get_field(const struct qdma_device *qdev, const u32 *data,
50 			  enum qdma_reg_fields field)
51 {
52 	const struct qdma_reg_field *f = &qdev->rfields[field];
53 	u16 low_pos, hi_pos, low_bit, hi_bit;
54 	u64 value = 0, mask;
55 
56 	low_pos = f->lsb / BITS_PER_TYPE(*data);
57 	hi_pos = f->msb / BITS_PER_TYPE(*data);
58 
59 	if (low_pos == hi_pos) {
60 		low_bit = f->lsb % BITS_PER_TYPE(*data);
61 		hi_bit = f->msb % BITS_PER_TYPE(*data);
62 		mask = GENMASK(hi_bit, low_bit);
63 		value = (data[low_pos] & mask) >> low_bit;
64 	} else if (hi_pos == low_pos + 1) {
65 		low_bit = f->lsb % BITS_PER_TYPE(*data);
66 		hi_bit = low_bit + (f->msb - f->lsb);
67 		value = ((u64)data[hi_pos] << BITS_PER_TYPE(*data)) |
68 			data[low_pos];
69 		mask = GENMASK_ULL(hi_bit, low_bit);
70 		value = (value & mask) >> low_bit;
71 	} else {
72 		hi_bit = f->msb % BITS_PER_TYPE(*data);
73 		mask = GENMASK(hi_bit, 0);
74 		value = data[hi_pos] & mask;
75 		low_bit = f->msb - f->lsb - hi_bit;
76 		value <<= low_bit;
77 		low_bit -= 32;
78 		value |= (u64)data[hi_pos - 1] << low_bit;
79 		mask = GENMASK(31, 32 - low_bit);
80 		value |= (data[hi_pos - 2] & mask) >> low_bit;
81 	}
82 
83 	return value;
84 }
85 
86 static void qdma_set_field(const struct qdma_device *qdev, u32 *data,
87 			   enum qdma_reg_fields field, u64 value)
88 {
89 	const struct qdma_reg_field *f = &qdev->rfields[field];
90 	u16 low_pos, hi_pos, low_bit;
91 
92 	low_pos = f->lsb / BITS_PER_TYPE(*data);
93 	hi_pos = f->msb / BITS_PER_TYPE(*data);
94 	low_bit = f->lsb % BITS_PER_TYPE(*data);
95 
96 	data[low_pos++] |= value << low_bit;
97 	if (low_pos <= hi_pos)
98 		data[low_pos++] |= (u32)(value >> (32 - low_bit));
99 	if (low_pos <= hi_pos)
100 		data[low_pos] |= (u32)(value >> (64 - low_bit));
101 }
102 
103 static inline int qdma_reg_write(const struct qdma_device *qdev,
104 				 const u32 *data, enum qdma_regs reg)
105 {
106 	const struct qdma_reg *r = &qdev->roffs[reg];
107 	int ret;
108 
109 	if (r->count > 1)
110 		ret = regmap_bulk_write(qdev->regmap, r->off, data, r->count);
111 	else
112 		ret = regmap_write(qdev->regmap, r->off, *data);
113 
114 	return ret;
115 }
116 
117 static inline int qdma_reg_read(const struct qdma_device *qdev, u32 *data,
118 				enum qdma_regs reg)
119 {
120 	const struct qdma_reg *r = &qdev->roffs[reg];
121 	int ret;
122 
123 	if (r->count > 1)
124 		ret = regmap_bulk_read(qdev->regmap, r->off, data, r->count);
125 	else
126 		ret = regmap_read(qdev->regmap, r->off, data);
127 
128 	return ret;
129 }
130 
131 static int qdma_context_cmd_execute(const struct qdma_device *qdev,
132 				    enum qdma_ctxt_type type,
133 				    enum qdma_ctxt_cmd cmd, u16 index)
134 {
135 	u32 value = 0;
136 	int ret;
137 
138 	qdma_set_field(qdev, &value, QDMA_REGF_CMD_INDX, index);
139 	qdma_set_field(qdev, &value, QDMA_REGF_CMD_CMD, cmd);
140 	qdma_set_field(qdev, &value, QDMA_REGF_CMD_TYPE, type);
141 
142 	ret = qdma_reg_write(qdev, &value, QDMA_REGO_CTXT_CMD);
143 	if (ret)
144 		return ret;
145 
146 	ret = regmap_read_poll_timeout(qdev->regmap,
147 				       QDMA_REG_OFF(qdev, QDMA_REGO_CTXT_CMD),
148 				       value,
149 				       !qdma_get_field(qdev, &value,
150 						       QDMA_REGF_CMD_BUSY),
151 				       QDMA_POLL_INTRVL_US,
152 				       QDMA_POLL_TIMEOUT_US);
153 	if (ret) {
154 		qdma_err(qdev, "Context command execution timed out");
155 		return ret;
156 	}
157 
158 	return 0;
159 }
160 
161 static int qdma_context_write_data(const struct qdma_device *qdev,
162 				   const u32 *data)
163 {
164 	u32 mask[QDMA_CTXT_REGMAP_LEN];
165 	int ret;
166 
167 	memset(mask, ~0, sizeof(mask));
168 
169 	ret = qdma_reg_write(qdev, mask, QDMA_REGO_CTXT_MASK);
170 	if (ret)
171 		return ret;
172 
173 	ret = qdma_reg_write(qdev, data, QDMA_REGO_CTXT_DATA);
174 	if (ret)
175 		return ret;
176 
177 	return 0;
178 }
179 
180 static void qdma_prep_sw_desc_context(const struct qdma_device *qdev,
181 				      const struct qdma_ctxt_sw_desc *ctxt,
182 				      u32 *data)
183 {
184 	memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data));
185 	qdma_set_field(qdev, data, QDMA_REGF_DESC_BASE, ctxt->desc_base);
186 	qdma_set_field(qdev, data, QDMA_REGF_IRQ_VEC, ctxt->vec);
187 	qdma_set_field(qdev, data, QDMA_REGF_FUNCTION_ID, qdev->fid);
188 
189 	qdma_set_field(qdev, data, QDMA_REGF_DESC_SIZE, QDMA_DESC_SIZE_32B);
190 	qdma_set_field(qdev, data, QDMA_REGF_RING_ID, QDMA_DEFAULT_RING_ID);
191 	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_MODE, QDMA_QUEUE_OP_MM);
192 	qdma_set_field(qdev, data, QDMA_REGF_IRQ_ENABLE, 1);
193 	qdma_set_field(qdev, data, QDMA_REGF_WBK_ENABLE, 1);
194 	qdma_set_field(qdev, data, QDMA_REGF_WBI_CHECK, 1);
195 	qdma_set_field(qdev, data, QDMA_REGF_IRQ_ARM, 1);
196 	qdma_set_field(qdev, data, QDMA_REGF_IRQ_AGG, 1);
197 	qdma_set_field(qdev, data, QDMA_REGF_WBI_INTVL_ENABLE, 1);
198 	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_ENABLE, 1);
199 	qdma_set_field(qdev, data, QDMA_REGF_MRKR_DISABLE, 1);
200 }
201 
202 static void qdma_prep_intr_context(const struct qdma_device *qdev,
203 				   const struct qdma_ctxt_intr *ctxt,
204 				   u32 *data)
205 {
206 	memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data));
207 	qdma_set_field(qdev, data, QDMA_REGF_INTR_AGG_BASE, ctxt->agg_base);
208 	qdma_set_field(qdev, data, QDMA_REGF_INTR_VECTOR, ctxt->vec);
209 	qdma_set_field(qdev, data, QDMA_REGF_INTR_SIZE, ctxt->size);
210 	qdma_set_field(qdev, data, QDMA_REGF_INTR_VALID, ctxt->valid);
211 	qdma_set_field(qdev, data, QDMA_REGF_INTR_COLOR, ctxt->color);
212 	qdma_set_field(qdev, data, QDMA_REGF_INTR_FUNCTION_ID, qdev->fid);
213 }
214 
215 static void qdma_prep_fmap_context(const struct qdma_device *qdev,
216 				   const struct qdma_ctxt_fmap *ctxt,
217 				   u32 *data)
218 {
219 	memset(data, 0, QDMA_CTXT_REGMAP_LEN * sizeof(*data));
220 	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_BASE, ctxt->qbase);
221 	qdma_set_field(qdev, data, QDMA_REGF_QUEUE_MAX, ctxt->qmax);
222 }
223 
224 /*
225  * Program the indirect context register space
226  *
227  * Once the queue is enabled, context is dynamically updated by hardware. Any
228  * modification of the context through this API when the queue is enabled can
229  * result in unexpected behavior. Reading the context when the queue is enabled
230  * is not recommended as it can result in reduced performance.
231  */
232 static int qdma_prog_context(struct qdma_device *qdev, enum qdma_ctxt_type type,
233 			     enum qdma_ctxt_cmd cmd, u16 index, u32 *ctxt)
234 {
235 	int ret;
236 
237 	mutex_lock(&qdev->ctxt_lock);
238 	if (cmd == QDMA_CTXT_WRITE) {
239 		ret = qdma_context_write_data(qdev, ctxt);
240 		if (ret)
241 			goto failed;
242 	}
243 
244 	ret = qdma_context_cmd_execute(qdev, type, cmd, index);
245 	if (ret)
246 		goto failed;
247 
248 	if (cmd == QDMA_CTXT_READ) {
249 		ret = qdma_reg_read(qdev, ctxt, QDMA_REGO_CTXT_DATA);
250 		if (ret)
251 			goto failed;
252 	}
253 
254 failed:
255 	mutex_unlock(&qdev->ctxt_lock);
256 
257 	return ret;
258 }
259 
260 static int qdma_check_queue_status(struct qdma_device *qdev,
261 				   enum dma_transfer_direction dir, u16 qid)
262 {
263 	u32 status, data[QDMA_CTXT_REGMAP_LEN] = {0};
264 	enum qdma_ctxt_type type;
265 	int ret;
266 
267 	if (dir == DMA_MEM_TO_DEV)
268 		type = QDMA_CTXT_DESC_SW_H2C;
269 	else
270 		type = QDMA_CTXT_DESC_SW_C2H;
271 	ret = qdma_prog_context(qdev, type, QDMA_CTXT_READ, qid, data);
272 	if (ret)
273 		return ret;
274 
275 	status = qdma_get_field(qdev, data, QDMA_REGF_QUEUE_ENABLE);
276 	if (status) {
277 		qdma_err(qdev, "queue %d already in use", qid);
278 		return -EBUSY;
279 	}
280 
281 	return 0;
282 }
283 
284 static int qdma_clear_queue_context(const struct qdma_queue *queue)
285 {
286 	enum qdma_ctxt_type h2c_types[] = { QDMA_CTXT_DESC_SW_H2C,
287 					    QDMA_CTXT_DESC_HW_H2C,
288 					    QDMA_CTXT_DESC_CR_H2C,
289 					    QDMA_CTXT_PFTCH, };
290 	enum qdma_ctxt_type c2h_types[] = { QDMA_CTXT_DESC_SW_C2H,
291 					    QDMA_CTXT_DESC_HW_C2H,
292 					    QDMA_CTXT_DESC_CR_C2H,
293 					    QDMA_CTXT_PFTCH, };
294 	struct qdma_device *qdev = queue->qdev;
295 	enum qdma_ctxt_type *type;
296 	int ret, num, i;
297 
298 	if (queue->dir == DMA_MEM_TO_DEV) {
299 		type = h2c_types;
300 		num = ARRAY_SIZE(h2c_types);
301 	} else {
302 		type = c2h_types;
303 		num = ARRAY_SIZE(c2h_types);
304 	}
305 	for (i = 0; i < num; i++) {
306 		ret = qdma_prog_context(qdev, type[i], QDMA_CTXT_CLEAR,
307 					queue->qid, NULL);
308 		if (ret) {
309 			qdma_err(qdev, "Failed to clear ctxt %d", type[i]);
310 			return ret;
311 		}
312 	}
313 
314 	return 0;
315 }
316 
317 static int qdma_setup_fmap_context(struct qdma_device *qdev)
318 {
319 	u32 ctxt[QDMA_CTXT_REGMAP_LEN];
320 	struct qdma_ctxt_fmap fmap;
321 	int ret;
322 
323 	ret = qdma_prog_context(qdev, QDMA_CTXT_FMAP, QDMA_CTXT_CLEAR,
324 				qdev->fid, NULL);
325 	if (ret) {
326 		qdma_err(qdev, "Failed clearing context");
327 		return ret;
328 	}
329 
330 	fmap.qbase = 0;
331 	fmap.qmax = qdev->chan_num * 2;
332 	qdma_prep_fmap_context(qdev, &fmap, ctxt);
333 	ret = qdma_prog_context(qdev, QDMA_CTXT_FMAP, QDMA_CTXT_WRITE,
334 				qdev->fid, ctxt);
335 	if (ret)
336 		qdma_err(qdev, "Failed setup fmap, ret %d", ret);
337 
338 	return ret;
339 }
340 
341 static int qdma_setup_queue_context(struct qdma_device *qdev,
342 				    const struct qdma_ctxt_sw_desc *sw_desc,
343 				    enum dma_transfer_direction dir, u16 qid)
344 {
345 	u32 ctxt[QDMA_CTXT_REGMAP_LEN];
346 	enum qdma_ctxt_type type;
347 	int ret;
348 
349 	if (dir == DMA_MEM_TO_DEV)
350 		type = QDMA_CTXT_DESC_SW_H2C;
351 	else
352 		type = QDMA_CTXT_DESC_SW_C2H;
353 
354 	qdma_prep_sw_desc_context(qdev, sw_desc, ctxt);
355 	/* Setup SW descriptor context */
356 	ret = qdma_prog_context(qdev, type, QDMA_CTXT_WRITE, qid, ctxt);
357 	if (ret)
358 		qdma_err(qdev, "Failed setup SW desc ctxt for queue: %d", qid);
359 
360 	return ret;
361 }
362 
363 /*
364  * Enable or disable memory-mapped DMA engines
365  * 1: enable, 0: disable
366  */
367 static int qdma_sgdma_control(struct qdma_device *qdev, u32 ctrl)
368 {
369 	int ret;
370 
371 	ret = qdma_reg_write(qdev, &ctrl, QDMA_REGO_MM_H2C_CTRL);
372 	ret |= qdma_reg_write(qdev, &ctrl, QDMA_REGO_MM_C2H_CTRL);
373 
374 	return ret;
375 }
376 
377 static int qdma_get_hw_info(struct qdma_device *qdev)
378 {
379 	struct qdma_platdata *pdata = dev_get_platdata(&qdev->pdev->dev);
380 	u32 value = 0;
381 	int ret;
382 
383 	ret = qdma_reg_read(qdev, &value, QDMA_REGO_QUEUE_COUNT);
384 	if (ret)
385 		return ret;
386 
387 	value = qdma_get_field(qdev, &value, QDMA_REGF_QUEUE_COUNT) + 1;
388 	if (pdata->max_mm_channels * 2 > value) {
389 		qdma_err(qdev, "not enough hw queues %d", value);
390 		return -EINVAL;
391 	}
392 	qdev->chan_num = pdata->max_mm_channels;
393 
394 	ret = qdma_reg_read(qdev, &qdev->fid, QDMA_REGO_FUNC_ID);
395 	if (ret)
396 		return ret;
397 
398 	qdma_info(qdev, "max channel %d, function id %d",
399 		  qdev->chan_num, qdev->fid);
400 
401 	return 0;
402 }
403 
404 static inline int qdma_update_pidx(const struct qdma_queue *queue, u16 pidx)
405 {
406 	struct qdma_device *qdev = queue->qdev;
407 
408 	return regmap_write(qdev->regmap, queue->pidx_reg,
409 			    pidx | QDMA_QUEUE_ARM_BIT);
410 }
411 
412 static inline int qdma_update_cidx(const struct qdma_queue *queue,
413 				   u16 ridx, u16 cidx)
414 {
415 	struct qdma_device *qdev = queue->qdev;
416 
417 	return regmap_write(qdev->regmap, queue->cidx_reg,
418 			    ((u32)ridx << 16) | cidx);
419 }
420 
421 /**
422  * qdma_free_vdesc - Free descriptor
423  * @vdesc: Virtual DMA descriptor
424  */
425 static void qdma_free_vdesc(struct virt_dma_desc *vdesc)
426 {
427 	struct qdma_mm_vdesc *vd = to_qdma_vdesc(vdesc);
428 
429 	kfree(vd);
430 }
431 
432 static int qdma_alloc_queues(struct qdma_device *qdev,
433 			     enum dma_transfer_direction dir)
434 {
435 	struct qdma_queue *q, **queues;
436 	u32 i, pidx_base;
437 	int ret;
438 
439 	if (dir == DMA_MEM_TO_DEV) {
440 		queues = &qdev->h2c_queues;
441 		pidx_base = QDMA_REG_OFF(qdev, QDMA_REGO_H2C_PIDX);
442 	} else {
443 		queues = &qdev->c2h_queues;
444 		pidx_base = QDMA_REG_OFF(qdev, QDMA_REGO_C2H_PIDX);
445 	}
446 
447 	*queues = devm_kcalloc(&qdev->pdev->dev, qdev->chan_num, sizeof(*q),
448 			       GFP_KERNEL);
449 	if (!*queues)
450 		return -ENOMEM;
451 
452 	for (i = 0; i < qdev->chan_num; i++) {
453 		ret = qdma_check_queue_status(qdev, dir, i);
454 		if (ret)
455 			return ret;
456 
457 		q = &(*queues)[i];
458 		q->ring_size = QDMA_DEFAULT_RING_SIZE;
459 		q->idx_mask = q->ring_size - 2;
460 		q->qdev = qdev;
461 		q->dir = dir;
462 		q->qid = i;
463 		q->pidx_reg = pidx_base + i * QDMA_DMAP_REG_STRIDE;
464 		q->cidx_reg = QDMA_REG_OFF(qdev, QDMA_REGO_INTR_CIDX) +
465 				i * QDMA_DMAP_REG_STRIDE;
466 		q->vchan.desc_free = qdma_free_vdesc;
467 		vchan_init(&q->vchan, &qdev->dma_dev);
468 	}
469 
470 	return 0;
471 }
472 
473 static int qdma_device_verify(struct qdma_device *qdev)
474 {
475 	u32 value;
476 	int ret;
477 
478 	ret = regmap_read(qdev->regmap, QDMA_IDENTIFIER_REGOFF, &value);
479 	if (ret)
480 		return ret;
481 
482 	value = FIELD_GET(QDMA_IDENTIFIER_MASK, value);
483 	if (value != QDMA_IDENTIFIER) {
484 		qdma_err(qdev, "Invalid identifier");
485 		return -ENODEV;
486 	}
487 	qdev->rfields = qdma_regfs_default;
488 	qdev->roffs = qdma_regos_default;
489 
490 	return 0;
491 }
492 
493 static int qdma_device_setup(struct qdma_device *qdev)
494 {
495 	struct device *dev = &qdev->pdev->dev;
496 	u32 ring_sz = QDMA_DEFAULT_RING_SIZE;
497 	int ret = 0;
498 
499 	while (dev && get_dma_ops(dev))
500 		dev = dev->parent;
501 	if (!dev) {
502 		qdma_err(qdev, "dma device not found");
503 		return -EINVAL;
504 	}
505 	set_dma_ops(&qdev->pdev->dev, get_dma_ops(dev));
506 
507 	ret = qdma_setup_fmap_context(qdev);
508 	if (ret) {
509 		qdma_err(qdev, "Failed setup fmap context");
510 		return ret;
511 	}
512 
513 	/* Setup global ring buffer size at QDMA_DEFAULT_RING_ID index */
514 	ret = qdma_reg_write(qdev, &ring_sz, QDMA_REGO_RING_SIZE);
515 	if (ret) {
516 		qdma_err(qdev, "Failed to setup ring %d of size %ld",
517 			 QDMA_DEFAULT_RING_ID, QDMA_DEFAULT_RING_SIZE);
518 		return ret;
519 	}
520 
521 	/* Enable memory-mapped DMA engine in both directions */
522 	ret = qdma_sgdma_control(qdev, 1);
523 	if (ret) {
524 		qdma_err(qdev, "Failed to SGDMA with error %d", ret);
525 		return ret;
526 	}
527 
528 	ret = qdma_alloc_queues(qdev, DMA_MEM_TO_DEV);
529 	if (ret) {
530 		qdma_err(qdev, "Failed to alloc H2C queues, ret %d", ret);
531 		return ret;
532 	}
533 
534 	ret = qdma_alloc_queues(qdev, DMA_DEV_TO_MEM);
535 	if (ret) {
536 		qdma_err(qdev, "Failed to alloc C2H queues, ret %d", ret);
537 		return ret;
538 	}
539 
540 	return 0;
541 }
542 
543 /**
544  * qdma_free_queue_resources() - Free queue resources
545  * @chan: DMA channel
546  */
547 static void qdma_free_queue_resources(struct dma_chan *chan)
548 {
549 	struct qdma_queue *queue = to_qdma_queue(chan);
550 	struct qdma_device *qdev = queue->qdev;
551 	struct device *dev = qdev->dma_dev.dev;
552 
553 	qdma_clear_queue_context(queue);
554 	vchan_free_chan_resources(&queue->vchan);
555 	dma_free_coherent(dev, queue->ring_size * QDMA_MM_DESC_SIZE,
556 			  queue->desc_base, queue->dma_desc_base);
557 }
558 
559 /**
560  * qdma_alloc_queue_resources() - Allocate queue resources
561  * @chan: DMA channel
562  */
563 static int qdma_alloc_queue_resources(struct dma_chan *chan)
564 {
565 	struct qdma_queue *queue = to_qdma_queue(chan);
566 	struct qdma_device *qdev = queue->qdev;
567 	struct qdma_ctxt_sw_desc desc;
568 	size_t size;
569 	int ret;
570 
571 	ret = qdma_clear_queue_context(queue);
572 	if (ret)
573 		return ret;
574 
575 	size = queue->ring_size * QDMA_MM_DESC_SIZE;
576 	queue->desc_base = dma_alloc_coherent(qdev->dma_dev.dev, size,
577 					      &queue->dma_desc_base,
578 					      GFP_KERNEL);
579 	if (!queue->desc_base) {
580 		qdma_err(qdev, "Failed to allocate descriptor ring");
581 		return -ENOMEM;
582 	}
583 
584 	/* Setup SW descriptor queue context for DMA memory map */
585 	desc.vec = qdma_get_intr_ring_idx(qdev);
586 	desc.desc_base = queue->dma_desc_base;
587 	ret = qdma_setup_queue_context(qdev, &desc, queue->dir, queue->qid);
588 	if (ret) {
589 		qdma_err(qdev, "Failed to setup SW desc ctxt for %s",
590 			 chan->name);
591 		dma_free_coherent(qdev->dma_dev.dev, size, queue->desc_base,
592 				  queue->dma_desc_base);
593 		return ret;
594 	}
595 
596 	queue->pidx = 0;
597 	queue->cidx = 0;
598 
599 	return 0;
600 }
601 
602 static bool qdma_filter_fn(struct dma_chan *chan, void *param)
603 {
604 	struct qdma_queue *queue = to_qdma_queue(chan);
605 	struct qdma_queue_info *info = param;
606 
607 	return info->dir == queue->dir;
608 }
609 
610 static int qdma_xfer_start(struct qdma_queue *queue)
611 {
612 	struct qdma_device *qdev = queue->qdev;
613 	int ret;
614 
615 	if (!vchan_next_desc(&queue->vchan))
616 		return 0;
617 
618 	qdma_dbg(qdev, "Tnx kickoff with P: %d for %s%d",
619 		 queue->issued_vdesc->pidx, CHAN_STR(queue), queue->qid);
620 
621 	ret = qdma_update_pidx(queue, queue->issued_vdesc->pidx);
622 	if (ret) {
623 		qdma_err(qdev, "Failed to update PIDX to %d for %s queue: %d",
624 			 queue->pidx, CHAN_STR(queue), queue->qid);
625 	}
626 
627 	return ret;
628 }
629 
630 static void qdma_issue_pending(struct dma_chan *chan)
631 {
632 	struct qdma_queue *queue = to_qdma_queue(chan);
633 	unsigned long flags;
634 
635 	spin_lock_irqsave(&queue->vchan.lock, flags);
636 	if (vchan_issue_pending(&queue->vchan)) {
637 		if (queue->submitted_vdesc) {
638 			queue->issued_vdesc = queue->submitted_vdesc;
639 			queue->submitted_vdesc = NULL;
640 		}
641 		qdma_xfer_start(queue);
642 	}
643 
644 	spin_unlock_irqrestore(&queue->vchan.lock, flags);
645 }
646 
647 static struct qdma_mm_desc *qdma_get_desc(struct qdma_queue *q)
648 {
649 	struct qdma_mm_desc *desc;
650 
651 	if (((q->pidx + 1) & q->idx_mask) == q->cidx)
652 		return NULL;
653 
654 	desc = q->desc_base + q->pidx;
655 	q->pidx = (q->pidx + 1) & q->idx_mask;
656 
657 	return desc;
658 }
659 
660 static int qdma_hw_enqueue(struct qdma_queue *q, struct qdma_mm_vdesc *vdesc)
661 {
662 	struct qdma_mm_desc *desc;
663 	struct scatterlist *sg;
664 	u64 addr, *src, *dst;
665 	u32 rest, len;
666 	int ret = 0;
667 	u32 i;
668 
669 	if (!vdesc->sg_len)
670 		return 0;
671 
672 	if (q->dir == DMA_MEM_TO_DEV) {
673 		dst = &vdesc->dev_addr;
674 		src = &addr;
675 	} else {
676 		dst = &addr;
677 		src = &vdesc->dev_addr;
678 	}
679 
680 	for_each_sg(vdesc->sgl, sg, vdesc->sg_len, i) {
681 		addr = sg_dma_address(sg) + vdesc->sg_off;
682 		rest = sg_dma_len(sg) - vdesc->sg_off;
683 		while (rest) {
684 			len = min_t(u32, rest, QDMA_MM_DESC_MAX_LEN);
685 			desc = qdma_get_desc(q);
686 			if (!desc) {
687 				ret = -EBUSY;
688 				goto out;
689 			}
690 
691 			desc->src_addr = cpu_to_le64(*src);
692 			desc->dst_addr = cpu_to_le64(*dst);
693 			desc->len = cpu_to_le32(len);
694 
695 			vdesc->dev_addr += len;
696 			vdesc->sg_off += len;
697 			vdesc->pending_descs++;
698 			addr += len;
699 			rest -= len;
700 		}
701 		vdesc->sg_off = 0;
702 	}
703 out:
704 	vdesc->sg_len -= i;
705 	vdesc->pidx = q->pidx;
706 	return ret;
707 }
708 
709 static void qdma_fill_pending_vdesc(struct qdma_queue *q)
710 {
711 	struct virt_dma_chan *vc = &q->vchan;
712 	struct qdma_mm_vdesc *vdesc = NULL;
713 	struct virt_dma_desc *vd;
714 	int ret;
715 
716 	if (!list_empty(&vc->desc_issued)) {
717 		vd = &q->issued_vdesc->vdesc;
718 		list_for_each_entry_from(vd, &vc->desc_issued, node) {
719 			vdesc = to_qdma_vdesc(vd);
720 			ret = qdma_hw_enqueue(q, vdesc);
721 			if (ret) {
722 				q->issued_vdesc = vdesc;
723 				return;
724 			}
725 		}
726 		q->issued_vdesc = vdesc;
727 	}
728 
729 	if (list_empty(&vc->desc_submitted))
730 		return;
731 
732 	if (q->submitted_vdesc)
733 		vd = &q->submitted_vdesc->vdesc;
734 	else
735 		vd = list_first_entry(&vc->desc_submitted, typeof(*vd), node);
736 
737 	list_for_each_entry_from(vd, &vc->desc_submitted, node) {
738 		vdesc = to_qdma_vdesc(vd);
739 		ret = qdma_hw_enqueue(q, vdesc);
740 		if (ret)
741 			break;
742 	}
743 	q->submitted_vdesc = vdesc;
744 }
745 
746 static dma_cookie_t qdma_tx_submit(struct dma_async_tx_descriptor *tx)
747 {
748 	struct virt_dma_chan *vc = to_virt_chan(tx->chan);
749 	struct qdma_queue *q = to_qdma_queue(&vc->chan);
750 	struct virt_dma_desc *vd;
751 	unsigned long flags;
752 	dma_cookie_t cookie;
753 
754 	vd = container_of(tx, struct virt_dma_desc, tx);
755 	spin_lock_irqsave(&vc->lock, flags);
756 	cookie = dma_cookie_assign(tx);
757 
758 	list_move_tail(&vd->node, &vc->desc_submitted);
759 	qdma_fill_pending_vdesc(q);
760 	spin_unlock_irqrestore(&vc->lock, flags);
761 
762 	return cookie;
763 }
764 
765 static struct dma_async_tx_descriptor *
766 qdma_prep_device_sg(struct dma_chan *chan, struct scatterlist *sgl,
767 		    unsigned int sg_len, enum dma_transfer_direction dir,
768 		    unsigned long flags, void *context)
769 {
770 	struct qdma_queue *q = to_qdma_queue(chan);
771 	struct dma_async_tx_descriptor *tx;
772 	struct qdma_mm_vdesc *vdesc;
773 
774 	vdesc = kzalloc(sizeof(*vdesc), GFP_NOWAIT);
775 	if (!vdesc)
776 		return NULL;
777 	vdesc->sgl = sgl;
778 	vdesc->sg_len = sg_len;
779 	if (dir == DMA_MEM_TO_DEV)
780 		vdesc->dev_addr = q->cfg.dst_addr;
781 	else
782 		vdesc->dev_addr = q->cfg.src_addr;
783 
784 	tx = vchan_tx_prep(&q->vchan, &vdesc->vdesc, flags);
785 	tx->tx_submit = qdma_tx_submit;
786 
787 	return tx;
788 }
789 
790 static int qdma_device_config(struct dma_chan *chan,
791 			      struct dma_slave_config *cfg)
792 {
793 	struct qdma_queue *q = to_qdma_queue(chan);
794 
795 	memcpy(&q->cfg, cfg, sizeof(*cfg));
796 
797 	return 0;
798 }
799 
800 static int qdma_arm_err_intr(const struct qdma_device *qdev)
801 {
802 	u32 value = 0;
803 
804 	qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_FUNC, qdev->fid);
805 	qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_VEC, qdev->err_irq_idx);
806 	qdma_set_field(qdev, &value, QDMA_REGF_ERR_INT_ARM, 1);
807 
808 	return qdma_reg_write(qdev, &value, QDMA_REGO_ERR_INT);
809 }
810 
811 static irqreturn_t qdma_error_isr(int irq, void *data)
812 {
813 	struct qdma_device *qdev = data;
814 	u32 err_stat = 0;
815 	int ret;
816 
817 	ret = qdma_reg_read(qdev, &err_stat, QDMA_REGO_ERR_STAT);
818 	if (ret) {
819 		qdma_err(qdev, "read error state failed, ret %d", ret);
820 		goto out;
821 	}
822 
823 	qdma_err(qdev, "global error %d", err_stat);
824 	ret = qdma_reg_write(qdev, &err_stat, QDMA_REGO_ERR_STAT);
825 	if (ret)
826 		qdma_err(qdev, "clear error state failed, ret %d", ret);
827 
828 out:
829 	qdma_arm_err_intr(qdev);
830 	return IRQ_HANDLED;
831 }
832 
833 static irqreturn_t qdma_queue_isr(int irq, void *data)
834 {
835 	struct qdma_intr_ring *intr = data;
836 	struct qdma_queue *q = NULL;
837 	struct qdma_device *qdev;
838 	u32 index, comp_desc;
839 	u64 intr_ent;
840 	u8 color;
841 	int ret;
842 	u16 qid;
843 
844 	qdev = intr->qdev;
845 	index = intr->cidx;
846 	while (1) {
847 		struct virt_dma_desc *vd;
848 		struct qdma_mm_vdesc *vdesc;
849 		unsigned long flags;
850 		u32 cidx;
851 
852 		intr_ent = le64_to_cpu(intr->base[index]);
853 		color = FIELD_GET(QDMA_INTR_MASK_COLOR, intr_ent);
854 		if (color != intr->color)
855 			break;
856 
857 		qid = FIELD_GET(QDMA_INTR_MASK_QID, intr_ent);
858 		if (FIELD_GET(QDMA_INTR_MASK_TYPE, intr_ent))
859 			q = qdev->c2h_queues;
860 		else
861 			q = qdev->h2c_queues;
862 		q += qid;
863 
864 		cidx = FIELD_GET(QDMA_INTR_MASK_CIDX, intr_ent);
865 
866 		spin_lock_irqsave(&q->vchan.lock, flags);
867 		comp_desc = (cidx - q->cidx) & q->idx_mask;
868 
869 		vd = vchan_next_desc(&q->vchan);
870 		if (!vd)
871 			goto skip;
872 
873 		vdesc = to_qdma_vdesc(vd);
874 		while (comp_desc > vdesc->pending_descs) {
875 			list_del(&vd->node);
876 			vchan_cookie_complete(vd);
877 			comp_desc -= vdesc->pending_descs;
878 			vd = vchan_next_desc(&q->vchan);
879 			vdesc = to_qdma_vdesc(vd);
880 		}
881 		vdesc->pending_descs -= comp_desc;
882 		if (!vdesc->pending_descs && QDMA_VDESC_QUEUED(vdesc)) {
883 			list_del(&vd->node);
884 			vchan_cookie_complete(vd);
885 		}
886 		q->cidx = cidx;
887 
888 		qdma_fill_pending_vdesc(q);
889 		qdma_xfer_start(q);
890 
891 skip:
892 		spin_unlock_irqrestore(&q->vchan.lock, flags);
893 
894 		/*
895 		 * Wrap the index value and flip the expected color value if
896 		 * interrupt aggregation PIDX has wrapped around.
897 		 */
898 		index++;
899 		index &= QDMA_INTR_RING_IDX_MASK;
900 		if (!index)
901 			intr->color = !intr->color;
902 	}
903 
904 	/*
905 	 * Update the software interrupt aggregation ring CIDX if a valid entry
906 	 * was found.
907 	 */
908 	if (q) {
909 		qdma_dbg(qdev, "update intr ring%d %d", intr->ridx, index);
910 
911 		/*
912 		 * Record the last read index of status descriptor from the
913 		 * interrupt aggregation ring.
914 		 */
915 		intr->cidx = index;
916 
917 		ret = qdma_update_cidx(q, intr->ridx, index);
918 		if (ret) {
919 			qdma_err(qdev, "Failed to update IRQ CIDX");
920 			return IRQ_NONE;
921 		}
922 	}
923 
924 	return IRQ_HANDLED;
925 }
926 
927 static int qdma_init_error_irq(struct qdma_device *qdev)
928 {
929 	struct device *dev = &qdev->pdev->dev;
930 	int ret;
931 	u32 vec;
932 
933 	vec = qdev->queue_irq_start - 1;
934 
935 	ret = devm_request_threaded_irq(dev, vec, NULL, qdma_error_isr,
936 					IRQF_ONESHOT, "amd-qdma-error", qdev);
937 	if (ret) {
938 		qdma_err(qdev, "Failed to request error IRQ vector: %d", vec);
939 		return ret;
940 	}
941 
942 	ret = qdma_arm_err_intr(qdev);
943 	if (ret)
944 		qdma_err(qdev, "Failed to arm err interrupt, ret %d", ret);
945 
946 	return ret;
947 }
948 
949 static int qdmam_alloc_qintr_rings(struct qdma_device *qdev)
950 {
951 	u32 ctxt[QDMA_CTXT_REGMAP_LEN];
952 	struct device *dev = &qdev->pdev->dev;
953 	struct qdma_intr_ring *ring;
954 	struct qdma_ctxt_intr intr_ctxt;
955 	u32 vector;
956 	int ret, i;
957 
958 	qdev->qintr_ring_num = qdev->queue_irq_num;
959 	qdev->qintr_rings = devm_kcalloc(dev, qdev->qintr_ring_num,
960 					 sizeof(*qdev->qintr_rings),
961 					 GFP_KERNEL);
962 	if (!qdev->qintr_rings)
963 		return -ENOMEM;
964 
965 	vector = qdev->queue_irq_start;
966 	for (i = 0; i < qdev->qintr_ring_num; i++, vector++) {
967 		ring = &qdev->qintr_rings[i];
968 		ring->qdev = qdev;
969 		ring->msix_id = qdev->err_irq_idx + i + 1;
970 		ring->ridx = i;
971 		ring->color = 1;
972 		ring->base = dmam_alloc_coherent(dev, QDMA_INTR_RING_SIZE,
973 						 &ring->dev_base, GFP_KERNEL);
974 		if (!ring->base) {
975 			qdma_err(qdev, "Failed to alloc intr ring %d", i);
976 			return -ENOMEM;
977 		}
978 		intr_ctxt.agg_base = QDMA_INTR_RING_BASE(ring->dev_base);
979 		intr_ctxt.size = (QDMA_INTR_RING_SIZE - 1) / 4096;
980 		intr_ctxt.vec = ring->msix_id;
981 		intr_ctxt.valid = true;
982 		intr_ctxt.color = true;
983 		ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL,
984 					QDMA_CTXT_CLEAR, ring->ridx, NULL);
985 		if (ret) {
986 			qdma_err(qdev, "Failed clear intr ctx, ret %d", ret);
987 			return ret;
988 		}
989 
990 		qdma_prep_intr_context(qdev, &intr_ctxt, ctxt);
991 		ret = qdma_prog_context(qdev, QDMA_CTXT_INTR_COAL,
992 					QDMA_CTXT_WRITE, ring->ridx, ctxt);
993 		if (ret) {
994 			qdma_err(qdev, "Failed setup intr ctx, ret %d", ret);
995 			return ret;
996 		}
997 
998 		ret = devm_request_threaded_irq(dev, vector, NULL,
999 						qdma_queue_isr, IRQF_ONESHOT,
1000 						"amd-qdma-queue", ring);
1001 		if (ret) {
1002 			qdma_err(qdev, "Failed to request irq %d", vector);
1003 			return ret;
1004 		}
1005 	}
1006 
1007 	return 0;
1008 }
1009 
1010 static int qdma_intr_init(struct qdma_device *qdev)
1011 {
1012 	int ret;
1013 
1014 	ret = qdma_init_error_irq(qdev);
1015 	if (ret) {
1016 		qdma_err(qdev, "Failed to init error IRQs, ret %d", ret);
1017 		return ret;
1018 	}
1019 
1020 	ret = qdmam_alloc_qintr_rings(qdev);
1021 	if (ret) {
1022 		qdma_err(qdev, "Failed to init queue IRQs, ret %d", ret);
1023 		return ret;
1024 	}
1025 
1026 	return 0;
1027 }
1028 
1029 static void amd_qdma_remove(struct platform_device *pdev)
1030 {
1031 	struct qdma_device *qdev = platform_get_drvdata(pdev);
1032 
1033 	qdma_sgdma_control(qdev, 0);
1034 	dma_async_device_unregister(&qdev->dma_dev);
1035 
1036 	mutex_destroy(&qdev->ctxt_lock);
1037 }
1038 
1039 static int amd_qdma_probe(struct platform_device *pdev)
1040 {
1041 	struct qdma_platdata *pdata = dev_get_platdata(&pdev->dev);
1042 	struct qdma_device *qdev;
1043 	struct resource *res;
1044 	void __iomem *regs;
1045 	int ret;
1046 
1047 	qdev = devm_kzalloc(&pdev->dev, sizeof(*qdev), GFP_KERNEL);
1048 	if (!qdev)
1049 		return -ENOMEM;
1050 
1051 	platform_set_drvdata(pdev, qdev);
1052 	qdev->pdev = pdev;
1053 	mutex_init(&qdev->ctxt_lock);
1054 
1055 	res = platform_get_resource(pdev, IORESOURCE_IRQ, 0);
1056 	if (!res) {
1057 		qdma_err(qdev, "Failed to get IRQ resource");
1058 		ret = -ENODEV;
1059 		goto failed;
1060 	}
1061 	qdev->err_irq_idx = pdata->irq_index;
1062 	qdev->queue_irq_start = res->start + 1;
1063 	qdev->queue_irq_num = resource_size(res) - 1;
1064 
1065 	regs = devm_platform_get_and_ioremap_resource(pdev, 0, NULL);
1066 	if (IS_ERR(regs)) {
1067 		ret = PTR_ERR(regs);
1068 		qdma_err(qdev, "Failed to map IO resource, err %d", ret);
1069 		goto failed;
1070 	}
1071 
1072 	qdev->regmap = devm_regmap_init_mmio(&pdev->dev, regs,
1073 					     &qdma_regmap_config);
1074 	if (IS_ERR(qdev->regmap)) {
1075 		ret = PTR_ERR(qdev->regmap);
1076 		qdma_err(qdev, "Regmap init failed, err %d", ret);
1077 		goto failed;
1078 	}
1079 
1080 	ret = qdma_device_verify(qdev);
1081 	if (ret)
1082 		goto failed;
1083 
1084 	ret = qdma_get_hw_info(qdev);
1085 	if (ret)
1086 		goto failed;
1087 
1088 	INIT_LIST_HEAD(&qdev->dma_dev.channels);
1089 
1090 	ret = qdma_device_setup(qdev);
1091 	if (ret)
1092 		goto failed;
1093 
1094 	ret = qdma_intr_init(qdev);
1095 	if (ret) {
1096 		qdma_err(qdev, "Failed to initialize IRQs %d", ret);
1097 		goto failed_disable_engine;
1098 	}
1099 
1100 	dma_cap_set(DMA_SLAVE, qdev->dma_dev.cap_mask);
1101 	dma_cap_set(DMA_PRIVATE, qdev->dma_dev.cap_mask);
1102 
1103 	qdev->dma_dev.dev = &pdev->dev;
1104 	qdev->dma_dev.filter.map = pdata->device_map;
1105 	qdev->dma_dev.filter.mapcnt = qdev->chan_num * 2;
1106 	qdev->dma_dev.filter.fn = qdma_filter_fn;
1107 	qdev->dma_dev.device_alloc_chan_resources = qdma_alloc_queue_resources;
1108 	qdev->dma_dev.device_free_chan_resources = qdma_free_queue_resources;
1109 	qdev->dma_dev.device_prep_slave_sg = qdma_prep_device_sg;
1110 	qdev->dma_dev.device_config = qdma_device_config;
1111 	qdev->dma_dev.device_issue_pending = qdma_issue_pending;
1112 	qdev->dma_dev.device_tx_status = dma_cookie_status;
1113 	qdev->dma_dev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV);
1114 
1115 	ret = dma_async_device_register(&qdev->dma_dev);
1116 	if (ret) {
1117 		qdma_err(qdev, "Failed to register AMD QDMA: %d", ret);
1118 		goto failed_disable_engine;
1119 	}
1120 
1121 	return 0;
1122 
1123 failed_disable_engine:
1124 	qdma_sgdma_control(qdev, 0);
1125 failed:
1126 	mutex_destroy(&qdev->ctxt_lock);
1127 	qdma_err(qdev, "Failed to probe AMD QDMA driver");
1128 	return ret;
1129 }
1130 
1131 static struct platform_driver amd_qdma_driver = {
1132 	.driver		= {
1133 		.name = "amd-qdma",
1134 	},
1135 	.probe		= amd_qdma_probe,
1136 	.remove		= amd_qdma_remove,
1137 };
1138 
1139 module_platform_driver(amd_qdma_driver);
1140 
1141 MODULE_DESCRIPTION("AMD QDMA driver");
1142 MODULE_AUTHOR("XRT Team <runtimeca39d@amd.com>");
1143 MODULE_LICENSE("GPL");
1144