xref: /linux/drivers/dma/amd/ptdma/ptdma-dmaengine.c (revision aa0fdccda4076c81d07a0c0b05602ee2aa17a2be)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * AMD Passthrough DMA device driver
4  * -- Based on the CCP driver
5  *
6  * Copyright (C) 2016,2021 Advanced Micro Devices, Inc.
7  *
8  * Author: Sanjay R Mehta <sanju.mehta@amd.com>
9  * Author: Gary R Hook <gary.hook@amd.com>
10  */
11 
12 #include <linux/bitfield.h>
13 #include "ptdma.h"
14 #include "../ae4dma/ae4dma.h"
15 #include "../../dmaengine.h"
16 
17 static char *ae4_error_codes[] = {
18 	"",
19 	"ERR 01: INVALID HEADER DW0",
20 	"ERR 02: INVALID STATUS",
21 	"ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT",
22 	"ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT",
23 	"ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT",
24 	"ERR 06: INVALID ALIGNMENT",
25 	"ERR 07: INVALID DESCRIPTOR",
26 };
27 
28 static void ae4_log_error(struct pt_device *d, int e)
29 {
30 	/* ERR 01 - 07 represents Invalid AE4 errors */
31 	if (e <= 7)
32 		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e);
33 	/* ERR 08 - 15 represents Invalid Descriptor errors */
34 	else if (e > 7 && e <= 15)
35 		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e);
36 	/* ERR 16 - 31 represents Firmware errors */
37 	else if (e > 15 && e <= 31)
38 		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FIRMWARE ERROR", e);
39 	/* ERR 32 - 63 represents Fatal errors */
40 	else if (e > 31 && e <= 63)
41 		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FATAL ERROR", e);
42 	/* ERR 64 - 255 represents PTE errors */
43 	else if (e > 63 && e <= 255)
44 		dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e);
45 	else
46 		dev_info(d->dev, "Unknown AE4DMA error");
47 }
48 
49 void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx)
50 {
51 	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
52 	struct ae4dma_desc desc;
53 	u8 status;
54 
55 	memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc));
56 	status = desc.dw1.status;
57 	if (status && status != AE4_DESC_COMPLETED) {
58 		cmd_q->cmd_error = desc.dw1.err_code;
59 		if (cmd_q->cmd_error)
60 			ae4_log_error(cmd_q->pt, cmd_q->cmd_error);
61 	}
62 }
63 EXPORT_SYMBOL_GPL(ae4_check_status_error);
64 
65 static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan)
66 {
67 	return container_of(dma_chan, struct pt_dma_chan, vc.chan);
68 }
69 
70 static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd)
71 {
72 	return container_of(vd, struct pt_dma_desc, vd);
73 }
74 
75 static void pt_free_chan_resources(struct dma_chan *dma_chan)
76 {
77 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
78 
79 	vchan_free_chan_resources(&chan->vc);
80 }
81 
82 static void pt_synchronize(struct dma_chan *dma_chan)
83 {
84 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
85 
86 	vchan_synchronize(&chan->vc);
87 }
88 
89 static void pt_do_cleanup(struct virt_dma_desc *vd)
90 {
91 	struct pt_dma_desc *desc = to_pt_desc(vd);
92 	struct pt_device *pt = desc->pt;
93 
94 	kmem_cache_free(pt->dma_desc_cache, desc);
95 }
96 
97 static struct pt_cmd_queue *pt_get_cmd_queue(struct pt_device *pt, struct pt_dma_chan *chan)
98 {
99 	struct ae4_cmd_queue *ae4cmd_q;
100 	struct pt_cmd_queue *cmd_q;
101 	struct ae4_device *ae4;
102 
103 	if (pt->ver == AE4_DMA_VERSION) {
104 		ae4 = container_of(pt, struct ae4_device, pt);
105 		ae4cmd_q = &ae4->ae4cmd_q[chan->id];
106 		cmd_q = &ae4cmd_q->cmd_q;
107 	} else {
108 		cmd_q = &pt->cmd_q;
109 	}
110 
111 	return cmd_q;
112 }
113 
114 static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct ae4_cmd_queue *ae4cmd_q)
115 {
116 	bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0);
117 	struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q;
118 
119 	if (soc) {
120 		desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0);
121 		desc->dwouv.dw0 &= ~DWORD0_SOC;
122 	}
123 
124 	mutex_lock(&ae4cmd_q->cmd_lock);
125 	memcpy(&cmd_q->qbase[ae4cmd_q->tail_wi], desc, sizeof(struct ae4dma_desc));
126 	ae4cmd_q->q_cmd_count++;
127 	ae4cmd_q->tail_wi = (ae4cmd_q->tail_wi + 1) % CMD_Q_LEN;
128 	writel(ae4cmd_q->tail_wi, cmd_q->reg_control + AE4_WR_IDX_OFF);
129 	mutex_unlock(&ae4cmd_q->cmd_lock);
130 
131 	wake_up(&ae4cmd_q->q_w);
132 
133 	return 0;
134 }
135 
136 static int pt_core_perform_passthru_ae4(struct pt_cmd_queue *cmd_q,
137 					struct pt_passthru_engine *pt_engine)
138 {
139 	struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q);
140 	struct ae4dma_desc desc;
141 
142 	cmd_q->cmd_error = 0;
143 	cmd_q->total_pt_ops++;
144 	memset(&desc, 0, sizeof(desc));
145 	desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL;
146 
147 	desc.dw1.status = 0;
148 	desc.dw1.err_code = 0;
149 	desc.dw1.desc_id = 0;
150 
151 	desc.length = pt_engine->src_len;
152 
153 	desc.src_lo = upper_32_bits(pt_engine->src_dma);
154 	desc.src_hi = lower_32_bits(pt_engine->src_dma);
155 	desc.dst_lo = upper_32_bits(pt_engine->dst_dma);
156 	desc.dst_hi = lower_32_bits(pt_engine->dst_dma);
157 
158 	return ae4_core_execute_cmd(&desc, ae4cmd_q);
159 }
160 
161 static int pt_dma_start_desc(struct pt_dma_desc *desc, struct pt_dma_chan *chan)
162 {
163 	struct pt_passthru_engine *pt_engine;
164 	struct pt_device *pt;
165 	struct pt_cmd *pt_cmd;
166 	struct pt_cmd_queue *cmd_q;
167 
168 	desc->issued_to_hw = 1;
169 
170 	pt_cmd = &desc->pt_cmd;
171 	pt = pt_cmd->pt;
172 
173 	cmd_q = pt_get_cmd_queue(pt, chan);
174 
175 	pt_engine = &pt_cmd->passthru;
176 
177 	pt->tdata.cmd = pt_cmd;
178 
179 	/* Execute the command */
180 	if (pt->ver == AE4_DMA_VERSION)
181 		pt_cmd->ret = pt_core_perform_passthru_ae4(cmd_q, pt_engine);
182 	else
183 		pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine);
184 
185 	return 0;
186 }
187 
188 static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan)
189 {
190 	/* Get the next DMA descriptor on the active list */
191 	struct virt_dma_desc *vd = vchan_next_desc(&chan->vc);
192 
193 	return vd ? to_pt_desc(vd) : NULL;
194 }
195 
196 static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan,
197 						 struct pt_dma_desc *desc)
198 {
199 	struct dma_async_tx_descriptor *tx_desc;
200 	struct virt_dma_desc *vd;
201 	unsigned long flags;
202 
203 	/* Loop over descriptors until one is found with commands */
204 	do {
205 		if (desc) {
206 			if (!desc->issued_to_hw) {
207 				/* No errors, keep going */
208 				if (desc->status != DMA_ERROR)
209 					return desc;
210 			}
211 
212 			tx_desc = &desc->vd.tx;
213 			vd = &desc->vd;
214 		} else {
215 			tx_desc = NULL;
216 		}
217 
218 		spin_lock_irqsave(&chan->vc.lock, flags);
219 
220 		if (desc) {
221 			if (desc->status != DMA_COMPLETE) {
222 				if (desc->status != DMA_ERROR)
223 					desc->status = DMA_COMPLETE;
224 
225 				dma_cookie_complete(tx_desc);
226 				dma_descriptor_unmap(tx_desc);
227 				list_del(&desc->vd.node);
228 			} else {
229 				/* Don't handle it twice */
230 				tx_desc = NULL;
231 			}
232 		}
233 
234 		desc = pt_next_dma_desc(chan);
235 
236 		spin_unlock_irqrestore(&chan->vc.lock, flags);
237 
238 		if (tx_desc) {
239 			dmaengine_desc_get_callback_invoke(tx_desc, NULL);
240 			dma_run_dependencies(tx_desc);
241 			vchan_vdesc_fini(vd);
242 		}
243 	} while (desc);
244 
245 	return NULL;
246 }
247 
248 static void pt_cmd_callback(void *data, int err)
249 {
250 	struct pt_dma_desc *desc = data;
251 	struct dma_chan *dma_chan;
252 	struct pt_dma_chan *chan;
253 	int ret;
254 
255 	if (err == -EINPROGRESS)
256 		return;
257 
258 	dma_chan = desc->vd.tx.chan;
259 	chan = to_pt_chan(dma_chan);
260 
261 	if (err)
262 		desc->status = DMA_ERROR;
263 
264 	while (true) {
265 		/* Check for DMA descriptor completion */
266 		desc = pt_handle_active_desc(chan, desc);
267 
268 		/* Don't submit cmd if no descriptor or DMA is paused */
269 		if (!desc)
270 			break;
271 
272 		ret = pt_dma_start_desc(desc, chan);
273 		if (!ret)
274 			break;
275 
276 		desc->status = DMA_ERROR;
277 	}
278 }
279 
280 static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan,
281 					     unsigned long flags)
282 {
283 	struct pt_dma_desc *desc;
284 
285 	desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT);
286 	if (!desc)
287 		return NULL;
288 
289 	vchan_tx_prep(&chan->vc, &desc->vd, flags);
290 
291 	desc->pt = chan->pt;
292 	desc->pt->cmd_q.int_en = !!(flags & DMA_PREP_INTERRUPT);
293 	desc->issued_to_hw = 0;
294 	desc->status = DMA_IN_PROGRESS;
295 
296 	return desc;
297 }
298 
299 static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan,
300 					  dma_addr_t dst,
301 					  dma_addr_t src,
302 					  unsigned int len,
303 					  unsigned long flags)
304 {
305 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
306 	struct pt_passthru_engine *pt_engine;
307 	struct pt_device *pt = chan->pt;
308 	struct ae4_cmd_queue *ae4cmd_q;
309 	struct pt_dma_desc *desc;
310 	struct ae4_device *ae4;
311 	struct pt_cmd *pt_cmd;
312 
313 	desc = pt_alloc_dma_desc(chan, flags);
314 	if (!desc)
315 		return NULL;
316 
317 	pt_cmd = &desc->pt_cmd;
318 	pt_cmd->pt = pt;
319 	pt_engine = &pt_cmd->passthru;
320 	pt_cmd->engine = PT_ENGINE_PASSTHRU;
321 	pt_engine->src_dma = src;
322 	pt_engine->dst_dma = dst;
323 	pt_engine->src_len = len;
324 	pt_cmd->pt_cmd_callback = pt_cmd_callback;
325 	pt_cmd->data = desc;
326 
327 	desc->len = len;
328 
329 	if (pt->ver == AE4_DMA_VERSION) {
330 		ae4 = container_of(pt, struct ae4_device, pt);
331 		ae4cmd_q = &ae4->ae4cmd_q[chan->id];
332 		mutex_lock(&ae4cmd_q->cmd_lock);
333 		list_add_tail(&pt_cmd->entry, &ae4cmd_q->cmd);
334 		mutex_unlock(&ae4cmd_q->cmd_lock);
335 	}
336 
337 	return desc;
338 }
339 
340 static struct dma_async_tx_descriptor *
341 pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst,
342 		   dma_addr_t src, size_t len, unsigned long flags)
343 {
344 	struct pt_dma_desc *desc;
345 
346 	desc = pt_create_desc(dma_chan, dst, src, len, flags);
347 	if (!desc)
348 		return NULL;
349 
350 	return &desc->vd.tx;
351 }
352 
353 static struct dma_async_tx_descriptor *
354 pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags)
355 {
356 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
357 	struct pt_dma_desc *desc;
358 
359 	desc = pt_alloc_dma_desc(chan, flags);
360 	if (!desc)
361 		return NULL;
362 
363 	return &desc->vd.tx;
364 }
365 
366 static void pt_issue_pending(struct dma_chan *dma_chan)
367 {
368 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
369 	struct pt_dma_desc *desc;
370 	unsigned long flags;
371 	bool engine_is_idle = true;
372 
373 	spin_lock_irqsave(&chan->vc.lock, flags);
374 
375 	desc = pt_next_dma_desc(chan);
376 	if (desc)
377 		engine_is_idle = false;
378 
379 	vchan_issue_pending(&chan->vc);
380 
381 	desc = pt_next_dma_desc(chan);
382 
383 	spin_unlock_irqrestore(&chan->vc.lock, flags);
384 
385 	/* If there was nothing active, start processing */
386 	if (engine_is_idle && desc)
387 		pt_cmd_callback(desc, 0);
388 }
389 
390 static void pt_check_status_trans_ae4(struct pt_device *pt, struct pt_cmd_queue *cmd_q)
391 {
392 	struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q);
393 	int i;
394 
395 	for (i = 0; i < CMD_Q_LEN; i++)
396 		ae4_check_status_error(ae4cmd_q, i);
397 }
398 
399 static enum dma_status
400 pt_tx_status(struct dma_chan *c, dma_cookie_t cookie,
401 		struct dma_tx_state *txstate)
402 {
403 	struct pt_dma_chan *chan = to_pt_chan(c);
404 	struct pt_device *pt = chan->pt;
405 	struct pt_cmd_queue *cmd_q;
406 
407 	cmd_q = pt_get_cmd_queue(pt, chan);
408 
409 	if (pt->ver == AE4_DMA_VERSION)
410 		pt_check_status_trans_ae4(pt, cmd_q);
411 	else
412 		pt_check_status_trans(pt, cmd_q);
413 
414 	return dma_cookie_status(c, cookie, txstate);
415 }
416 
417 static int pt_pause(struct dma_chan *dma_chan)
418 {
419 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
420 	struct pt_device *pt = chan->pt;
421 	struct pt_cmd_queue *cmd_q;
422 	unsigned long flags;
423 
424 	spin_lock_irqsave(&chan->vc.lock, flags);
425 	cmd_q = pt_get_cmd_queue(pt, chan);
426 	pt_stop_queue(cmd_q);
427 	spin_unlock_irqrestore(&chan->vc.lock, flags);
428 
429 	return 0;
430 }
431 
432 static int pt_resume(struct dma_chan *dma_chan)
433 {
434 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
435 	struct pt_dma_desc *desc = NULL;
436 	struct pt_device *pt = chan->pt;
437 	struct pt_cmd_queue *cmd_q;
438 	unsigned long flags;
439 
440 	spin_lock_irqsave(&chan->vc.lock, flags);
441 	cmd_q = pt_get_cmd_queue(pt, chan);
442 	pt_start_queue(cmd_q);
443 	desc = pt_next_dma_desc(chan);
444 	spin_unlock_irqrestore(&chan->vc.lock, flags);
445 
446 	/* If there was something active, re-start */
447 	if (desc)
448 		pt_cmd_callback(desc, 0);
449 
450 	return 0;
451 }
452 
453 static int pt_terminate_all(struct dma_chan *dma_chan)
454 {
455 	struct pt_dma_chan *chan = to_pt_chan(dma_chan);
456 	struct pt_device *pt = chan->pt;
457 	struct pt_cmd_queue *cmd_q;
458 	unsigned long flags;
459 	LIST_HEAD(head);
460 
461 	cmd_q = pt_get_cmd_queue(pt, chan);
462 	if (pt->ver == AE4_DMA_VERSION)
463 		pt_stop_queue(cmd_q);
464 	else
465 		iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010);
466 
467 	spin_lock_irqsave(&chan->vc.lock, flags);
468 	vchan_get_all_descriptors(&chan->vc, &head);
469 	spin_unlock_irqrestore(&chan->vc.lock, flags);
470 
471 	vchan_dma_desc_free_list(&chan->vc, &head);
472 	vchan_free_chan_resources(&chan->vc);
473 
474 	return 0;
475 }
476 
477 int pt_dmaengine_register(struct pt_device *pt)
478 {
479 	struct dma_device *dma_dev = &pt->dma_dev;
480 	struct ae4_cmd_queue *ae4cmd_q = NULL;
481 	struct ae4_device *ae4 = NULL;
482 	struct pt_dma_chan *chan;
483 	char *desc_cache_name;
484 	char *cmd_cache_name;
485 	int ret, i;
486 
487 	if (pt->ver == AE4_DMA_VERSION)
488 		ae4 = container_of(pt, struct ae4_device, pt);
489 
490 	if (ae4)
491 		pt->pt_dma_chan = devm_kcalloc(pt->dev, ae4->cmd_q_count,
492 					       sizeof(*pt->pt_dma_chan), GFP_KERNEL);
493 	else
494 		pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan),
495 					       GFP_KERNEL);
496 
497 	if (!pt->pt_dma_chan)
498 		return -ENOMEM;
499 
500 	cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
501 					"%s-dmaengine-cmd-cache",
502 					dev_name(pt->dev));
503 	if (!cmd_cache_name)
504 		return -ENOMEM;
505 
506 	desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL,
507 					 "%s-dmaengine-desc-cache",
508 					 dev_name(pt->dev));
509 	if (!desc_cache_name) {
510 		ret = -ENOMEM;
511 		goto err_cache;
512 	}
513 
514 	pt->dma_desc_cache = kmem_cache_create(desc_cache_name,
515 					       sizeof(struct pt_dma_desc), 0,
516 					       SLAB_HWCACHE_ALIGN, NULL);
517 	if (!pt->dma_desc_cache) {
518 		ret = -ENOMEM;
519 		goto err_cache;
520 	}
521 
522 	dma_dev->dev = pt->dev;
523 	dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
524 	dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES;
525 	dma_dev->directions = DMA_MEM_TO_MEM;
526 	dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR;
527 	dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask);
528 	dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask);
529 
530 	/*
531 	 * PTDMA is intended to be used with the AMD NTB devices, hence
532 	 * marking it as DMA_PRIVATE.
533 	 */
534 	dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask);
535 
536 	INIT_LIST_HEAD(&dma_dev->channels);
537 
538 	/* Set base and prep routines */
539 	dma_dev->device_free_chan_resources = pt_free_chan_resources;
540 	dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy;
541 	dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt;
542 	dma_dev->device_issue_pending = pt_issue_pending;
543 	dma_dev->device_tx_status = pt_tx_status;
544 	dma_dev->device_pause = pt_pause;
545 	dma_dev->device_resume = pt_resume;
546 	dma_dev->device_terminate_all = pt_terminate_all;
547 	dma_dev->device_synchronize = pt_synchronize;
548 
549 	if (ae4) {
550 		for (i = 0; i < ae4->cmd_q_count; i++) {
551 			chan = pt->pt_dma_chan + i;
552 			ae4cmd_q = &ae4->ae4cmd_q[i];
553 			chan->id = ae4cmd_q->id;
554 			chan->pt = pt;
555 			chan->vc.desc_free = pt_do_cleanup;
556 			vchan_init(&chan->vc, dma_dev);
557 		}
558 	} else {
559 		chan = pt->pt_dma_chan;
560 		chan->pt = pt;
561 		chan->vc.desc_free = pt_do_cleanup;
562 		vchan_init(&chan->vc, dma_dev);
563 	}
564 
565 	ret = dma_async_device_register(dma_dev);
566 	if (ret)
567 		goto err_reg;
568 
569 	return 0;
570 
571 err_reg:
572 	kmem_cache_destroy(pt->dma_desc_cache);
573 
574 err_cache:
575 	kmem_cache_destroy(pt->dma_cmd_cache);
576 
577 	return ret;
578 }
579 EXPORT_SYMBOL_GPL(pt_dmaengine_register);
580 
581 void pt_dmaengine_unregister(struct pt_device *pt)
582 {
583 	struct dma_device *dma_dev = &pt->dma_dev;
584 
585 	dma_async_device_unregister(dma_dev);
586 
587 	kmem_cache_destroy(pt->dma_desc_cache);
588 	kmem_cache_destroy(pt->dma_cmd_cache);
589 }
590