1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * AMD Passthrough DMA device driver 4 * -- Based on the CCP driver 5 * 6 * Copyright (C) 2016,2021 Advanced Micro Devices, Inc. 7 * 8 * Author: Sanjay R Mehta <sanju.mehta@amd.com> 9 * Author: Gary R Hook <gary.hook@amd.com> 10 */ 11 12 #include <linux/bitfield.h> 13 #include "ptdma.h" 14 #include "../ae4dma/ae4dma.h" 15 #include "../../dmaengine.h" 16 17 static char *ae4_error_codes[] = { 18 "", 19 "ERR 01: INVALID HEADER DW0", 20 "ERR 02: INVALID STATUS", 21 "ERR 03: INVALID LENGTH - 4 BYTE ALIGNMENT", 22 "ERR 04: INVALID SRC ADDR - 4 BYTE ALIGNMENT", 23 "ERR 05: INVALID DST ADDR - 4 BYTE ALIGNMENT", 24 "ERR 06: INVALID ALIGNMENT", 25 "ERR 07: INVALID DESCRIPTOR", 26 }; 27 28 static void ae4_log_error(struct pt_device *d, int e) 29 { 30 /* ERR 01 - 07 represents Invalid AE4 errors */ 31 if (e <= 7) 32 dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", ae4_error_codes[e], e); 33 /* ERR 08 - 15 represents Invalid Descriptor errors */ 34 else if (e > 7 && e <= 15) 35 dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "INVALID DESCRIPTOR", e); 36 /* ERR 16 - 31 represents Firmware errors */ 37 else if (e > 15 && e <= 31) 38 dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FIRMWARE ERROR", e); 39 /* ERR 32 - 63 represents Fatal errors */ 40 else if (e > 31 && e <= 63) 41 dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "FATAL ERROR", e); 42 /* ERR 64 - 255 represents PTE errors */ 43 else if (e > 63 && e <= 255) 44 dev_info(d->dev, "AE4DMA error: %s (0x%x)\n", "PTE ERROR", e); 45 else 46 dev_info(d->dev, "Unknown AE4DMA error"); 47 } 48 49 void ae4_check_status_error(struct ae4_cmd_queue *ae4cmd_q, int idx) 50 { 51 struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; 52 struct ae4dma_desc desc; 53 u8 status; 54 55 memcpy(&desc, &cmd_q->qbase[idx], sizeof(struct ae4dma_desc)); 56 status = desc.dw1.status; 57 if (status && status != AE4_DESC_COMPLETED) { 58 cmd_q->cmd_error = desc.dw1.err_code; 59 if (cmd_q->cmd_error) 60 ae4_log_error(cmd_q->pt, cmd_q->cmd_error); 61 } 62 } 63 EXPORT_SYMBOL_GPL(ae4_check_status_error); 64 65 static inline struct pt_dma_chan *to_pt_chan(struct dma_chan *dma_chan) 66 { 67 return container_of(dma_chan, struct pt_dma_chan, vc.chan); 68 } 69 70 static inline struct pt_dma_desc *to_pt_desc(struct virt_dma_desc *vd) 71 { 72 return container_of(vd, struct pt_dma_desc, vd); 73 } 74 75 static void pt_free_chan_resources(struct dma_chan *dma_chan) 76 { 77 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 78 79 vchan_free_chan_resources(&chan->vc); 80 } 81 82 static void pt_synchronize(struct dma_chan *dma_chan) 83 { 84 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 85 86 vchan_synchronize(&chan->vc); 87 } 88 89 static void pt_do_cleanup(struct virt_dma_desc *vd) 90 { 91 struct pt_dma_desc *desc = to_pt_desc(vd); 92 struct pt_device *pt = desc->pt; 93 94 kmem_cache_free(pt->dma_desc_cache, desc); 95 } 96 97 static struct pt_cmd_queue *pt_get_cmd_queue(struct pt_device *pt, struct pt_dma_chan *chan) 98 { 99 struct ae4_cmd_queue *ae4cmd_q; 100 struct pt_cmd_queue *cmd_q; 101 struct ae4_device *ae4; 102 103 if (pt->ver == AE4_DMA_VERSION) { 104 ae4 = container_of(pt, struct ae4_device, pt); 105 ae4cmd_q = &ae4->ae4cmd_q[chan->id]; 106 cmd_q = &ae4cmd_q->cmd_q; 107 } else { 108 cmd_q = &pt->cmd_q; 109 } 110 111 return cmd_q; 112 } 113 114 static int ae4_core_execute_cmd(struct ae4dma_desc *desc, struct ae4_cmd_queue *ae4cmd_q) 115 { 116 bool soc = FIELD_GET(DWORD0_SOC, desc->dwouv.dw0); 117 struct pt_cmd_queue *cmd_q = &ae4cmd_q->cmd_q; 118 119 if (soc) { 120 desc->dwouv.dw0 |= FIELD_PREP(DWORD0_IOC, desc->dwouv.dw0); 121 desc->dwouv.dw0 &= ~DWORD0_SOC; 122 } 123 124 mutex_lock(&ae4cmd_q->cmd_lock); 125 memcpy(&cmd_q->qbase[ae4cmd_q->tail_wi], desc, sizeof(struct ae4dma_desc)); 126 ae4cmd_q->q_cmd_count++; 127 ae4cmd_q->tail_wi = (ae4cmd_q->tail_wi + 1) % CMD_Q_LEN; 128 writel(ae4cmd_q->tail_wi, cmd_q->reg_control + AE4_WR_IDX_OFF); 129 mutex_unlock(&ae4cmd_q->cmd_lock); 130 131 wake_up(&ae4cmd_q->q_w); 132 133 return 0; 134 } 135 136 static int pt_core_perform_passthru_ae4(struct pt_cmd_queue *cmd_q, 137 struct pt_passthru_engine *pt_engine) 138 { 139 struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q); 140 struct ae4dma_desc desc; 141 142 cmd_q->cmd_error = 0; 143 cmd_q->total_pt_ops++; 144 memset(&desc, 0, sizeof(desc)); 145 desc.dwouv.dws.byte0 = CMD_AE4_DESC_DW0_VAL; 146 147 desc.dw1.status = 0; 148 desc.dw1.err_code = 0; 149 desc.dw1.desc_id = 0; 150 151 desc.length = pt_engine->src_len; 152 153 desc.src_lo = upper_32_bits(pt_engine->src_dma); 154 desc.src_hi = lower_32_bits(pt_engine->src_dma); 155 desc.dst_lo = upper_32_bits(pt_engine->dst_dma); 156 desc.dst_hi = lower_32_bits(pt_engine->dst_dma); 157 158 return ae4_core_execute_cmd(&desc, ae4cmd_q); 159 } 160 161 static int pt_dma_start_desc(struct pt_dma_desc *desc, struct pt_dma_chan *chan) 162 { 163 struct pt_passthru_engine *pt_engine; 164 struct pt_device *pt; 165 struct pt_cmd *pt_cmd; 166 struct pt_cmd_queue *cmd_q; 167 168 desc->issued_to_hw = 1; 169 170 pt_cmd = &desc->pt_cmd; 171 pt = pt_cmd->pt; 172 173 cmd_q = pt_get_cmd_queue(pt, chan); 174 175 pt_engine = &pt_cmd->passthru; 176 177 pt->tdata.cmd = pt_cmd; 178 179 /* Execute the command */ 180 if (pt->ver == AE4_DMA_VERSION) 181 pt_cmd->ret = pt_core_perform_passthru_ae4(cmd_q, pt_engine); 182 else 183 pt_cmd->ret = pt_core_perform_passthru(cmd_q, pt_engine); 184 185 return 0; 186 } 187 188 static struct pt_dma_desc *pt_next_dma_desc(struct pt_dma_chan *chan) 189 { 190 /* Get the next DMA descriptor on the active list */ 191 struct virt_dma_desc *vd = vchan_next_desc(&chan->vc); 192 193 return vd ? to_pt_desc(vd) : NULL; 194 } 195 196 static struct pt_dma_desc *pt_handle_active_desc(struct pt_dma_chan *chan, 197 struct pt_dma_desc *desc) 198 { 199 struct dma_async_tx_descriptor *tx_desc; 200 struct virt_dma_desc *vd; 201 unsigned long flags; 202 203 /* Loop over descriptors until one is found with commands */ 204 do { 205 if (desc) { 206 if (!desc->issued_to_hw) { 207 /* No errors, keep going */ 208 if (desc->status != DMA_ERROR) 209 return desc; 210 } 211 212 tx_desc = &desc->vd.tx; 213 vd = &desc->vd; 214 } else { 215 tx_desc = NULL; 216 } 217 218 spin_lock_irqsave(&chan->vc.lock, flags); 219 220 if (desc) { 221 if (desc->status != DMA_COMPLETE) { 222 if (desc->status != DMA_ERROR) 223 desc->status = DMA_COMPLETE; 224 225 dma_cookie_complete(tx_desc); 226 dma_descriptor_unmap(tx_desc); 227 list_del(&desc->vd.node); 228 } else { 229 /* Don't handle it twice */ 230 tx_desc = NULL; 231 } 232 } 233 234 desc = pt_next_dma_desc(chan); 235 236 spin_unlock_irqrestore(&chan->vc.lock, flags); 237 238 if (tx_desc) { 239 dmaengine_desc_get_callback_invoke(tx_desc, NULL); 240 dma_run_dependencies(tx_desc); 241 vchan_vdesc_fini(vd); 242 } 243 } while (desc); 244 245 return NULL; 246 } 247 248 static void pt_cmd_callback(void *data, int err) 249 { 250 struct pt_dma_desc *desc = data; 251 struct dma_chan *dma_chan; 252 struct pt_dma_chan *chan; 253 int ret; 254 255 if (err == -EINPROGRESS) 256 return; 257 258 dma_chan = desc->vd.tx.chan; 259 chan = to_pt_chan(dma_chan); 260 261 if (err) 262 desc->status = DMA_ERROR; 263 264 while (true) { 265 /* Check for DMA descriptor completion */ 266 desc = pt_handle_active_desc(chan, desc); 267 268 /* Don't submit cmd if no descriptor or DMA is paused */ 269 if (!desc) 270 break; 271 272 ret = pt_dma_start_desc(desc, chan); 273 if (!ret) 274 break; 275 276 desc->status = DMA_ERROR; 277 } 278 } 279 280 static struct pt_dma_desc *pt_alloc_dma_desc(struct pt_dma_chan *chan, 281 unsigned long flags) 282 { 283 struct pt_dma_desc *desc; 284 285 desc = kmem_cache_zalloc(chan->pt->dma_desc_cache, GFP_NOWAIT); 286 if (!desc) 287 return NULL; 288 289 vchan_tx_prep(&chan->vc, &desc->vd, flags); 290 291 desc->pt = chan->pt; 292 desc->pt->cmd_q.int_en = !!(flags & DMA_PREP_INTERRUPT); 293 desc->issued_to_hw = 0; 294 desc->status = DMA_IN_PROGRESS; 295 296 return desc; 297 } 298 299 static struct pt_dma_desc *pt_create_desc(struct dma_chan *dma_chan, 300 dma_addr_t dst, 301 dma_addr_t src, 302 unsigned int len, 303 unsigned long flags) 304 { 305 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 306 struct pt_passthru_engine *pt_engine; 307 struct pt_device *pt = chan->pt; 308 struct ae4_cmd_queue *ae4cmd_q; 309 struct pt_dma_desc *desc; 310 struct ae4_device *ae4; 311 struct pt_cmd *pt_cmd; 312 313 desc = pt_alloc_dma_desc(chan, flags); 314 if (!desc) 315 return NULL; 316 317 pt_cmd = &desc->pt_cmd; 318 pt_cmd->pt = pt; 319 pt_engine = &pt_cmd->passthru; 320 pt_cmd->engine = PT_ENGINE_PASSTHRU; 321 pt_engine->src_dma = src; 322 pt_engine->dst_dma = dst; 323 pt_engine->src_len = len; 324 pt_cmd->pt_cmd_callback = pt_cmd_callback; 325 pt_cmd->data = desc; 326 327 desc->len = len; 328 329 if (pt->ver == AE4_DMA_VERSION) { 330 ae4 = container_of(pt, struct ae4_device, pt); 331 ae4cmd_q = &ae4->ae4cmd_q[chan->id]; 332 mutex_lock(&ae4cmd_q->cmd_lock); 333 list_add_tail(&pt_cmd->entry, &ae4cmd_q->cmd); 334 mutex_unlock(&ae4cmd_q->cmd_lock); 335 } 336 337 return desc; 338 } 339 340 static struct dma_async_tx_descriptor * 341 pt_prep_dma_memcpy(struct dma_chan *dma_chan, dma_addr_t dst, 342 dma_addr_t src, size_t len, unsigned long flags) 343 { 344 struct pt_dma_desc *desc; 345 346 desc = pt_create_desc(dma_chan, dst, src, len, flags); 347 if (!desc) 348 return NULL; 349 350 return &desc->vd.tx; 351 } 352 353 static struct dma_async_tx_descriptor * 354 pt_prep_dma_interrupt(struct dma_chan *dma_chan, unsigned long flags) 355 { 356 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 357 struct pt_dma_desc *desc; 358 359 desc = pt_alloc_dma_desc(chan, flags); 360 if (!desc) 361 return NULL; 362 363 return &desc->vd.tx; 364 } 365 366 static void pt_issue_pending(struct dma_chan *dma_chan) 367 { 368 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 369 struct pt_dma_desc *desc; 370 unsigned long flags; 371 bool engine_is_idle = true; 372 373 spin_lock_irqsave(&chan->vc.lock, flags); 374 375 desc = pt_next_dma_desc(chan); 376 if (desc) 377 engine_is_idle = false; 378 379 vchan_issue_pending(&chan->vc); 380 381 desc = pt_next_dma_desc(chan); 382 383 spin_unlock_irqrestore(&chan->vc.lock, flags); 384 385 /* If there was nothing active, start processing */ 386 if (engine_is_idle && desc) 387 pt_cmd_callback(desc, 0); 388 } 389 390 static void pt_check_status_trans_ae4(struct pt_device *pt, struct pt_cmd_queue *cmd_q) 391 { 392 struct ae4_cmd_queue *ae4cmd_q = container_of(cmd_q, struct ae4_cmd_queue, cmd_q); 393 int i; 394 395 for (i = 0; i < CMD_Q_LEN; i++) 396 ae4_check_status_error(ae4cmd_q, i); 397 } 398 399 static enum dma_status 400 pt_tx_status(struct dma_chan *c, dma_cookie_t cookie, 401 struct dma_tx_state *txstate) 402 { 403 struct pt_dma_chan *chan = to_pt_chan(c); 404 struct pt_device *pt = chan->pt; 405 struct pt_cmd_queue *cmd_q; 406 407 cmd_q = pt_get_cmd_queue(pt, chan); 408 409 if (pt->ver == AE4_DMA_VERSION) 410 pt_check_status_trans_ae4(pt, cmd_q); 411 else 412 pt_check_status_trans(pt, cmd_q); 413 414 return dma_cookie_status(c, cookie, txstate); 415 } 416 417 static int pt_pause(struct dma_chan *dma_chan) 418 { 419 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 420 struct pt_device *pt = chan->pt; 421 struct pt_cmd_queue *cmd_q; 422 unsigned long flags; 423 424 spin_lock_irqsave(&chan->vc.lock, flags); 425 cmd_q = pt_get_cmd_queue(pt, chan); 426 pt_stop_queue(cmd_q); 427 spin_unlock_irqrestore(&chan->vc.lock, flags); 428 429 return 0; 430 } 431 432 static int pt_resume(struct dma_chan *dma_chan) 433 { 434 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 435 struct pt_dma_desc *desc = NULL; 436 struct pt_device *pt = chan->pt; 437 struct pt_cmd_queue *cmd_q; 438 unsigned long flags; 439 440 spin_lock_irqsave(&chan->vc.lock, flags); 441 cmd_q = pt_get_cmd_queue(pt, chan); 442 pt_start_queue(cmd_q); 443 desc = pt_next_dma_desc(chan); 444 spin_unlock_irqrestore(&chan->vc.lock, flags); 445 446 /* If there was something active, re-start */ 447 if (desc) 448 pt_cmd_callback(desc, 0); 449 450 return 0; 451 } 452 453 static int pt_terminate_all(struct dma_chan *dma_chan) 454 { 455 struct pt_dma_chan *chan = to_pt_chan(dma_chan); 456 struct pt_device *pt = chan->pt; 457 struct pt_cmd_queue *cmd_q; 458 unsigned long flags; 459 LIST_HEAD(head); 460 461 cmd_q = pt_get_cmd_queue(pt, chan); 462 if (pt->ver == AE4_DMA_VERSION) 463 pt_stop_queue(cmd_q); 464 else 465 iowrite32(SUPPORTED_INTERRUPTS, cmd_q->reg_control + 0x0010); 466 467 spin_lock_irqsave(&chan->vc.lock, flags); 468 vchan_get_all_descriptors(&chan->vc, &head); 469 spin_unlock_irqrestore(&chan->vc.lock, flags); 470 471 vchan_dma_desc_free_list(&chan->vc, &head); 472 vchan_free_chan_resources(&chan->vc); 473 474 return 0; 475 } 476 477 int pt_dmaengine_register(struct pt_device *pt) 478 { 479 struct dma_device *dma_dev = &pt->dma_dev; 480 struct ae4_cmd_queue *ae4cmd_q = NULL; 481 struct ae4_device *ae4 = NULL; 482 struct pt_dma_chan *chan; 483 char *desc_cache_name; 484 char *cmd_cache_name; 485 int ret, i; 486 487 if (pt->ver == AE4_DMA_VERSION) 488 ae4 = container_of(pt, struct ae4_device, pt); 489 490 if (ae4) 491 pt->pt_dma_chan = devm_kcalloc(pt->dev, ae4->cmd_q_count, 492 sizeof(*pt->pt_dma_chan), GFP_KERNEL); 493 else 494 pt->pt_dma_chan = devm_kzalloc(pt->dev, sizeof(*pt->pt_dma_chan), 495 GFP_KERNEL); 496 497 if (!pt->pt_dma_chan) 498 return -ENOMEM; 499 500 cmd_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, 501 "%s-dmaengine-cmd-cache", 502 dev_name(pt->dev)); 503 if (!cmd_cache_name) 504 return -ENOMEM; 505 506 desc_cache_name = devm_kasprintf(pt->dev, GFP_KERNEL, 507 "%s-dmaengine-desc-cache", 508 dev_name(pt->dev)); 509 if (!desc_cache_name) { 510 ret = -ENOMEM; 511 goto err_cache; 512 } 513 514 pt->dma_desc_cache = kmem_cache_create(desc_cache_name, 515 sizeof(struct pt_dma_desc), 0, 516 SLAB_HWCACHE_ALIGN, NULL); 517 if (!pt->dma_desc_cache) { 518 ret = -ENOMEM; 519 goto err_cache; 520 } 521 522 dma_dev->dev = pt->dev; 523 dma_dev->src_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES; 524 dma_dev->dst_addr_widths = DMA_SLAVE_BUSWIDTH_64_BYTES; 525 dma_dev->directions = DMA_MEM_TO_MEM; 526 dma_dev->residue_granularity = DMA_RESIDUE_GRANULARITY_DESCRIPTOR; 527 dma_cap_set(DMA_MEMCPY, dma_dev->cap_mask); 528 dma_cap_set(DMA_INTERRUPT, dma_dev->cap_mask); 529 530 /* 531 * PTDMA is intended to be used with the AMD NTB devices, hence 532 * marking it as DMA_PRIVATE. 533 */ 534 dma_cap_set(DMA_PRIVATE, dma_dev->cap_mask); 535 536 INIT_LIST_HEAD(&dma_dev->channels); 537 538 /* Set base and prep routines */ 539 dma_dev->device_free_chan_resources = pt_free_chan_resources; 540 dma_dev->device_prep_dma_memcpy = pt_prep_dma_memcpy; 541 dma_dev->device_prep_dma_interrupt = pt_prep_dma_interrupt; 542 dma_dev->device_issue_pending = pt_issue_pending; 543 dma_dev->device_tx_status = pt_tx_status; 544 dma_dev->device_pause = pt_pause; 545 dma_dev->device_resume = pt_resume; 546 dma_dev->device_terminate_all = pt_terminate_all; 547 dma_dev->device_synchronize = pt_synchronize; 548 549 if (ae4) { 550 for (i = 0; i < ae4->cmd_q_count; i++) { 551 chan = pt->pt_dma_chan + i; 552 ae4cmd_q = &ae4->ae4cmd_q[i]; 553 chan->id = ae4cmd_q->id; 554 chan->pt = pt; 555 chan->vc.desc_free = pt_do_cleanup; 556 vchan_init(&chan->vc, dma_dev); 557 } 558 } else { 559 chan = pt->pt_dma_chan; 560 chan->pt = pt; 561 chan->vc.desc_free = pt_do_cleanup; 562 vchan_init(&chan->vc, dma_dev); 563 } 564 565 ret = dma_async_device_register(dma_dev); 566 if (ret) 567 goto err_reg; 568 569 return 0; 570 571 err_reg: 572 kmem_cache_destroy(pt->dma_desc_cache); 573 574 err_cache: 575 kmem_cache_destroy(pt->dma_cmd_cache); 576 577 return ret; 578 } 579 EXPORT_SYMBOL_GPL(pt_dmaengine_register); 580 581 void pt_dmaengine_unregister(struct pt_device *pt) 582 { 583 struct dma_device *dma_dev = &pt->dma_dev; 584 585 dma_async_device_unregister(dma_dev); 586 587 kmem_cache_destroy(pt->dma_desc_cache); 588 kmem_cache_destroy(pt->dma_cmd_cache); 589 } 590