1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/delay.h> 3 #include <linux/dmaengine.h> 4 #include <linux/dma-mapping.h> 5 #include <linux/platform_device.h> 6 #include <linux/module.h> 7 #include <linux/of.h> 8 #include <linux/slab.h> 9 #include <linux/of_dma.h> 10 #include <linux/of_irq.h> 11 #include <linux/dmapool.h> 12 #include <linux/interrupt.h> 13 #include <linux/of_address.h> 14 #include <linux/pm_runtime.h> 15 #include "../dmaengine.h" 16 17 #define DESC_TYPE 27 18 #define DESC_TYPE_HOST 0x10 19 #define DESC_TYPE_TEARD 0x13 20 21 #define TD_DESC_IS_RX (1 << 16) 22 #define TD_DESC_DMA_NUM 10 23 24 #define DESC_LENGTH_BITS_NUM 21 25 26 #define DESC_TYPE_USB (5 << 26) 27 #define DESC_PD_COMPLETE (1 << 31) 28 29 /* DMA engine */ 30 #define DMA_TDFDQ 4 31 #define DMA_TXGCR(x) (0x800 + (x) * 0x20) 32 #define DMA_RXGCR(x) (0x808 + (x) * 0x20) 33 #define RXHPCRA0 4 34 35 #define GCR_CHAN_ENABLE (1 << 31) 36 #define GCR_TEARDOWN (1 << 30) 37 #define GCR_STARV_RETRY (1 << 24) 38 #define GCR_DESC_TYPE_HOST (1 << 14) 39 40 /* DMA scheduler */ 41 #define DMA_SCHED_CTRL 0 42 #define DMA_SCHED_CTRL_EN (1 << 31) 43 #define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 44 45 #define SCHED_ENTRY0_CHAN(x) ((x) << 0) 46 #define SCHED_ENTRY0_IS_RX (1 << 7) 47 48 #define SCHED_ENTRY1_CHAN(x) ((x) << 8) 49 #define SCHED_ENTRY1_IS_RX (1 << 15) 50 51 #define SCHED_ENTRY2_CHAN(x) ((x) << 16) 52 #define SCHED_ENTRY2_IS_RX (1 << 23) 53 54 #define SCHED_ENTRY3_CHAN(x) ((x) << 24) 55 #define SCHED_ENTRY3_IS_RX (1 << 31) 56 57 /* Queue manager */ 58 /* 4 KiB of memory for descriptors, 2 for each endpoint */ 59 #define ALLOC_DECS_NUM 128 60 #define DESCS_AREAS 1 61 #define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 62 #define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 63 64 #define QMGR_LRAM0_BASE 0x80 65 #define QMGR_LRAM_SIZE 0x84 66 #define QMGR_LRAM1_BASE 0x88 67 #define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 68 #define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 69 #define QMGR_MEMCTRL_IDX_SH 16 70 #define QMGR_MEMCTRL_DESC_SH 8 71 72 #define QMGR_PEND(x) (0x90 + (x) * 4) 73 74 #define QMGR_PENDING_SLOT_Q(x) (x / 32) 75 #define QMGR_PENDING_BIT_Q(x) (x % 32) 76 77 #define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 78 #define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 79 #define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 80 #define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 81 82 /* Packet Descriptor */ 83 #define PD2_ZERO_LENGTH (1 << 19) 84 85 struct cppi41_channel { 86 struct dma_chan chan; 87 struct dma_async_tx_descriptor txd; 88 struct cppi41_dd *cdd; 89 struct cppi41_desc *desc; 90 dma_addr_t desc_phys; 91 void __iomem *gcr_reg; 92 int is_tx; 93 u32 residue; 94 95 unsigned int q_num; 96 unsigned int q_comp_num; 97 unsigned int port_num; 98 99 unsigned td_retry; 100 unsigned td_queued:1; 101 unsigned td_seen:1; 102 unsigned td_desc_seen:1; 103 104 struct list_head node; /* Node for pending list */ 105 }; 106 107 struct cppi41_desc { 108 u32 pd0; 109 u32 pd1; 110 u32 pd2; 111 u32 pd3; 112 u32 pd4; 113 u32 pd5; 114 u32 pd6; 115 u32 pd7; 116 } __aligned(32); 117 118 struct chan_queues { 119 u16 submit; 120 u16 complete; 121 }; 122 123 struct cppi41_dd { 124 struct dma_device ddev; 125 126 void *qmgr_scratch; 127 dma_addr_t scratch_phys; 128 129 struct cppi41_desc *cd; 130 dma_addr_t descs_phys; 131 u32 first_td_desc; 132 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 133 134 void __iomem *ctrl_mem; 135 void __iomem *sched_mem; 136 void __iomem *qmgr_mem; 137 unsigned int irq; 138 const struct chan_queues *queues_rx; 139 const struct chan_queues *queues_tx; 140 struct chan_queues td_queue; 141 u16 first_completion_queue; 142 u16 qmgr_num_pend; 143 u32 n_chans; 144 u8 platform; 145 146 struct list_head pending; /* Pending queued transfers */ 147 spinlock_t lock; /* Lock for pending list */ 148 149 /* context for suspend/resume */ 150 unsigned int dma_tdfdq; 151 152 bool is_suspended; 153 }; 154 155 static struct chan_queues am335x_usb_queues_tx[] = { 156 /* USB0 ENDP 1 */ 157 [ 0] = { .submit = 32, .complete = 93}, 158 [ 1] = { .submit = 34, .complete = 94}, 159 [ 2] = { .submit = 36, .complete = 95}, 160 [ 3] = { .submit = 38, .complete = 96}, 161 [ 4] = { .submit = 40, .complete = 97}, 162 [ 5] = { .submit = 42, .complete = 98}, 163 [ 6] = { .submit = 44, .complete = 99}, 164 [ 7] = { .submit = 46, .complete = 100}, 165 [ 8] = { .submit = 48, .complete = 101}, 166 [ 9] = { .submit = 50, .complete = 102}, 167 [10] = { .submit = 52, .complete = 103}, 168 [11] = { .submit = 54, .complete = 104}, 169 [12] = { .submit = 56, .complete = 105}, 170 [13] = { .submit = 58, .complete = 106}, 171 [14] = { .submit = 60, .complete = 107}, 172 173 /* USB1 ENDP1 */ 174 [15] = { .submit = 62, .complete = 125}, 175 [16] = { .submit = 64, .complete = 126}, 176 [17] = { .submit = 66, .complete = 127}, 177 [18] = { .submit = 68, .complete = 128}, 178 [19] = { .submit = 70, .complete = 129}, 179 [20] = { .submit = 72, .complete = 130}, 180 [21] = { .submit = 74, .complete = 131}, 181 [22] = { .submit = 76, .complete = 132}, 182 [23] = { .submit = 78, .complete = 133}, 183 [24] = { .submit = 80, .complete = 134}, 184 [25] = { .submit = 82, .complete = 135}, 185 [26] = { .submit = 84, .complete = 136}, 186 [27] = { .submit = 86, .complete = 137}, 187 [28] = { .submit = 88, .complete = 138}, 188 [29] = { .submit = 90, .complete = 139}, 189 }; 190 191 static const struct chan_queues am335x_usb_queues_rx[] = { 192 /* USB0 ENDP 1 */ 193 [ 0] = { .submit = 1, .complete = 109}, 194 [ 1] = { .submit = 2, .complete = 110}, 195 [ 2] = { .submit = 3, .complete = 111}, 196 [ 3] = { .submit = 4, .complete = 112}, 197 [ 4] = { .submit = 5, .complete = 113}, 198 [ 5] = { .submit = 6, .complete = 114}, 199 [ 6] = { .submit = 7, .complete = 115}, 200 [ 7] = { .submit = 8, .complete = 116}, 201 [ 8] = { .submit = 9, .complete = 117}, 202 [ 9] = { .submit = 10, .complete = 118}, 203 [10] = { .submit = 11, .complete = 119}, 204 [11] = { .submit = 12, .complete = 120}, 205 [12] = { .submit = 13, .complete = 121}, 206 [13] = { .submit = 14, .complete = 122}, 207 [14] = { .submit = 15, .complete = 123}, 208 209 /* USB1 ENDP 1 */ 210 [15] = { .submit = 16, .complete = 141}, 211 [16] = { .submit = 17, .complete = 142}, 212 [17] = { .submit = 18, .complete = 143}, 213 [18] = { .submit = 19, .complete = 144}, 214 [19] = { .submit = 20, .complete = 145}, 215 [20] = { .submit = 21, .complete = 146}, 216 [21] = { .submit = 22, .complete = 147}, 217 [22] = { .submit = 23, .complete = 148}, 218 [23] = { .submit = 24, .complete = 149}, 219 [24] = { .submit = 25, .complete = 150}, 220 [25] = { .submit = 26, .complete = 151}, 221 [26] = { .submit = 27, .complete = 152}, 222 [27] = { .submit = 28, .complete = 153}, 223 [28] = { .submit = 29, .complete = 154}, 224 [29] = { .submit = 30, .complete = 155}, 225 }; 226 227 static const struct chan_queues da8xx_usb_queues_tx[] = { 228 [0] = { .submit = 16, .complete = 24}, 229 [1] = { .submit = 18, .complete = 24}, 230 [2] = { .submit = 20, .complete = 24}, 231 [3] = { .submit = 22, .complete = 24}, 232 }; 233 234 static const struct chan_queues da8xx_usb_queues_rx[] = { 235 [0] = { .submit = 1, .complete = 26}, 236 [1] = { .submit = 3, .complete = 26}, 237 [2] = { .submit = 5, .complete = 26}, 238 [3] = { .submit = 7, .complete = 26}, 239 }; 240 241 struct cppi_glue_infos { 242 const struct chan_queues *queues_rx; 243 const struct chan_queues *queues_tx; 244 struct chan_queues td_queue; 245 u16 first_completion_queue; 246 u16 qmgr_num_pend; 247 }; 248 249 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 250 { 251 return container_of(c, struct cppi41_channel, chan); 252 } 253 254 static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 255 { 256 struct cppi41_channel *c; 257 u32 descs_size; 258 u32 desc_num; 259 260 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 261 262 if (!((desc >= cdd->descs_phys) && 263 (desc < (cdd->descs_phys + descs_size)))) { 264 return NULL; 265 } 266 267 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 268 BUG_ON(desc_num >= ALLOC_DECS_NUM); 269 c = cdd->chan_busy[desc_num]; 270 cdd->chan_busy[desc_num] = NULL; 271 272 /* Usecount for chan_busy[], paired with push_desc_queue() */ 273 pm_runtime_put(cdd->ddev.dev); 274 275 return c; 276 } 277 278 static void cppi_writel(u32 val, void *__iomem *mem) 279 { 280 __raw_writel(val, mem); 281 } 282 283 static u32 cppi_readl(void *__iomem *mem) 284 { 285 return __raw_readl(mem); 286 } 287 288 static u32 pd_trans_len(u32 val) 289 { 290 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 291 } 292 293 static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 294 { 295 u32 desc; 296 297 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 298 desc &= ~0x1f; 299 return desc; 300 } 301 302 static irqreturn_t cppi41_irq(int irq, void *data) 303 { 304 struct cppi41_dd *cdd = data; 305 u16 first_completion_queue = cdd->first_completion_queue; 306 u16 qmgr_num_pend = cdd->qmgr_num_pend; 307 struct cppi41_channel *c; 308 int i; 309 310 for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend; 311 i++) { 312 u32 val; 313 u32 q_num; 314 315 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 316 if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) { 317 u32 mask; 318 /* set corresponding bit for completion Q 93 */ 319 mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue); 320 /* not set all bits for queues less than Q 93 */ 321 mask--; 322 /* now invert and keep only Q 93+ set */ 323 val &= ~mask; 324 } 325 326 if (val) 327 __iormb(); 328 329 while (val) { 330 u32 desc, len; 331 332 /* 333 * This should never trigger, see the comments in 334 * push_desc_queue() 335 */ 336 WARN_ON(cdd->is_suspended); 337 338 q_num = __fls(val); 339 val &= ~(1 << q_num); 340 q_num += 32 * i; 341 desc = cppi41_pop_desc(cdd, q_num); 342 c = desc_to_chan(cdd, desc); 343 if (WARN_ON(!c)) { 344 pr_err("%s() q %d desc %08x\n", __func__, 345 q_num, desc); 346 continue; 347 } 348 349 if (c->desc->pd2 & PD2_ZERO_LENGTH) 350 len = 0; 351 else 352 len = pd_trans_len(c->desc->pd0); 353 354 c->residue = pd_trans_len(c->desc->pd6) - len; 355 dma_cookie_complete(&c->txd); 356 dmaengine_desc_get_callback_invoke(&c->txd, NULL); 357 } 358 } 359 return IRQ_HANDLED; 360 } 361 362 static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 363 { 364 dma_cookie_t cookie; 365 366 cookie = dma_cookie_assign(tx); 367 368 return cookie; 369 } 370 371 static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 372 { 373 struct cppi41_channel *c = to_cpp41_chan(chan); 374 struct cppi41_dd *cdd = c->cdd; 375 int error; 376 377 error = pm_runtime_get_sync(cdd->ddev.dev); 378 if (error < 0) { 379 dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", 380 __func__, error); 381 pm_runtime_put_noidle(cdd->ddev.dev); 382 383 return error; 384 } 385 386 dma_cookie_init(chan); 387 dma_async_tx_descriptor_init(&c->txd, chan); 388 c->txd.tx_submit = cppi41_tx_submit; 389 390 if (!c->is_tx) 391 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 392 393 pm_runtime_mark_last_busy(cdd->ddev.dev); 394 pm_runtime_put_autosuspend(cdd->ddev.dev); 395 396 return 0; 397 } 398 399 static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 400 { 401 struct cppi41_channel *c = to_cpp41_chan(chan); 402 struct cppi41_dd *cdd = c->cdd; 403 int error; 404 405 error = pm_runtime_get_sync(cdd->ddev.dev); 406 if (error < 0) { 407 pm_runtime_put_noidle(cdd->ddev.dev); 408 409 return; 410 } 411 412 WARN_ON(!list_empty(&cdd->pending)); 413 414 pm_runtime_mark_last_busy(cdd->ddev.dev); 415 pm_runtime_put_autosuspend(cdd->ddev.dev); 416 } 417 418 static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 419 dma_cookie_t cookie, struct dma_tx_state *txstate) 420 { 421 struct cppi41_channel *c = to_cpp41_chan(chan); 422 enum dma_status ret; 423 424 ret = dma_cookie_status(chan, cookie, txstate); 425 426 dma_set_residue(txstate, c->residue); 427 428 return ret; 429 } 430 431 static void push_desc_queue(struct cppi41_channel *c) 432 { 433 struct cppi41_dd *cdd = c->cdd; 434 u32 desc_num; 435 u32 desc_phys; 436 u32 reg; 437 438 c->residue = 0; 439 440 reg = GCR_CHAN_ENABLE; 441 if (!c->is_tx) { 442 reg |= GCR_STARV_RETRY; 443 reg |= GCR_DESC_TYPE_HOST; 444 reg |= c->q_comp_num; 445 } 446 447 cppi_writel(reg, c->gcr_reg); 448 449 /* 450 * We don't use writel() but __raw_writel() so we have to make sure 451 * that the DMA descriptor in coherent memory made to the main memory 452 * before starting the dma engine. 453 */ 454 __iowmb(); 455 456 /* 457 * DMA transfers can take at least 200ms to complete with USB mass 458 * storage connected. To prevent autosuspend timeouts, we must use 459 * pm_runtime_get/put() when chan_busy[] is modified. This will get 460 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the 461 * outcome of the transfer. 462 */ 463 pm_runtime_get(cdd->ddev.dev); 464 465 desc_phys = lower_32_bits(c->desc_phys); 466 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 467 WARN_ON(cdd->chan_busy[desc_num]); 468 cdd->chan_busy[desc_num] = c; 469 470 reg = (sizeof(struct cppi41_desc) - 24) / 4; 471 reg |= desc_phys; 472 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 473 } 474 475 /* 476 * Caller must hold cdd->lock to prevent push_desc_queue() 477 * getting called out of order. We have both cppi41_dma_issue_pending() 478 * and cppi41_runtime_resume() call this function. 479 */ 480 static void cppi41_run_queue(struct cppi41_dd *cdd) 481 { 482 struct cppi41_channel *c, *_c; 483 484 list_for_each_entry_safe(c, _c, &cdd->pending, node) { 485 push_desc_queue(c); 486 list_del(&c->node); 487 } 488 } 489 490 static void cppi41_dma_issue_pending(struct dma_chan *chan) 491 { 492 struct cppi41_channel *c = to_cpp41_chan(chan); 493 struct cppi41_dd *cdd = c->cdd; 494 unsigned long flags; 495 int error; 496 497 error = pm_runtime_get(cdd->ddev.dev); 498 if ((error != -EINPROGRESS) && error < 0) { 499 pm_runtime_put_noidle(cdd->ddev.dev); 500 dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", 501 error); 502 503 return; 504 } 505 506 spin_lock_irqsave(&cdd->lock, flags); 507 list_add_tail(&c->node, &cdd->pending); 508 if (!cdd->is_suspended) 509 cppi41_run_queue(cdd); 510 spin_unlock_irqrestore(&cdd->lock, flags); 511 512 pm_runtime_mark_last_busy(cdd->ddev.dev); 513 pm_runtime_put_autosuspend(cdd->ddev.dev); 514 } 515 516 static u32 get_host_pd0(u32 length) 517 { 518 u32 reg; 519 520 reg = DESC_TYPE_HOST << DESC_TYPE; 521 reg |= length; 522 523 return reg; 524 } 525 526 static u32 get_host_pd1(struct cppi41_channel *c) 527 { 528 u32 reg; 529 530 reg = 0; 531 532 return reg; 533 } 534 535 static u32 get_host_pd2(struct cppi41_channel *c) 536 { 537 u32 reg; 538 539 reg = DESC_TYPE_USB; 540 reg |= c->q_comp_num; 541 542 return reg; 543 } 544 545 static u32 get_host_pd3(u32 length) 546 { 547 u32 reg; 548 549 /* PD3 = packet size */ 550 reg = length; 551 552 return reg; 553 } 554 555 static u32 get_host_pd6(u32 length) 556 { 557 u32 reg; 558 559 /* PD6 buffer size */ 560 reg = DESC_PD_COMPLETE; 561 reg |= length; 562 563 return reg; 564 } 565 566 static u32 get_host_pd4_or_7(u32 addr) 567 { 568 u32 reg; 569 570 reg = addr; 571 572 return reg; 573 } 574 575 static u32 get_host_pd5(void) 576 { 577 u32 reg; 578 579 reg = 0; 580 581 return reg; 582 } 583 584 static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 585 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 586 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 587 { 588 struct cppi41_channel *c = to_cpp41_chan(chan); 589 struct dma_async_tx_descriptor *txd = NULL; 590 struct cppi41_dd *cdd = c->cdd; 591 struct cppi41_desc *d; 592 struct scatterlist *sg; 593 unsigned int i; 594 int error; 595 596 error = pm_runtime_get(cdd->ddev.dev); 597 if (error < 0) { 598 pm_runtime_put_noidle(cdd->ddev.dev); 599 600 return NULL; 601 } 602 603 if (cdd->is_suspended) 604 goto err_out_not_ready; 605 606 d = c->desc; 607 for_each_sg(sgl, sg, sg_len, i) { 608 u32 addr; 609 u32 len; 610 611 /* We need to use more than one desc once musb supports sg */ 612 addr = lower_32_bits(sg_dma_address(sg)); 613 len = sg_dma_len(sg); 614 615 d->pd0 = get_host_pd0(len); 616 d->pd1 = get_host_pd1(c); 617 d->pd2 = get_host_pd2(c); 618 d->pd3 = get_host_pd3(len); 619 d->pd4 = get_host_pd4_or_7(addr); 620 d->pd5 = get_host_pd5(); 621 d->pd6 = get_host_pd6(len); 622 d->pd7 = get_host_pd4_or_7(addr); 623 624 d++; 625 } 626 627 txd = &c->txd; 628 629 err_out_not_ready: 630 pm_runtime_mark_last_busy(cdd->ddev.dev); 631 pm_runtime_put_autosuspend(cdd->ddev.dev); 632 633 return txd; 634 } 635 636 static void cppi41_compute_td_desc(struct cppi41_desc *d) 637 { 638 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 639 } 640 641 static int cppi41_tear_down_chan(struct cppi41_channel *c) 642 { 643 struct dmaengine_result abort_result; 644 struct cppi41_dd *cdd = c->cdd; 645 struct cppi41_desc *td; 646 u32 reg; 647 u32 desc_phys; 648 u32 td_desc_phys; 649 650 td = cdd->cd; 651 td += cdd->first_td_desc; 652 653 td_desc_phys = cdd->descs_phys; 654 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 655 656 if (!c->td_queued) { 657 cppi41_compute_td_desc(td); 658 __iowmb(); 659 660 reg = (sizeof(struct cppi41_desc) - 24) / 4; 661 reg |= td_desc_phys; 662 cppi_writel(reg, cdd->qmgr_mem + 663 QMGR_QUEUE_D(cdd->td_queue.submit)); 664 665 reg = GCR_CHAN_ENABLE; 666 if (!c->is_tx) { 667 reg |= GCR_STARV_RETRY; 668 reg |= GCR_DESC_TYPE_HOST; 669 reg |= cdd->td_queue.complete; 670 } 671 reg |= GCR_TEARDOWN; 672 cppi_writel(reg, c->gcr_reg); 673 c->td_queued = 1; 674 c->td_retry = 500; 675 } 676 677 if (!c->td_seen || !c->td_desc_seen) { 678 679 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 680 if (!desc_phys && c->is_tx) 681 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 682 683 if (desc_phys == c->desc_phys) { 684 c->td_desc_seen = 1; 685 686 } else if (desc_phys == td_desc_phys) { 687 u32 pd0; 688 689 __iormb(); 690 pd0 = td->pd0; 691 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 692 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 693 WARN_ON((pd0 & 0x1f) != c->port_num); 694 c->td_seen = 1; 695 } else if (desc_phys) { 696 WARN_ON_ONCE(1); 697 } 698 } 699 c->td_retry--; 700 /* 701 * If the TX descriptor / channel is in use, the caller needs to poke 702 * his TD bit multiple times. After that he hardware releases the 703 * transfer descriptor followed by TD descriptor. Waiting seems not to 704 * cause any difference. 705 * RX seems to be thrown out right away. However once the TearDown 706 * descriptor gets through we are done. If we have seen the transfer 707 * descriptor before the TD we fetch it from enqueue, it has to be 708 * there waiting for us. 709 */ 710 if (!c->td_seen && c->td_retry) { 711 udelay(1); 712 return -EAGAIN; 713 } 714 WARN_ON(!c->td_retry); 715 716 if (!c->td_desc_seen) { 717 desc_phys = cppi41_pop_desc(cdd, c->q_num); 718 if (!desc_phys) 719 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 720 WARN_ON(!desc_phys); 721 } 722 723 c->td_queued = 0; 724 c->td_seen = 0; 725 c->td_desc_seen = 0; 726 cppi_writel(0, c->gcr_reg); 727 728 /* Invoke the callback to do the necessary clean-up */ 729 abort_result.result = DMA_TRANS_ABORTED; 730 dma_cookie_complete(&c->txd); 731 dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); 732 733 return 0; 734 } 735 736 static int cppi41_stop_chan(struct dma_chan *chan) 737 { 738 struct cppi41_channel *c = to_cpp41_chan(chan); 739 struct cppi41_dd *cdd = c->cdd; 740 u32 desc_num; 741 u32 desc_phys; 742 int ret; 743 744 desc_phys = lower_32_bits(c->desc_phys); 745 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 746 if (!cdd->chan_busy[desc_num]) { 747 struct cppi41_channel *cc, *_ct; 748 749 /* 750 * channels might still be in the pending list if 751 * cppi41_dma_issue_pending() is called after 752 * cppi41_runtime_suspend() is called 753 */ 754 list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { 755 if (cc != c) 756 continue; 757 list_del(&cc->node); 758 break; 759 } 760 return 0; 761 } 762 763 ret = cppi41_tear_down_chan(c); 764 if (ret) 765 return ret; 766 767 WARN_ON(!cdd->chan_busy[desc_num]); 768 cdd->chan_busy[desc_num] = NULL; 769 770 /* Usecount for chan_busy[], paired with push_desc_queue() */ 771 pm_runtime_put(cdd->ddev.dev); 772 773 return 0; 774 } 775 776 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 777 { 778 struct cppi41_channel *cchan, *chans; 779 int i; 780 u32 n_chans = cdd->n_chans; 781 782 /* 783 * The channels can only be used as TX or as RX. So we add twice 784 * that much dma channels because USB can only do RX or TX. 785 */ 786 n_chans *= 2; 787 788 chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL); 789 if (!chans) 790 return -ENOMEM; 791 792 for (i = 0; i < n_chans; i++) { 793 cchan = &chans[i]; 794 795 cchan->cdd = cdd; 796 if (i & 1) { 797 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 798 cchan->is_tx = 1; 799 } else { 800 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 801 cchan->is_tx = 0; 802 } 803 cchan->port_num = i >> 1; 804 cchan->desc = &cdd->cd[i]; 805 cchan->desc_phys = cdd->descs_phys; 806 cchan->desc_phys += i * sizeof(struct cppi41_desc); 807 cchan->chan.device = &cdd->ddev; 808 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 809 } 810 cdd->first_td_desc = n_chans; 811 812 return 0; 813 } 814 815 static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 816 { 817 unsigned int mem_decs; 818 int i; 819 820 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 821 822 for (i = 0; i < DESCS_AREAS; i++) { 823 824 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 825 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 826 827 dma_free_coherent(dev, mem_decs, cdd->cd, 828 cdd->descs_phys); 829 } 830 } 831 832 static void disable_sched(struct cppi41_dd *cdd) 833 { 834 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 835 } 836 837 static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 838 { 839 disable_sched(cdd); 840 841 purge_descs(dev, cdd); 842 843 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 844 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 845 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 846 cdd->scratch_phys); 847 } 848 849 static int init_descs(struct device *dev, struct cppi41_dd *cdd) 850 { 851 unsigned int desc_size; 852 unsigned int mem_decs; 853 int i; 854 u32 reg; 855 u32 idx; 856 857 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 858 (sizeof(struct cppi41_desc) - 1)); 859 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 860 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 861 862 desc_size = sizeof(struct cppi41_desc); 863 mem_decs = ALLOC_DECS_NUM * desc_size; 864 865 idx = 0; 866 for (i = 0; i < DESCS_AREAS; i++) { 867 868 reg = idx << QMGR_MEMCTRL_IDX_SH; 869 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 870 reg |= ilog2(ALLOC_DECS_NUM) - 5; 871 872 BUILD_BUG_ON(DESCS_AREAS != 1); 873 cdd->cd = dma_alloc_coherent(dev, mem_decs, 874 &cdd->descs_phys, GFP_KERNEL); 875 if (!cdd->cd) 876 return -ENOMEM; 877 878 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 879 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 880 881 idx += ALLOC_DECS_NUM; 882 } 883 return 0; 884 } 885 886 static void init_sched(struct cppi41_dd *cdd) 887 { 888 unsigned ch; 889 unsigned word; 890 u32 reg; 891 892 word = 0; 893 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 894 for (ch = 0; ch < cdd->n_chans; ch += 2) { 895 896 reg = SCHED_ENTRY0_CHAN(ch); 897 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 898 899 reg |= SCHED_ENTRY2_CHAN(ch + 1); 900 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 901 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 902 word++; 903 } 904 reg = cdd->n_chans * 2 - 1; 905 reg |= DMA_SCHED_CTRL_EN; 906 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 907 } 908 909 static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 910 { 911 int ret; 912 913 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 914 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 915 &cdd->scratch_phys, GFP_KERNEL); 916 if (!cdd->qmgr_scratch) 917 return -ENOMEM; 918 919 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 920 cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE); 921 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 922 923 ret = init_descs(dev, cdd); 924 if (ret) 925 goto err_td; 926 927 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 928 init_sched(cdd); 929 930 return 0; 931 err_td: 932 deinit_cppi41(dev, cdd); 933 return ret; 934 } 935 936 static struct platform_driver cpp41_dma_driver; 937 /* 938 * The param format is: 939 * X Y 940 * X: Port 941 * Y: 0 = RX else TX 942 */ 943 #define INFO_PORT 0 944 #define INFO_IS_TX 1 945 946 static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 947 { 948 struct cppi41_channel *cchan; 949 struct cppi41_dd *cdd; 950 const struct chan_queues *queues; 951 u32 *num = param; 952 953 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 954 return false; 955 956 cchan = to_cpp41_chan(chan); 957 958 if (cchan->port_num != num[INFO_PORT]) 959 return false; 960 961 if (cchan->is_tx && !num[INFO_IS_TX]) 962 return false; 963 cdd = cchan->cdd; 964 if (cchan->is_tx) 965 queues = cdd->queues_tx; 966 else 967 queues = cdd->queues_rx; 968 969 BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) != 970 ARRAY_SIZE(am335x_usb_queues_tx)); 971 if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx))) 972 return false; 973 974 cchan->q_num = queues[cchan->port_num].submit; 975 cchan->q_comp_num = queues[cchan->port_num].complete; 976 return true; 977 } 978 979 static struct of_dma_filter_info cpp41_dma_info = { 980 .filter_fn = cpp41_dma_filter_fn, 981 }; 982 983 static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 984 struct of_dma *ofdma) 985 { 986 int count = dma_spec->args_count; 987 struct of_dma_filter_info *info = ofdma->of_dma_data; 988 989 if (!info || !info->filter_fn) 990 return NULL; 991 992 if (count != 2) 993 return NULL; 994 995 return dma_request_channel(info->dma_cap, info->filter_fn, 996 &dma_spec->args[0]); 997 } 998 999 static const struct cppi_glue_infos am335x_usb_infos = { 1000 .queues_rx = am335x_usb_queues_rx, 1001 .queues_tx = am335x_usb_queues_tx, 1002 .td_queue = { .submit = 31, .complete = 0 }, 1003 .first_completion_queue = 93, 1004 .qmgr_num_pend = 5, 1005 }; 1006 1007 static const struct cppi_glue_infos da8xx_usb_infos = { 1008 .queues_rx = da8xx_usb_queues_rx, 1009 .queues_tx = da8xx_usb_queues_tx, 1010 .td_queue = { .submit = 31, .complete = 0 }, 1011 .first_completion_queue = 24, 1012 .qmgr_num_pend = 2, 1013 }; 1014 1015 static const struct of_device_id cppi41_dma_ids[] = { 1016 { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos}, 1017 { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos}, 1018 {}, 1019 }; 1020 MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 1021 1022 static const struct cppi_glue_infos *get_glue_info(struct device *dev) 1023 { 1024 const struct of_device_id *of_id; 1025 1026 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 1027 if (!of_id) 1028 return NULL; 1029 return of_id->data; 1030 } 1031 1032 #define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ 1033 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ 1034 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ 1035 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) 1036 1037 static int cppi41_dma_probe(struct platform_device *pdev) 1038 { 1039 struct cppi41_dd *cdd; 1040 struct device *dev = &pdev->dev; 1041 const struct cppi_glue_infos *glue_info; 1042 int index; 1043 int irq; 1044 int ret; 1045 1046 glue_info = get_glue_info(dev); 1047 if (!glue_info) 1048 return -EINVAL; 1049 1050 cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); 1051 if (!cdd) 1052 return -ENOMEM; 1053 1054 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 1055 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 1056 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 1057 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 1058 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 1059 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 1060 cdd->ddev.device_terminate_all = cppi41_stop_chan; 1061 cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); 1062 cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; 1063 cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; 1064 cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; 1065 cdd->ddev.dev = dev; 1066 INIT_LIST_HEAD(&cdd->ddev.channels); 1067 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 1068 1069 index = of_property_match_string(dev->of_node, 1070 "reg-names", "controller"); 1071 if (index < 0) 1072 return index; 1073 1074 cdd->ctrl_mem = devm_platform_ioremap_resource(pdev, index); 1075 if (IS_ERR(cdd->ctrl_mem)) 1076 return PTR_ERR(cdd->ctrl_mem); 1077 1078 cdd->sched_mem = devm_platform_ioremap_resource(pdev, index + 1); 1079 if (IS_ERR(cdd->sched_mem)) 1080 return PTR_ERR(cdd->sched_mem); 1081 1082 cdd->qmgr_mem = devm_platform_ioremap_resource(pdev, index + 2); 1083 if (IS_ERR(cdd->qmgr_mem)) 1084 return PTR_ERR(cdd->qmgr_mem); 1085 1086 spin_lock_init(&cdd->lock); 1087 INIT_LIST_HEAD(&cdd->pending); 1088 1089 platform_set_drvdata(pdev, cdd); 1090 1091 pm_runtime_enable(dev); 1092 pm_runtime_set_autosuspend_delay(dev, 100); 1093 pm_runtime_use_autosuspend(dev); 1094 ret = pm_runtime_get_sync(dev); 1095 if (ret < 0) 1096 goto err_get_sync; 1097 1098 cdd->queues_rx = glue_info->queues_rx; 1099 cdd->queues_tx = glue_info->queues_tx; 1100 cdd->td_queue = glue_info->td_queue; 1101 cdd->qmgr_num_pend = glue_info->qmgr_num_pend; 1102 cdd->first_completion_queue = glue_info->first_completion_queue; 1103 1104 /* Parse new and deprecated dma-channels properties */ 1105 ret = of_property_read_u32(dev->of_node, 1106 "dma-channels", &cdd->n_chans); 1107 if (ret) 1108 ret = of_property_read_u32(dev->of_node, 1109 "#dma-channels", &cdd->n_chans); 1110 if (ret) 1111 goto err_get_n_chans; 1112 1113 ret = init_cppi41(dev, cdd); 1114 if (ret) 1115 goto err_init_cppi; 1116 1117 ret = cppi41_add_chans(dev, cdd); 1118 if (ret) 1119 goto err_chans; 1120 1121 irq = irq_of_parse_and_map(dev->of_node, 0); 1122 if (!irq) { 1123 ret = -EINVAL; 1124 goto err_chans; 1125 } 1126 1127 ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED, 1128 dev_name(dev), cdd); 1129 if (ret) 1130 goto err_chans; 1131 cdd->irq = irq; 1132 1133 ret = dma_async_device_register(&cdd->ddev); 1134 if (ret) 1135 goto err_chans; 1136 1137 ret = of_dma_controller_register(dev->of_node, 1138 cppi41_dma_xlate, &cpp41_dma_info); 1139 if (ret) 1140 goto err_of; 1141 1142 pm_runtime_mark_last_busy(dev); 1143 pm_runtime_put_autosuspend(dev); 1144 1145 return 0; 1146 err_of: 1147 dma_async_device_unregister(&cdd->ddev); 1148 err_chans: 1149 deinit_cppi41(dev, cdd); 1150 err_init_cppi: 1151 pm_runtime_dont_use_autosuspend(dev); 1152 err_get_n_chans: 1153 err_get_sync: 1154 pm_runtime_put_sync(dev); 1155 pm_runtime_disable(dev); 1156 return ret; 1157 } 1158 1159 static void cppi41_dma_remove(struct platform_device *pdev) 1160 { 1161 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1162 int error; 1163 1164 error = pm_runtime_get_sync(&pdev->dev); 1165 if (error < 0) 1166 dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", 1167 __func__, error); 1168 of_dma_controller_free(pdev->dev.of_node); 1169 dma_async_device_unregister(&cdd->ddev); 1170 1171 devm_free_irq(&pdev->dev, cdd->irq, cdd); 1172 deinit_cppi41(&pdev->dev, cdd); 1173 pm_runtime_dont_use_autosuspend(&pdev->dev); 1174 pm_runtime_put_sync(&pdev->dev); 1175 pm_runtime_disable(&pdev->dev); 1176 } 1177 1178 static int __maybe_unused cppi41_suspend(struct device *dev) 1179 { 1180 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1181 1182 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1183 disable_sched(cdd); 1184 1185 return 0; 1186 } 1187 1188 static int __maybe_unused cppi41_resume(struct device *dev) 1189 { 1190 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1191 struct cppi41_channel *c; 1192 int i; 1193 1194 for (i = 0; i < DESCS_AREAS; i++) 1195 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1196 1197 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1198 if (!c->is_tx) 1199 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1200 1201 init_sched(cdd); 1202 1203 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1204 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1205 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1206 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1207 1208 return 0; 1209 } 1210 1211 static int __maybe_unused cppi41_runtime_suspend(struct device *dev) 1212 { 1213 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1214 unsigned long flags; 1215 1216 spin_lock_irqsave(&cdd->lock, flags); 1217 cdd->is_suspended = true; 1218 WARN_ON(!list_empty(&cdd->pending)); 1219 spin_unlock_irqrestore(&cdd->lock, flags); 1220 1221 return 0; 1222 } 1223 1224 static int __maybe_unused cppi41_runtime_resume(struct device *dev) 1225 { 1226 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1227 unsigned long flags; 1228 1229 spin_lock_irqsave(&cdd->lock, flags); 1230 cdd->is_suspended = false; 1231 cppi41_run_queue(cdd); 1232 spin_unlock_irqrestore(&cdd->lock, flags); 1233 1234 return 0; 1235 } 1236 1237 static const struct dev_pm_ops cppi41_pm_ops = { 1238 SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) 1239 SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, 1240 cppi41_runtime_resume, 1241 NULL) 1242 }; 1243 1244 static struct platform_driver cpp41_dma_driver = { 1245 .probe = cppi41_dma_probe, 1246 .remove = cppi41_dma_remove, 1247 .driver = { 1248 .name = "cppi41-dma-engine", 1249 .pm = &cppi41_pm_ops, 1250 .of_match_table = of_match_ptr(cppi41_dma_ids), 1251 }, 1252 }; 1253 1254 module_platform_driver(cpp41_dma_driver); 1255 MODULE_DESCRIPTION("Texas Instruments CPPI 4.1 DMA support"); 1256 MODULE_LICENSE("GPL"); 1257 MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>"); 1258