1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/delay.h> 3 #include <linux/dmaengine.h> 4 #include <linux/dma-mapping.h> 5 #include <linux/platform_device.h> 6 #include <linux/module.h> 7 #include <linux/of.h> 8 #include <linux/slab.h> 9 #include <linux/of_dma.h> 10 #include <linux/of_irq.h> 11 #include <linux/dmapool.h> 12 #include <linux/interrupt.h> 13 #include <linux/of_address.h> 14 #include <linux/pm_runtime.h> 15 #include "../dmaengine.h" 16 17 #define DESC_TYPE 27 18 #define DESC_TYPE_HOST 0x10 19 #define DESC_TYPE_TEARD 0x13 20 21 #define TD_DESC_IS_RX (1 << 16) 22 #define TD_DESC_DMA_NUM 10 23 24 #define DESC_LENGTH_BITS_NUM 21 25 26 #define DESC_TYPE_USB (5 << 26) 27 #define DESC_PD_COMPLETE (1 << 31) 28 29 /* DMA engine */ 30 #define DMA_TDFDQ 4 31 #define DMA_TXGCR(x) (0x800 + (x) * 0x20) 32 #define DMA_RXGCR(x) (0x808 + (x) * 0x20) 33 #define RXHPCRA0 4 34 35 #define GCR_CHAN_ENABLE (1 << 31) 36 #define GCR_TEARDOWN (1 << 30) 37 #define GCR_STARV_RETRY (1 << 24) 38 #define GCR_DESC_TYPE_HOST (1 << 14) 39 40 /* DMA scheduler */ 41 #define DMA_SCHED_CTRL 0 42 #define DMA_SCHED_CTRL_EN (1 << 31) 43 #define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 44 45 #define SCHED_ENTRY0_CHAN(x) ((x) << 0) 46 #define SCHED_ENTRY0_IS_RX (1 << 7) 47 48 #define SCHED_ENTRY1_CHAN(x) ((x) << 8) 49 #define SCHED_ENTRY1_IS_RX (1 << 15) 50 51 #define SCHED_ENTRY2_CHAN(x) ((x) << 16) 52 #define SCHED_ENTRY2_IS_RX (1 << 23) 53 54 #define SCHED_ENTRY3_CHAN(x) ((x) << 24) 55 #define SCHED_ENTRY3_IS_RX (1 << 31) 56 57 /* Queue manager */ 58 /* 4 KiB of memory for descriptors, 2 for each endpoint */ 59 #define ALLOC_DECS_NUM 128 60 #define DESCS_AREAS 1 61 #define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 62 #define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 63 64 #define QMGR_LRAM0_BASE 0x80 65 #define QMGR_LRAM_SIZE 0x84 66 #define QMGR_LRAM1_BASE 0x88 67 #define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 68 #define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 69 #define QMGR_MEMCTRL_IDX_SH 16 70 #define QMGR_MEMCTRL_DESC_SH 8 71 72 #define QMGR_PEND(x) (0x90 + (x) * 4) 73 74 #define QMGR_PENDING_SLOT_Q(x) (x / 32) 75 #define QMGR_PENDING_BIT_Q(x) (x % 32) 76 77 #define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 78 #define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 79 #define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 80 #define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 81 82 /* Packet Descriptor */ 83 #define PD2_ZERO_LENGTH (1 << 19) 84 85 struct cppi41_channel { 86 struct dma_chan chan; 87 struct dma_async_tx_descriptor txd; 88 struct cppi41_dd *cdd; 89 struct cppi41_desc *desc; 90 dma_addr_t desc_phys; 91 void __iomem *gcr_reg; 92 int is_tx; 93 u32 residue; 94 95 unsigned int q_num; 96 unsigned int q_comp_num; 97 unsigned int port_num; 98 99 unsigned td_retry; 100 unsigned td_queued:1; 101 unsigned td_seen:1; 102 unsigned td_desc_seen:1; 103 104 struct list_head node; /* Node for pending list */ 105 }; 106 107 struct cppi41_desc { 108 u32 pd0; 109 u32 pd1; 110 u32 pd2; 111 u32 pd3; 112 u32 pd4; 113 u32 pd5; 114 u32 pd6; 115 u32 pd7; 116 } __aligned(32); 117 118 struct chan_queues { 119 u16 submit; 120 u16 complete; 121 }; 122 123 struct cppi41_dd { 124 struct dma_device ddev; 125 126 void *qmgr_scratch; 127 dma_addr_t scratch_phys; 128 129 struct cppi41_desc *cd; 130 dma_addr_t descs_phys; 131 u32 first_td_desc; 132 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 133 134 void __iomem *ctrl_mem; 135 void __iomem *sched_mem; 136 void __iomem *qmgr_mem; 137 unsigned int irq; 138 const struct chan_queues *queues_rx; 139 const struct chan_queues *queues_tx; 140 struct chan_queues td_queue; 141 u16 first_completion_queue; 142 u16 qmgr_num_pend; 143 u32 n_chans; 144 u8 platform; 145 146 struct list_head pending; /* Pending queued transfers */ 147 spinlock_t lock; /* Lock for pending list */ 148 149 /* context for suspend/resume */ 150 unsigned int dma_tdfdq; 151 152 bool is_suspended; 153 }; 154 155 static struct chan_queues am335x_usb_queues_tx[] = { 156 /* USB0 ENDP 1 */ 157 [ 0] = { .submit = 32, .complete = 93}, 158 [ 1] = { .submit = 34, .complete = 94}, 159 [ 2] = { .submit = 36, .complete = 95}, 160 [ 3] = { .submit = 38, .complete = 96}, 161 [ 4] = { .submit = 40, .complete = 97}, 162 [ 5] = { .submit = 42, .complete = 98}, 163 [ 6] = { .submit = 44, .complete = 99}, 164 [ 7] = { .submit = 46, .complete = 100}, 165 [ 8] = { .submit = 48, .complete = 101}, 166 [ 9] = { .submit = 50, .complete = 102}, 167 [10] = { .submit = 52, .complete = 103}, 168 [11] = { .submit = 54, .complete = 104}, 169 [12] = { .submit = 56, .complete = 105}, 170 [13] = { .submit = 58, .complete = 106}, 171 [14] = { .submit = 60, .complete = 107}, 172 173 /* USB1 ENDP1 */ 174 [15] = { .submit = 62, .complete = 125}, 175 [16] = { .submit = 64, .complete = 126}, 176 [17] = { .submit = 66, .complete = 127}, 177 [18] = { .submit = 68, .complete = 128}, 178 [19] = { .submit = 70, .complete = 129}, 179 [20] = { .submit = 72, .complete = 130}, 180 [21] = { .submit = 74, .complete = 131}, 181 [22] = { .submit = 76, .complete = 132}, 182 [23] = { .submit = 78, .complete = 133}, 183 [24] = { .submit = 80, .complete = 134}, 184 [25] = { .submit = 82, .complete = 135}, 185 [26] = { .submit = 84, .complete = 136}, 186 [27] = { .submit = 86, .complete = 137}, 187 [28] = { .submit = 88, .complete = 138}, 188 [29] = { .submit = 90, .complete = 139}, 189 }; 190 191 static const struct chan_queues am335x_usb_queues_rx[] = { 192 /* USB0 ENDP 1 */ 193 [ 0] = { .submit = 1, .complete = 109}, 194 [ 1] = { .submit = 2, .complete = 110}, 195 [ 2] = { .submit = 3, .complete = 111}, 196 [ 3] = { .submit = 4, .complete = 112}, 197 [ 4] = { .submit = 5, .complete = 113}, 198 [ 5] = { .submit = 6, .complete = 114}, 199 [ 6] = { .submit = 7, .complete = 115}, 200 [ 7] = { .submit = 8, .complete = 116}, 201 [ 8] = { .submit = 9, .complete = 117}, 202 [ 9] = { .submit = 10, .complete = 118}, 203 [10] = { .submit = 11, .complete = 119}, 204 [11] = { .submit = 12, .complete = 120}, 205 [12] = { .submit = 13, .complete = 121}, 206 [13] = { .submit = 14, .complete = 122}, 207 [14] = { .submit = 15, .complete = 123}, 208 209 /* USB1 ENDP 1 */ 210 [15] = { .submit = 16, .complete = 141}, 211 [16] = { .submit = 17, .complete = 142}, 212 [17] = { .submit = 18, .complete = 143}, 213 [18] = { .submit = 19, .complete = 144}, 214 [19] = { .submit = 20, .complete = 145}, 215 [20] = { .submit = 21, .complete = 146}, 216 [21] = { .submit = 22, .complete = 147}, 217 [22] = { .submit = 23, .complete = 148}, 218 [23] = { .submit = 24, .complete = 149}, 219 [24] = { .submit = 25, .complete = 150}, 220 [25] = { .submit = 26, .complete = 151}, 221 [26] = { .submit = 27, .complete = 152}, 222 [27] = { .submit = 28, .complete = 153}, 223 [28] = { .submit = 29, .complete = 154}, 224 [29] = { .submit = 30, .complete = 155}, 225 }; 226 227 static const struct chan_queues da8xx_usb_queues_tx[] = { 228 [0] = { .submit = 16, .complete = 24}, 229 [1] = { .submit = 18, .complete = 24}, 230 [2] = { .submit = 20, .complete = 24}, 231 [3] = { .submit = 22, .complete = 24}, 232 }; 233 234 static const struct chan_queues da8xx_usb_queues_rx[] = { 235 [0] = { .submit = 1, .complete = 26}, 236 [1] = { .submit = 3, .complete = 26}, 237 [2] = { .submit = 5, .complete = 26}, 238 [3] = { .submit = 7, .complete = 26}, 239 }; 240 241 struct cppi_glue_infos { 242 const struct chan_queues *queues_rx; 243 const struct chan_queues *queues_tx; 244 struct chan_queues td_queue; 245 u16 first_completion_queue; 246 u16 qmgr_num_pend; 247 }; 248 249 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 250 { 251 return container_of(c, struct cppi41_channel, chan); 252 } 253 254 static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 255 { 256 struct cppi41_channel *c; 257 u32 descs_size; 258 u32 desc_num; 259 260 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 261 262 if (!((desc >= cdd->descs_phys) && 263 (desc < (cdd->descs_phys + descs_size)))) { 264 return NULL; 265 } 266 267 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 268 BUG_ON(desc_num >= ALLOC_DECS_NUM); 269 c = cdd->chan_busy[desc_num]; 270 cdd->chan_busy[desc_num] = NULL; 271 272 /* Usecount for chan_busy[], paired with push_desc_queue() */ 273 pm_runtime_put(cdd->ddev.dev); 274 275 return c; 276 } 277 278 static void cppi_writel(u32 val, void *__iomem *mem) 279 { 280 __raw_writel(val, mem); 281 } 282 283 static u32 cppi_readl(void *__iomem *mem) 284 { 285 return __raw_readl(mem); 286 } 287 288 static u32 pd_trans_len(u32 val) 289 { 290 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 291 } 292 293 static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 294 { 295 u32 desc; 296 297 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 298 desc &= ~0x1f; 299 return desc; 300 } 301 302 static irqreturn_t cppi41_irq(int irq, void *data) 303 { 304 struct cppi41_dd *cdd = data; 305 u16 first_completion_queue = cdd->first_completion_queue; 306 u16 qmgr_num_pend = cdd->qmgr_num_pend; 307 struct cppi41_channel *c; 308 int i; 309 310 for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend; 311 i++) { 312 u32 val; 313 u32 q_num; 314 315 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 316 if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) { 317 u32 mask; 318 /* set corresponding bit for completetion Q 93 */ 319 mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue); 320 /* not set all bits for queues less than Q 93 */ 321 mask--; 322 /* now invert and keep only Q 93+ set */ 323 val &= ~mask; 324 } 325 326 if (val) 327 __iormb(); 328 329 while (val) { 330 u32 desc, len; 331 332 /* 333 * This should never trigger, see the comments in 334 * push_desc_queue() 335 */ 336 WARN_ON(cdd->is_suspended); 337 338 q_num = __fls(val); 339 val &= ~(1 << q_num); 340 q_num += 32 * i; 341 desc = cppi41_pop_desc(cdd, q_num); 342 c = desc_to_chan(cdd, desc); 343 if (WARN_ON(!c)) { 344 pr_err("%s() q %d desc %08x\n", __func__, 345 q_num, desc); 346 continue; 347 } 348 349 if (c->desc->pd2 & PD2_ZERO_LENGTH) 350 len = 0; 351 else 352 len = pd_trans_len(c->desc->pd0); 353 354 c->residue = pd_trans_len(c->desc->pd6) - len; 355 dma_cookie_complete(&c->txd); 356 dmaengine_desc_get_callback_invoke(&c->txd, NULL); 357 } 358 } 359 return IRQ_HANDLED; 360 } 361 362 static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 363 { 364 dma_cookie_t cookie; 365 366 cookie = dma_cookie_assign(tx); 367 368 return cookie; 369 } 370 371 static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 372 { 373 struct cppi41_channel *c = to_cpp41_chan(chan); 374 struct cppi41_dd *cdd = c->cdd; 375 int error; 376 377 error = pm_runtime_get_sync(cdd->ddev.dev); 378 if (error < 0) { 379 dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", 380 __func__, error); 381 pm_runtime_put_noidle(cdd->ddev.dev); 382 383 return error; 384 } 385 386 dma_cookie_init(chan); 387 dma_async_tx_descriptor_init(&c->txd, chan); 388 c->txd.tx_submit = cppi41_tx_submit; 389 390 if (!c->is_tx) 391 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 392 393 pm_runtime_mark_last_busy(cdd->ddev.dev); 394 pm_runtime_put_autosuspend(cdd->ddev.dev); 395 396 return 0; 397 } 398 399 static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 400 { 401 struct cppi41_channel *c = to_cpp41_chan(chan); 402 struct cppi41_dd *cdd = c->cdd; 403 int error; 404 405 error = pm_runtime_get_sync(cdd->ddev.dev); 406 if (error < 0) { 407 pm_runtime_put_noidle(cdd->ddev.dev); 408 409 return; 410 } 411 412 WARN_ON(!list_empty(&cdd->pending)); 413 414 pm_runtime_mark_last_busy(cdd->ddev.dev); 415 pm_runtime_put_autosuspend(cdd->ddev.dev); 416 } 417 418 static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 419 dma_cookie_t cookie, struct dma_tx_state *txstate) 420 { 421 struct cppi41_channel *c = to_cpp41_chan(chan); 422 enum dma_status ret; 423 424 ret = dma_cookie_status(chan, cookie, txstate); 425 426 dma_set_residue(txstate, c->residue); 427 428 return ret; 429 } 430 431 static void push_desc_queue(struct cppi41_channel *c) 432 { 433 struct cppi41_dd *cdd = c->cdd; 434 u32 desc_num; 435 u32 desc_phys; 436 u32 reg; 437 438 c->residue = 0; 439 440 reg = GCR_CHAN_ENABLE; 441 if (!c->is_tx) { 442 reg |= GCR_STARV_RETRY; 443 reg |= GCR_DESC_TYPE_HOST; 444 reg |= c->q_comp_num; 445 } 446 447 cppi_writel(reg, c->gcr_reg); 448 449 /* 450 * We don't use writel() but __raw_writel() so we have to make sure 451 * that the DMA descriptor in coherent memory made to the main memory 452 * before starting the dma engine. 453 */ 454 __iowmb(); 455 456 /* 457 * DMA transfers can take at least 200ms to complete with USB mass 458 * storage connected. To prevent autosuspend timeouts, we must use 459 * pm_runtime_get/put() when chan_busy[] is modified. This will get 460 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the 461 * outcome of the transfer. 462 */ 463 pm_runtime_get(cdd->ddev.dev); 464 465 desc_phys = lower_32_bits(c->desc_phys); 466 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 467 WARN_ON(cdd->chan_busy[desc_num]); 468 cdd->chan_busy[desc_num] = c; 469 470 reg = (sizeof(struct cppi41_desc) - 24) / 4; 471 reg |= desc_phys; 472 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 473 } 474 475 /* 476 * Caller must hold cdd->lock to prevent push_desc_queue() 477 * getting called out of order. We have both cppi41_dma_issue_pending() 478 * and cppi41_runtime_resume() call this function. 479 */ 480 static void cppi41_run_queue(struct cppi41_dd *cdd) 481 { 482 struct cppi41_channel *c, *_c; 483 484 list_for_each_entry_safe(c, _c, &cdd->pending, node) { 485 push_desc_queue(c); 486 list_del(&c->node); 487 } 488 } 489 490 static void cppi41_dma_issue_pending(struct dma_chan *chan) 491 { 492 struct cppi41_channel *c = to_cpp41_chan(chan); 493 struct cppi41_dd *cdd = c->cdd; 494 unsigned long flags; 495 int error; 496 497 error = pm_runtime_get(cdd->ddev.dev); 498 if ((error != -EINPROGRESS) && error < 0) { 499 pm_runtime_put_noidle(cdd->ddev.dev); 500 dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", 501 error); 502 503 return; 504 } 505 506 spin_lock_irqsave(&cdd->lock, flags); 507 list_add_tail(&c->node, &cdd->pending); 508 if (!cdd->is_suspended) 509 cppi41_run_queue(cdd); 510 spin_unlock_irqrestore(&cdd->lock, flags); 511 512 pm_runtime_mark_last_busy(cdd->ddev.dev); 513 pm_runtime_put_autosuspend(cdd->ddev.dev); 514 } 515 516 static u32 get_host_pd0(u32 length) 517 { 518 u32 reg; 519 520 reg = DESC_TYPE_HOST << DESC_TYPE; 521 reg |= length; 522 523 return reg; 524 } 525 526 static u32 get_host_pd1(struct cppi41_channel *c) 527 { 528 u32 reg; 529 530 reg = 0; 531 532 return reg; 533 } 534 535 static u32 get_host_pd2(struct cppi41_channel *c) 536 { 537 u32 reg; 538 539 reg = DESC_TYPE_USB; 540 reg |= c->q_comp_num; 541 542 return reg; 543 } 544 545 static u32 get_host_pd3(u32 length) 546 { 547 u32 reg; 548 549 /* PD3 = packet size */ 550 reg = length; 551 552 return reg; 553 } 554 555 static u32 get_host_pd6(u32 length) 556 { 557 u32 reg; 558 559 /* PD6 buffer size */ 560 reg = DESC_PD_COMPLETE; 561 reg |= length; 562 563 return reg; 564 } 565 566 static u32 get_host_pd4_or_7(u32 addr) 567 { 568 u32 reg; 569 570 reg = addr; 571 572 return reg; 573 } 574 575 static u32 get_host_pd5(void) 576 { 577 u32 reg; 578 579 reg = 0; 580 581 return reg; 582 } 583 584 static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 585 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 586 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 587 { 588 struct cppi41_channel *c = to_cpp41_chan(chan); 589 struct cppi41_desc *d; 590 struct scatterlist *sg; 591 unsigned int i; 592 593 d = c->desc; 594 for_each_sg(sgl, sg, sg_len, i) { 595 u32 addr; 596 u32 len; 597 598 /* We need to use more than one desc once musb supports sg */ 599 addr = lower_32_bits(sg_dma_address(sg)); 600 len = sg_dma_len(sg); 601 602 d->pd0 = get_host_pd0(len); 603 d->pd1 = get_host_pd1(c); 604 d->pd2 = get_host_pd2(c); 605 d->pd3 = get_host_pd3(len); 606 d->pd4 = get_host_pd4_or_7(addr); 607 d->pd5 = get_host_pd5(); 608 d->pd6 = get_host_pd6(len); 609 d->pd7 = get_host_pd4_or_7(addr); 610 611 d++; 612 } 613 614 return &c->txd; 615 } 616 617 static void cppi41_compute_td_desc(struct cppi41_desc *d) 618 { 619 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 620 } 621 622 static int cppi41_tear_down_chan(struct cppi41_channel *c) 623 { 624 struct dmaengine_result abort_result; 625 struct cppi41_dd *cdd = c->cdd; 626 struct cppi41_desc *td; 627 u32 reg; 628 u32 desc_phys; 629 u32 td_desc_phys; 630 631 td = cdd->cd; 632 td += cdd->first_td_desc; 633 634 td_desc_phys = cdd->descs_phys; 635 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 636 637 if (!c->td_queued) { 638 cppi41_compute_td_desc(td); 639 __iowmb(); 640 641 reg = (sizeof(struct cppi41_desc) - 24) / 4; 642 reg |= td_desc_phys; 643 cppi_writel(reg, cdd->qmgr_mem + 644 QMGR_QUEUE_D(cdd->td_queue.submit)); 645 646 reg = GCR_CHAN_ENABLE; 647 if (!c->is_tx) { 648 reg |= GCR_STARV_RETRY; 649 reg |= GCR_DESC_TYPE_HOST; 650 reg |= cdd->td_queue.complete; 651 } 652 reg |= GCR_TEARDOWN; 653 cppi_writel(reg, c->gcr_reg); 654 c->td_queued = 1; 655 c->td_retry = 500; 656 } 657 658 if (!c->td_seen || !c->td_desc_seen) { 659 660 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 661 if (!desc_phys && c->is_tx) 662 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 663 664 if (desc_phys == c->desc_phys) { 665 c->td_desc_seen = 1; 666 667 } else if (desc_phys == td_desc_phys) { 668 u32 pd0; 669 670 __iormb(); 671 pd0 = td->pd0; 672 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 673 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 674 WARN_ON((pd0 & 0x1f) != c->port_num); 675 c->td_seen = 1; 676 } else if (desc_phys) { 677 WARN_ON_ONCE(1); 678 } 679 } 680 c->td_retry--; 681 /* 682 * If the TX descriptor / channel is in use, the caller needs to poke 683 * his TD bit multiple times. After that he hardware releases the 684 * transfer descriptor followed by TD descriptor. Waiting seems not to 685 * cause any difference. 686 * RX seems to be thrown out right away. However once the TearDown 687 * descriptor gets through we are done. If we have seens the transfer 688 * descriptor before the TD we fetch it from enqueue, it has to be 689 * there waiting for us. 690 */ 691 if (!c->td_seen && c->td_retry) { 692 udelay(1); 693 return -EAGAIN; 694 } 695 WARN_ON(!c->td_retry); 696 697 if (!c->td_desc_seen) { 698 desc_phys = cppi41_pop_desc(cdd, c->q_num); 699 if (!desc_phys) 700 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 701 WARN_ON(!desc_phys); 702 } 703 704 c->td_queued = 0; 705 c->td_seen = 0; 706 c->td_desc_seen = 0; 707 cppi_writel(0, c->gcr_reg); 708 709 /* Invoke the callback to do the necessary clean-up */ 710 abort_result.result = DMA_TRANS_ABORTED; 711 dma_cookie_complete(&c->txd); 712 dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); 713 714 return 0; 715 } 716 717 static int cppi41_stop_chan(struct dma_chan *chan) 718 { 719 struct cppi41_channel *c = to_cpp41_chan(chan); 720 struct cppi41_dd *cdd = c->cdd; 721 u32 desc_num; 722 u32 desc_phys; 723 int ret; 724 725 desc_phys = lower_32_bits(c->desc_phys); 726 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 727 if (!cdd->chan_busy[desc_num]) { 728 struct cppi41_channel *cc, *_ct; 729 730 /* 731 * channels might still be in the pendling list if 732 * cppi41_dma_issue_pending() is called after 733 * cppi41_runtime_suspend() is called 734 */ 735 list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { 736 if (cc != c) 737 continue; 738 list_del(&cc->node); 739 break; 740 } 741 return 0; 742 } 743 744 ret = cppi41_tear_down_chan(c); 745 if (ret) 746 return ret; 747 748 WARN_ON(!cdd->chan_busy[desc_num]); 749 cdd->chan_busy[desc_num] = NULL; 750 751 /* Usecount for chan_busy[], paired with push_desc_queue() */ 752 pm_runtime_put(cdd->ddev.dev); 753 754 return 0; 755 } 756 757 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 758 { 759 struct cppi41_channel *cchan, *chans; 760 int i; 761 u32 n_chans = cdd->n_chans; 762 763 /* 764 * The channels can only be used as TX or as RX. So we add twice 765 * that much dma channels because USB can only do RX or TX. 766 */ 767 n_chans *= 2; 768 769 chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL); 770 if (!chans) 771 return -ENOMEM; 772 773 for (i = 0; i < n_chans; i++) { 774 cchan = &chans[i]; 775 776 cchan->cdd = cdd; 777 if (i & 1) { 778 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 779 cchan->is_tx = 1; 780 } else { 781 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 782 cchan->is_tx = 0; 783 } 784 cchan->port_num = i >> 1; 785 cchan->desc = &cdd->cd[i]; 786 cchan->desc_phys = cdd->descs_phys; 787 cchan->desc_phys += i * sizeof(struct cppi41_desc); 788 cchan->chan.device = &cdd->ddev; 789 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 790 } 791 cdd->first_td_desc = n_chans; 792 793 return 0; 794 } 795 796 static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 797 { 798 unsigned int mem_decs; 799 int i; 800 801 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 802 803 for (i = 0; i < DESCS_AREAS; i++) { 804 805 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 806 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 807 808 dma_free_coherent(dev, mem_decs, cdd->cd, 809 cdd->descs_phys); 810 } 811 } 812 813 static void disable_sched(struct cppi41_dd *cdd) 814 { 815 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 816 } 817 818 static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 819 { 820 disable_sched(cdd); 821 822 purge_descs(dev, cdd); 823 824 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 825 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 826 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 827 cdd->scratch_phys); 828 } 829 830 static int init_descs(struct device *dev, struct cppi41_dd *cdd) 831 { 832 unsigned int desc_size; 833 unsigned int mem_decs; 834 int i; 835 u32 reg; 836 u32 idx; 837 838 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 839 (sizeof(struct cppi41_desc) - 1)); 840 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 841 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 842 843 desc_size = sizeof(struct cppi41_desc); 844 mem_decs = ALLOC_DECS_NUM * desc_size; 845 846 idx = 0; 847 for (i = 0; i < DESCS_AREAS; i++) { 848 849 reg = idx << QMGR_MEMCTRL_IDX_SH; 850 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 851 reg |= ilog2(ALLOC_DECS_NUM) - 5; 852 853 BUILD_BUG_ON(DESCS_AREAS != 1); 854 cdd->cd = dma_alloc_coherent(dev, mem_decs, 855 &cdd->descs_phys, GFP_KERNEL); 856 if (!cdd->cd) 857 return -ENOMEM; 858 859 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 860 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 861 862 idx += ALLOC_DECS_NUM; 863 } 864 return 0; 865 } 866 867 static void init_sched(struct cppi41_dd *cdd) 868 { 869 unsigned ch; 870 unsigned word; 871 u32 reg; 872 873 word = 0; 874 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 875 for (ch = 0; ch < cdd->n_chans; ch += 2) { 876 877 reg = SCHED_ENTRY0_CHAN(ch); 878 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 879 880 reg |= SCHED_ENTRY2_CHAN(ch + 1); 881 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 882 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 883 word++; 884 } 885 reg = cdd->n_chans * 2 - 1; 886 reg |= DMA_SCHED_CTRL_EN; 887 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 888 } 889 890 static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 891 { 892 int ret; 893 894 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 895 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 896 &cdd->scratch_phys, GFP_KERNEL); 897 if (!cdd->qmgr_scratch) 898 return -ENOMEM; 899 900 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 901 cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE); 902 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 903 904 ret = init_descs(dev, cdd); 905 if (ret) 906 goto err_td; 907 908 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 909 init_sched(cdd); 910 911 return 0; 912 err_td: 913 deinit_cppi41(dev, cdd); 914 return ret; 915 } 916 917 static struct platform_driver cpp41_dma_driver; 918 /* 919 * The param format is: 920 * X Y 921 * X: Port 922 * Y: 0 = RX else TX 923 */ 924 #define INFO_PORT 0 925 #define INFO_IS_TX 1 926 927 static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 928 { 929 struct cppi41_channel *cchan; 930 struct cppi41_dd *cdd; 931 const struct chan_queues *queues; 932 u32 *num = param; 933 934 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 935 return false; 936 937 cchan = to_cpp41_chan(chan); 938 939 if (cchan->port_num != num[INFO_PORT]) 940 return false; 941 942 if (cchan->is_tx && !num[INFO_IS_TX]) 943 return false; 944 cdd = cchan->cdd; 945 if (cchan->is_tx) 946 queues = cdd->queues_tx; 947 else 948 queues = cdd->queues_rx; 949 950 BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) != 951 ARRAY_SIZE(am335x_usb_queues_tx)); 952 if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx))) 953 return false; 954 955 cchan->q_num = queues[cchan->port_num].submit; 956 cchan->q_comp_num = queues[cchan->port_num].complete; 957 return true; 958 } 959 960 static struct of_dma_filter_info cpp41_dma_info = { 961 .filter_fn = cpp41_dma_filter_fn, 962 }; 963 964 static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 965 struct of_dma *ofdma) 966 { 967 int count = dma_spec->args_count; 968 struct of_dma_filter_info *info = ofdma->of_dma_data; 969 970 if (!info || !info->filter_fn) 971 return NULL; 972 973 if (count != 2) 974 return NULL; 975 976 return dma_request_channel(info->dma_cap, info->filter_fn, 977 &dma_spec->args[0]); 978 } 979 980 static const struct cppi_glue_infos am335x_usb_infos = { 981 .queues_rx = am335x_usb_queues_rx, 982 .queues_tx = am335x_usb_queues_tx, 983 .td_queue = { .submit = 31, .complete = 0 }, 984 .first_completion_queue = 93, 985 .qmgr_num_pend = 5, 986 }; 987 988 static const struct cppi_glue_infos da8xx_usb_infos = { 989 .queues_rx = da8xx_usb_queues_rx, 990 .queues_tx = da8xx_usb_queues_tx, 991 .td_queue = { .submit = 31, .complete = 0 }, 992 .first_completion_queue = 24, 993 .qmgr_num_pend = 2, 994 }; 995 996 static const struct of_device_id cppi41_dma_ids[] = { 997 { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos}, 998 { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos}, 999 {}, 1000 }; 1001 MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 1002 1003 static const struct cppi_glue_infos *get_glue_info(struct device *dev) 1004 { 1005 const struct of_device_id *of_id; 1006 1007 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 1008 if (!of_id) 1009 return NULL; 1010 return of_id->data; 1011 } 1012 1013 #define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ 1014 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ 1015 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ 1016 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) 1017 1018 static int cppi41_dma_probe(struct platform_device *pdev) 1019 { 1020 struct cppi41_dd *cdd; 1021 struct device *dev = &pdev->dev; 1022 const struct cppi_glue_infos *glue_info; 1023 struct resource *mem; 1024 int index; 1025 int irq; 1026 int ret; 1027 1028 glue_info = get_glue_info(dev); 1029 if (!glue_info) 1030 return -EINVAL; 1031 1032 cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); 1033 if (!cdd) 1034 return -ENOMEM; 1035 1036 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 1037 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 1038 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 1039 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 1040 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 1041 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 1042 cdd->ddev.device_terminate_all = cppi41_stop_chan; 1043 cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); 1044 cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; 1045 cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; 1046 cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; 1047 cdd->ddev.dev = dev; 1048 INIT_LIST_HEAD(&cdd->ddev.channels); 1049 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 1050 1051 index = of_property_match_string(dev->of_node, 1052 "reg-names", "controller"); 1053 if (index < 0) 1054 return index; 1055 1056 mem = platform_get_resource(pdev, IORESOURCE_MEM, index); 1057 cdd->ctrl_mem = devm_ioremap_resource(dev, mem); 1058 if (IS_ERR(cdd->ctrl_mem)) 1059 return PTR_ERR(cdd->ctrl_mem); 1060 1061 mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1); 1062 cdd->sched_mem = devm_ioremap_resource(dev, mem); 1063 if (IS_ERR(cdd->sched_mem)) 1064 return PTR_ERR(cdd->sched_mem); 1065 1066 mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2); 1067 cdd->qmgr_mem = devm_ioremap_resource(dev, mem); 1068 if (IS_ERR(cdd->qmgr_mem)) 1069 return PTR_ERR(cdd->qmgr_mem); 1070 1071 spin_lock_init(&cdd->lock); 1072 INIT_LIST_HEAD(&cdd->pending); 1073 1074 platform_set_drvdata(pdev, cdd); 1075 1076 pm_runtime_enable(dev); 1077 pm_runtime_set_autosuspend_delay(dev, 100); 1078 pm_runtime_use_autosuspend(dev); 1079 ret = pm_runtime_get_sync(dev); 1080 if (ret < 0) 1081 goto err_get_sync; 1082 1083 cdd->queues_rx = glue_info->queues_rx; 1084 cdd->queues_tx = glue_info->queues_tx; 1085 cdd->td_queue = glue_info->td_queue; 1086 cdd->qmgr_num_pend = glue_info->qmgr_num_pend; 1087 cdd->first_completion_queue = glue_info->first_completion_queue; 1088 1089 ret = of_property_read_u32(dev->of_node, 1090 "#dma-channels", &cdd->n_chans); 1091 if (ret) 1092 goto err_get_n_chans; 1093 1094 ret = init_cppi41(dev, cdd); 1095 if (ret) 1096 goto err_init_cppi; 1097 1098 ret = cppi41_add_chans(dev, cdd); 1099 if (ret) 1100 goto err_chans; 1101 1102 irq = irq_of_parse_and_map(dev->of_node, 0); 1103 if (!irq) { 1104 ret = -EINVAL; 1105 goto err_chans; 1106 } 1107 1108 ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED, 1109 dev_name(dev), cdd); 1110 if (ret) 1111 goto err_chans; 1112 cdd->irq = irq; 1113 1114 ret = dma_async_device_register(&cdd->ddev); 1115 if (ret) 1116 goto err_chans; 1117 1118 ret = of_dma_controller_register(dev->of_node, 1119 cppi41_dma_xlate, &cpp41_dma_info); 1120 if (ret) 1121 goto err_of; 1122 1123 pm_runtime_mark_last_busy(dev); 1124 pm_runtime_put_autosuspend(dev); 1125 1126 return 0; 1127 err_of: 1128 dma_async_device_unregister(&cdd->ddev); 1129 err_chans: 1130 deinit_cppi41(dev, cdd); 1131 err_init_cppi: 1132 pm_runtime_dont_use_autosuspend(dev); 1133 err_get_n_chans: 1134 err_get_sync: 1135 pm_runtime_put_sync(dev); 1136 pm_runtime_disable(dev); 1137 return ret; 1138 } 1139 1140 static int cppi41_dma_remove(struct platform_device *pdev) 1141 { 1142 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1143 int error; 1144 1145 error = pm_runtime_get_sync(&pdev->dev); 1146 if (error < 0) 1147 dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", 1148 __func__, error); 1149 of_dma_controller_free(pdev->dev.of_node); 1150 dma_async_device_unregister(&cdd->ddev); 1151 1152 devm_free_irq(&pdev->dev, cdd->irq, cdd); 1153 deinit_cppi41(&pdev->dev, cdd); 1154 pm_runtime_dont_use_autosuspend(&pdev->dev); 1155 pm_runtime_put_sync(&pdev->dev); 1156 pm_runtime_disable(&pdev->dev); 1157 return 0; 1158 } 1159 1160 static int __maybe_unused cppi41_suspend(struct device *dev) 1161 { 1162 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1163 1164 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1165 disable_sched(cdd); 1166 1167 return 0; 1168 } 1169 1170 static int __maybe_unused cppi41_resume(struct device *dev) 1171 { 1172 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1173 struct cppi41_channel *c; 1174 int i; 1175 1176 for (i = 0; i < DESCS_AREAS; i++) 1177 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1178 1179 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1180 if (!c->is_tx) 1181 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1182 1183 init_sched(cdd); 1184 1185 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1186 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1187 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1188 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1189 1190 return 0; 1191 } 1192 1193 static int __maybe_unused cppi41_runtime_suspend(struct device *dev) 1194 { 1195 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1196 unsigned long flags; 1197 1198 spin_lock_irqsave(&cdd->lock, flags); 1199 cdd->is_suspended = true; 1200 WARN_ON(!list_empty(&cdd->pending)); 1201 spin_unlock_irqrestore(&cdd->lock, flags); 1202 1203 return 0; 1204 } 1205 1206 static int __maybe_unused cppi41_runtime_resume(struct device *dev) 1207 { 1208 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1209 unsigned long flags; 1210 1211 spin_lock_irqsave(&cdd->lock, flags); 1212 cdd->is_suspended = false; 1213 cppi41_run_queue(cdd); 1214 spin_unlock_irqrestore(&cdd->lock, flags); 1215 1216 return 0; 1217 } 1218 1219 static const struct dev_pm_ops cppi41_pm_ops = { 1220 SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) 1221 SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, 1222 cppi41_runtime_resume, 1223 NULL) 1224 }; 1225 1226 static struct platform_driver cpp41_dma_driver = { 1227 .probe = cppi41_dma_probe, 1228 .remove = cppi41_dma_remove, 1229 .driver = { 1230 .name = "cppi41-dma-engine", 1231 .pm = &cppi41_pm_ops, 1232 .of_match_table = of_match_ptr(cppi41_dma_ids), 1233 }, 1234 }; 1235 1236 module_platform_driver(cpp41_dma_driver); 1237 MODULE_LICENSE("GPL"); 1238 MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>"); 1239