1 // SPDX-License-Identifier: GPL-2.0-only 2 #include <linux/delay.h> 3 #include <linux/dmaengine.h> 4 #include <linux/dma-mapping.h> 5 #include <linux/platform_device.h> 6 #include <linux/module.h> 7 #include <linux/of.h> 8 #include <linux/slab.h> 9 #include <linux/of_dma.h> 10 #include <linux/of_irq.h> 11 #include <linux/dmapool.h> 12 #include <linux/interrupt.h> 13 #include <linux/of_address.h> 14 #include <linux/pm_runtime.h> 15 #include "../dmaengine.h" 16 17 #define DESC_TYPE 27 18 #define DESC_TYPE_HOST 0x10 19 #define DESC_TYPE_TEARD 0x13 20 21 #define TD_DESC_IS_RX (1 << 16) 22 #define TD_DESC_DMA_NUM 10 23 24 #define DESC_LENGTH_BITS_NUM 21 25 26 #define DESC_TYPE_USB (5 << 26) 27 #define DESC_PD_COMPLETE (1 << 31) 28 29 /* DMA engine */ 30 #define DMA_TDFDQ 4 31 #define DMA_TXGCR(x) (0x800 + (x) * 0x20) 32 #define DMA_RXGCR(x) (0x808 + (x) * 0x20) 33 #define RXHPCRA0 4 34 35 #define GCR_CHAN_ENABLE (1 << 31) 36 #define GCR_TEARDOWN (1 << 30) 37 #define GCR_STARV_RETRY (1 << 24) 38 #define GCR_DESC_TYPE_HOST (1 << 14) 39 40 /* DMA scheduler */ 41 #define DMA_SCHED_CTRL 0 42 #define DMA_SCHED_CTRL_EN (1 << 31) 43 #define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 44 45 #define SCHED_ENTRY0_CHAN(x) ((x) << 0) 46 #define SCHED_ENTRY0_IS_RX (1 << 7) 47 48 #define SCHED_ENTRY1_CHAN(x) ((x) << 8) 49 #define SCHED_ENTRY1_IS_RX (1 << 15) 50 51 #define SCHED_ENTRY2_CHAN(x) ((x) << 16) 52 #define SCHED_ENTRY2_IS_RX (1 << 23) 53 54 #define SCHED_ENTRY3_CHAN(x) ((x) << 24) 55 #define SCHED_ENTRY3_IS_RX (1 << 31) 56 57 /* Queue manager */ 58 /* 4 KiB of memory for descriptors, 2 for each endpoint */ 59 #define ALLOC_DECS_NUM 128 60 #define DESCS_AREAS 1 61 #define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 62 #define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 63 64 #define QMGR_LRAM0_BASE 0x80 65 #define QMGR_LRAM_SIZE 0x84 66 #define QMGR_LRAM1_BASE 0x88 67 #define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 68 #define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 69 #define QMGR_MEMCTRL_IDX_SH 16 70 #define QMGR_MEMCTRL_DESC_SH 8 71 72 #define QMGR_PEND(x) (0x90 + (x) * 4) 73 74 #define QMGR_PENDING_SLOT_Q(x) (x / 32) 75 #define QMGR_PENDING_BIT_Q(x) (x % 32) 76 77 #define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 78 #define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 79 #define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 80 #define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 81 82 /* Packet Descriptor */ 83 #define PD2_ZERO_LENGTH (1 << 19) 84 85 struct cppi41_channel { 86 struct dma_chan chan; 87 struct dma_async_tx_descriptor txd; 88 struct cppi41_dd *cdd; 89 struct cppi41_desc *desc; 90 dma_addr_t desc_phys; 91 void __iomem *gcr_reg; 92 int is_tx; 93 u32 residue; 94 95 unsigned int q_num; 96 unsigned int q_comp_num; 97 unsigned int port_num; 98 99 unsigned td_retry; 100 unsigned td_queued:1; 101 unsigned td_seen:1; 102 unsigned td_desc_seen:1; 103 104 struct list_head node; /* Node for pending list */ 105 }; 106 107 struct cppi41_desc { 108 u32 pd0; 109 u32 pd1; 110 u32 pd2; 111 u32 pd3; 112 u32 pd4; 113 u32 pd5; 114 u32 pd6; 115 u32 pd7; 116 } __aligned(32); 117 118 struct chan_queues { 119 u16 submit; 120 u16 complete; 121 }; 122 123 struct cppi41_dd { 124 struct dma_device ddev; 125 126 void *qmgr_scratch; 127 dma_addr_t scratch_phys; 128 129 struct cppi41_desc *cd; 130 dma_addr_t descs_phys; 131 u32 first_td_desc; 132 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 133 134 void __iomem *ctrl_mem; 135 void __iomem *sched_mem; 136 void __iomem *qmgr_mem; 137 unsigned int irq; 138 const struct chan_queues *queues_rx; 139 const struct chan_queues *queues_tx; 140 struct chan_queues td_queue; 141 u16 first_completion_queue; 142 u16 qmgr_num_pend; 143 u32 n_chans; 144 u8 platform; 145 146 struct list_head pending; /* Pending queued transfers */ 147 spinlock_t lock; /* Lock for pending list */ 148 149 /* context for suspend/resume */ 150 unsigned int dma_tdfdq; 151 152 bool is_suspended; 153 }; 154 155 static struct chan_queues am335x_usb_queues_tx[] = { 156 /* USB0 ENDP 1 */ 157 [ 0] = { .submit = 32, .complete = 93}, 158 [ 1] = { .submit = 34, .complete = 94}, 159 [ 2] = { .submit = 36, .complete = 95}, 160 [ 3] = { .submit = 38, .complete = 96}, 161 [ 4] = { .submit = 40, .complete = 97}, 162 [ 5] = { .submit = 42, .complete = 98}, 163 [ 6] = { .submit = 44, .complete = 99}, 164 [ 7] = { .submit = 46, .complete = 100}, 165 [ 8] = { .submit = 48, .complete = 101}, 166 [ 9] = { .submit = 50, .complete = 102}, 167 [10] = { .submit = 52, .complete = 103}, 168 [11] = { .submit = 54, .complete = 104}, 169 [12] = { .submit = 56, .complete = 105}, 170 [13] = { .submit = 58, .complete = 106}, 171 [14] = { .submit = 60, .complete = 107}, 172 173 /* USB1 ENDP1 */ 174 [15] = { .submit = 62, .complete = 125}, 175 [16] = { .submit = 64, .complete = 126}, 176 [17] = { .submit = 66, .complete = 127}, 177 [18] = { .submit = 68, .complete = 128}, 178 [19] = { .submit = 70, .complete = 129}, 179 [20] = { .submit = 72, .complete = 130}, 180 [21] = { .submit = 74, .complete = 131}, 181 [22] = { .submit = 76, .complete = 132}, 182 [23] = { .submit = 78, .complete = 133}, 183 [24] = { .submit = 80, .complete = 134}, 184 [25] = { .submit = 82, .complete = 135}, 185 [26] = { .submit = 84, .complete = 136}, 186 [27] = { .submit = 86, .complete = 137}, 187 [28] = { .submit = 88, .complete = 138}, 188 [29] = { .submit = 90, .complete = 139}, 189 }; 190 191 static const struct chan_queues am335x_usb_queues_rx[] = { 192 /* USB0 ENDP 1 */ 193 [ 0] = { .submit = 1, .complete = 109}, 194 [ 1] = { .submit = 2, .complete = 110}, 195 [ 2] = { .submit = 3, .complete = 111}, 196 [ 3] = { .submit = 4, .complete = 112}, 197 [ 4] = { .submit = 5, .complete = 113}, 198 [ 5] = { .submit = 6, .complete = 114}, 199 [ 6] = { .submit = 7, .complete = 115}, 200 [ 7] = { .submit = 8, .complete = 116}, 201 [ 8] = { .submit = 9, .complete = 117}, 202 [ 9] = { .submit = 10, .complete = 118}, 203 [10] = { .submit = 11, .complete = 119}, 204 [11] = { .submit = 12, .complete = 120}, 205 [12] = { .submit = 13, .complete = 121}, 206 [13] = { .submit = 14, .complete = 122}, 207 [14] = { .submit = 15, .complete = 123}, 208 209 /* USB1 ENDP 1 */ 210 [15] = { .submit = 16, .complete = 141}, 211 [16] = { .submit = 17, .complete = 142}, 212 [17] = { .submit = 18, .complete = 143}, 213 [18] = { .submit = 19, .complete = 144}, 214 [19] = { .submit = 20, .complete = 145}, 215 [20] = { .submit = 21, .complete = 146}, 216 [21] = { .submit = 22, .complete = 147}, 217 [22] = { .submit = 23, .complete = 148}, 218 [23] = { .submit = 24, .complete = 149}, 219 [24] = { .submit = 25, .complete = 150}, 220 [25] = { .submit = 26, .complete = 151}, 221 [26] = { .submit = 27, .complete = 152}, 222 [27] = { .submit = 28, .complete = 153}, 223 [28] = { .submit = 29, .complete = 154}, 224 [29] = { .submit = 30, .complete = 155}, 225 }; 226 227 static const struct chan_queues da8xx_usb_queues_tx[] = { 228 [0] = { .submit = 16, .complete = 24}, 229 [1] = { .submit = 18, .complete = 24}, 230 [2] = { .submit = 20, .complete = 24}, 231 [3] = { .submit = 22, .complete = 24}, 232 }; 233 234 static const struct chan_queues da8xx_usb_queues_rx[] = { 235 [0] = { .submit = 1, .complete = 26}, 236 [1] = { .submit = 3, .complete = 26}, 237 [2] = { .submit = 5, .complete = 26}, 238 [3] = { .submit = 7, .complete = 26}, 239 }; 240 241 struct cppi_glue_infos { 242 const struct chan_queues *queues_rx; 243 const struct chan_queues *queues_tx; 244 struct chan_queues td_queue; 245 u16 first_completion_queue; 246 u16 qmgr_num_pend; 247 }; 248 249 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 250 { 251 return container_of(c, struct cppi41_channel, chan); 252 } 253 254 static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 255 { 256 struct cppi41_channel *c; 257 u32 descs_size; 258 u32 desc_num; 259 260 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 261 262 if (!((desc >= cdd->descs_phys) && 263 (desc < (cdd->descs_phys + descs_size)))) { 264 return NULL; 265 } 266 267 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 268 BUG_ON(desc_num >= ALLOC_DECS_NUM); 269 c = cdd->chan_busy[desc_num]; 270 cdd->chan_busy[desc_num] = NULL; 271 272 /* Usecount for chan_busy[], paired with push_desc_queue() */ 273 pm_runtime_put(cdd->ddev.dev); 274 275 return c; 276 } 277 278 static void cppi_writel(u32 val, void *__iomem *mem) 279 { 280 __raw_writel(val, mem); 281 } 282 283 static u32 cppi_readl(void *__iomem *mem) 284 { 285 return __raw_readl(mem); 286 } 287 288 static u32 pd_trans_len(u32 val) 289 { 290 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 291 } 292 293 static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 294 { 295 u32 desc; 296 297 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 298 desc &= ~0x1f; 299 return desc; 300 } 301 302 static irqreturn_t cppi41_irq(int irq, void *data) 303 { 304 struct cppi41_dd *cdd = data; 305 u16 first_completion_queue = cdd->first_completion_queue; 306 u16 qmgr_num_pend = cdd->qmgr_num_pend; 307 struct cppi41_channel *c; 308 int i; 309 310 for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend; 311 i++) { 312 u32 val; 313 u32 q_num; 314 315 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 316 if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) { 317 u32 mask; 318 /* set corresponding bit for completion Q 93 */ 319 mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue); 320 /* not set all bits for queues less than Q 93 */ 321 mask--; 322 /* now invert and keep only Q 93+ set */ 323 val &= ~mask; 324 } 325 326 if (val) 327 __iormb(); 328 329 while (val) { 330 u32 desc, len; 331 332 /* 333 * This should never trigger, see the comments in 334 * push_desc_queue() 335 */ 336 WARN_ON(cdd->is_suspended); 337 338 q_num = __fls(val); 339 val &= ~(1 << q_num); 340 q_num += 32 * i; 341 desc = cppi41_pop_desc(cdd, q_num); 342 c = desc_to_chan(cdd, desc); 343 if (WARN_ON(!c)) { 344 pr_err("%s() q %d desc %08x\n", __func__, 345 q_num, desc); 346 continue; 347 } 348 349 if (c->desc->pd2 & PD2_ZERO_LENGTH) 350 len = 0; 351 else 352 len = pd_trans_len(c->desc->pd0); 353 354 c->residue = pd_trans_len(c->desc->pd6) - len; 355 dma_cookie_complete(&c->txd); 356 dmaengine_desc_get_callback_invoke(&c->txd, NULL); 357 } 358 } 359 return IRQ_HANDLED; 360 } 361 362 static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 363 { 364 dma_cookie_t cookie; 365 366 cookie = dma_cookie_assign(tx); 367 368 return cookie; 369 } 370 371 static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 372 { 373 struct cppi41_channel *c = to_cpp41_chan(chan); 374 struct cppi41_dd *cdd = c->cdd; 375 int error; 376 377 error = pm_runtime_get_sync(cdd->ddev.dev); 378 if (error < 0) { 379 dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", 380 __func__, error); 381 pm_runtime_put_noidle(cdd->ddev.dev); 382 383 return error; 384 } 385 386 dma_cookie_init(chan); 387 dma_async_tx_descriptor_init(&c->txd, chan); 388 c->txd.tx_submit = cppi41_tx_submit; 389 390 if (!c->is_tx) 391 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 392 393 pm_runtime_put_autosuspend(cdd->ddev.dev); 394 395 return 0; 396 } 397 398 static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 399 { 400 struct cppi41_channel *c = to_cpp41_chan(chan); 401 struct cppi41_dd *cdd = c->cdd; 402 int error; 403 404 error = pm_runtime_get_sync(cdd->ddev.dev); 405 if (error < 0) { 406 pm_runtime_put_noidle(cdd->ddev.dev); 407 408 return; 409 } 410 411 WARN_ON(!list_empty(&cdd->pending)); 412 413 pm_runtime_put_autosuspend(cdd->ddev.dev); 414 } 415 416 static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 417 dma_cookie_t cookie, struct dma_tx_state *txstate) 418 { 419 struct cppi41_channel *c = to_cpp41_chan(chan); 420 enum dma_status ret; 421 422 ret = dma_cookie_status(chan, cookie, txstate); 423 424 dma_set_residue(txstate, c->residue); 425 426 return ret; 427 } 428 429 static void push_desc_queue(struct cppi41_channel *c) 430 { 431 struct cppi41_dd *cdd = c->cdd; 432 u32 desc_num; 433 u32 desc_phys; 434 u32 reg; 435 436 c->residue = 0; 437 438 reg = GCR_CHAN_ENABLE; 439 if (!c->is_tx) { 440 reg |= GCR_STARV_RETRY; 441 reg |= GCR_DESC_TYPE_HOST; 442 reg |= c->q_comp_num; 443 } 444 445 cppi_writel(reg, c->gcr_reg); 446 447 /* 448 * We don't use writel() but __raw_writel() so we have to make sure 449 * that the DMA descriptor in coherent memory made to the main memory 450 * before starting the dma engine. 451 */ 452 __iowmb(); 453 454 /* 455 * DMA transfers can take at least 200ms to complete with USB mass 456 * storage connected. To prevent autosuspend timeouts, we must use 457 * pm_runtime_get/put() when chan_busy[] is modified. This will get 458 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the 459 * outcome of the transfer. 460 */ 461 pm_runtime_get(cdd->ddev.dev); 462 463 desc_phys = lower_32_bits(c->desc_phys); 464 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 465 WARN_ON(cdd->chan_busy[desc_num]); 466 cdd->chan_busy[desc_num] = c; 467 468 reg = (sizeof(struct cppi41_desc) - 24) / 4; 469 reg |= desc_phys; 470 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 471 } 472 473 /* 474 * Caller must hold cdd->lock to prevent push_desc_queue() 475 * getting called out of order. We have both cppi41_dma_issue_pending() 476 * and cppi41_runtime_resume() call this function. 477 */ 478 static void cppi41_run_queue(struct cppi41_dd *cdd) 479 { 480 struct cppi41_channel *c, *_c; 481 482 list_for_each_entry_safe(c, _c, &cdd->pending, node) { 483 push_desc_queue(c); 484 list_del(&c->node); 485 } 486 } 487 488 static void cppi41_dma_issue_pending(struct dma_chan *chan) 489 { 490 struct cppi41_channel *c = to_cpp41_chan(chan); 491 struct cppi41_dd *cdd = c->cdd; 492 unsigned long flags; 493 int error; 494 495 error = pm_runtime_get(cdd->ddev.dev); 496 if ((error != -EINPROGRESS) && error < 0) { 497 pm_runtime_put_noidle(cdd->ddev.dev); 498 dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", 499 error); 500 501 return; 502 } 503 504 spin_lock_irqsave(&cdd->lock, flags); 505 list_add_tail(&c->node, &cdd->pending); 506 if (!cdd->is_suspended) 507 cppi41_run_queue(cdd); 508 spin_unlock_irqrestore(&cdd->lock, flags); 509 510 pm_runtime_put_autosuspend(cdd->ddev.dev); 511 } 512 513 static u32 get_host_pd0(u32 length) 514 { 515 u32 reg; 516 517 reg = DESC_TYPE_HOST << DESC_TYPE; 518 reg |= length; 519 520 return reg; 521 } 522 523 static u32 get_host_pd1(struct cppi41_channel *c) 524 { 525 u32 reg; 526 527 reg = 0; 528 529 return reg; 530 } 531 532 static u32 get_host_pd2(struct cppi41_channel *c) 533 { 534 u32 reg; 535 536 reg = DESC_TYPE_USB; 537 reg |= c->q_comp_num; 538 539 return reg; 540 } 541 542 static u32 get_host_pd3(u32 length) 543 { 544 u32 reg; 545 546 /* PD3 = packet size */ 547 reg = length; 548 549 return reg; 550 } 551 552 static u32 get_host_pd6(u32 length) 553 { 554 u32 reg; 555 556 /* PD6 buffer size */ 557 reg = DESC_PD_COMPLETE; 558 reg |= length; 559 560 return reg; 561 } 562 563 static u32 get_host_pd4_or_7(u32 addr) 564 { 565 u32 reg; 566 567 reg = addr; 568 569 return reg; 570 } 571 572 static u32 get_host_pd5(void) 573 { 574 u32 reg; 575 576 reg = 0; 577 578 return reg; 579 } 580 581 static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 582 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 583 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 584 { 585 struct cppi41_channel *c = to_cpp41_chan(chan); 586 struct dma_async_tx_descriptor *txd = NULL; 587 struct cppi41_dd *cdd = c->cdd; 588 struct cppi41_desc *d; 589 struct scatterlist *sg; 590 unsigned int i; 591 int error; 592 593 error = pm_runtime_get(cdd->ddev.dev); 594 if (error < 0) { 595 pm_runtime_put_noidle(cdd->ddev.dev); 596 597 return NULL; 598 } 599 600 if (cdd->is_suspended) 601 goto err_out_not_ready; 602 603 d = c->desc; 604 for_each_sg(sgl, sg, sg_len, i) { 605 u32 addr; 606 u32 len; 607 608 /* We need to use more than one desc once musb supports sg */ 609 addr = lower_32_bits(sg_dma_address(sg)); 610 len = sg_dma_len(sg); 611 612 d->pd0 = get_host_pd0(len); 613 d->pd1 = get_host_pd1(c); 614 d->pd2 = get_host_pd2(c); 615 d->pd3 = get_host_pd3(len); 616 d->pd4 = get_host_pd4_or_7(addr); 617 d->pd5 = get_host_pd5(); 618 d->pd6 = get_host_pd6(len); 619 d->pd7 = get_host_pd4_or_7(addr); 620 621 d++; 622 } 623 624 txd = &c->txd; 625 626 err_out_not_ready: 627 pm_runtime_put_autosuspend(cdd->ddev.dev); 628 629 return txd; 630 } 631 632 static void cppi41_compute_td_desc(struct cppi41_desc *d) 633 { 634 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 635 } 636 637 static int cppi41_tear_down_chan(struct cppi41_channel *c) 638 { 639 struct dmaengine_result abort_result; 640 struct cppi41_dd *cdd = c->cdd; 641 struct cppi41_desc *td; 642 u32 reg; 643 u32 desc_phys; 644 u32 td_desc_phys; 645 646 td = cdd->cd; 647 td += cdd->first_td_desc; 648 649 td_desc_phys = cdd->descs_phys; 650 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 651 652 if (!c->td_queued) { 653 cppi41_compute_td_desc(td); 654 __iowmb(); 655 656 reg = (sizeof(struct cppi41_desc) - 24) / 4; 657 reg |= td_desc_phys; 658 cppi_writel(reg, cdd->qmgr_mem + 659 QMGR_QUEUE_D(cdd->td_queue.submit)); 660 661 reg = GCR_CHAN_ENABLE; 662 if (!c->is_tx) { 663 reg |= GCR_STARV_RETRY; 664 reg |= GCR_DESC_TYPE_HOST; 665 reg |= cdd->td_queue.complete; 666 } 667 reg |= GCR_TEARDOWN; 668 cppi_writel(reg, c->gcr_reg); 669 c->td_queued = 1; 670 c->td_retry = 500; 671 } 672 673 if (!c->td_seen || !c->td_desc_seen) { 674 675 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 676 if (!desc_phys && c->is_tx) 677 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 678 679 if (desc_phys == c->desc_phys) { 680 c->td_desc_seen = 1; 681 682 } else if (desc_phys == td_desc_phys) { 683 u32 pd0; 684 685 __iormb(); 686 pd0 = td->pd0; 687 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 688 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 689 WARN_ON((pd0 & 0x1f) != c->port_num); 690 c->td_seen = 1; 691 } else if (desc_phys) { 692 WARN_ON_ONCE(1); 693 } 694 } 695 c->td_retry--; 696 /* 697 * If the TX descriptor / channel is in use, the caller needs to poke 698 * his TD bit multiple times. After that he hardware releases the 699 * transfer descriptor followed by TD descriptor. Waiting seems not to 700 * cause any difference. 701 * RX seems to be thrown out right away. However once the TearDown 702 * descriptor gets through we are done. If we have seen the transfer 703 * descriptor before the TD we fetch it from enqueue, it has to be 704 * there waiting for us. 705 */ 706 if (!c->td_seen && c->td_retry) { 707 udelay(1); 708 return -EAGAIN; 709 } 710 WARN_ON(!c->td_retry); 711 712 if (!c->td_desc_seen) { 713 desc_phys = cppi41_pop_desc(cdd, c->q_num); 714 if (!desc_phys) 715 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 716 WARN_ON(!desc_phys); 717 } 718 719 c->td_queued = 0; 720 c->td_seen = 0; 721 c->td_desc_seen = 0; 722 cppi_writel(0, c->gcr_reg); 723 724 /* Invoke the callback to do the necessary clean-up */ 725 abort_result.result = DMA_TRANS_ABORTED; 726 dma_cookie_complete(&c->txd); 727 dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); 728 729 return 0; 730 } 731 732 static int cppi41_stop_chan(struct dma_chan *chan) 733 { 734 struct cppi41_channel *c = to_cpp41_chan(chan); 735 struct cppi41_dd *cdd = c->cdd; 736 u32 desc_num; 737 u32 desc_phys; 738 int ret; 739 740 desc_phys = lower_32_bits(c->desc_phys); 741 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 742 if (!cdd->chan_busy[desc_num]) { 743 struct cppi41_channel *cc, *_ct; 744 745 /* 746 * channels might still be in the pending list if 747 * cppi41_dma_issue_pending() is called after 748 * cppi41_runtime_suspend() is called 749 */ 750 list_for_each_entry_safe(cc, _ct, &cdd->pending, node) { 751 if (cc != c) 752 continue; 753 list_del(&cc->node); 754 break; 755 } 756 return 0; 757 } 758 759 ret = cppi41_tear_down_chan(c); 760 if (ret) 761 return ret; 762 763 WARN_ON(!cdd->chan_busy[desc_num]); 764 cdd->chan_busy[desc_num] = NULL; 765 766 /* Usecount for chan_busy[], paired with push_desc_queue() */ 767 pm_runtime_put(cdd->ddev.dev); 768 769 return 0; 770 } 771 772 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 773 { 774 struct cppi41_channel *cchan, *chans; 775 int i; 776 u32 n_chans = cdd->n_chans; 777 778 /* 779 * The channels can only be used as TX or as RX. So we add twice 780 * that much dma channels because USB can only do RX or TX. 781 */ 782 n_chans *= 2; 783 784 chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL); 785 if (!chans) 786 return -ENOMEM; 787 788 for (i = 0; i < n_chans; i++) { 789 cchan = &chans[i]; 790 791 cchan->cdd = cdd; 792 if (i & 1) { 793 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 794 cchan->is_tx = 1; 795 } else { 796 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 797 cchan->is_tx = 0; 798 } 799 cchan->port_num = i >> 1; 800 cchan->desc = &cdd->cd[i]; 801 cchan->desc_phys = cdd->descs_phys; 802 cchan->desc_phys += i * sizeof(struct cppi41_desc); 803 cchan->chan.device = &cdd->ddev; 804 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 805 } 806 cdd->first_td_desc = n_chans; 807 808 return 0; 809 } 810 811 static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 812 { 813 unsigned int mem_decs; 814 int i; 815 816 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 817 818 for (i = 0; i < DESCS_AREAS; i++) { 819 820 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 821 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 822 823 dma_free_coherent(dev, mem_decs, cdd->cd, 824 cdd->descs_phys); 825 } 826 } 827 828 static void disable_sched(struct cppi41_dd *cdd) 829 { 830 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 831 } 832 833 static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 834 { 835 disable_sched(cdd); 836 837 purge_descs(dev, cdd); 838 839 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 840 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 841 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 842 cdd->scratch_phys); 843 } 844 845 static int init_descs(struct device *dev, struct cppi41_dd *cdd) 846 { 847 unsigned int desc_size; 848 unsigned int mem_decs; 849 int i; 850 u32 reg; 851 u32 idx; 852 853 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 854 (sizeof(struct cppi41_desc) - 1)); 855 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 856 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 857 858 desc_size = sizeof(struct cppi41_desc); 859 mem_decs = ALLOC_DECS_NUM * desc_size; 860 861 idx = 0; 862 for (i = 0; i < DESCS_AREAS; i++) { 863 864 reg = idx << QMGR_MEMCTRL_IDX_SH; 865 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 866 reg |= ilog2(ALLOC_DECS_NUM) - 5; 867 868 BUILD_BUG_ON(DESCS_AREAS != 1); 869 cdd->cd = dma_alloc_coherent(dev, mem_decs, 870 &cdd->descs_phys, GFP_KERNEL); 871 if (!cdd->cd) 872 return -ENOMEM; 873 874 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 875 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 876 877 idx += ALLOC_DECS_NUM; 878 } 879 return 0; 880 } 881 882 static void init_sched(struct cppi41_dd *cdd) 883 { 884 unsigned ch; 885 unsigned word; 886 u32 reg; 887 888 word = 0; 889 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 890 for (ch = 0; ch < cdd->n_chans; ch += 2) { 891 892 reg = SCHED_ENTRY0_CHAN(ch); 893 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 894 895 reg |= SCHED_ENTRY2_CHAN(ch + 1); 896 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 897 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 898 word++; 899 } 900 reg = cdd->n_chans * 2 - 1; 901 reg |= DMA_SCHED_CTRL_EN; 902 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 903 } 904 905 static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 906 { 907 int ret; 908 909 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 910 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 911 &cdd->scratch_phys, GFP_KERNEL); 912 if (!cdd->qmgr_scratch) 913 return -ENOMEM; 914 915 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 916 cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE); 917 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 918 919 ret = init_descs(dev, cdd); 920 if (ret) 921 goto err_td; 922 923 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 924 init_sched(cdd); 925 926 return 0; 927 err_td: 928 deinit_cppi41(dev, cdd); 929 return ret; 930 } 931 932 static struct platform_driver cpp41_dma_driver; 933 /* 934 * The param format is: 935 * X Y 936 * X: Port 937 * Y: 0 = RX else TX 938 */ 939 #define INFO_PORT 0 940 #define INFO_IS_TX 1 941 942 static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 943 { 944 struct cppi41_channel *cchan; 945 struct cppi41_dd *cdd; 946 const struct chan_queues *queues; 947 u32 *num = param; 948 949 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 950 return false; 951 952 cchan = to_cpp41_chan(chan); 953 954 if (cchan->port_num != num[INFO_PORT]) 955 return false; 956 957 if (cchan->is_tx && !num[INFO_IS_TX]) 958 return false; 959 cdd = cchan->cdd; 960 if (cchan->is_tx) 961 queues = cdd->queues_tx; 962 else 963 queues = cdd->queues_rx; 964 965 BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) != 966 ARRAY_SIZE(am335x_usb_queues_tx)); 967 if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx))) 968 return false; 969 970 cchan->q_num = queues[cchan->port_num].submit; 971 cchan->q_comp_num = queues[cchan->port_num].complete; 972 return true; 973 } 974 975 static struct of_dma_filter_info cpp41_dma_info = { 976 .filter_fn = cpp41_dma_filter_fn, 977 }; 978 979 static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 980 struct of_dma *ofdma) 981 { 982 int count = dma_spec->args_count; 983 struct of_dma_filter_info *info = ofdma->of_dma_data; 984 985 if (!info || !info->filter_fn) 986 return NULL; 987 988 if (count != 2) 989 return NULL; 990 991 return dma_request_channel(info->dma_cap, info->filter_fn, 992 &dma_spec->args[0]); 993 } 994 995 static const struct cppi_glue_infos am335x_usb_infos = { 996 .queues_rx = am335x_usb_queues_rx, 997 .queues_tx = am335x_usb_queues_tx, 998 .td_queue = { .submit = 31, .complete = 0 }, 999 .first_completion_queue = 93, 1000 .qmgr_num_pend = 5, 1001 }; 1002 1003 static const struct cppi_glue_infos da8xx_usb_infos = { 1004 .queues_rx = da8xx_usb_queues_rx, 1005 .queues_tx = da8xx_usb_queues_tx, 1006 .td_queue = { .submit = 31, .complete = 0 }, 1007 .first_completion_queue = 24, 1008 .qmgr_num_pend = 2, 1009 }; 1010 1011 static const struct of_device_id cppi41_dma_ids[] = { 1012 { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos}, 1013 { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos}, 1014 {}, 1015 }; 1016 MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 1017 1018 static const struct cppi_glue_infos *get_glue_info(struct device *dev) 1019 { 1020 const struct of_device_id *of_id; 1021 1022 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 1023 if (!of_id) 1024 return NULL; 1025 return of_id->data; 1026 } 1027 1028 #define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ 1029 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ 1030 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ 1031 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) 1032 1033 static int cppi41_dma_probe(struct platform_device *pdev) 1034 { 1035 struct cppi41_dd *cdd; 1036 struct device *dev = &pdev->dev; 1037 const struct cppi_glue_infos *glue_info; 1038 int index; 1039 int irq; 1040 int ret; 1041 1042 glue_info = get_glue_info(dev); 1043 if (!glue_info) 1044 return -EINVAL; 1045 1046 cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); 1047 if (!cdd) 1048 return -ENOMEM; 1049 1050 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 1051 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 1052 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 1053 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 1054 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 1055 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 1056 cdd->ddev.device_terminate_all = cppi41_stop_chan; 1057 cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); 1058 cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; 1059 cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; 1060 cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; 1061 cdd->ddev.dev = dev; 1062 INIT_LIST_HEAD(&cdd->ddev.channels); 1063 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 1064 1065 index = of_property_match_string(dev->of_node, 1066 "reg-names", "controller"); 1067 if (index < 0) 1068 return index; 1069 1070 cdd->ctrl_mem = devm_platform_ioremap_resource(pdev, index); 1071 if (IS_ERR(cdd->ctrl_mem)) 1072 return PTR_ERR(cdd->ctrl_mem); 1073 1074 cdd->sched_mem = devm_platform_ioremap_resource(pdev, index + 1); 1075 if (IS_ERR(cdd->sched_mem)) 1076 return PTR_ERR(cdd->sched_mem); 1077 1078 cdd->qmgr_mem = devm_platform_ioremap_resource(pdev, index + 2); 1079 if (IS_ERR(cdd->qmgr_mem)) 1080 return PTR_ERR(cdd->qmgr_mem); 1081 1082 spin_lock_init(&cdd->lock); 1083 INIT_LIST_HEAD(&cdd->pending); 1084 1085 platform_set_drvdata(pdev, cdd); 1086 1087 pm_runtime_enable(dev); 1088 pm_runtime_set_autosuspend_delay(dev, 100); 1089 pm_runtime_use_autosuspend(dev); 1090 ret = pm_runtime_get_sync(dev); 1091 if (ret < 0) 1092 goto err_get_sync; 1093 1094 cdd->queues_rx = glue_info->queues_rx; 1095 cdd->queues_tx = glue_info->queues_tx; 1096 cdd->td_queue = glue_info->td_queue; 1097 cdd->qmgr_num_pend = glue_info->qmgr_num_pend; 1098 cdd->first_completion_queue = glue_info->first_completion_queue; 1099 1100 /* Parse new and deprecated dma-channels properties */ 1101 ret = of_property_read_u32(dev->of_node, 1102 "dma-channels", &cdd->n_chans); 1103 if (ret) 1104 ret = of_property_read_u32(dev->of_node, 1105 "#dma-channels", &cdd->n_chans); 1106 if (ret) 1107 goto err_get_n_chans; 1108 1109 ret = init_cppi41(dev, cdd); 1110 if (ret) 1111 goto err_init_cppi; 1112 1113 ret = cppi41_add_chans(dev, cdd); 1114 if (ret) 1115 goto err_chans; 1116 1117 irq = irq_of_parse_and_map(dev->of_node, 0); 1118 if (!irq) { 1119 ret = -EINVAL; 1120 goto err_chans; 1121 } 1122 1123 ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED, 1124 dev_name(dev), cdd); 1125 if (ret) 1126 goto err_chans; 1127 cdd->irq = irq; 1128 1129 ret = dma_async_device_register(&cdd->ddev); 1130 if (ret) 1131 goto err_chans; 1132 1133 ret = of_dma_controller_register(dev->of_node, 1134 cppi41_dma_xlate, &cpp41_dma_info); 1135 if (ret) 1136 goto err_of; 1137 1138 pm_runtime_put_autosuspend(dev); 1139 1140 return 0; 1141 err_of: 1142 dma_async_device_unregister(&cdd->ddev); 1143 err_chans: 1144 deinit_cppi41(dev, cdd); 1145 err_init_cppi: 1146 pm_runtime_dont_use_autosuspend(dev); 1147 err_get_n_chans: 1148 err_get_sync: 1149 pm_runtime_put_sync(dev); 1150 pm_runtime_disable(dev); 1151 return ret; 1152 } 1153 1154 static void cppi41_dma_remove(struct platform_device *pdev) 1155 { 1156 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1157 int error; 1158 1159 error = pm_runtime_get_sync(&pdev->dev); 1160 if (error < 0) 1161 dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", 1162 __func__, error); 1163 of_dma_controller_free(pdev->dev.of_node); 1164 dma_async_device_unregister(&cdd->ddev); 1165 1166 devm_free_irq(&pdev->dev, cdd->irq, cdd); 1167 deinit_cppi41(&pdev->dev, cdd); 1168 pm_runtime_dont_use_autosuspend(&pdev->dev); 1169 pm_runtime_put_sync(&pdev->dev); 1170 pm_runtime_disable(&pdev->dev); 1171 } 1172 1173 static int __maybe_unused cppi41_suspend(struct device *dev) 1174 { 1175 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1176 1177 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1178 disable_sched(cdd); 1179 1180 return 0; 1181 } 1182 1183 static int __maybe_unused cppi41_resume(struct device *dev) 1184 { 1185 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1186 struct cppi41_channel *c; 1187 int i; 1188 1189 for (i = 0; i < DESCS_AREAS; i++) 1190 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1191 1192 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1193 if (!c->is_tx) 1194 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1195 1196 init_sched(cdd); 1197 1198 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1199 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1200 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1201 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1202 1203 return 0; 1204 } 1205 1206 static int __maybe_unused cppi41_runtime_suspend(struct device *dev) 1207 { 1208 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1209 unsigned long flags; 1210 1211 spin_lock_irqsave(&cdd->lock, flags); 1212 cdd->is_suspended = true; 1213 WARN_ON(!list_empty(&cdd->pending)); 1214 spin_unlock_irqrestore(&cdd->lock, flags); 1215 1216 return 0; 1217 } 1218 1219 static int __maybe_unused cppi41_runtime_resume(struct device *dev) 1220 { 1221 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1222 unsigned long flags; 1223 1224 spin_lock_irqsave(&cdd->lock, flags); 1225 cdd->is_suspended = false; 1226 cppi41_run_queue(cdd); 1227 spin_unlock_irqrestore(&cdd->lock, flags); 1228 1229 return 0; 1230 } 1231 1232 static const struct dev_pm_ops cppi41_pm_ops = { 1233 SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) 1234 SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, 1235 cppi41_runtime_resume, 1236 NULL) 1237 }; 1238 1239 static struct platform_driver cpp41_dma_driver = { 1240 .probe = cppi41_dma_probe, 1241 .remove = cppi41_dma_remove, 1242 .driver = { 1243 .name = "cppi41-dma-engine", 1244 .pm = &cppi41_pm_ops, 1245 .of_match_table = of_match_ptr(cppi41_dma_ids), 1246 }, 1247 }; 1248 1249 module_platform_driver(cpp41_dma_driver); 1250 MODULE_DESCRIPTION("Texas Instruments CPPI 4.1 DMA support"); 1251 MODULE_LICENSE("GPL"); 1252 MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>"); 1253