1 #include <linux/delay.h> 2 #include <linux/dmaengine.h> 3 #include <linux/dma-mapping.h> 4 #include <linux/platform_device.h> 5 #include <linux/module.h> 6 #include <linux/of.h> 7 #include <linux/slab.h> 8 #include <linux/of_dma.h> 9 #include <linux/of_irq.h> 10 #include <linux/dmapool.h> 11 #include <linux/interrupt.h> 12 #include <linux/of_address.h> 13 #include <linux/pm_runtime.h> 14 #include "../dmaengine.h" 15 16 #define DESC_TYPE 27 17 #define DESC_TYPE_HOST 0x10 18 #define DESC_TYPE_TEARD 0x13 19 20 #define TD_DESC_IS_RX (1 << 16) 21 #define TD_DESC_DMA_NUM 10 22 23 #define DESC_LENGTH_BITS_NUM 21 24 25 #define DESC_TYPE_USB (5 << 26) 26 #define DESC_PD_COMPLETE (1 << 31) 27 28 /* DMA engine */ 29 #define DMA_TDFDQ 4 30 #define DMA_TXGCR(x) (0x800 + (x) * 0x20) 31 #define DMA_RXGCR(x) (0x808 + (x) * 0x20) 32 #define RXHPCRA0 4 33 34 #define GCR_CHAN_ENABLE (1 << 31) 35 #define GCR_TEARDOWN (1 << 30) 36 #define GCR_STARV_RETRY (1 << 24) 37 #define GCR_DESC_TYPE_HOST (1 << 14) 38 39 /* DMA scheduler */ 40 #define DMA_SCHED_CTRL 0 41 #define DMA_SCHED_CTRL_EN (1 << 31) 42 #define DMA_SCHED_WORD(x) ((x) * 4 + 0x800) 43 44 #define SCHED_ENTRY0_CHAN(x) ((x) << 0) 45 #define SCHED_ENTRY0_IS_RX (1 << 7) 46 47 #define SCHED_ENTRY1_CHAN(x) ((x) << 8) 48 #define SCHED_ENTRY1_IS_RX (1 << 15) 49 50 #define SCHED_ENTRY2_CHAN(x) ((x) << 16) 51 #define SCHED_ENTRY2_IS_RX (1 << 23) 52 53 #define SCHED_ENTRY3_CHAN(x) ((x) << 24) 54 #define SCHED_ENTRY3_IS_RX (1 << 31) 55 56 /* Queue manager */ 57 /* 4 KiB of memory for descriptors, 2 for each endpoint */ 58 #define ALLOC_DECS_NUM 128 59 #define DESCS_AREAS 1 60 #define TOTAL_DESCS_NUM (ALLOC_DECS_NUM * DESCS_AREAS) 61 #define QMGR_SCRATCH_SIZE (TOTAL_DESCS_NUM * 4) 62 63 #define QMGR_LRAM0_BASE 0x80 64 #define QMGR_LRAM_SIZE 0x84 65 #define QMGR_LRAM1_BASE 0x88 66 #define QMGR_MEMBASE(x) (0x1000 + (x) * 0x10) 67 #define QMGR_MEMCTRL(x) (0x1004 + (x) * 0x10) 68 #define QMGR_MEMCTRL_IDX_SH 16 69 #define QMGR_MEMCTRL_DESC_SH 8 70 71 #define QMGR_PEND(x) (0x90 + (x) * 4) 72 73 #define QMGR_PENDING_SLOT_Q(x) (x / 32) 74 #define QMGR_PENDING_BIT_Q(x) (x % 32) 75 76 #define QMGR_QUEUE_A(n) (0x2000 + (n) * 0x10) 77 #define QMGR_QUEUE_B(n) (0x2004 + (n) * 0x10) 78 #define QMGR_QUEUE_C(n) (0x2008 + (n) * 0x10) 79 #define QMGR_QUEUE_D(n) (0x200c + (n) * 0x10) 80 81 /* Packet Descriptor */ 82 #define PD2_ZERO_LENGTH (1 << 19) 83 84 struct cppi41_channel { 85 struct dma_chan chan; 86 struct dma_async_tx_descriptor txd; 87 struct cppi41_dd *cdd; 88 struct cppi41_desc *desc; 89 dma_addr_t desc_phys; 90 void __iomem *gcr_reg; 91 int is_tx; 92 u32 residue; 93 94 unsigned int q_num; 95 unsigned int q_comp_num; 96 unsigned int port_num; 97 98 unsigned td_retry; 99 unsigned td_queued:1; 100 unsigned td_seen:1; 101 unsigned td_desc_seen:1; 102 103 struct list_head node; /* Node for pending list */ 104 }; 105 106 struct cppi41_desc { 107 u32 pd0; 108 u32 pd1; 109 u32 pd2; 110 u32 pd3; 111 u32 pd4; 112 u32 pd5; 113 u32 pd6; 114 u32 pd7; 115 } __aligned(32); 116 117 struct chan_queues { 118 u16 submit; 119 u16 complete; 120 }; 121 122 struct cppi41_dd { 123 struct dma_device ddev; 124 125 void *qmgr_scratch; 126 dma_addr_t scratch_phys; 127 128 struct cppi41_desc *cd; 129 dma_addr_t descs_phys; 130 u32 first_td_desc; 131 struct cppi41_channel *chan_busy[ALLOC_DECS_NUM]; 132 133 void __iomem *ctrl_mem; 134 void __iomem *sched_mem; 135 void __iomem *qmgr_mem; 136 unsigned int irq; 137 const struct chan_queues *queues_rx; 138 const struct chan_queues *queues_tx; 139 struct chan_queues td_queue; 140 u16 first_completion_queue; 141 u16 qmgr_num_pend; 142 u32 n_chans; 143 u8 platform; 144 145 struct list_head pending; /* Pending queued transfers */ 146 spinlock_t lock; /* Lock for pending list */ 147 148 /* context for suspend/resume */ 149 unsigned int dma_tdfdq; 150 151 bool is_suspended; 152 }; 153 154 static struct chan_queues am335x_usb_queues_tx[] = { 155 /* USB0 ENDP 1 */ 156 [ 0] = { .submit = 32, .complete = 93}, 157 [ 1] = { .submit = 34, .complete = 94}, 158 [ 2] = { .submit = 36, .complete = 95}, 159 [ 3] = { .submit = 38, .complete = 96}, 160 [ 4] = { .submit = 40, .complete = 97}, 161 [ 5] = { .submit = 42, .complete = 98}, 162 [ 6] = { .submit = 44, .complete = 99}, 163 [ 7] = { .submit = 46, .complete = 100}, 164 [ 8] = { .submit = 48, .complete = 101}, 165 [ 9] = { .submit = 50, .complete = 102}, 166 [10] = { .submit = 52, .complete = 103}, 167 [11] = { .submit = 54, .complete = 104}, 168 [12] = { .submit = 56, .complete = 105}, 169 [13] = { .submit = 58, .complete = 106}, 170 [14] = { .submit = 60, .complete = 107}, 171 172 /* USB1 ENDP1 */ 173 [15] = { .submit = 62, .complete = 125}, 174 [16] = { .submit = 64, .complete = 126}, 175 [17] = { .submit = 66, .complete = 127}, 176 [18] = { .submit = 68, .complete = 128}, 177 [19] = { .submit = 70, .complete = 129}, 178 [20] = { .submit = 72, .complete = 130}, 179 [21] = { .submit = 74, .complete = 131}, 180 [22] = { .submit = 76, .complete = 132}, 181 [23] = { .submit = 78, .complete = 133}, 182 [24] = { .submit = 80, .complete = 134}, 183 [25] = { .submit = 82, .complete = 135}, 184 [26] = { .submit = 84, .complete = 136}, 185 [27] = { .submit = 86, .complete = 137}, 186 [28] = { .submit = 88, .complete = 138}, 187 [29] = { .submit = 90, .complete = 139}, 188 }; 189 190 static const struct chan_queues am335x_usb_queues_rx[] = { 191 /* USB0 ENDP 1 */ 192 [ 0] = { .submit = 1, .complete = 109}, 193 [ 1] = { .submit = 2, .complete = 110}, 194 [ 2] = { .submit = 3, .complete = 111}, 195 [ 3] = { .submit = 4, .complete = 112}, 196 [ 4] = { .submit = 5, .complete = 113}, 197 [ 5] = { .submit = 6, .complete = 114}, 198 [ 6] = { .submit = 7, .complete = 115}, 199 [ 7] = { .submit = 8, .complete = 116}, 200 [ 8] = { .submit = 9, .complete = 117}, 201 [ 9] = { .submit = 10, .complete = 118}, 202 [10] = { .submit = 11, .complete = 119}, 203 [11] = { .submit = 12, .complete = 120}, 204 [12] = { .submit = 13, .complete = 121}, 205 [13] = { .submit = 14, .complete = 122}, 206 [14] = { .submit = 15, .complete = 123}, 207 208 /* USB1 ENDP 1 */ 209 [15] = { .submit = 16, .complete = 141}, 210 [16] = { .submit = 17, .complete = 142}, 211 [17] = { .submit = 18, .complete = 143}, 212 [18] = { .submit = 19, .complete = 144}, 213 [19] = { .submit = 20, .complete = 145}, 214 [20] = { .submit = 21, .complete = 146}, 215 [21] = { .submit = 22, .complete = 147}, 216 [22] = { .submit = 23, .complete = 148}, 217 [23] = { .submit = 24, .complete = 149}, 218 [24] = { .submit = 25, .complete = 150}, 219 [25] = { .submit = 26, .complete = 151}, 220 [26] = { .submit = 27, .complete = 152}, 221 [27] = { .submit = 28, .complete = 153}, 222 [28] = { .submit = 29, .complete = 154}, 223 [29] = { .submit = 30, .complete = 155}, 224 }; 225 226 static const struct chan_queues da8xx_usb_queues_tx[] = { 227 [0] = { .submit = 16, .complete = 24}, 228 [1] = { .submit = 18, .complete = 24}, 229 [2] = { .submit = 20, .complete = 24}, 230 [3] = { .submit = 22, .complete = 24}, 231 }; 232 233 static const struct chan_queues da8xx_usb_queues_rx[] = { 234 [0] = { .submit = 1, .complete = 26}, 235 [1] = { .submit = 3, .complete = 26}, 236 [2] = { .submit = 5, .complete = 26}, 237 [3] = { .submit = 7, .complete = 26}, 238 }; 239 240 struct cppi_glue_infos { 241 const struct chan_queues *queues_rx; 242 const struct chan_queues *queues_tx; 243 struct chan_queues td_queue; 244 u16 first_completion_queue; 245 u16 qmgr_num_pend; 246 }; 247 248 static struct cppi41_channel *to_cpp41_chan(struct dma_chan *c) 249 { 250 return container_of(c, struct cppi41_channel, chan); 251 } 252 253 static struct cppi41_channel *desc_to_chan(struct cppi41_dd *cdd, u32 desc) 254 { 255 struct cppi41_channel *c; 256 u32 descs_size; 257 u32 desc_num; 258 259 descs_size = sizeof(struct cppi41_desc) * ALLOC_DECS_NUM; 260 261 if (!((desc >= cdd->descs_phys) && 262 (desc < (cdd->descs_phys + descs_size)))) { 263 return NULL; 264 } 265 266 desc_num = (desc - cdd->descs_phys) / sizeof(struct cppi41_desc); 267 BUG_ON(desc_num >= ALLOC_DECS_NUM); 268 c = cdd->chan_busy[desc_num]; 269 cdd->chan_busy[desc_num] = NULL; 270 271 /* Usecount for chan_busy[], paired with push_desc_queue() */ 272 pm_runtime_put(cdd->ddev.dev); 273 274 return c; 275 } 276 277 static void cppi_writel(u32 val, void *__iomem *mem) 278 { 279 __raw_writel(val, mem); 280 } 281 282 static u32 cppi_readl(void *__iomem *mem) 283 { 284 return __raw_readl(mem); 285 } 286 287 static u32 pd_trans_len(u32 val) 288 { 289 return val & ((1 << (DESC_LENGTH_BITS_NUM + 1)) - 1); 290 } 291 292 static u32 cppi41_pop_desc(struct cppi41_dd *cdd, unsigned queue_num) 293 { 294 u32 desc; 295 296 desc = cppi_readl(cdd->qmgr_mem + QMGR_QUEUE_D(queue_num)); 297 desc &= ~0x1f; 298 return desc; 299 } 300 301 static irqreturn_t cppi41_irq(int irq, void *data) 302 { 303 struct cppi41_dd *cdd = data; 304 u16 first_completion_queue = cdd->first_completion_queue; 305 u16 qmgr_num_pend = cdd->qmgr_num_pend; 306 struct cppi41_channel *c; 307 int i; 308 309 for (i = QMGR_PENDING_SLOT_Q(first_completion_queue); i < qmgr_num_pend; 310 i++) { 311 u32 val; 312 u32 q_num; 313 314 val = cppi_readl(cdd->qmgr_mem + QMGR_PEND(i)); 315 if (i == QMGR_PENDING_SLOT_Q(first_completion_queue) && val) { 316 u32 mask; 317 /* set corresponding bit for completetion Q 93 */ 318 mask = 1 << QMGR_PENDING_BIT_Q(first_completion_queue); 319 /* not set all bits for queues less than Q 93 */ 320 mask--; 321 /* now invert and keep only Q 93+ set */ 322 val &= ~mask; 323 } 324 325 if (val) 326 __iormb(); 327 328 while (val) { 329 u32 desc, len; 330 331 /* 332 * This should never trigger, see the comments in 333 * push_desc_queue() 334 */ 335 WARN_ON(cdd->is_suspended); 336 337 q_num = __fls(val); 338 val &= ~(1 << q_num); 339 q_num += 32 * i; 340 desc = cppi41_pop_desc(cdd, q_num); 341 c = desc_to_chan(cdd, desc); 342 if (WARN_ON(!c)) { 343 pr_err("%s() q %d desc %08x\n", __func__, 344 q_num, desc); 345 continue; 346 } 347 348 if (c->desc->pd2 & PD2_ZERO_LENGTH) 349 len = 0; 350 else 351 len = pd_trans_len(c->desc->pd0); 352 353 c->residue = pd_trans_len(c->desc->pd6) - len; 354 dma_cookie_complete(&c->txd); 355 dmaengine_desc_get_callback_invoke(&c->txd, NULL); 356 } 357 } 358 return IRQ_HANDLED; 359 } 360 361 static dma_cookie_t cppi41_tx_submit(struct dma_async_tx_descriptor *tx) 362 { 363 dma_cookie_t cookie; 364 365 cookie = dma_cookie_assign(tx); 366 367 return cookie; 368 } 369 370 static int cppi41_dma_alloc_chan_resources(struct dma_chan *chan) 371 { 372 struct cppi41_channel *c = to_cpp41_chan(chan); 373 struct cppi41_dd *cdd = c->cdd; 374 int error; 375 376 error = pm_runtime_get_sync(cdd->ddev.dev); 377 if (error < 0) { 378 dev_err(cdd->ddev.dev, "%s pm runtime get: %i\n", 379 __func__, error); 380 pm_runtime_put_noidle(cdd->ddev.dev); 381 382 return error; 383 } 384 385 dma_cookie_init(chan); 386 dma_async_tx_descriptor_init(&c->txd, chan); 387 c->txd.tx_submit = cppi41_tx_submit; 388 389 if (!c->is_tx) 390 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 391 392 pm_runtime_mark_last_busy(cdd->ddev.dev); 393 pm_runtime_put_autosuspend(cdd->ddev.dev); 394 395 return 0; 396 } 397 398 static void cppi41_dma_free_chan_resources(struct dma_chan *chan) 399 { 400 struct cppi41_channel *c = to_cpp41_chan(chan); 401 struct cppi41_dd *cdd = c->cdd; 402 int error; 403 404 error = pm_runtime_get_sync(cdd->ddev.dev); 405 if (error < 0) { 406 pm_runtime_put_noidle(cdd->ddev.dev); 407 408 return; 409 } 410 411 WARN_ON(!list_empty(&cdd->pending)); 412 413 pm_runtime_mark_last_busy(cdd->ddev.dev); 414 pm_runtime_put_autosuspend(cdd->ddev.dev); 415 } 416 417 static enum dma_status cppi41_dma_tx_status(struct dma_chan *chan, 418 dma_cookie_t cookie, struct dma_tx_state *txstate) 419 { 420 struct cppi41_channel *c = to_cpp41_chan(chan); 421 enum dma_status ret; 422 423 ret = dma_cookie_status(chan, cookie, txstate); 424 425 dma_set_residue(txstate, c->residue); 426 427 return ret; 428 } 429 430 static void push_desc_queue(struct cppi41_channel *c) 431 { 432 struct cppi41_dd *cdd = c->cdd; 433 u32 desc_num; 434 u32 desc_phys; 435 u32 reg; 436 437 c->residue = 0; 438 439 reg = GCR_CHAN_ENABLE; 440 if (!c->is_tx) { 441 reg |= GCR_STARV_RETRY; 442 reg |= GCR_DESC_TYPE_HOST; 443 reg |= c->q_comp_num; 444 } 445 446 cppi_writel(reg, c->gcr_reg); 447 448 /* 449 * We don't use writel() but __raw_writel() so we have to make sure 450 * that the DMA descriptor in coherent memory made to the main memory 451 * before starting the dma engine. 452 */ 453 __iowmb(); 454 455 /* 456 * DMA transfers can take at least 200ms to complete with USB mass 457 * storage connected. To prevent autosuspend timeouts, we must use 458 * pm_runtime_get/put() when chan_busy[] is modified. This will get 459 * cleared in desc_to_chan() or cppi41_stop_chan() depending on the 460 * outcome of the transfer. 461 */ 462 pm_runtime_get(cdd->ddev.dev); 463 464 desc_phys = lower_32_bits(c->desc_phys); 465 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 466 WARN_ON(cdd->chan_busy[desc_num]); 467 cdd->chan_busy[desc_num] = c; 468 469 reg = (sizeof(struct cppi41_desc) - 24) / 4; 470 reg |= desc_phys; 471 cppi_writel(reg, cdd->qmgr_mem + QMGR_QUEUE_D(c->q_num)); 472 } 473 474 /* 475 * Caller must hold cdd->lock to prevent push_desc_queue() 476 * getting called out of order. We have both cppi41_dma_issue_pending() 477 * and cppi41_runtime_resume() call this function. 478 */ 479 static void cppi41_run_queue(struct cppi41_dd *cdd) 480 { 481 struct cppi41_channel *c, *_c; 482 483 list_for_each_entry_safe(c, _c, &cdd->pending, node) { 484 push_desc_queue(c); 485 list_del(&c->node); 486 } 487 } 488 489 static void cppi41_dma_issue_pending(struct dma_chan *chan) 490 { 491 struct cppi41_channel *c = to_cpp41_chan(chan); 492 struct cppi41_dd *cdd = c->cdd; 493 unsigned long flags; 494 int error; 495 496 error = pm_runtime_get(cdd->ddev.dev); 497 if ((error != -EINPROGRESS) && error < 0) { 498 pm_runtime_put_noidle(cdd->ddev.dev); 499 dev_err(cdd->ddev.dev, "Failed to pm_runtime_get: %i\n", 500 error); 501 502 return; 503 } 504 505 spin_lock_irqsave(&cdd->lock, flags); 506 list_add_tail(&c->node, &cdd->pending); 507 if (!cdd->is_suspended) 508 cppi41_run_queue(cdd); 509 spin_unlock_irqrestore(&cdd->lock, flags); 510 511 pm_runtime_mark_last_busy(cdd->ddev.dev); 512 pm_runtime_put_autosuspend(cdd->ddev.dev); 513 } 514 515 static u32 get_host_pd0(u32 length) 516 { 517 u32 reg; 518 519 reg = DESC_TYPE_HOST << DESC_TYPE; 520 reg |= length; 521 522 return reg; 523 } 524 525 static u32 get_host_pd1(struct cppi41_channel *c) 526 { 527 u32 reg; 528 529 reg = 0; 530 531 return reg; 532 } 533 534 static u32 get_host_pd2(struct cppi41_channel *c) 535 { 536 u32 reg; 537 538 reg = DESC_TYPE_USB; 539 reg |= c->q_comp_num; 540 541 return reg; 542 } 543 544 static u32 get_host_pd3(u32 length) 545 { 546 u32 reg; 547 548 /* PD3 = packet size */ 549 reg = length; 550 551 return reg; 552 } 553 554 static u32 get_host_pd6(u32 length) 555 { 556 u32 reg; 557 558 /* PD6 buffer size */ 559 reg = DESC_PD_COMPLETE; 560 reg |= length; 561 562 return reg; 563 } 564 565 static u32 get_host_pd4_or_7(u32 addr) 566 { 567 u32 reg; 568 569 reg = addr; 570 571 return reg; 572 } 573 574 static u32 get_host_pd5(void) 575 { 576 u32 reg; 577 578 reg = 0; 579 580 return reg; 581 } 582 583 static struct dma_async_tx_descriptor *cppi41_dma_prep_slave_sg( 584 struct dma_chan *chan, struct scatterlist *sgl, unsigned sg_len, 585 enum dma_transfer_direction dir, unsigned long tx_flags, void *context) 586 { 587 struct cppi41_channel *c = to_cpp41_chan(chan); 588 struct cppi41_desc *d; 589 struct scatterlist *sg; 590 unsigned int i; 591 592 d = c->desc; 593 for_each_sg(sgl, sg, sg_len, i) { 594 u32 addr; 595 u32 len; 596 597 /* We need to use more than one desc once musb supports sg */ 598 addr = lower_32_bits(sg_dma_address(sg)); 599 len = sg_dma_len(sg); 600 601 d->pd0 = get_host_pd0(len); 602 d->pd1 = get_host_pd1(c); 603 d->pd2 = get_host_pd2(c); 604 d->pd3 = get_host_pd3(len); 605 d->pd4 = get_host_pd4_or_7(addr); 606 d->pd5 = get_host_pd5(); 607 d->pd6 = get_host_pd6(len); 608 d->pd7 = get_host_pd4_or_7(addr); 609 610 d++; 611 } 612 613 return &c->txd; 614 } 615 616 static void cppi41_compute_td_desc(struct cppi41_desc *d) 617 { 618 d->pd0 = DESC_TYPE_TEARD << DESC_TYPE; 619 } 620 621 static int cppi41_tear_down_chan(struct cppi41_channel *c) 622 { 623 struct dmaengine_result abort_result; 624 struct cppi41_dd *cdd = c->cdd; 625 struct cppi41_desc *td; 626 u32 reg; 627 u32 desc_phys; 628 u32 td_desc_phys; 629 630 td = cdd->cd; 631 td += cdd->first_td_desc; 632 633 td_desc_phys = cdd->descs_phys; 634 td_desc_phys += cdd->first_td_desc * sizeof(struct cppi41_desc); 635 636 if (!c->td_queued) { 637 cppi41_compute_td_desc(td); 638 __iowmb(); 639 640 reg = (sizeof(struct cppi41_desc) - 24) / 4; 641 reg |= td_desc_phys; 642 cppi_writel(reg, cdd->qmgr_mem + 643 QMGR_QUEUE_D(cdd->td_queue.submit)); 644 645 reg = GCR_CHAN_ENABLE; 646 if (!c->is_tx) { 647 reg |= GCR_STARV_RETRY; 648 reg |= GCR_DESC_TYPE_HOST; 649 reg |= cdd->td_queue.complete; 650 } 651 reg |= GCR_TEARDOWN; 652 cppi_writel(reg, c->gcr_reg); 653 c->td_queued = 1; 654 c->td_retry = 500; 655 } 656 657 if (!c->td_seen || !c->td_desc_seen) { 658 659 desc_phys = cppi41_pop_desc(cdd, cdd->td_queue.complete); 660 if (!desc_phys && c->is_tx) 661 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 662 663 if (desc_phys == c->desc_phys) { 664 c->td_desc_seen = 1; 665 666 } else if (desc_phys == td_desc_phys) { 667 u32 pd0; 668 669 __iormb(); 670 pd0 = td->pd0; 671 WARN_ON((pd0 >> DESC_TYPE) != DESC_TYPE_TEARD); 672 WARN_ON(!c->is_tx && !(pd0 & TD_DESC_IS_RX)); 673 WARN_ON((pd0 & 0x1f) != c->port_num); 674 c->td_seen = 1; 675 } else if (desc_phys) { 676 WARN_ON_ONCE(1); 677 } 678 } 679 c->td_retry--; 680 /* 681 * If the TX descriptor / channel is in use, the caller needs to poke 682 * his TD bit multiple times. After that he hardware releases the 683 * transfer descriptor followed by TD descriptor. Waiting seems not to 684 * cause any difference. 685 * RX seems to be thrown out right away. However once the TearDown 686 * descriptor gets through we are done. If we have seens the transfer 687 * descriptor before the TD we fetch it from enqueue, it has to be 688 * there waiting for us. 689 */ 690 if (!c->td_seen && c->td_retry) { 691 udelay(1); 692 return -EAGAIN; 693 } 694 WARN_ON(!c->td_retry); 695 696 if (!c->td_desc_seen) { 697 desc_phys = cppi41_pop_desc(cdd, c->q_num); 698 if (!desc_phys) 699 desc_phys = cppi41_pop_desc(cdd, c->q_comp_num); 700 WARN_ON(!desc_phys); 701 } 702 703 c->td_queued = 0; 704 c->td_seen = 0; 705 c->td_desc_seen = 0; 706 cppi_writel(0, c->gcr_reg); 707 708 /* Invoke the callback to do the necessary clean-up */ 709 abort_result.result = DMA_TRANS_ABORTED; 710 dma_cookie_complete(&c->txd); 711 dmaengine_desc_get_callback_invoke(&c->txd, &abort_result); 712 713 return 0; 714 } 715 716 static int cppi41_stop_chan(struct dma_chan *chan) 717 { 718 struct cppi41_channel *c = to_cpp41_chan(chan); 719 struct cppi41_dd *cdd = c->cdd; 720 u32 desc_num; 721 u32 desc_phys; 722 int ret; 723 724 desc_phys = lower_32_bits(c->desc_phys); 725 desc_num = (desc_phys - cdd->descs_phys) / sizeof(struct cppi41_desc); 726 if (!cdd->chan_busy[desc_num]) 727 return 0; 728 729 ret = cppi41_tear_down_chan(c); 730 if (ret) 731 return ret; 732 733 WARN_ON(!cdd->chan_busy[desc_num]); 734 cdd->chan_busy[desc_num] = NULL; 735 736 /* Usecount for chan_busy[], paired with push_desc_queue() */ 737 pm_runtime_put(cdd->ddev.dev); 738 739 return 0; 740 } 741 742 static int cppi41_add_chans(struct device *dev, struct cppi41_dd *cdd) 743 { 744 struct cppi41_channel *cchan, *chans; 745 int i; 746 u32 n_chans = cdd->n_chans; 747 748 /* 749 * The channels can only be used as TX or as RX. So we add twice 750 * that much dma channels because USB can only do RX or TX. 751 */ 752 n_chans *= 2; 753 754 chans = devm_kcalloc(dev, n_chans, sizeof(*chans), GFP_KERNEL); 755 if (!chans) 756 return -ENOMEM; 757 758 for (i = 0; i < n_chans; i++) { 759 cchan = &chans[i]; 760 761 cchan->cdd = cdd; 762 if (i & 1) { 763 cchan->gcr_reg = cdd->ctrl_mem + DMA_TXGCR(i >> 1); 764 cchan->is_tx = 1; 765 } else { 766 cchan->gcr_reg = cdd->ctrl_mem + DMA_RXGCR(i >> 1); 767 cchan->is_tx = 0; 768 } 769 cchan->port_num = i >> 1; 770 cchan->desc = &cdd->cd[i]; 771 cchan->desc_phys = cdd->descs_phys; 772 cchan->desc_phys += i * sizeof(struct cppi41_desc); 773 cchan->chan.device = &cdd->ddev; 774 list_add_tail(&cchan->chan.device_node, &cdd->ddev.channels); 775 } 776 cdd->first_td_desc = n_chans; 777 778 return 0; 779 } 780 781 static void purge_descs(struct device *dev, struct cppi41_dd *cdd) 782 { 783 unsigned int mem_decs; 784 int i; 785 786 mem_decs = ALLOC_DECS_NUM * sizeof(struct cppi41_desc); 787 788 for (i = 0; i < DESCS_AREAS; i++) { 789 790 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMBASE(i)); 791 cppi_writel(0, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 792 793 dma_free_coherent(dev, mem_decs, cdd->cd, 794 cdd->descs_phys); 795 } 796 } 797 798 static void disable_sched(struct cppi41_dd *cdd) 799 { 800 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 801 } 802 803 static void deinit_cppi41(struct device *dev, struct cppi41_dd *cdd) 804 { 805 disable_sched(cdd); 806 807 purge_descs(dev, cdd); 808 809 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 810 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM0_BASE); 811 dma_free_coherent(dev, QMGR_SCRATCH_SIZE, cdd->qmgr_scratch, 812 cdd->scratch_phys); 813 } 814 815 static int init_descs(struct device *dev, struct cppi41_dd *cdd) 816 { 817 unsigned int desc_size; 818 unsigned int mem_decs; 819 int i; 820 u32 reg; 821 u32 idx; 822 823 BUILD_BUG_ON(sizeof(struct cppi41_desc) & 824 (sizeof(struct cppi41_desc) - 1)); 825 BUILD_BUG_ON(sizeof(struct cppi41_desc) < 32); 826 BUILD_BUG_ON(ALLOC_DECS_NUM < 32); 827 828 desc_size = sizeof(struct cppi41_desc); 829 mem_decs = ALLOC_DECS_NUM * desc_size; 830 831 idx = 0; 832 for (i = 0; i < DESCS_AREAS; i++) { 833 834 reg = idx << QMGR_MEMCTRL_IDX_SH; 835 reg |= (ilog2(desc_size) - 5) << QMGR_MEMCTRL_DESC_SH; 836 reg |= ilog2(ALLOC_DECS_NUM) - 5; 837 838 BUILD_BUG_ON(DESCS_AREAS != 1); 839 cdd->cd = dma_alloc_coherent(dev, mem_decs, 840 &cdd->descs_phys, GFP_KERNEL); 841 if (!cdd->cd) 842 return -ENOMEM; 843 844 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 845 cppi_writel(reg, cdd->qmgr_mem + QMGR_MEMCTRL(i)); 846 847 idx += ALLOC_DECS_NUM; 848 } 849 return 0; 850 } 851 852 static void init_sched(struct cppi41_dd *cdd) 853 { 854 unsigned ch; 855 unsigned word; 856 u32 reg; 857 858 word = 0; 859 cppi_writel(0, cdd->sched_mem + DMA_SCHED_CTRL); 860 for (ch = 0; ch < cdd->n_chans; ch += 2) { 861 862 reg = SCHED_ENTRY0_CHAN(ch); 863 reg |= SCHED_ENTRY1_CHAN(ch) | SCHED_ENTRY1_IS_RX; 864 865 reg |= SCHED_ENTRY2_CHAN(ch + 1); 866 reg |= SCHED_ENTRY3_CHAN(ch + 1) | SCHED_ENTRY3_IS_RX; 867 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_WORD(word)); 868 word++; 869 } 870 reg = cdd->n_chans * 2 - 1; 871 reg |= DMA_SCHED_CTRL_EN; 872 cppi_writel(reg, cdd->sched_mem + DMA_SCHED_CTRL); 873 } 874 875 static int init_cppi41(struct device *dev, struct cppi41_dd *cdd) 876 { 877 int ret; 878 879 BUILD_BUG_ON(QMGR_SCRATCH_SIZE > ((1 << 14) - 1)); 880 cdd->qmgr_scratch = dma_alloc_coherent(dev, QMGR_SCRATCH_SIZE, 881 &cdd->scratch_phys, GFP_KERNEL); 882 if (!cdd->qmgr_scratch) 883 return -ENOMEM; 884 885 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 886 cppi_writel(TOTAL_DESCS_NUM, cdd->qmgr_mem + QMGR_LRAM_SIZE); 887 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 888 889 ret = init_descs(dev, cdd); 890 if (ret) 891 goto err_td; 892 893 cppi_writel(cdd->td_queue.submit, cdd->ctrl_mem + DMA_TDFDQ); 894 init_sched(cdd); 895 896 return 0; 897 err_td: 898 deinit_cppi41(dev, cdd); 899 return ret; 900 } 901 902 static struct platform_driver cpp41_dma_driver; 903 /* 904 * The param format is: 905 * X Y 906 * X: Port 907 * Y: 0 = RX else TX 908 */ 909 #define INFO_PORT 0 910 #define INFO_IS_TX 1 911 912 static bool cpp41_dma_filter_fn(struct dma_chan *chan, void *param) 913 { 914 struct cppi41_channel *cchan; 915 struct cppi41_dd *cdd; 916 const struct chan_queues *queues; 917 u32 *num = param; 918 919 if (chan->device->dev->driver != &cpp41_dma_driver.driver) 920 return false; 921 922 cchan = to_cpp41_chan(chan); 923 924 if (cchan->port_num != num[INFO_PORT]) 925 return false; 926 927 if (cchan->is_tx && !num[INFO_IS_TX]) 928 return false; 929 cdd = cchan->cdd; 930 if (cchan->is_tx) 931 queues = cdd->queues_tx; 932 else 933 queues = cdd->queues_rx; 934 935 BUILD_BUG_ON(ARRAY_SIZE(am335x_usb_queues_rx) != 936 ARRAY_SIZE(am335x_usb_queues_tx)); 937 if (WARN_ON(cchan->port_num >= ARRAY_SIZE(am335x_usb_queues_rx))) 938 return false; 939 940 cchan->q_num = queues[cchan->port_num].submit; 941 cchan->q_comp_num = queues[cchan->port_num].complete; 942 return true; 943 } 944 945 static struct of_dma_filter_info cpp41_dma_info = { 946 .filter_fn = cpp41_dma_filter_fn, 947 }; 948 949 static struct dma_chan *cppi41_dma_xlate(struct of_phandle_args *dma_spec, 950 struct of_dma *ofdma) 951 { 952 int count = dma_spec->args_count; 953 struct of_dma_filter_info *info = ofdma->of_dma_data; 954 955 if (!info || !info->filter_fn) 956 return NULL; 957 958 if (count != 2) 959 return NULL; 960 961 return dma_request_channel(info->dma_cap, info->filter_fn, 962 &dma_spec->args[0]); 963 } 964 965 static const struct cppi_glue_infos am335x_usb_infos = { 966 .queues_rx = am335x_usb_queues_rx, 967 .queues_tx = am335x_usb_queues_tx, 968 .td_queue = { .submit = 31, .complete = 0 }, 969 .first_completion_queue = 93, 970 .qmgr_num_pend = 5, 971 }; 972 973 static const struct cppi_glue_infos da8xx_usb_infos = { 974 .queues_rx = da8xx_usb_queues_rx, 975 .queues_tx = da8xx_usb_queues_tx, 976 .td_queue = { .submit = 31, .complete = 0 }, 977 .first_completion_queue = 24, 978 .qmgr_num_pend = 2, 979 }; 980 981 static const struct of_device_id cppi41_dma_ids[] = { 982 { .compatible = "ti,am3359-cppi41", .data = &am335x_usb_infos}, 983 { .compatible = "ti,da830-cppi41", .data = &da8xx_usb_infos}, 984 {}, 985 }; 986 MODULE_DEVICE_TABLE(of, cppi41_dma_ids); 987 988 static const struct cppi_glue_infos *get_glue_info(struct device *dev) 989 { 990 const struct of_device_id *of_id; 991 992 of_id = of_match_node(cppi41_dma_ids, dev->of_node); 993 if (!of_id) 994 return NULL; 995 return of_id->data; 996 } 997 998 #define CPPI41_DMA_BUSWIDTHS (BIT(DMA_SLAVE_BUSWIDTH_1_BYTE) | \ 999 BIT(DMA_SLAVE_BUSWIDTH_2_BYTES) | \ 1000 BIT(DMA_SLAVE_BUSWIDTH_3_BYTES) | \ 1001 BIT(DMA_SLAVE_BUSWIDTH_4_BYTES)) 1002 1003 static int cppi41_dma_probe(struct platform_device *pdev) 1004 { 1005 struct cppi41_dd *cdd; 1006 struct device *dev = &pdev->dev; 1007 const struct cppi_glue_infos *glue_info; 1008 struct resource *mem; 1009 int index; 1010 int irq; 1011 int ret; 1012 1013 glue_info = get_glue_info(dev); 1014 if (!glue_info) 1015 return -EINVAL; 1016 1017 cdd = devm_kzalloc(&pdev->dev, sizeof(*cdd), GFP_KERNEL); 1018 if (!cdd) 1019 return -ENOMEM; 1020 1021 dma_cap_set(DMA_SLAVE, cdd->ddev.cap_mask); 1022 cdd->ddev.device_alloc_chan_resources = cppi41_dma_alloc_chan_resources; 1023 cdd->ddev.device_free_chan_resources = cppi41_dma_free_chan_resources; 1024 cdd->ddev.device_tx_status = cppi41_dma_tx_status; 1025 cdd->ddev.device_issue_pending = cppi41_dma_issue_pending; 1026 cdd->ddev.device_prep_slave_sg = cppi41_dma_prep_slave_sg; 1027 cdd->ddev.device_terminate_all = cppi41_stop_chan; 1028 cdd->ddev.directions = BIT(DMA_DEV_TO_MEM) | BIT(DMA_MEM_TO_DEV); 1029 cdd->ddev.src_addr_widths = CPPI41_DMA_BUSWIDTHS; 1030 cdd->ddev.dst_addr_widths = CPPI41_DMA_BUSWIDTHS; 1031 cdd->ddev.residue_granularity = DMA_RESIDUE_GRANULARITY_BURST; 1032 cdd->ddev.dev = dev; 1033 INIT_LIST_HEAD(&cdd->ddev.channels); 1034 cpp41_dma_info.dma_cap = cdd->ddev.cap_mask; 1035 1036 index = of_property_match_string(dev->of_node, 1037 "reg-names", "controller"); 1038 if (index < 0) 1039 return index; 1040 1041 mem = platform_get_resource(pdev, IORESOURCE_MEM, index); 1042 cdd->ctrl_mem = devm_ioremap_resource(dev, mem); 1043 if (IS_ERR(cdd->ctrl_mem)) 1044 return PTR_ERR(cdd->ctrl_mem); 1045 1046 mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 1); 1047 cdd->sched_mem = devm_ioremap_resource(dev, mem); 1048 if (IS_ERR(cdd->sched_mem)) 1049 return PTR_ERR(cdd->sched_mem); 1050 1051 mem = platform_get_resource(pdev, IORESOURCE_MEM, index + 2); 1052 cdd->qmgr_mem = devm_ioremap_resource(dev, mem); 1053 if (IS_ERR(cdd->qmgr_mem)) 1054 return PTR_ERR(cdd->qmgr_mem); 1055 1056 spin_lock_init(&cdd->lock); 1057 INIT_LIST_HEAD(&cdd->pending); 1058 1059 platform_set_drvdata(pdev, cdd); 1060 1061 pm_runtime_enable(dev); 1062 pm_runtime_set_autosuspend_delay(dev, 100); 1063 pm_runtime_use_autosuspend(dev); 1064 ret = pm_runtime_get_sync(dev); 1065 if (ret < 0) 1066 goto err_get_sync; 1067 1068 cdd->queues_rx = glue_info->queues_rx; 1069 cdd->queues_tx = glue_info->queues_tx; 1070 cdd->td_queue = glue_info->td_queue; 1071 cdd->qmgr_num_pend = glue_info->qmgr_num_pend; 1072 cdd->first_completion_queue = glue_info->first_completion_queue; 1073 1074 ret = of_property_read_u32(dev->of_node, 1075 "#dma-channels", &cdd->n_chans); 1076 if (ret) 1077 goto err_get_n_chans; 1078 1079 ret = init_cppi41(dev, cdd); 1080 if (ret) 1081 goto err_init_cppi; 1082 1083 ret = cppi41_add_chans(dev, cdd); 1084 if (ret) 1085 goto err_chans; 1086 1087 irq = irq_of_parse_and_map(dev->of_node, 0); 1088 if (!irq) { 1089 ret = -EINVAL; 1090 goto err_chans; 1091 } 1092 1093 ret = devm_request_irq(&pdev->dev, irq, cppi41_irq, IRQF_SHARED, 1094 dev_name(dev), cdd); 1095 if (ret) 1096 goto err_chans; 1097 cdd->irq = irq; 1098 1099 ret = dma_async_device_register(&cdd->ddev); 1100 if (ret) 1101 goto err_chans; 1102 1103 ret = of_dma_controller_register(dev->of_node, 1104 cppi41_dma_xlate, &cpp41_dma_info); 1105 if (ret) 1106 goto err_of; 1107 1108 pm_runtime_mark_last_busy(dev); 1109 pm_runtime_put_autosuspend(dev); 1110 1111 return 0; 1112 err_of: 1113 dma_async_device_unregister(&cdd->ddev); 1114 err_chans: 1115 deinit_cppi41(dev, cdd); 1116 err_init_cppi: 1117 pm_runtime_dont_use_autosuspend(dev); 1118 err_get_n_chans: 1119 err_get_sync: 1120 pm_runtime_put_sync(dev); 1121 pm_runtime_disable(dev); 1122 return ret; 1123 } 1124 1125 static int cppi41_dma_remove(struct platform_device *pdev) 1126 { 1127 struct cppi41_dd *cdd = platform_get_drvdata(pdev); 1128 int error; 1129 1130 error = pm_runtime_get_sync(&pdev->dev); 1131 if (error < 0) 1132 dev_err(&pdev->dev, "%s could not pm_runtime_get: %i\n", 1133 __func__, error); 1134 of_dma_controller_free(pdev->dev.of_node); 1135 dma_async_device_unregister(&cdd->ddev); 1136 1137 devm_free_irq(&pdev->dev, cdd->irq, cdd); 1138 deinit_cppi41(&pdev->dev, cdd); 1139 pm_runtime_dont_use_autosuspend(&pdev->dev); 1140 pm_runtime_put_sync(&pdev->dev); 1141 pm_runtime_disable(&pdev->dev); 1142 return 0; 1143 } 1144 1145 static int __maybe_unused cppi41_suspend(struct device *dev) 1146 { 1147 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1148 1149 cdd->dma_tdfdq = cppi_readl(cdd->ctrl_mem + DMA_TDFDQ); 1150 disable_sched(cdd); 1151 1152 return 0; 1153 } 1154 1155 static int __maybe_unused cppi41_resume(struct device *dev) 1156 { 1157 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1158 struct cppi41_channel *c; 1159 int i; 1160 1161 for (i = 0; i < DESCS_AREAS; i++) 1162 cppi_writel(cdd->descs_phys, cdd->qmgr_mem + QMGR_MEMBASE(i)); 1163 1164 list_for_each_entry(c, &cdd->ddev.channels, chan.device_node) 1165 if (!c->is_tx) 1166 cppi_writel(c->q_num, c->gcr_reg + RXHPCRA0); 1167 1168 init_sched(cdd); 1169 1170 cppi_writel(cdd->dma_tdfdq, cdd->ctrl_mem + DMA_TDFDQ); 1171 cppi_writel(cdd->scratch_phys, cdd->qmgr_mem + QMGR_LRAM0_BASE); 1172 cppi_writel(QMGR_SCRATCH_SIZE, cdd->qmgr_mem + QMGR_LRAM_SIZE); 1173 cppi_writel(0, cdd->qmgr_mem + QMGR_LRAM1_BASE); 1174 1175 return 0; 1176 } 1177 1178 static int __maybe_unused cppi41_runtime_suspend(struct device *dev) 1179 { 1180 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1181 unsigned long flags; 1182 1183 spin_lock_irqsave(&cdd->lock, flags); 1184 cdd->is_suspended = true; 1185 WARN_ON(!list_empty(&cdd->pending)); 1186 spin_unlock_irqrestore(&cdd->lock, flags); 1187 1188 return 0; 1189 } 1190 1191 static int __maybe_unused cppi41_runtime_resume(struct device *dev) 1192 { 1193 struct cppi41_dd *cdd = dev_get_drvdata(dev); 1194 unsigned long flags; 1195 1196 spin_lock_irqsave(&cdd->lock, flags); 1197 cdd->is_suspended = false; 1198 cppi41_run_queue(cdd); 1199 spin_unlock_irqrestore(&cdd->lock, flags); 1200 1201 return 0; 1202 } 1203 1204 static const struct dev_pm_ops cppi41_pm_ops = { 1205 SET_LATE_SYSTEM_SLEEP_PM_OPS(cppi41_suspend, cppi41_resume) 1206 SET_RUNTIME_PM_OPS(cppi41_runtime_suspend, 1207 cppi41_runtime_resume, 1208 NULL) 1209 }; 1210 1211 static struct platform_driver cpp41_dma_driver = { 1212 .probe = cppi41_dma_probe, 1213 .remove = cppi41_dma_remove, 1214 .driver = { 1215 .name = "cppi41-dma-engine", 1216 .pm = &cppi41_pm_ops, 1217 .of_match_table = of_match_ptr(cppi41_dma_ids), 1218 }, 1219 }; 1220 1221 module_platform_driver(cpp41_dma_driver); 1222 MODULE_LICENSE("GPL"); 1223 MODULE_AUTHOR("Sebastian Andrzej Siewior <bigeasy@linutronix.de>"); 1224