1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Special handling for DW DMA core 4 * 5 * Copyright (c) 2009, 2014 Intel Corporation. 6 */ 7 8 #include <linux/completion.h> 9 #include <linux/dma-mapping.h> 10 #include <linux/dmaengine.h> 11 #include <linux/irqreturn.h> 12 #include <linux/jiffies.h> 13 #include <linux/pci.h> 14 #include <linux/platform_data/dma-dw.h> 15 #include <linux/spi/spi.h> 16 #include <linux/types.h> 17 18 #include "spi-dw.h" 19 20 #define RX_BUSY 0 21 #define RX_BURST_LEVEL 16 22 #define TX_BUSY 1 23 #define TX_BURST_LEVEL 16 24 25 static bool dw_spi_dma_chan_filter(struct dma_chan *chan, void *param) 26 { 27 struct dw_dma_slave *s = param; 28 29 if (s->dma_dev != chan->device->dev) 30 return false; 31 32 chan->private = s; 33 return true; 34 } 35 36 static void dw_spi_dma_maxburst_init(struct dw_spi *dws) 37 { 38 struct dma_slave_caps caps; 39 u32 max_burst, def_burst; 40 int ret; 41 42 def_burst = dws->fifo_len / 2; 43 44 ret = dma_get_slave_caps(dws->rxchan, &caps); 45 if (!ret && caps.max_burst) 46 max_burst = caps.max_burst; 47 else 48 max_burst = RX_BURST_LEVEL; 49 50 dws->rxburst = min(max_burst, def_burst); 51 dw_writel(dws, DW_SPI_DMARDLR, dws->rxburst - 1); 52 53 ret = dma_get_slave_caps(dws->txchan, &caps); 54 if (!ret && caps.max_burst) 55 max_burst = caps.max_burst; 56 else 57 max_burst = TX_BURST_LEVEL; 58 59 /* 60 * Having a Rx DMA channel serviced with higher priority than a Tx DMA 61 * channel might not be enough to provide a well balanced DMA-based 62 * SPI transfer interface. There might still be moments when the Tx DMA 63 * channel is occasionally handled faster than the Rx DMA channel. 64 * That in its turn will eventually cause the SPI Rx FIFO overflow if 65 * SPI bus speed is high enough to fill the SPI Rx FIFO in before it's 66 * cleared by the Rx DMA channel. In order to fix the problem the Tx 67 * DMA activity is intentionally slowed down by limiting the SPI Tx 68 * FIFO depth with a value twice bigger than the Tx burst length. 69 */ 70 dws->txburst = min(max_burst, def_burst); 71 dw_writel(dws, DW_SPI_DMATDLR, dws->txburst); 72 } 73 74 static void dw_spi_dma_sg_burst_init(struct dw_spi *dws) 75 { 76 struct dma_slave_caps tx = {0}, rx = {0}; 77 78 dma_get_slave_caps(dws->txchan, &tx); 79 dma_get_slave_caps(dws->rxchan, &rx); 80 81 if (tx.max_sg_burst > 0 && rx.max_sg_burst > 0) 82 dws->dma_sg_burst = min(tx.max_sg_burst, rx.max_sg_burst); 83 else if (tx.max_sg_burst > 0) 84 dws->dma_sg_burst = tx.max_sg_burst; 85 else if (rx.max_sg_burst > 0) 86 dws->dma_sg_burst = rx.max_sg_burst; 87 else 88 dws->dma_sg_burst = 0; 89 } 90 91 static int dw_spi_dma_init_mfld(struct device *dev, struct dw_spi *dws) 92 { 93 struct dw_dma_slave dma_tx = { .dst_id = 1 }, *tx = &dma_tx; 94 struct dw_dma_slave dma_rx = { .src_id = 0 }, *rx = &dma_rx; 95 struct pci_dev *dma_dev; 96 dma_cap_mask_t mask; 97 98 /* 99 * Get pci device for DMA controller, currently it could only 100 * be the DMA controller of Medfield 101 */ 102 dma_dev = pci_get_device(PCI_VENDOR_ID_INTEL, 0x0827, NULL); 103 if (!dma_dev) 104 return -ENODEV; 105 106 dma_cap_zero(mask); 107 dma_cap_set(DMA_SLAVE, mask); 108 109 /* 1. Init rx channel */ 110 rx->dma_dev = &dma_dev->dev; 111 dws->rxchan = dma_request_channel(mask, dw_spi_dma_chan_filter, rx); 112 if (!dws->rxchan) 113 goto err_exit; 114 115 /* 2. Init tx channel */ 116 tx->dma_dev = &dma_dev->dev; 117 dws->txchan = dma_request_channel(mask, dw_spi_dma_chan_filter, tx); 118 if (!dws->txchan) 119 goto free_rxchan; 120 121 dws->master->dma_rx = dws->rxchan; 122 dws->master->dma_tx = dws->txchan; 123 124 init_completion(&dws->dma_completion); 125 126 dw_spi_dma_maxburst_init(dws); 127 128 dw_spi_dma_sg_burst_init(dws); 129 130 return 0; 131 132 free_rxchan: 133 dma_release_channel(dws->rxchan); 134 dws->rxchan = NULL; 135 err_exit: 136 return -EBUSY; 137 } 138 139 static int dw_spi_dma_init_generic(struct device *dev, struct dw_spi *dws) 140 { 141 dws->rxchan = dma_request_slave_channel(dev, "rx"); 142 if (!dws->rxchan) 143 return -ENODEV; 144 145 dws->txchan = dma_request_slave_channel(dev, "tx"); 146 if (!dws->txchan) { 147 dma_release_channel(dws->rxchan); 148 dws->rxchan = NULL; 149 return -ENODEV; 150 } 151 152 dws->master->dma_rx = dws->rxchan; 153 dws->master->dma_tx = dws->txchan; 154 155 init_completion(&dws->dma_completion); 156 157 dw_spi_dma_maxburst_init(dws); 158 159 dw_spi_dma_sg_burst_init(dws); 160 161 return 0; 162 } 163 164 static void dw_spi_dma_exit(struct dw_spi *dws) 165 { 166 if (dws->txchan) { 167 dmaengine_terminate_sync(dws->txchan); 168 dma_release_channel(dws->txchan); 169 } 170 171 if (dws->rxchan) { 172 dmaengine_terminate_sync(dws->rxchan); 173 dma_release_channel(dws->rxchan); 174 } 175 } 176 177 static irqreturn_t dw_spi_dma_transfer_handler(struct dw_spi *dws) 178 { 179 dw_spi_check_status(dws, false); 180 181 complete(&dws->dma_completion); 182 183 return IRQ_HANDLED; 184 } 185 186 static bool dw_spi_can_dma(struct spi_controller *master, 187 struct spi_device *spi, struct spi_transfer *xfer) 188 { 189 struct dw_spi *dws = spi_controller_get_devdata(master); 190 191 return xfer->len > dws->fifo_len; 192 } 193 194 static enum dma_slave_buswidth dw_spi_dma_convert_width(u8 n_bytes) 195 { 196 if (n_bytes == 1) 197 return DMA_SLAVE_BUSWIDTH_1_BYTE; 198 else if (n_bytes == 2) 199 return DMA_SLAVE_BUSWIDTH_2_BYTES; 200 201 return DMA_SLAVE_BUSWIDTH_UNDEFINED; 202 } 203 204 static int dw_spi_dma_wait(struct dw_spi *dws, unsigned int len, u32 speed) 205 { 206 unsigned long long ms; 207 208 ms = len * MSEC_PER_SEC * BITS_PER_BYTE; 209 do_div(ms, speed); 210 ms += ms + 200; 211 212 if (ms > UINT_MAX) 213 ms = UINT_MAX; 214 215 ms = wait_for_completion_timeout(&dws->dma_completion, 216 msecs_to_jiffies(ms)); 217 218 if (ms == 0) { 219 dev_err(&dws->master->cur_msg->spi->dev, 220 "DMA transaction timed out\n"); 221 return -ETIMEDOUT; 222 } 223 224 return 0; 225 } 226 227 static inline bool dw_spi_dma_tx_busy(struct dw_spi *dws) 228 { 229 return !(dw_readl(dws, DW_SPI_SR) & SR_TF_EMPT); 230 } 231 232 static int dw_spi_dma_wait_tx_done(struct dw_spi *dws, 233 struct spi_transfer *xfer) 234 { 235 int retry = SPI_WAIT_RETRIES; 236 struct spi_delay delay; 237 u32 nents; 238 239 nents = dw_readl(dws, DW_SPI_TXFLR); 240 delay.unit = SPI_DELAY_UNIT_SCK; 241 delay.value = nents * dws->n_bytes * BITS_PER_BYTE; 242 243 while (dw_spi_dma_tx_busy(dws) && retry--) 244 spi_delay_exec(&delay, xfer); 245 246 if (retry < 0) { 247 dev_err(&dws->master->dev, "Tx hanged up\n"); 248 return -EIO; 249 } 250 251 return 0; 252 } 253 254 /* 255 * dws->dma_chan_busy is set before the dma transfer starts, callback for tx 256 * channel will clear a corresponding bit. 257 */ 258 static void dw_spi_dma_tx_done(void *arg) 259 { 260 struct dw_spi *dws = arg; 261 262 clear_bit(TX_BUSY, &dws->dma_chan_busy); 263 if (test_bit(RX_BUSY, &dws->dma_chan_busy)) 264 return; 265 266 complete(&dws->dma_completion); 267 } 268 269 static int dw_spi_dma_config_tx(struct dw_spi *dws) 270 { 271 struct dma_slave_config txconf; 272 273 memset(&txconf, 0, sizeof(txconf)); 274 txconf.direction = DMA_MEM_TO_DEV; 275 txconf.dst_addr = dws->dma_addr; 276 txconf.dst_maxburst = dws->txburst; 277 txconf.src_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; 278 txconf.dst_addr_width = dw_spi_dma_convert_width(dws->n_bytes); 279 txconf.device_fc = false; 280 281 return dmaengine_slave_config(dws->txchan, &txconf); 282 } 283 284 static int dw_spi_dma_submit_tx(struct dw_spi *dws, struct scatterlist *sgl, 285 unsigned int nents) 286 { 287 struct dma_async_tx_descriptor *txdesc; 288 dma_cookie_t cookie; 289 int ret; 290 291 txdesc = dmaengine_prep_slave_sg(dws->txchan, sgl, nents, 292 DMA_MEM_TO_DEV, 293 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 294 if (!txdesc) 295 return -ENOMEM; 296 297 txdesc->callback = dw_spi_dma_tx_done; 298 txdesc->callback_param = dws; 299 300 cookie = dmaengine_submit(txdesc); 301 ret = dma_submit_error(cookie); 302 if (ret) { 303 dmaengine_terminate_sync(dws->txchan); 304 return ret; 305 } 306 307 set_bit(TX_BUSY, &dws->dma_chan_busy); 308 309 return 0; 310 } 311 312 static inline bool dw_spi_dma_rx_busy(struct dw_spi *dws) 313 { 314 return !!(dw_readl(dws, DW_SPI_SR) & SR_RF_NOT_EMPT); 315 } 316 317 static int dw_spi_dma_wait_rx_done(struct dw_spi *dws) 318 { 319 int retry = SPI_WAIT_RETRIES; 320 struct spi_delay delay; 321 unsigned long ns, us; 322 u32 nents; 323 324 /* 325 * It's unlikely that DMA engine is still doing the data fetching, but 326 * if it's let's give it some reasonable time. The timeout calculation 327 * is based on the synchronous APB/SSI reference clock rate, on a 328 * number of data entries left in the Rx FIFO, times a number of clock 329 * periods normally needed for a single APB read/write transaction 330 * without PREADY signal utilized (which is true for the DW APB SSI 331 * controller). 332 */ 333 nents = dw_readl(dws, DW_SPI_RXFLR); 334 ns = 4U * NSEC_PER_SEC / dws->max_freq * nents; 335 if (ns <= NSEC_PER_USEC) { 336 delay.unit = SPI_DELAY_UNIT_NSECS; 337 delay.value = ns; 338 } else { 339 us = DIV_ROUND_UP(ns, NSEC_PER_USEC); 340 delay.unit = SPI_DELAY_UNIT_USECS; 341 delay.value = clamp_val(us, 0, USHRT_MAX); 342 } 343 344 while (dw_spi_dma_rx_busy(dws) && retry--) 345 spi_delay_exec(&delay, NULL); 346 347 if (retry < 0) { 348 dev_err(&dws->master->dev, "Rx hanged up\n"); 349 return -EIO; 350 } 351 352 return 0; 353 } 354 355 /* 356 * dws->dma_chan_busy is set before the dma transfer starts, callback for rx 357 * channel will clear a corresponding bit. 358 */ 359 static void dw_spi_dma_rx_done(void *arg) 360 { 361 struct dw_spi *dws = arg; 362 363 clear_bit(RX_BUSY, &dws->dma_chan_busy); 364 if (test_bit(TX_BUSY, &dws->dma_chan_busy)) 365 return; 366 367 complete(&dws->dma_completion); 368 } 369 370 static int dw_spi_dma_config_rx(struct dw_spi *dws) 371 { 372 struct dma_slave_config rxconf; 373 374 memset(&rxconf, 0, sizeof(rxconf)); 375 rxconf.direction = DMA_DEV_TO_MEM; 376 rxconf.src_addr = dws->dma_addr; 377 rxconf.src_maxburst = dws->rxburst; 378 rxconf.dst_addr_width = DMA_SLAVE_BUSWIDTH_4_BYTES; 379 rxconf.src_addr_width = dw_spi_dma_convert_width(dws->n_bytes); 380 rxconf.device_fc = false; 381 382 return dmaengine_slave_config(dws->rxchan, &rxconf); 383 } 384 385 static int dw_spi_dma_submit_rx(struct dw_spi *dws, struct scatterlist *sgl, 386 unsigned int nents) 387 { 388 struct dma_async_tx_descriptor *rxdesc; 389 dma_cookie_t cookie; 390 int ret; 391 392 rxdesc = dmaengine_prep_slave_sg(dws->rxchan, sgl, nents, 393 DMA_DEV_TO_MEM, 394 DMA_PREP_INTERRUPT | DMA_CTRL_ACK); 395 if (!rxdesc) 396 return -ENOMEM; 397 398 rxdesc->callback = dw_spi_dma_rx_done; 399 rxdesc->callback_param = dws; 400 401 cookie = dmaengine_submit(rxdesc); 402 ret = dma_submit_error(cookie); 403 if (ret) { 404 dmaengine_terminate_sync(dws->rxchan); 405 return ret; 406 } 407 408 set_bit(RX_BUSY, &dws->dma_chan_busy); 409 410 return 0; 411 } 412 413 static int dw_spi_dma_setup(struct dw_spi *dws, struct spi_transfer *xfer) 414 { 415 u16 imr, dma_ctrl; 416 int ret; 417 418 if (!xfer->tx_buf) 419 return -EINVAL; 420 421 /* Setup DMA channels */ 422 ret = dw_spi_dma_config_tx(dws); 423 if (ret) 424 return ret; 425 426 if (xfer->rx_buf) { 427 ret = dw_spi_dma_config_rx(dws); 428 if (ret) 429 return ret; 430 } 431 432 /* Set the DMA handshaking interface */ 433 dma_ctrl = SPI_DMA_TDMAE; 434 if (xfer->rx_buf) 435 dma_ctrl |= SPI_DMA_RDMAE; 436 dw_writel(dws, DW_SPI_DMACR, dma_ctrl); 437 438 /* Set the interrupt mask */ 439 imr = SPI_INT_TXOI; 440 if (xfer->rx_buf) 441 imr |= SPI_INT_RXUI | SPI_INT_RXOI; 442 spi_umask_intr(dws, imr); 443 444 reinit_completion(&dws->dma_completion); 445 446 dws->transfer_handler = dw_spi_dma_transfer_handler; 447 448 return 0; 449 } 450 451 static int dw_spi_dma_transfer_all(struct dw_spi *dws, 452 struct spi_transfer *xfer) 453 { 454 int ret; 455 456 /* Submit the DMA Tx transfer */ 457 ret = dw_spi_dma_submit_tx(dws, xfer->tx_sg.sgl, xfer->tx_sg.nents); 458 if (ret) 459 goto err_clear_dmac; 460 461 /* Submit the DMA Rx transfer if required */ 462 if (xfer->rx_buf) { 463 ret = dw_spi_dma_submit_rx(dws, xfer->rx_sg.sgl, 464 xfer->rx_sg.nents); 465 if (ret) 466 goto err_clear_dmac; 467 468 /* rx must be started before tx due to spi instinct */ 469 dma_async_issue_pending(dws->rxchan); 470 } 471 472 dma_async_issue_pending(dws->txchan); 473 474 ret = dw_spi_dma_wait(dws, xfer->len, xfer->effective_speed_hz); 475 476 err_clear_dmac: 477 dw_writel(dws, DW_SPI_DMACR, 0); 478 479 return ret; 480 } 481 482 /* 483 * In case if at least one of the requested DMA channels doesn't support the 484 * hardware accelerated SG list entries traverse, the DMA driver will most 485 * likely work that around by performing the IRQ-based SG list entries 486 * resubmission. That might and will cause a problem if the DMA Tx channel is 487 * recharged and re-executed before the Rx DMA channel. Due to 488 * non-deterministic IRQ-handler execution latency the DMA Tx channel will 489 * start pushing data to the SPI bus before the Rx DMA channel is even 490 * reinitialized with the next inbound SG list entry. By doing so the DMA Tx 491 * channel will implicitly start filling the DW APB SSI Rx FIFO up, which while 492 * the DMA Rx channel being recharged and re-executed will eventually be 493 * overflown. 494 * 495 * In order to solve the problem we have to feed the DMA engine with SG list 496 * entries one-by-one. It shall keep the DW APB SSI Tx and Rx FIFOs 497 * synchronized and prevent the Rx FIFO overflow. Since in general the tx_sg 498 * and rx_sg lists may have different number of entries of different lengths 499 * (though total length should match) let's virtually split the SG-lists to the 500 * set of DMA transfers, which length is a minimum of the ordered SG-entries 501 * lengths. An ASCII-sketch of the implemented algo is following: 502 * xfer->len 503 * |___________| 504 * tx_sg list: |___|____|__| 505 * rx_sg list: |_|____|____| 506 * DMA transfers: |_|_|__|_|__| 507 * 508 * Note in order to have this workaround solving the denoted problem the DMA 509 * engine driver should properly initialize the max_sg_burst capability and set 510 * the DMA device max segment size parameter with maximum data block size the 511 * DMA engine supports. 512 */ 513 514 static int dw_spi_dma_transfer_one(struct dw_spi *dws, 515 struct spi_transfer *xfer) 516 { 517 struct scatterlist *tx_sg = NULL, *rx_sg = NULL, tx_tmp, rx_tmp; 518 unsigned int tx_len = 0, rx_len = 0; 519 unsigned int base, len; 520 int ret; 521 522 sg_init_table(&tx_tmp, 1); 523 sg_init_table(&rx_tmp, 1); 524 525 for (base = 0, len = 0; base < xfer->len; base += len) { 526 /* Fetch next Tx DMA data chunk */ 527 if (!tx_len) { 528 tx_sg = !tx_sg ? &xfer->tx_sg.sgl[0] : sg_next(tx_sg); 529 sg_dma_address(&tx_tmp) = sg_dma_address(tx_sg); 530 tx_len = sg_dma_len(tx_sg); 531 } 532 533 /* Fetch next Rx DMA data chunk */ 534 if (!rx_len) { 535 rx_sg = !rx_sg ? &xfer->rx_sg.sgl[0] : sg_next(rx_sg); 536 sg_dma_address(&rx_tmp) = sg_dma_address(rx_sg); 537 rx_len = sg_dma_len(rx_sg); 538 } 539 540 len = min(tx_len, rx_len); 541 542 sg_dma_len(&tx_tmp) = len; 543 sg_dma_len(&rx_tmp) = len; 544 545 /* Submit DMA Tx transfer */ 546 ret = dw_spi_dma_submit_tx(dws, &tx_tmp, 1); 547 if (ret) 548 break; 549 550 /* Submit DMA Rx transfer */ 551 ret = dw_spi_dma_submit_rx(dws, &rx_tmp, 1); 552 if (ret) 553 break; 554 555 /* Rx must be started before Tx due to SPI instinct */ 556 dma_async_issue_pending(dws->rxchan); 557 558 dma_async_issue_pending(dws->txchan); 559 560 /* 561 * Here we only need to wait for the DMA transfer to be 562 * finished since SPI controller is kept enabled during the 563 * procedure this loop implements and there is no risk to lose 564 * data left in the Tx/Rx FIFOs. 565 */ 566 ret = dw_spi_dma_wait(dws, len, xfer->effective_speed_hz); 567 if (ret) 568 break; 569 570 reinit_completion(&dws->dma_completion); 571 572 sg_dma_address(&tx_tmp) += len; 573 sg_dma_address(&rx_tmp) += len; 574 tx_len -= len; 575 rx_len -= len; 576 } 577 578 dw_writel(dws, DW_SPI_DMACR, 0); 579 580 return ret; 581 } 582 583 static int dw_spi_dma_transfer(struct dw_spi *dws, struct spi_transfer *xfer) 584 { 585 unsigned int nents; 586 int ret; 587 588 nents = max(xfer->tx_sg.nents, xfer->rx_sg.nents); 589 590 /* 591 * Execute normal DMA-based transfer (which submits the Rx and Tx SG 592 * lists directly to the DMA engine at once) if either full hardware 593 * accelerated SG list traverse is supported by both channels, or the 594 * Tx-only SPI transfer is requested, or the DMA engine is capable to 595 * handle both SG lists on hardware accelerated basis. 596 */ 597 if (!dws->dma_sg_burst || !xfer->rx_buf || nents <= dws->dma_sg_burst) 598 ret = dw_spi_dma_transfer_all(dws, xfer); 599 else 600 ret = dw_spi_dma_transfer_one(dws, xfer); 601 if (ret) 602 return ret; 603 604 if (dws->master->cur_msg->status == -EINPROGRESS) { 605 ret = dw_spi_dma_wait_tx_done(dws, xfer); 606 if (ret) 607 return ret; 608 } 609 610 if (xfer->rx_buf && dws->master->cur_msg->status == -EINPROGRESS) 611 ret = dw_spi_dma_wait_rx_done(dws); 612 613 return ret; 614 } 615 616 static void dw_spi_dma_stop(struct dw_spi *dws) 617 { 618 if (test_bit(TX_BUSY, &dws->dma_chan_busy)) { 619 dmaengine_terminate_sync(dws->txchan); 620 clear_bit(TX_BUSY, &dws->dma_chan_busy); 621 } 622 if (test_bit(RX_BUSY, &dws->dma_chan_busy)) { 623 dmaengine_terminate_sync(dws->rxchan); 624 clear_bit(RX_BUSY, &dws->dma_chan_busy); 625 } 626 } 627 628 static const struct dw_spi_dma_ops dw_spi_dma_mfld_ops = { 629 .dma_init = dw_spi_dma_init_mfld, 630 .dma_exit = dw_spi_dma_exit, 631 .dma_setup = dw_spi_dma_setup, 632 .can_dma = dw_spi_can_dma, 633 .dma_transfer = dw_spi_dma_transfer, 634 .dma_stop = dw_spi_dma_stop, 635 }; 636 637 void dw_spi_dma_setup_mfld(struct dw_spi *dws) 638 { 639 dws->dma_ops = &dw_spi_dma_mfld_ops; 640 } 641 EXPORT_SYMBOL_GPL(dw_spi_dma_setup_mfld); 642 643 static const struct dw_spi_dma_ops dw_spi_dma_generic_ops = { 644 .dma_init = dw_spi_dma_init_generic, 645 .dma_exit = dw_spi_dma_exit, 646 .dma_setup = dw_spi_dma_setup, 647 .can_dma = dw_spi_can_dma, 648 .dma_transfer = dw_spi_dma_transfer, 649 .dma_stop = dw_spi_dma_stop, 650 }; 651 652 void dw_spi_dma_setup_generic(struct dw_spi *dws) 653 { 654 dws->dma_ops = &dw_spi_dma_generic_ops; 655 } 656 EXPORT_SYMBOL_GPL(dw_spi_dma_setup_generic); 657