1 /* 2 * Copyright (c) 2005-2011 Atheros Communications Inc. 3 * Copyright (c) 2011-2013 Qualcomm Atheros, Inc. 4 * 5 * Permission to use, copy, modify, and/or distribute this software for any 6 * purpose with or without fee is hereby granted, provided that the above 7 * copyright notice and this permission notice appear in all copies. 8 * 9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES 10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF 11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR 12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES 13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN 14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF 15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. 16 */ 17 18 #include "hif.h" 19 #include "pci.h" 20 #include "ce.h" 21 #include "debug.h" 22 23 /* 24 * Support for Copy Engine hardware, which is mainly used for 25 * communication between Host and Target over a PCIe interconnect. 26 */ 27 28 /* 29 * A single CopyEngine (CE) comprises two "rings": 30 * a source ring 31 * a destination ring 32 * 33 * Each ring consists of a number of descriptors which specify 34 * an address, length, and meta-data. 35 * 36 * Typically, one side of the PCIe interconnect (Host or Target) 37 * controls one ring and the other side controls the other ring. 38 * The source side chooses when to initiate a transfer and it 39 * chooses what to send (buffer address, length). The destination 40 * side keeps a supply of "anonymous receive buffers" available and 41 * it handles incoming data as it arrives (when the destination 42 * recieves an interrupt). 43 * 44 * The sender may send a simple buffer (address/length) or it may 45 * send a small list of buffers. When a small list is sent, hardware 46 * "gathers" these and they end up in a single destination buffer 47 * with a single interrupt. 48 * 49 * There are several "contexts" managed by this layer -- more, it 50 * may seem -- than should be needed. These are provided mainly for 51 * maximum flexibility and especially to facilitate a simpler HIF 52 * implementation. There are per-CopyEngine recv, send, and watermark 53 * contexts. These are supplied by the caller when a recv, send, 54 * or watermark handler is established and they are echoed back to 55 * the caller when the respective callbacks are invoked. There is 56 * also a per-transfer context supplied by the caller when a buffer 57 * (or sendlist) is sent and when a buffer is enqueued for recv. 58 * These per-transfer contexts are echoed back to the caller when 59 * the buffer is sent/received. 60 */ 61 62 static inline void ath10k_ce_dest_ring_write_index_set(struct ath10k *ar, 63 u32 ce_ctrl_addr, 64 unsigned int n) 65 { 66 ath10k_pci_write32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS, n); 67 } 68 69 static inline u32 ath10k_ce_dest_ring_write_index_get(struct ath10k *ar, 70 u32 ce_ctrl_addr) 71 { 72 return ath10k_pci_read32(ar, ce_ctrl_addr + DST_WR_INDEX_ADDRESS); 73 } 74 75 static inline void ath10k_ce_src_ring_write_index_set(struct ath10k *ar, 76 u32 ce_ctrl_addr, 77 unsigned int n) 78 { 79 ath10k_pci_write32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS, n); 80 } 81 82 static inline u32 ath10k_ce_src_ring_write_index_get(struct ath10k *ar, 83 u32 ce_ctrl_addr) 84 { 85 return ath10k_pci_read32(ar, ce_ctrl_addr + SR_WR_INDEX_ADDRESS); 86 } 87 88 static inline u32 ath10k_ce_src_ring_read_index_get(struct ath10k *ar, 89 u32 ce_ctrl_addr) 90 { 91 return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_SRRI_ADDRESS); 92 } 93 94 static inline void ath10k_ce_src_ring_base_addr_set(struct ath10k *ar, 95 u32 ce_ctrl_addr, 96 unsigned int addr) 97 { 98 ath10k_pci_write32(ar, ce_ctrl_addr + SR_BA_ADDRESS, addr); 99 } 100 101 static inline void ath10k_ce_src_ring_size_set(struct ath10k *ar, 102 u32 ce_ctrl_addr, 103 unsigned int n) 104 { 105 ath10k_pci_write32(ar, ce_ctrl_addr + SR_SIZE_ADDRESS, n); 106 } 107 108 static inline void ath10k_ce_src_ring_dmax_set(struct ath10k *ar, 109 u32 ce_ctrl_addr, 110 unsigned int n) 111 { 112 u32 ctrl1_addr = ath10k_pci_read32((ar), 113 (ce_ctrl_addr) + CE_CTRL1_ADDRESS); 114 115 ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS, 116 (ctrl1_addr & ~CE_CTRL1_DMAX_LENGTH_MASK) | 117 CE_CTRL1_DMAX_LENGTH_SET(n)); 118 } 119 120 static inline void ath10k_ce_src_ring_byte_swap_set(struct ath10k *ar, 121 u32 ce_ctrl_addr, 122 unsigned int n) 123 { 124 u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS); 125 126 ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS, 127 (ctrl1_addr & ~CE_CTRL1_SRC_RING_BYTE_SWAP_EN_MASK) | 128 CE_CTRL1_SRC_RING_BYTE_SWAP_EN_SET(n)); 129 } 130 131 static inline void ath10k_ce_dest_ring_byte_swap_set(struct ath10k *ar, 132 u32 ce_ctrl_addr, 133 unsigned int n) 134 { 135 u32 ctrl1_addr = ath10k_pci_read32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS); 136 137 ath10k_pci_write32(ar, ce_ctrl_addr + CE_CTRL1_ADDRESS, 138 (ctrl1_addr & ~CE_CTRL1_DST_RING_BYTE_SWAP_EN_MASK) | 139 CE_CTRL1_DST_RING_BYTE_SWAP_EN_SET(n)); 140 } 141 142 static inline u32 ath10k_ce_dest_ring_read_index_get(struct ath10k *ar, 143 u32 ce_ctrl_addr) 144 { 145 return ath10k_pci_read32(ar, ce_ctrl_addr + CURRENT_DRRI_ADDRESS); 146 } 147 148 static inline void ath10k_ce_dest_ring_base_addr_set(struct ath10k *ar, 149 u32 ce_ctrl_addr, 150 u32 addr) 151 { 152 ath10k_pci_write32(ar, ce_ctrl_addr + DR_BA_ADDRESS, addr); 153 } 154 155 static inline void ath10k_ce_dest_ring_size_set(struct ath10k *ar, 156 u32 ce_ctrl_addr, 157 unsigned int n) 158 { 159 ath10k_pci_write32(ar, ce_ctrl_addr + DR_SIZE_ADDRESS, n); 160 } 161 162 static inline void ath10k_ce_src_ring_highmark_set(struct ath10k *ar, 163 u32 ce_ctrl_addr, 164 unsigned int n) 165 { 166 u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS); 167 168 ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS, 169 (addr & ~SRC_WATERMARK_HIGH_MASK) | 170 SRC_WATERMARK_HIGH_SET(n)); 171 } 172 173 static inline void ath10k_ce_src_ring_lowmark_set(struct ath10k *ar, 174 u32 ce_ctrl_addr, 175 unsigned int n) 176 { 177 u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS); 178 179 ath10k_pci_write32(ar, ce_ctrl_addr + SRC_WATERMARK_ADDRESS, 180 (addr & ~SRC_WATERMARK_LOW_MASK) | 181 SRC_WATERMARK_LOW_SET(n)); 182 } 183 184 static inline void ath10k_ce_dest_ring_highmark_set(struct ath10k *ar, 185 u32 ce_ctrl_addr, 186 unsigned int n) 187 { 188 u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS); 189 190 ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS, 191 (addr & ~DST_WATERMARK_HIGH_MASK) | 192 DST_WATERMARK_HIGH_SET(n)); 193 } 194 195 static inline void ath10k_ce_dest_ring_lowmark_set(struct ath10k *ar, 196 u32 ce_ctrl_addr, 197 unsigned int n) 198 { 199 u32 addr = ath10k_pci_read32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS); 200 201 ath10k_pci_write32(ar, ce_ctrl_addr + DST_WATERMARK_ADDRESS, 202 (addr & ~DST_WATERMARK_LOW_MASK) | 203 DST_WATERMARK_LOW_SET(n)); 204 } 205 206 static inline void ath10k_ce_copy_complete_inter_enable(struct ath10k *ar, 207 u32 ce_ctrl_addr) 208 { 209 u32 host_ie_addr = ath10k_pci_read32(ar, 210 ce_ctrl_addr + HOST_IE_ADDRESS); 211 212 ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS, 213 host_ie_addr | HOST_IE_COPY_COMPLETE_MASK); 214 } 215 216 static inline void ath10k_ce_copy_complete_intr_disable(struct ath10k *ar, 217 u32 ce_ctrl_addr) 218 { 219 u32 host_ie_addr = ath10k_pci_read32(ar, 220 ce_ctrl_addr + HOST_IE_ADDRESS); 221 222 ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS, 223 host_ie_addr & ~HOST_IE_COPY_COMPLETE_MASK); 224 } 225 226 static inline void ath10k_ce_watermark_intr_disable(struct ath10k *ar, 227 u32 ce_ctrl_addr) 228 { 229 u32 host_ie_addr = ath10k_pci_read32(ar, 230 ce_ctrl_addr + HOST_IE_ADDRESS); 231 232 ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IE_ADDRESS, 233 host_ie_addr & ~CE_WATERMARK_MASK); 234 } 235 236 static inline void ath10k_ce_error_intr_enable(struct ath10k *ar, 237 u32 ce_ctrl_addr) 238 { 239 u32 misc_ie_addr = ath10k_pci_read32(ar, 240 ce_ctrl_addr + MISC_IE_ADDRESS); 241 242 ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS, 243 misc_ie_addr | CE_ERROR_MASK); 244 } 245 246 static inline void ath10k_ce_error_intr_disable(struct ath10k *ar, 247 u32 ce_ctrl_addr) 248 { 249 u32 misc_ie_addr = ath10k_pci_read32(ar, 250 ce_ctrl_addr + MISC_IE_ADDRESS); 251 252 ath10k_pci_write32(ar, ce_ctrl_addr + MISC_IE_ADDRESS, 253 misc_ie_addr & ~CE_ERROR_MASK); 254 } 255 256 static inline void ath10k_ce_engine_int_status_clear(struct ath10k *ar, 257 u32 ce_ctrl_addr, 258 unsigned int mask) 259 { 260 ath10k_pci_write32(ar, ce_ctrl_addr + HOST_IS_ADDRESS, mask); 261 } 262 263 264 /* 265 * Guts of ath10k_ce_send, used by both ath10k_ce_send and 266 * ath10k_ce_sendlist_send. 267 * The caller takes responsibility for any needed locking. 268 */ 269 int ath10k_ce_send_nolock(struct ath10k_ce_pipe *ce_state, 270 void *per_transfer_context, 271 u32 buffer, 272 unsigned int nbytes, 273 unsigned int transfer_id, 274 unsigned int flags) 275 { 276 struct ath10k *ar = ce_state->ar; 277 struct ath10k_ce_ring *src_ring = ce_state->src_ring; 278 struct ce_desc *desc, *sdesc; 279 unsigned int nentries_mask = src_ring->nentries_mask; 280 unsigned int sw_index = src_ring->sw_index; 281 unsigned int write_index = src_ring->write_index; 282 u32 ctrl_addr = ce_state->ctrl_addr; 283 u32 desc_flags = 0; 284 int ret = 0; 285 286 if (nbytes > ce_state->src_sz_max) 287 ath10k_warn("%s: send more we can (nbytes: %d, max: %d)\n", 288 __func__, nbytes, ce_state->src_sz_max); 289 290 ret = ath10k_pci_wake(ar); 291 if (ret) 292 return ret; 293 294 if (unlikely(CE_RING_DELTA(nentries_mask, 295 write_index, sw_index - 1) <= 0)) { 296 ret = -ENOSR; 297 goto exit; 298 } 299 300 desc = CE_SRC_RING_TO_DESC(src_ring->base_addr_owner_space, 301 write_index); 302 sdesc = CE_SRC_RING_TO_DESC(src_ring->shadow_base, write_index); 303 304 desc_flags |= SM(transfer_id, CE_DESC_FLAGS_META_DATA); 305 306 if (flags & CE_SEND_FLAG_GATHER) 307 desc_flags |= CE_DESC_FLAGS_GATHER; 308 if (flags & CE_SEND_FLAG_BYTE_SWAP) 309 desc_flags |= CE_DESC_FLAGS_BYTE_SWAP; 310 311 sdesc->addr = __cpu_to_le32(buffer); 312 sdesc->nbytes = __cpu_to_le16(nbytes); 313 sdesc->flags = __cpu_to_le16(desc_flags); 314 315 *desc = *sdesc; 316 317 src_ring->per_transfer_context[write_index] = per_transfer_context; 318 319 /* Update Source Ring Write Index */ 320 write_index = CE_RING_IDX_INCR(nentries_mask, write_index); 321 322 /* WORKAROUND */ 323 if (!(flags & CE_SEND_FLAG_GATHER)) 324 ath10k_ce_src_ring_write_index_set(ar, ctrl_addr, write_index); 325 326 src_ring->write_index = write_index; 327 exit: 328 ath10k_pci_sleep(ar); 329 return ret; 330 } 331 332 void __ath10k_ce_send_revert(struct ath10k_ce_pipe *pipe) 333 { 334 struct ath10k *ar = pipe->ar; 335 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 336 struct ath10k_ce_ring *src_ring = pipe->src_ring; 337 u32 ctrl_addr = pipe->ctrl_addr; 338 339 lockdep_assert_held(&ar_pci->ce_lock); 340 341 /* 342 * This function must be called only if there is an incomplete 343 * scatter-gather transfer (before index register is updated) 344 * that needs to be cleaned up. 345 */ 346 if (WARN_ON_ONCE(src_ring->write_index == src_ring->sw_index)) 347 return; 348 349 if (WARN_ON_ONCE(src_ring->write_index == 350 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr))) 351 return; 352 353 src_ring->write_index--; 354 src_ring->write_index &= src_ring->nentries_mask; 355 356 src_ring->per_transfer_context[src_ring->write_index] = NULL; 357 } 358 359 int ath10k_ce_send(struct ath10k_ce_pipe *ce_state, 360 void *per_transfer_context, 361 u32 buffer, 362 unsigned int nbytes, 363 unsigned int transfer_id, 364 unsigned int flags) 365 { 366 struct ath10k *ar = ce_state->ar; 367 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 368 int ret; 369 370 spin_lock_bh(&ar_pci->ce_lock); 371 ret = ath10k_ce_send_nolock(ce_state, per_transfer_context, 372 buffer, nbytes, transfer_id, flags); 373 spin_unlock_bh(&ar_pci->ce_lock); 374 375 return ret; 376 } 377 378 int ath10k_ce_num_free_src_entries(struct ath10k_ce_pipe *pipe) 379 { 380 struct ath10k *ar = pipe->ar; 381 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 382 int delta; 383 384 spin_lock_bh(&ar_pci->ce_lock); 385 delta = CE_RING_DELTA(pipe->src_ring->nentries_mask, 386 pipe->src_ring->write_index, 387 pipe->src_ring->sw_index - 1); 388 spin_unlock_bh(&ar_pci->ce_lock); 389 390 return delta; 391 } 392 393 int ath10k_ce_recv_buf_enqueue(struct ath10k_ce_pipe *ce_state, 394 void *per_recv_context, 395 u32 buffer) 396 { 397 struct ath10k_ce_ring *dest_ring = ce_state->dest_ring; 398 u32 ctrl_addr = ce_state->ctrl_addr; 399 struct ath10k *ar = ce_state->ar; 400 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 401 unsigned int nentries_mask = dest_ring->nentries_mask; 402 unsigned int write_index; 403 unsigned int sw_index; 404 int ret; 405 406 spin_lock_bh(&ar_pci->ce_lock); 407 write_index = dest_ring->write_index; 408 sw_index = dest_ring->sw_index; 409 410 ret = ath10k_pci_wake(ar); 411 if (ret) 412 goto out; 413 414 if (CE_RING_DELTA(nentries_mask, write_index, sw_index - 1) > 0) { 415 struct ce_desc *base = dest_ring->base_addr_owner_space; 416 struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, write_index); 417 418 /* Update destination descriptor */ 419 desc->addr = __cpu_to_le32(buffer); 420 desc->nbytes = 0; 421 422 dest_ring->per_transfer_context[write_index] = 423 per_recv_context; 424 425 /* Update Destination Ring Write Index */ 426 write_index = CE_RING_IDX_INCR(nentries_mask, write_index); 427 ath10k_ce_dest_ring_write_index_set(ar, ctrl_addr, write_index); 428 dest_ring->write_index = write_index; 429 ret = 0; 430 } else { 431 ret = -EIO; 432 } 433 ath10k_pci_sleep(ar); 434 435 out: 436 spin_unlock_bh(&ar_pci->ce_lock); 437 438 return ret; 439 } 440 441 /* 442 * Guts of ath10k_ce_completed_recv_next. 443 * The caller takes responsibility for any necessary locking. 444 */ 445 static int ath10k_ce_completed_recv_next_nolock(struct ath10k_ce_pipe *ce_state, 446 void **per_transfer_contextp, 447 u32 *bufferp, 448 unsigned int *nbytesp, 449 unsigned int *transfer_idp, 450 unsigned int *flagsp) 451 { 452 struct ath10k_ce_ring *dest_ring = ce_state->dest_ring; 453 unsigned int nentries_mask = dest_ring->nentries_mask; 454 unsigned int sw_index = dest_ring->sw_index; 455 456 struct ce_desc *base = dest_ring->base_addr_owner_space; 457 struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index); 458 struct ce_desc sdesc; 459 u16 nbytes; 460 461 /* Copy in one go for performance reasons */ 462 sdesc = *desc; 463 464 nbytes = __le16_to_cpu(sdesc.nbytes); 465 if (nbytes == 0) { 466 /* 467 * This closes a relatively unusual race where the Host 468 * sees the updated DRRI before the update to the 469 * corresponding descriptor has completed. We treat this 470 * as a descriptor that is not yet done. 471 */ 472 return -EIO; 473 } 474 475 desc->nbytes = 0; 476 477 /* Return data from completed destination descriptor */ 478 *bufferp = __le32_to_cpu(sdesc.addr); 479 *nbytesp = nbytes; 480 *transfer_idp = MS(__le16_to_cpu(sdesc.flags), CE_DESC_FLAGS_META_DATA); 481 482 if (__le16_to_cpu(sdesc.flags) & CE_DESC_FLAGS_BYTE_SWAP) 483 *flagsp = CE_RECV_FLAG_SWAPPED; 484 else 485 *flagsp = 0; 486 487 if (per_transfer_contextp) 488 *per_transfer_contextp = 489 dest_ring->per_transfer_context[sw_index]; 490 491 /* sanity */ 492 dest_ring->per_transfer_context[sw_index] = NULL; 493 494 /* Update sw_index */ 495 sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index); 496 dest_ring->sw_index = sw_index; 497 498 return 0; 499 } 500 501 int ath10k_ce_completed_recv_next(struct ath10k_ce_pipe *ce_state, 502 void **per_transfer_contextp, 503 u32 *bufferp, 504 unsigned int *nbytesp, 505 unsigned int *transfer_idp, 506 unsigned int *flagsp) 507 { 508 struct ath10k *ar = ce_state->ar; 509 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 510 int ret; 511 512 spin_lock_bh(&ar_pci->ce_lock); 513 ret = ath10k_ce_completed_recv_next_nolock(ce_state, 514 per_transfer_contextp, 515 bufferp, nbytesp, 516 transfer_idp, flagsp); 517 spin_unlock_bh(&ar_pci->ce_lock); 518 519 return ret; 520 } 521 522 int ath10k_ce_revoke_recv_next(struct ath10k_ce_pipe *ce_state, 523 void **per_transfer_contextp, 524 u32 *bufferp) 525 { 526 struct ath10k_ce_ring *dest_ring; 527 unsigned int nentries_mask; 528 unsigned int sw_index; 529 unsigned int write_index; 530 int ret; 531 struct ath10k *ar; 532 struct ath10k_pci *ar_pci; 533 534 dest_ring = ce_state->dest_ring; 535 536 if (!dest_ring) 537 return -EIO; 538 539 ar = ce_state->ar; 540 ar_pci = ath10k_pci_priv(ar); 541 542 spin_lock_bh(&ar_pci->ce_lock); 543 544 nentries_mask = dest_ring->nentries_mask; 545 sw_index = dest_ring->sw_index; 546 write_index = dest_ring->write_index; 547 if (write_index != sw_index) { 548 struct ce_desc *base = dest_ring->base_addr_owner_space; 549 struct ce_desc *desc = CE_DEST_RING_TO_DESC(base, sw_index); 550 551 /* Return data from completed destination descriptor */ 552 *bufferp = __le32_to_cpu(desc->addr); 553 554 if (per_transfer_contextp) 555 *per_transfer_contextp = 556 dest_ring->per_transfer_context[sw_index]; 557 558 /* sanity */ 559 dest_ring->per_transfer_context[sw_index] = NULL; 560 561 /* Update sw_index */ 562 sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index); 563 dest_ring->sw_index = sw_index; 564 ret = 0; 565 } else { 566 ret = -EIO; 567 } 568 569 spin_unlock_bh(&ar_pci->ce_lock); 570 571 return ret; 572 } 573 574 /* 575 * Guts of ath10k_ce_completed_send_next. 576 * The caller takes responsibility for any necessary locking. 577 */ 578 static int ath10k_ce_completed_send_next_nolock(struct ath10k_ce_pipe *ce_state, 579 void **per_transfer_contextp, 580 u32 *bufferp, 581 unsigned int *nbytesp, 582 unsigned int *transfer_idp) 583 { 584 struct ath10k_ce_ring *src_ring = ce_state->src_ring; 585 u32 ctrl_addr = ce_state->ctrl_addr; 586 struct ath10k *ar = ce_state->ar; 587 unsigned int nentries_mask = src_ring->nentries_mask; 588 unsigned int sw_index = src_ring->sw_index; 589 struct ce_desc *sdesc, *sbase; 590 unsigned int read_index; 591 int ret; 592 593 if (src_ring->hw_index == sw_index) { 594 /* 595 * The SW completion index has caught up with the cached 596 * version of the HW completion index. 597 * Update the cached HW completion index to see whether 598 * the SW has really caught up to the HW, or if the cached 599 * value of the HW index has become stale. 600 */ 601 602 ret = ath10k_pci_wake(ar); 603 if (ret) 604 return ret; 605 606 read_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr); 607 if (read_index == 0xffffffff) 608 return -ENODEV; 609 610 read_index &= nentries_mask; 611 src_ring->hw_index = read_index; 612 613 ath10k_pci_sleep(ar); 614 } 615 616 read_index = src_ring->hw_index; 617 618 if (read_index == sw_index) 619 return -EIO; 620 621 sbase = src_ring->shadow_base; 622 sdesc = CE_SRC_RING_TO_DESC(sbase, sw_index); 623 624 /* Return data from completed source descriptor */ 625 *bufferp = __le32_to_cpu(sdesc->addr); 626 *nbytesp = __le16_to_cpu(sdesc->nbytes); 627 *transfer_idp = MS(__le16_to_cpu(sdesc->flags), 628 CE_DESC_FLAGS_META_DATA); 629 630 if (per_transfer_contextp) 631 *per_transfer_contextp = 632 src_ring->per_transfer_context[sw_index]; 633 634 /* sanity */ 635 src_ring->per_transfer_context[sw_index] = NULL; 636 637 /* Update sw_index */ 638 sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index); 639 src_ring->sw_index = sw_index; 640 641 return 0; 642 } 643 644 /* NB: Modeled after ath10k_ce_completed_send_next */ 645 int ath10k_ce_cancel_send_next(struct ath10k_ce_pipe *ce_state, 646 void **per_transfer_contextp, 647 u32 *bufferp, 648 unsigned int *nbytesp, 649 unsigned int *transfer_idp) 650 { 651 struct ath10k_ce_ring *src_ring; 652 unsigned int nentries_mask; 653 unsigned int sw_index; 654 unsigned int write_index; 655 int ret; 656 struct ath10k *ar; 657 struct ath10k_pci *ar_pci; 658 659 src_ring = ce_state->src_ring; 660 661 if (!src_ring) 662 return -EIO; 663 664 ar = ce_state->ar; 665 ar_pci = ath10k_pci_priv(ar); 666 667 spin_lock_bh(&ar_pci->ce_lock); 668 669 nentries_mask = src_ring->nentries_mask; 670 sw_index = src_ring->sw_index; 671 write_index = src_ring->write_index; 672 673 if (write_index != sw_index) { 674 struct ce_desc *base = src_ring->base_addr_owner_space; 675 struct ce_desc *desc = CE_SRC_RING_TO_DESC(base, sw_index); 676 677 /* Return data from completed source descriptor */ 678 *bufferp = __le32_to_cpu(desc->addr); 679 *nbytesp = __le16_to_cpu(desc->nbytes); 680 *transfer_idp = MS(__le16_to_cpu(desc->flags), 681 CE_DESC_FLAGS_META_DATA); 682 683 if (per_transfer_contextp) 684 *per_transfer_contextp = 685 src_ring->per_transfer_context[sw_index]; 686 687 /* sanity */ 688 src_ring->per_transfer_context[sw_index] = NULL; 689 690 /* Update sw_index */ 691 sw_index = CE_RING_IDX_INCR(nentries_mask, sw_index); 692 src_ring->sw_index = sw_index; 693 ret = 0; 694 } else { 695 ret = -EIO; 696 } 697 698 spin_unlock_bh(&ar_pci->ce_lock); 699 700 return ret; 701 } 702 703 int ath10k_ce_completed_send_next(struct ath10k_ce_pipe *ce_state, 704 void **per_transfer_contextp, 705 u32 *bufferp, 706 unsigned int *nbytesp, 707 unsigned int *transfer_idp) 708 { 709 struct ath10k *ar = ce_state->ar; 710 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 711 int ret; 712 713 spin_lock_bh(&ar_pci->ce_lock); 714 ret = ath10k_ce_completed_send_next_nolock(ce_state, 715 per_transfer_contextp, 716 bufferp, nbytesp, 717 transfer_idp); 718 spin_unlock_bh(&ar_pci->ce_lock); 719 720 return ret; 721 } 722 723 /* 724 * Guts of interrupt handler for per-engine interrupts on a particular CE. 725 * 726 * Invokes registered callbacks for recv_complete, 727 * send_complete, and watermarks. 728 */ 729 void ath10k_ce_per_engine_service(struct ath10k *ar, unsigned int ce_id) 730 { 731 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 732 struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id]; 733 u32 ctrl_addr = ce_state->ctrl_addr; 734 int ret; 735 736 ret = ath10k_pci_wake(ar); 737 if (ret) 738 return; 739 740 spin_lock_bh(&ar_pci->ce_lock); 741 742 /* Clear the copy-complete interrupts that will be handled here. */ 743 ath10k_ce_engine_int_status_clear(ar, ctrl_addr, 744 HOST_IS_COPY_COMPLETE_MASK); 745 746 spin_unlock_bh(&ar_pci->ce_lock); 747 748 if (ce_state->recv_cb) 749 ce_state->recv_cb(ce_state); 750 751 if (ce_state->send_cb) 752 ce_state->send_cb(ce_state); 753 754 spin_lock_bh(&ar_pci->ce_lock); 755 756 /* 757 * Misc CE interrupts are not being handled, but still need 758 * to be cleared. 759 */ 760 ath10k_ce_engine_int_status_clear(ar, ctrl_addr, CE_WATERMARK_MASK); 761 762 spin_unlock_bh(&ar_pci->ce_lock); 763 ath10k_pci_sleep(ar); 764 } 765 766 /* 767 * Handler for per-engine interrupts on ALL active CEs. 768 * This is used in cases where the system is sharing a 769 * single interrput for all CEs 770 */ 771 772 void ath10k_ce_per_engine_service_any(struct ath10k *ar) 773 { 774 int ce_id, ret; 775 u32 intr_summary; 776 777 ret = ath10k_pci_wake(ar); 778 if (ret) 779 return; 780 781 intr_summary = CE_INTERRUPT_SUMMARY(ar); 782 783 for (ce_id = 0; intr_summary && (ce_id < CE_COUNT); ce_id++) { 784 if (intr_summary & (1 << ce_id)) 785 intr_summary &= ~(1 << ce_id); 786 else 787 /* no intr pending on this CE */ 788 continue; 789 790 ath10k_ce_per_engine_service(ar, ce_id); 791 } 792 793 ath10k_pci_sleep(ar); 794 } 795 796 /* 797 * Adjust interrupts for the copy complete handler. 798 * If it's needed for either send or recv, then unmask 799 * this interrupt; otherwise, mask it. 800 * 801 * Called with ce_lock held. 802 */ 803 static void ath10k_ce_per_engine_handler_adjust(struct ath10k_ce_pipe *ce_state, 804 int disable_copy_compl_intr) 805 { 806 u32 ctrl_addr = ce_state->ctrl_addr; 807 struct ath10k *ar = ce_state->ar; 808 int ret; 809 810 ret = ath10k_pci_wake(ar); 811 if (ret) 812 return; 813 814 if ((!disable_copy_compl_intr) && 815 (ce_state->send_cb || ce_state->recv_cb)) 816 ath10k_ce_copy_complete_inter_enable(ar, ctrl_addr); 817 else 818 ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr); 819 820 ath10k_ce_watermark_intr_disable(ar, ctrl_addr); 821 822 ath10k_pci_sleep(ar); 823 } 824 825 int ath10k_ce_disable_interrupts(struct ath10k *ar) 826 { 827 int ce_id, ret; 828 829 ret = ath10k_pci_wake(ar); 830 if (ret) 831 return ret; 832 833 for (ce_id = 0; ce_id < CE_COUNT; ce_id++) { 834 u32 ctrl_addr = ath10k_ce_base_address(ce_id); 835 836 ath10k_ce_copy_complete_intr_disable(ar, ctrl_addr); 837 ath10k_ce_error_intr_disable(ar, ctrl_addr); 838 ath10k_ce_watermark_intr_disable(ar, ctrl_addr); 839 } 840 841 ath10k_pci_sleep(ar); 842 843 return 0; 844 } 845 846 void ath10k_ce_send_cb_register(struct ath10k_ce_pipe *ce_state, 847 void (*send_cb)(struct ath10k_ce_pipe *), 848 int disable_interrupts) 849 { 850 struct ath10k *ar = ce_state->ar; 851 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 852 853 spin_lock_bh(&ar_pci->ce_lock); 854 ce_state->send_cb = send_cb; 855 ath10k_ce_per_engine_handler_adjust(ce_state, disable_interrupts); 856 spin_unlock_bh(&ar_pci->ce_lock); 857 } 858 859 void ath10k_ce_recv_cb_register(struct ath10k_ce_pipe *ce_state, 860 void (*recv_cb)(struct ath10k_ce_pipe *)) 861 { 862 struct ath10k *ar = ce_state->ar; 863 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 864 865 spin_lock_bh(&ar_pci->ce_lock); 866 ce_state->recv_cb = recv_cb; 867 ath10k_ce_per_engine_handler_adjust(ce_state, 0); 868 spin_unlock_bh(&ar_pci->ce_lock); 869 } 870 871 static int ath10k_ce_init_src_ring(struct ath10k *ar, 872 unsigned int ce_id, 873 const struct ce_attr *attr) 874 { 875 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 876 struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id]; 877 struct ath10k_ce_ring *src_ring = ce_state->src_ring; 878 u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id); 879 880 nentries = roundup_pow_of_two(attr->src_nentries); 881 882 memset(src_ring->per_transfer_context, 0, 883 nentries * sizeof(*src_ring->per_transfer_context)); 884 885 src_ring->sw_index = ath10k_ce_src_ring_read_index_get(ar, ctrl_addr); 886 src_ring->sw_index &= src_ring->nentries_mask; 887 src_ring->hw_index = src_ring->sw_index; 888 889 src_ring->write_index = 890 ath10k_ce_src_ring_write_index_get(ar, ctrl_addr); 891 src_ring->write_index &= src_ring->nentries_mask; 892 893 ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 894 src_ring->base_addr_ce_space); 895 ath10k_ce_src_ring_size_set(ar, ctrl_addr, nentries); 896 ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, attr->src_sz_max); 897 ath10k_ce_src_ring_byte_swap_set(ar, ctrl_addr, 0); 898 ath10k_ce_src_ring_lowmark_set(ar, ctrl_addr, 0); 899 ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, nentries); 900 901 ath10k_dbg(ATH10K_DBG_BOOT, 902 "boot init ce src ring id %d entries %d base_addr %p\n", 903 ce_id, nentries, src_ring->base_addr_owner_space); 904 905 return 0; 906 } 907 908 static int ath10k_ce_init_dest_ring(struct ath10k *ar, 909 unsigned int ce_id, 910 const struct ce_attr *attr) 911 { 912 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 913 struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id]; 914 struct ath10k_ce_ring *dest_ring = ce_state->dest_ring; 915 u32 nentries, ctrl_addr = ath10k_ce_base_address(ce_id); 916 917 nentries = roundup_pow_of_two(attr->dest_nentries); 918 919 memset(dest_ring->per_transfer_context, 0, 920 nentries * sizeof(*dest_ring->per_transfer_context)); 921 922 dest_ring->sw_index = ath10k_ce_dest_ring_read_index_get(ar, ctrl_addr); 923 dest_ring->sw_index &= dest_ring->nentries_mask; 924 dest_ring->write_index = 925 ath10k_ce_dest_ring_write_index_get(ar, ctrl_addr); 926 dest_ring->write_index &= dest_ring->nentries_mask; 927 928 ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 929 dest_ring->base_addr_ce_space); 930 ath10k_ce_dest_ring_size_set(ar, ctrl_addr, nentries); 931 ath10k_ce_dest_ring_byte_swap_set(ar, ctrl_addr, 0); 932 ath10k_ce_dest_ring_lowmark_set(ar, ctrl_addr, 0); 933 ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, nentries); 934 935 ath10k_dbg(ATH10K_DBG_BOOT, 936 "boot ce dest ring id %d entries %d base_addr %p\n", 937 ce_id, nentries, dest_ring->base_addr_owner_space); 938 939 return 0; 940 } 941 942 static struct ath10k_ce_ring * 943 ath10k_ce_alloc_src_ring(struct ath10k *ar, unsigned int ce_id, 944 const struct ce_attr *attr) 945 { 946 struct ath10k_ce_ring *src_ring; 947 u32 nentries = attr->src_nentries; 948 dma_addr_t base_addr; 949 950 nentries = roundup_pow_of_two(nentries); 951 952 src_ring = kzalloc(sizeof(*src_ring) + 953 (nentries * 954 sizeof(*src_ring->per_transfer_context)), 955 GFP_KERNEL); 956 if (src_ring == NULL) 957 return ERR_PTR(-ENOMEM); 958 959 src_ring->nentries = nentries; 960 src_ring->nentries_mask = nentries - 1; 961 962 /* 963 * Legacy platforms that do not support cache 964 * coherent DMA are unsupported 965 */ 966 src_ring->base_addr_owner_space_unaligned = 967 dma_alloc_coherent(ar->dev, 968 (nentries * sizeof(struct ce_desc) + 969 CE_DESC_RING_ALIGN), 970 &base_addr, GFP_KERNEL); 971 if (!src_ring->base_addr_owner_space_unaligned) { 972 kfree(src_ring); 973 return ERR_PTR(-ENOMEM); 974 } 975 976 src_ring->base_addr_ce_space_unaligned = base_addr; 977 978 src_ring->base_addr_owner_space = PTR_ALIGN( 979 src_ring->base_addr_owner_space_unaligned, 980 CE_DESC_RING_ALIGN); 981 src_ring->base_addr_ce_space = ALIGN( 982 src_ring->base_addr_ce_space_unaligned, 983 CE_DESC_RING_ALIGN); 984 985 /* 986 * Also allocate a shadow src ring in regular 987 * mem to use for faster access. 988 */ 989 src_ring->shadow_base_unaligned = 990 kmalloc((nentries * sizeof(struct ce_desc) + 991 CE_DESC_RING_ALIGN), GFP_KERNEL); 992 if (!src_ring->shadow_base_unaligned) { 993 dma_free_coherent(ar->dev, 994 (nentries * sizeof(struct ce_desc) + 995 CE_DESC_RING_ALIGN), 996 src_ring->base_addr_owner_space, 997 src_ring->base_addr_ce_space); 998 kfree(src_ring); 999 return ERR_PTR(-ENOMEM); 1000 } 1001 1002 src_ring->shadow_base = PTR_ALIGN( 1003 src_ring->shadow_base_unaligned, 1004 CE_DESC_RING_ALIGN); 1005 1006 return src_ring; 1007 } 1008 1009 static struct ath10k_ce_ring * 1010 ath10k_ce_alloc_dest_ring(struct ath10k *ar, unsigned int ce_id, 1011 const struct ce_attr *attr) 1012 { 1013 struct ath10k_ce_ring *dest_ring; 1014 u32 nentries; 1015 dma_addr_t base_addr; 1016 1017 nentries = roundup_pow_of_two(attr->dest_nentries); 1018 1019 dest_ring = kzalloc(sizeof(*dest_ring) + 1020 (nentries * 1021 sizeof(*dest_ring->per_transfer_context)), 1022 GFP_KERNEL); 1023 if (dest_ring == NULL) 1024 return ERR_PTR(-ENOMEM); 1025 1026 dest_ring->nentries = nentries; 1027 dest_ring->nentries_mask = nentries - 1; 1028 1029 /* 1030 * Legacy platforms that do not support cache 1031 * coherent DMA are unsupported 1032 */ 1033 dest_ring->base_addr_owner_space_unaligned = 1034 dma_alloc_coherent(ar->dev, 1035 (nentries * sizeof(struct ce_desc) + 1036 CE_DESC_RING_ALIGN), 1037 &base_addr, GFP_KERNEL); 1038 if (!dest_ring->base_addr_owner_space_unaligned) { 1039 kfree(dest_ring); 1040 return ERR_PTR(-ENOMEM); 1041 } 1042 1043 dest_ring->base_addr_ce_space_unaligned = base_addr; 1044 1045 /* 1046 * Correctly initialize memory to 0 to prevent garbage 1047 * data crashing system when download firmware 1048 */ 1049 memset(dest_ring->base_addr_owner_space_unaligned, 0, 1050 nentries * sizeof(struct ce_desc) + CE_DESC_RING_ALIGN); 1051 1052 dest_ring->base_addr_owner_space = PTR_ALIGN( 1053 dest_ring->base_addr_owner_space_unaligned, 1054 CE_DESC_RING_ALIGN); 1055 dest_ring->base_addr_ce_space = ALIGN( 1056 dest_ring->base_addr_ce_space_unaligned, 1057 CE_DESC_RING_ALIGN); 1058 1059 return dest_ring; 1060 } 1061 1062 /* 1063 * Initialize a Copy Engine based on caller-supplied attributes. 1064 * This may be called once to initialize both source and destination 1065 * rings or it may be called twice for separate source and destination 1066 * initialization. It may be that only one side or the other is 1067 * initialized by software/firmware. 1068 */ 1069 int ath10k_ce_init_pipe(struct ath10k *ar, unsigned int ce_id, 1070 const struct ce_attr *attr) 1071 { 1072 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 1073 struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id]; 1074 int ret; 1075 1076 /* 1077 * Make sure there's enough CE ringbuffer entries for HTT TX to avoid 1078 * additional TX locking checks. 1079 * 1080 * For the lack of a better place do the check here. 1081 */ 1082 BUILD_BUG_ON(2*TARGET_NUM_MSDU_DESC > 1083 (CE_HTT_H2T_MSG_SRC_NENTRIES - 1)); 1084 BUILD_BUG_ON(2*TARGET_10X_NUM_MSDU_DESC > 1085 (CE_HTT_H2T_MSG_SRC_NENTRIES - 1)); 1086 1087 ret = ath10k_pci_wake(ar); 1088 if (ret) 1089 return ret; 1090 1091 spin_lock_bh(&ar_pci->ce_lock); 1092 ce_state->ar = ar; 1093 ce_state->id = ce_id; 1094 ce_state->ctrl_addr = ath10k_ce_base_address(ce_id); 1095 ce_state->attr_flags = attr->flags; 1096 ce_state->src_sz_max = attr->src_sz_max; 1097 spin_unlock_bh(&ar_pci->ce_lock); 1098 1099 if (attr->src_nentries) { 1100 ret = ath10k_ce_init_src_ring(ar, ce_id, attr); 1101 if (ret) { 1102 ath10k_err("Failed to initialize CE src ring for ID: %d (%d)\n", 1103 ce_id, ret); 1104 goto out; 1105 } 1106 } 1107 1108 if (attr->dest_nentries) { 1109 ret = ath10k_ce_init_dest_ring(ar, ce_id, attr); 1110 if (ret) { 1111 ath10k_err("Failed to initialize CE dest ring for ID: %d (%d)\n", 1112 ce_id, ret); 1113 goto out; 1114 } 1115 } 1116 1117 out: 1118 ath10k_pci_sleep(ar); 1119 return ret; 1120 } 1121 1122 static void ath10k_ce_deinit_src_ring(struct ath10k *ar, unsigned int ce_id) 1123 { 1124 u32 ctrl_addr = ath10k_ce_base_address(ce_id); 1125 1126 ath10k_ce_src_ring_base_addr_set(ar, ctrl_addr, 0); 1127 ath10k_ce_src_ring_size_set(ar, ctrl_addr, 0); 1128 ath10k_ce_src_ring_dmax_set(ar, ctrl_addr, 0); 1129 ath10k_ce_src_ring_highmark_set(ar, ctrl_addr, 0); 1130 } 1131 1132 static void ath10k_ce_deinit_dest_ring(struct ath10k *ar, unsigned int ce_id) 1133 { 1134 u32 ctrl_addr = ath10k_ce_base_address(ce_id); 1135 1136 ath10k_ce_dest_ring_base_addr_set(ar, ctrl_addr, 0); 1137 ath10k_ce_dest_ring_size_set(ar, ctrl_addr, 0); 1138 ath10k_ce_dest_ring_highmark_set(ar, ctrl_addr, 0); 1139 } 1140 1141 void ath10k_ce_deinit_pipe(struct ath10k *ar, unsigned int ce_id) 1142 { 1143 int ret; 1144 1145 ret = ath10k_pci_wake(ar); 1146 if (ret) 1147 return; 1148 1149 ath10k_ce_deinit_src_ring(ar, ce_id); 1150 ath10k_ce_deinit_dest_ring(ar, ce_id); 1151 1152 ath10k_pci_sleep(ar); 1153 } 1154 1155 int ath10k_ce_alloc_pipe(struct ath10k *ar, int ce_id, 1156 const struct ce_attr *attr) 1157 { 1158 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 1159 struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id]; 1160 int ret; 1161 1162 if (attr->src_nentries) { 1163 ce_state->src_ring = ath10k_ce_alloc_src_ring(ar, ce_id, attr); 1164 if (IS_ERR(ce_state->src_ring)) { 1165 ret = PTR_ERR(ce_state->src_ring); 1166 ath10k_err("failed to allocate copy engine source ring %d: %d\n", 1167 ce_id, ret); 1168 ce_state->src_ring = NULL; 1169 return ret; 1170 } 1171 } 1172 1173 if (attr->dest_nentries) { 1174 ce_state->dest_ring = ath10k_ce_alloc_dest_ring(ar, ce_id, 1175 attr); 1176 if (IS_ERR(ce_state->dest_ring)) { 1177 ret = PTR_ERR(ce_state->dest_ring); 1178 ath10k_err("failed to allocate copy engine destination ring %d: %d\n", 1179 ce_id, ret); 1180 ce_state->dest_ring = NULL; 1181 return ret; 1182 } 1183 } 1184 1185 return 0; 1186 } 1187 1188 void ath10k_ce_free_pipe(struct ath10k *ar, int ce_id) 1189 { 1190 struct ath10k_pci *ar_pci = ath10k_pci_priv(ar); 1191 struct ath10k_ce_pipe *ce_state = &ar_pci->ce_states[ce_id]; 1192 1193 if (ce_state->src_ring) { 1194 kfree(ce_state->src_ring->shadow_base_unaligned); 1195 dma_free_coherent(ar->dev, 1196 (ce_state->src_ring->nentries * 1197 sizeof(struct ce_desc) + 1198 CE_DESC_RING_ALIGN), 1199 ce_state->src_ring->base_addr_owner_space, 1200 ce_state->src_ring->base_addr_ce_space); 1201 kfree(ce_state->src_ring); 1202 } 1203 1204 if (ce_state->dest_ring) { 1205 dma_free_coherent(ar->dev, 1206 (ce_state->dest_ring->nentries * 1207 sizeof(struct ce_desc) + 1208 CE_DESC_RING_ALIGN), 1209 ce_state->dest_ring->base_addr_owner_space, 1210 ce_state->dest_ring->base_addr_ce_space); 1211 kfree(ce_state->dest_ring); 1212 } 1213 1214 ce_state->src_ring = NULL; 1215 ce_state->dest_ring = NULL; 1216 } 1217