1 // SPDX-License-Identifier: GPL-2.0 OR BSD-3-Clause
2 /*
3 * Copyright (C) 2003-2014, 2018-2021, 2023-2025 Intel Corporation
4 * Copyright (C) 2013-2015 Intel Mobile Communications GmbH
5 * Copyright (C) 2016-2017 Intel Deutschland GmbH
6 */
7 #include <linux/etherdevice.h>
8 #include <linux/ieee80211.h>
9 #include <linux/dmapool.h>
10 #include <linux/slab.h>
11 #include <linux/sched.h>
12 #include <linux/tcp.h>
13 #ifdef CONFIG_INET
14 #include <net/ip6_checksum.h>
15 #include <net/tso.h>
16 #endif
17 #if defined(__FreeBSD__)
18 #include <net/mac80211.h>
19 #endif
20
21 #include "fw/api/commands.h"
22 #include "fw/api/datapath.h"
23 #include "fw/api/debug.h"
24 #include "iwl-fh.h"
25 #include "iwl-debug.h"
26 #include "iwl-csr.h"
27 #include "iwl-prph.h"
28 #include "iwl-io.h"
29 #include "iwl-scd.h"
30 #include "iwl-op-mode.h"
31 #include "internal.h"
32 #include "fw/api/tx.h"
33 #include "fw/dbg.h"
34 #include "pcie/utils.h"
35
36 /*************** DMA-QUEUE-GENERAL-FUNCTIONS *****
37 * DMA services
38 *
39 * Theory of operation
40 *
41 * A Tx or Rx queue resides in host DRAM, and is comprised of a circular buffer
42 * of buffer descriptors, each of which points to one or more data buffers for
43 * the device to read from or fill. Driver and device exchange status of each
44 * queue via "read" and "write" pointers. Driver keeps minimum of 2 empty
45 * entries in each circular buffer, to protect against confusing empty and full
46 * queue states.
47 *
48 * The device reads or writes the data in the queues via the device's several
49 * DMA/FIFO channels. Each queue is mapped to a single DMA channel.
50 *
51 * For Tx queue, there are low mark and high mark limits. If, after queuing
52 * the packet for Tx, free space become < low mark, Tx queue stopped. When
53 * reclaiming packets (on 'tx done IRQ), if free space become > high mark,
54 * Tx queue resumed.
55 *
56 ***************************************************/
57
58
iwl_pcie_alloc_dma_ptr(struct iwl_trans * trans,struct iwl_dma_ptr * ptr,size_t size)59 int iwl_pcie_alloc_dma_ptr(struct iwl_trans *trans,
60 struct iwl_dma_ptr *ptr, size_t size)
61 {
62 if (WARN_ON(ptr->addr))
63 return -EINVAL;
64
65 ptr->addr = dma_alloc_coherent(trans->dev, size,
66 &ptr->dma, GFP_KERNEL);
67 if (!ptr->addr)
68 return -ENOMEM;
69 ptr->size = size;
70 return 0;
71 }
72
iwl_pcie_free_dma_ptr(struct iwl_trans * trans,struct iwl_dma_ptr * ptr)73 void iwl_pcie_free_dma_ptr(struct iwl_trans *trans, struct iwl_dma_ptr *ptr)
74 {
75 if (unlikely(!ptr->addr))
76 return;
77
78 dma_free_coherent(trans->dev, ptr->size, ptr->addr, ptr->dma);
79 memset(ptr, 0, sizeof(*ptr));
80 }
81
82 /*
83 * iwl_pcie_txq_inc_wr_ptr - Send new write index to hardware
84 */
iwl_pcie_txq_inc_wr_ptr(struct iwl_trans * trans,struct iwl_txq * txq)85 static void iwl_pcie_txq_inc_wr_ptr(struct iwl_trans *trans,
86 struct iwl_txq *txq)
87 {
88 u32 reg = 0;
89 int txq_id = txq->id;
90
91 lockdep_assert_held(&txq->lock);
92
93 /*
94 * explicitly wake up the NIC if:
95 * 1. shadow registers aren't enabled
96 * 2. NIC is woken up for CMD regardless of shadow outside this function
97 * 3. there is a chance that the NIC is asleep
98 */
99 if (!trans->mac_cfg->base->shadow_reg_enable &&
100 txq_id != trans->conf.cmd_queue &&
101 test_bit(STATUS_TPOWER_PMI, &trans->status)) {
102 /*
103 * wake up nic if it's powered down ...
104 * uCode will wake up, and interrupt us again, so next
105 * time we'll skip this part.
106 */
107 reg = iwl_read32(trans, CSR_UCODE_DRV_GP1);
108
109 if (reg & CSR_UCODE_DRV_GP1_BIT_MAC_SLEEP) {
110 IWL_DEBUG_INFO(trans, "Tx queue %d requesting wakeup, GP1 = 0x%x\n",
111 txq_id, reg);
112 iwl_set_bit(trans, CSR_GP_CNTRL,
113 CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
114 txq->need_update = true;
115 return;
116 }
117 }
118
119 /*
120 * if not in power-save mode, uCode will never sleep when we're
121 * trying to tx (during RFKILL, we're not trying to tx).
122 */
123 IWL_DEBUG_TX(trans, "Q:%d WR: 0x%x\n", txq_id, txq->write_ptr);
124 if (!txq->block)
125 iwl_write32(trans, HBUS_TARG_WRPTR,
126 txq->write_ptr | (txq_id << 8));
127 }
128
iwl_pcie_txq_check_wrptrs(struct iwl_trans * trans)129 void iwl_pcie_txq_check_wrptrs(struct iwl_trans *trans)
130 {
131 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
132 int i;
133
134 for (i = 0; i < trans->mac_cfg->base->num_of_queues; i++) {
135 struct iwl_txq *txq = trans_pcie->txqs.txq[i];
136
137 if (!test_bit(i, trans_pcie->txqs.queue_used))
138 continue;
139
140 spin_lock_bh(&txq->lock);
141 if (txq->need_update) {
142 iwl_pcie_txq_inc_wr_ptr(trans, txq);
143 txq->need_update = false;
144 }
145 spin_unlock_bh(&txq->lock);
146 }
147 }
148
iwl_pcie_gen1_tfd_set_tb(struct iwl_tfd * tfd,u8 idx,dma_addr_t addr,u16 len)149 static inline void iwl_pcie_gen1_tfd_set_tb(struct iwl_tfd *tfd,
150 u8 idx, dma_addr_t addr, u16 len)
151 {
152 struct iwl_tfd_tb *tb = &tfd->tbs[idx];
153 u16 hi_n_len = len << 4;
154
155 put_unaligned_le32(addr, &tb->lo);
156 hi_n_len |= iwl_get_dma_hi_addr(addr);
157
158 tb->hi_n_len = cpu_to_le16(hi_n_len);
159
160 tfd->num_tbs = idx + 1;
161 }
162
iwl_txq_gen1_tfd_get_num_tbs(struct iwl_tfd * tfd)163 static inline u8 iwl_txq_gen1_tfd_get_num_tbs(struct iwl_tfd *tfd)
164 {
165 return tfd->num_tbs & 0x1f;
166 }
167
iwl_pcie_txq_build_tfd(struct iwl_trans * trans,struct iwl_txq * txq,dma_addr_t addr,u16 len,bool reset)168 static int iwl_pcie_txq_build_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
169 dma_addr_t addr, u16 len, bool reset)
170 {
171 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
172 void *tfd;
173 u32 num_tbs;
174
175 tfd = (u8 *)txq->tfds + trans_pcie->txqs.tfd.size * txq->write_ptr;
176
177 if (reset)
178 memset(tfd, 0, trans_pcie->txqs.tfd.size);
179
180 num_tbs = iwl_txq_gen1_tfd_get_num_tbs(tfd);
181
182 /* Each TFD can point to a maximum max_tbs Tx buffers */
183 if (num_tbs >= trans_pcie->txqs.tfd.max_tbs) {
184 IWL_ERR(trans, "Error can not send more than %d chunks\n",
185 trans_pcie->txqs.tfd.max_tbs);
186 return -EINVAL;
187 }
188
189 if (WARN(addr & ~IWL_TX_DMA_MASK,
190 "Unaligned address = %llx\n", (unsigned long long)addr))
191 return -EINVAL;
192
193 iwl_pcie_gen1_tfd_set_tb(tfd, num_tbs, addr, len);
194
195 return num_tbs;
196 }
197
iwl_pcie_clear_cmd_in_flight(struct iwl_trans * trans)198 static void iwl_pcie_clear_cmd_in_flight(struct iwl_trans *trans)
199 {
200 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
201
202 if (!trans->mac_cfg->base->apmg_wake_up_wa)
203 return;
204
205 spin_lock(&trans_pcie->reg_lock);
206
207 if (WARN_ON(!trans_pcie->cmd_hold_nic_awake)) {
208 spin_unlock(&trans_pcie->reg_lock);
209 return;
210 }
211
212 trans_pcie->cmd_hold_nic_awake = false;
213 iwl_trans_clear_bit(trans, CSR_GP_CNTRL,
214 CSR_GP_CNTRL_REG_FLAG_MAC_ACCESS_REQ);
215 spin_unlock(&trans_pcie->reg_lock);
216 }
217
iwl_pcie_free_and_unmap_tso_page(struct iwl_trans * trans,struct page * page)218 static void iwl_pcie_free_and_unmap_tso_page(struct iwl_trans *trans,
219 struct page *page)
220 {
221 struct iwl_tso_page_info *info = IWL_TSO_PAGE_INFO(page_address(page));
222
223 /* Decrease internal use count and unmap/free page if needed */
224 if (refcount_dec_and_test(&info->use_count)) {
225 dma_unmap_page(trans->dev, info->dma_addr, PAGE_SIZE,
226 DMA_TO_DEVICE);
227
228 __free_page(page);
229 }
230 }
231
iwl_pcie_free_tso_pages(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_cmd_meta * cmd_meta)232 void iwl_pcie_free_tso_pages(struct iwl_trans *trans, struct sk_buff *skb,
233 struct iwl_cmd_meta *cmd_meta)
234 {
235 struct page **page_ptr;
236 struct page *next;
237
238 page_ptr = (void *)((u8 *)skb->cb + trans->conf.cb_data_offs);
239 next = *page_ptr;
240 *page_ptr = NULL;
241
242 while (next) {
243 struct iwl_tso_page_info *info;
244 struct page *tmp = next;
245
246 info = IWL_TSO_PAGE_INFO(page_address(next));
247 next = info->next;
248
249 /* Unmap the scatter gather list that is on the last page */
250 if (!next && cmd_meta->sg_offset) {
251 struct sg_table *sgt;
252
253 sgt = (void *)((u8 *)page_address(tmp) +
254 cmd_meta->sg_offset);
255
256 dma_unmap_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0);
257 }
258
259 iwl_pcie_free_and_unmap_tso_page(trans, tmp);
260 }
261 }
262
263 static inline dma_addr_t
iwl_txq_gen1_tfd_tb_get_addr(struct iwl_tfd * tfd,u8 idx)264 iwl_txq_gen1_tfd_tb_get_addr(struct iwl_tfd *tfd, u8 idx)
265 {
266 struct iwl_tfd_tb *tb = &tfd->tbs[idx];
267 dma_addr_t addr;
268 dma_addr_t hi_len;
269
270 addr = get_unaligned_le32(&tb->lo);
271
272 if (sizeof(dma_addr_t) <= sizeof(u32))
273 return addr;
274
275 hi_len = le16_to_cpu(tb->hi_n_len) & 0xF;
276
277 /*
278 * shift by 16 twice to avoid warnings on 32-bit
279 * (where this code never runs anyway due to the
280 * if statement above)
281 */
282 return addr | ((hi_len << 16) << 16);
283 }
284
iwl_txq_set_tfd_invalid_gen1(struct iwl_trans * trans,struct iwl_tfd * tfd)285 static void iwl_txq_set_tfd_invalid_gen1(struct iwl_trans *trans,
286 struct iwl_tfd *tfd)
287 {
288 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
289
290 tfd->num_tbs = 0;
291
292 iwl_pcie_gen1_tfd_set_tb(tfd, 0, trans_pcie->invalid_tx_cmd.dma,
293 trans_pcie->invalid_tx_cmd.size);
294 }
295
iwl_txq_gen1_tfd_unmap(struct iwl_trans * trans,struct iwl_cmd_meta * meta,struct iwl_txq * txq,int index)296 static void iwl_txq_gen1_tfd_unmap(struct iwl_trans *trans,
297 struct iwl_cmd_meta *meta,
298 struct iwl_txq *txq, int index)
299 {
300 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
301 int i, num_tbs;
302 struct iwl_tfd *tfd = iwl_txq_get_tfd(trans, txq, index);
303
304 /* Sanity check on number of chunks */
305 num_tbs = iwl_txq_gen1_tfd_get_num_tbs(tfd);
306
307 if (num_tbs > trans_pcie->txqs.tfd.max_tbs) {
308 IWL_ERR(trans, "Too many chunks: %i\n", num_tbs);
309 /* @todo issue fatal error, it is quite serious situation */
310 return;
311 }
312
313 /* TB1 is mapped directly, the rest is the TSO page and SG list. */
314 if (meta->sg_offset)
315 num_tbs = 2;
316
317 /* first TB is never freed - it's the bidirectional DMA data */
318
319 for (i = 1; i < num_tbs; i++) {
320 if (meta->tbs & BIT(i))
321 dma_unmap_page(trans->dev,
322 iwl_txq_gen1_tfd_tb_get_addr(tfd, i),
323 iwl_txq_gen1_tfd_tb_get_len(trans,
324 tfd, i),
325 DMA_TO_DEVICE);
326 else
327 dma_unmap_single(trans->dev,
328 iwl_txq_gen1_tfd_tb_get_addr(tfd, i),
329 iwl_txq_gen1_tfd_tb_get_len(trans,
330 tfd, i),
331 DMA_TO_DEVICE);
332 }
333
334 meta->tbs = 0;
335
336 iwl_txq_set_tfd_invalid_gen1(trans, tfd);
337 }
338
339 /**
340 * iwl_txq_free_tfd - Free all chunks referenced by TFD [txq->q.read_ptr]
341 * @trans: transport private data
342 * @txq: tx queue
343 * @read_ptr: the TXQ read_ptr to free
344 *
345 * Does NOT advance any TFD circular buffer read/write indexes
346 * Does NOT free the TFD itself (which is within circular buffer)
347 */
iwl_txq_free_tfd(struct iwl_trans * trans,struct iwl_txq * txq,int read_ptr)348 static void iwl_txq_free_tfd(struct iwl_trans *trans, struct iwl_txq *txq,
349 int read_ptr)
350 {
351 /* rd_ptr is bounded by TFD_QUEUE_SIZE_MAX and
352 * idx is bounded by n_window
353 */
354 int idx = iwl_txq_get_cmd_index(txq, read_ptr);
355 struct sk_buff *skb;
356
357 lockdep_assert_held(&txq->reclaim_lock);
358
359 if (!txq->entries)
360 return;
361
362 /* We have only q->n_window txq->entries, but we use
363 * TFD_QUEUE_SIZE_MAX tfds
364 */
365 if (trans->mac_cfg->gen2)
366 iwl_txq_gen2_tfd_unmap(trans, &txq->entries[idx].meta,
367 iwl_txq_get_tfd(trans, txq, read_ptr));
368 else
369 iwl_txq_gen1_tfd_unmap(trans, &txq->entries[idx].meta,
370 txq, read_ptr);
371
372 /* free SKB */
373 skb = txq->entries[idx].skb;
374
375 /* Can be called from irqs-disabled context
376 * If skb is not NULL, it means that the whole queue is being
377 * freed and that the queue is not empty - free the skb
378 */
379 if (skb) {
380 iwl_op_mode_free_skb(trans->op_mode, skb);
381 txq->entries[idx].skb = NULL;
382 }
383 }
384
385 /*
386 * iwl_pcie_txq_unmap - Unmap any remaining DMA mappings and free skb's
387 */
iwl_pcie_txq_unmap(struct iwl_trans * trans,int txq_id)388 static void iwl_pcie_txq_unmap(struct iwl_trans *trans, int txq_id)
389 {
390 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
391 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
392
393 if (!txq) {
394 IWL_ERR(trans, "Trying to free a queue that wasn't allocated?\n");
395 return;
396 }
397
398 spin_lock_bh(&txq->reclaim_lock);
399 spin_lock(&txq->lock);
400 while (txq->write_ptr != txq->read_ptr) {
401 IWL_DEBUG_TX_REPLY(trans, "Q %d Free %d\n",
402 txq_id, txq->read_ptr);
403
404 if (txq_id != trans->conf.cmd_queue) {
405 struct sk_buff *skb = txq->entries[txq->read_ptr].skb;
406 struct iwl_cmd_meta *cmd_meta =
407 &txq->entries[txq->read_ptr].meta;
408
409 if (WARN_ON_ONCE(!skb))
410 continue;
411
412 iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
413 }
414 iwl_txq_free_tfd(trans, txq, txq->read_ptr);
415 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
416
417 if (txq->read_ptr == txq->write_ptr &&
418 txq_id == trans->conf.cmd_queue)
419 iwl_pcie_clear_cmd_in_flight(trans);
420 }
421
422 while (!skb_queue_empty(&txq->overflow_q)) {
423 struct sk_buff *skb = __skb_dequeue(&txq->overflow_q);
424
425 iwl_op_mode_free_skb(trans->op_mode, skb);
426 }
427
428 spin_unlock(&txq->lock);
429 spin_unlock_bh(&txq->reclaim_lock);
430
431 /* just in case - this queue may have been stopped */
432 iwl_trans_pcie_wake_queue(trans, txq);
433 }
434
435 /*
436 * iwl_pcie_txq_free - Deallocate DMA queue.
437 * @txq: Transmit queue to deallocate.
438 *
439 * Empty queue by removing and destroying all BD's.
440 * Free all buffers.
441 * 0-fill, but do not free "txq" descriptor structure.
442 */
iwl_pcie_txq_free(struct iwl_trans * trans,int txq_id)443 static void iwl_pcie_txq_free(struct iwl_trans *trans, int txq_id)
444 {
445 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
446 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
447 struct device *dev = trans->dev;
448 int i;
449
450 if (WARN_ON(!txq))
451 return;
452
453 iwl_pcie_txq_unmap(trans, txq_id);
454
455 /* De-alloc array of command/tx buffers */
456 if (txq_id == trans->conf.cmd_queue)
457 for (i = 0; i < txq->n_window; i++) {
458 kfree_sensitive(txq->entries[i].cmd);
459 kfree_sensitive(txq->entries[i].free_buf);
460 }
461
462 /* De-alloc circular buffer of TFDs */
463 if (txq->tfds) {
464 dma_free_coherent(dev,
465 trans_pcie->txqs.tfd.size *
466 trans->mac_cfg->base->max_tfd_queue_size,
467 txq->tfds, txq->dma_addr);
468 txq->dma_addr = 0;
469 txq->tfds = NULL;
470
471 dma_free_coherent(dev,
472 sizeof(*txq->first_tb_bufs) * txq->n_window,
473 txq->first_tb_bufs, txq->first_tb_dma);
474 }
475
476 kfree(txq->entries);
477 txq->entries = NULL;
478
479 timer_delete_sync(&txq->stuck_timer);
480
481 /* 0-fill queue descriptor structure */
482 memset(txq, 0, sizeof(*txq));
483 }
484
iwl_pcie_tx_start(struct iwl_trans * trans)485 void iwl_pcie_tx_start(struct iwl_trans *trans)
486 {
487 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
488 int nq = trans->mac_cfg->base->num_of_queues;
489 int chan;
490 u32 reg_val;
491 int clear_dwords = (SCD_TRANS_TBL_OFFSET_QUEUE(nq) -
492 SCD_CONTEXT_MEM_LOWER_BOUND) / sizeof(u32);
493
494 /* make sure all queue are not stopped/used */
495 memset(trans_pcie->txqs.queue_stopped, 0,
496 sizeof(trans_pcie->txqs.queue_stopped));
497 memset(trans_pcie->txqs.queue_used, 0,
498 sizeof(trans_pcie->txqs.queue_used));
499
500 trans_pcie->scd_base_addr =
501 iwl_read_prph(trans, SCD_SRAM_BASE_ADDR);
502
503 /* reset context data, TX status and translation data */
504 iwl_trans_write_mem(trans, trans_pcie->scd_base_addr +
505 SCD_CONTEXT_MEM_LOWER_BOUND,
506 NULL, clear_dwords);
507
508 iwl_write_prph(trans, SCD_DRAM_BASE_ADDR,
509 trans_pcie->txqs.scd_bc_tbls.dma >> 10);
510
511 /* The chain extension of the SCD doesn't work well. This feature is
512 * enabled by default by the HW, so we need to disable it manually.
513 */
514 if (trans->mac_cfg->base->scd_chain_ext_wa)
515 iwl_write_prph(trans, SCD_CHAINEXT_EN, 0);
516
517 iwl_trans_ac_txq_enable(trans, trans->conf.cmd_queue,
518 trans->conf.cmd_fifo,
519 IWL_DEF_WD_TIMEOUT);
520
521 /* Activate all Tx DMA/FIFO channels */
522 iwl_scd_activate_fifos(trans);
523
524 /* Enable DMA channel */
525 for (chan = 0; chan < FH_TCSR_CHNL_NUM; chan++)
526 iwl_write_direct32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(chan),
527 FH_TCSR_TX_CONFIG_REG_VAL_DMA_CHNL_ENABLE |
528 FH_TCSR_TX_CONFIG_REG_VAL_DMA_CREDIT_ENABLE);
529
530 /* Update FH chicken bits */
531 reg_val = iwl_read_direct32(trans, FH_TX_CHICKEN_BITS_REG);
532 iwl_write_direct32(trans, FH_TX_CHICKEN_BITS_REG,
533 reg_val | FH_TX_CHICKEN_BITS_SCD_AUTO_RETRY_EN);
534
535 /* Enable L1-Active */
536 if (trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_8000)
537 iwl_clear_bits_prph(trans, APMG_PCIDEV_STT_REG,
538 APMG_PCIDEV_STT_VAL_L1_ACT_DIS);
539 }
540
iwl_trans_pcie_tx_reset(struct iwl_trans * trans)541 void iwl_trans_pcie_tx_reset(struct iwl_trans *trans)
542 {
543 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
544 int txq_id;
545
546 /*
547 * we should never get here in gen2 trans mode return early to avoid
548 * having invalid accesses
549 */
550 if (WARN_ON_ONCE(trans->mac_cfg->gen2))
551 return;
552
553 for (txq_id = 0; txq_id < trans->mac_cfg->base->num_of_queues;
554 txq_id++) {
555 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
556 if (trans->mac_cfg->gen2)
557 iwl_write_direct64(trans,
558 FH_MEM_CBBC_QUEUE(trans, txq_id),
559 txq->dma_addr);
560 else
561 iwl_write_direct32(trans,
562 FH_MEM_CBBC_QUEUE(trans, txq_id),
563 txq->dma_addr >> 8);
564 iwl_pcie_txq_unmap(trans, txq_id);
565 txq->read_ptr = 0;
566 txq->write_ptr = 0;
567 }
568
569 /* Tell NIC where to find the "keep warm" buffer */
570 iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
571 trans_pcie->kw.dma >> 4);
572
573 /*
574 * Send 0 as the scd_base_addr since the device may have be reset
575 * while we were in WoWLAN in which case SCD_SRAM_BASE_ADDR will
576 * contain garbage.
577 */
578 iwl_pcie_tx_start(trans);
579 }
580
iwl_pcie_tx_stop_fh(struct iwl_trans * trans)581 static void iwl_pcie_tx_stop_fh(struct iwl_trans *trans)
582 {
583 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
584 int ch, ret;
585 u32 mask = 0;
586
587 spin_lock_bh(&trans_pcie->irq_lock);
588
589 if (!iwl_trans_grab_nic_access(trans))
590 goto out;
591
592 /* Stop each Tx DMA channel */
593 for (ch = 0; ch < FH_TCSR_CHNL_NUM; ch++) {
594 iwl_write32(trans, FH_TCSR_CHNL_TX_CONFIG_REG(ch), 0x0);
595 mask |= FH_TSSR_TX_STATUS_REG_MSK_CHNL_IDLE(ch);
596 }
597
598 /* Wait for DMA channels to be idle */
599 ret = iwl_poll_bits(trans, FH_TSSR_TX_STATUS_REG, mask, 5000);
600 if (ret)
601 IWL_ERR(trans,
602 "Failing on timeout while stopping DMA channel %d [0x%08x]\n",
603 ch, iwl_read32(trans, FH_TSSR_TX_STATUS_REG));
604
605 iwl_trans_release_nic_access(trans);
606
607 out:
608 spin_unlock_bh(&trans_pcie->irq_lock);
609 }
610
611 /*
612 * iwl_pcie_tx_stop - Stop all Tx DMA channels
613 */
iwl_pcie_tx_stop(struct iwl_trans * trans)614 int iwl_pcie_tx_stop(struct iwl_trans *trans)
615 {
616 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
617 int txq_id;
618
619 /* Turn off all Tx DMA fifos */
620 iwl_scd_deactivate_fifos(trans);
621
622 /* Turn off all Tx DMA channels */
623 iwl_pcie_tx_stop_fh(trans);
624
625 /*
626 * This function can be called before the op_mode disabled the
627 * queues. This happens when we have an rfkill interrupt.
628 * Since we stop Tx altogether - mark the queues as stopped.
629 */
630 memset(trans_pcie->txqs.queue_stopped, 0,
631 sizeof(trans_pcie->txqs.queue_stopped));
632 memset(trans_pcie->txqs.queue_used, 0,
633 sizeof(trans_pcie->txqs.queue_used));
634
635 /* This can happen: start_hw, stop_device */
636 if (!trans_pcie->txq_memory)
637 return 0;
638
639 /* Unmap DMA from host system and free skb's */
640 for (txq_id = 0; txq_id < trans->mac_cfg->base->num_of_queues;
641 txq_id++)
642 iwl_pcie_txq_unmap(trans, txq_id);
643
644 return 0;
645 }
646
647 /*
648 * iwl_trans_tx_free - Free TXQ Context
649 *
650 * Destroy all TX DMA queues and structures
651 */
iwl_pcie_tx_free(struct iwl_trans * trans)652 void iwl_pcie_tx_free(struct iwl_trans *trans)
653 {
654 int txq_id;
655 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
656
657 memset(trans_pcie->txqs.queue_used, 0,
658 sizeof(trans_pcie->txqs.queue_used));
659
660 /* Tx queues */
661 if (trans_pcie->txq_memory) {
662 for (txq_id = 0;
663 txq_id < trans->mac_cfg->base->num_of_queues;
664 txq_id++) {
665 iwl_pcie_txq_free(trans, txq_id);
666 trans_pcie->txqs.txq[txq_id] = NULL;
667 }
668 }
669
670 kfree(trans_pcie->txq_memory);
671 trans_pcie->txq_memory = NULL;
672
673 iwl_pcie_free_dma_ptr(trans, &trans_pcie->kw);
674
675 iwl_pcie_free_dma_ptr(trans, &trans_pcie->txqs.scd_bc_tbls);
676 }
677
iwl_txq_log_scd_error(struct iwl_trans * trans,struct iwl_txq * txq)678 void iwl_txq_log_scd_error(struct iwl_trans *trans, struct iwl_txq *txq)
679 {
680 u32 txq_id = txq->id;
681 u32 status;
682 bool active;
683 u8 fifo;
684
685 if (trans->mac_cfg->gen2) {
686 IWL_ERR(trans, "Queue %d is stuck %d %d\n", txq_id,
687 txq->read_ptr, txq->write_ptr);
688 /* TODO: access new SCD registers and dump them */
689 return;
690 }
691
692 status = iwl_read_prph(trans, SCD_QUEUE_STATUS_BITS(txq_id));
693 fifo = (status >> SCD_QUEUE_STTS_REG_POS_TXF) & 0x7;
694 active = !!(status & BIT(SCD_QUEUE_STTS_REG_POS_ACTIVE));
695
696 IWL_ERR(trans,
697 "Queue %d is %sactive on fifo %d and stuck for %u ms. SW [%d, %d] HW [%d, %d] FH TRB=0x0%x\n",
698 txq_id, active ? "" : "in", fifo,
699 jiffies_to_msecs(txq->wd_timeout),
700 txq->read_ptr, txq->write_ptr,
701 iwl_read_prph(trans, SCD_QUEUE_RDPTR(txq_id)) &
702 (trans->mac_cfg->base->max_tfd_queue_size - 1),
703 iwl_read_prph(trans, SCD_QUEUE_WRPTR(txq_id)) &
704 (trans->mac_cfg->base->max_tfd_queue_size - 1),
705 iwl_read_direct32(trans, FH_TX_TRB_REG(fifo)));
706 }
707
iwl_txq_stuck_timer(struct timer_list * t)708 static void iwl_txq_stuck_timer(struct timer_list *t)
709 {
710 struct iwl_txq *txq = timer_container_of(txq, t, stuck_timer);
711 struct iwl_trans *trans = txq->trans;
712
713 spin_lock(&txq->lock);
714 /* check if triggered erroneously */
715 if (txq->read_ptr == txq->write_ptr) {
716 spin_unlock(&txq->lock);
717 return;
718 }
719 spin_unlock(&txq->lock);
720
721 iwl_txq_log_scd_error(trans, txq);
722
723 iwl_force_nmi(trans);
724 }
725
iwl_pcie_txq_alloc(struct iwl_trans * trans,struct iwl_txq * txq,int slots_num,bool cmd_queue)726 int iwl_pcie_txq_alloc(struct iwl_trans *trans, struct iwl_txq *txq,
727 int slots_num, bool cmd_queue)
728 {
729 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
730 size_t num_entries = trans->mac_cfg->gen2 ?
731 slots_num : trans->mac_cfg->base->max_tfd_queue_size;
732 size_t tfd_sz;
733 size_t tb0_buf_sz;
734 int i;
735
736 if (WARN_ONCE(slots_num <= 0, "Invalid slots num:%d\n", slots_num))
737 return -EINVAL;
738
739 if (WARN_ON(txq->entries || txq->tfds))
740 return -EINVAL;
741
742 tfd_sz = trans_pcie->txqs.tfd.size * num_entries;
743
744 timer_setup(&txq->stuck_timer, iwl_txq_stuck_timer, 0);
745 txq->trans = trans;
746
747 txq->n_window = slots_num;
748
749 txq->entries = kcalloc(slots_num,
750 sizeof(struct iwl_pcie_txq_entry),
751 GFP_KERNEL);
752
753 if (!txq->entries)
754 goto error;
755
756 if (cmd_queue)
757 for (i = 0; i < slots_num; i++) {
758 txq->entries[i].cmd =
759 kmalloc(sizeof(struct iwl_device_cmd),
760 GFP_KERNEL);
761 if (!txq->entries[i].cmd)
762 goto error;
763 }
764
765 /* Circular buffer of transmit frame descriptors (TFDs),
766 * shared with device
767 */
768 txq->tfds = dma_alloc_coherent(trans->dev, tfd_sz,
769 &txq->dma_addr, GFP_KERNEL);
770 if (!txq->tfds)
771 goto error;
772
773 BUILD_BUG_ON(sizeof(*txq->first_tb_bufs) != IWL_FIRST_TB_SIZE_ALIGN);
774
775 tb0_buf_sz = sizeof(*txq->first_tb_bufs) * slots_num;
776
777 txq->first_tb_bufs = dma_alloc_coherent(trans->dev, tb0_buf_sz,
778 &txq->first_tb_dma,
779 GFP_KERNEL);
780 if (!txq->first_tb_bufs)
781 goto err_free_tfds;
782
783 for (i = 0; i < num_entries; i++) {
784 void *tfd = iwl_txq_get_tfd(trans, txq, i);
785
786 if (trans->mac_cfg->gen2)
787 iwl_txq_set_tfd_invalid_gen2(trans, tfd);
788 else
789 iwl_txq_set_tfd_invalid_gen1(trans, tfd);
790 }
791
792 return 0;
793 err_free_tfds:
794 dma_free_coherent(trans->dev, tfd_sz, txq->tfds, txq->dma_addr);
795 txq->tfds = NULL;
796 error:
797 if (txq->entries && cmd_queue)
798 for (i = 0; i < slots_num; i++)
799 kfree(txq->entries[i].cmd);
800 kfree(txq->entries);
801 txq->entries = NULL;
802
803 return -ENOMEM;
804 }
805
806 #define BC_TABLE_SIZE (sizeof(struct iwl_bc_tbl_entry) * TFD_QUEUE_BC_SIZE)
807
808 /*
809 * iwl_pcie_tx_alloc - allocate TX context
810 * Allocate all Tx DMA structures and initialize them
811 */
iwl_pcie_tx_alloc(struct iwl_trans * trans)812 static int iwl_pcie_tx_alloc(struct iwl_trans *trans)
813 {
814 int ret;
815 int txq_id, slots_num;
816 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
817 u16 bc_tbls_size = trans->mac_cfg->base->num_of_queues;
818
819 if (WARN_ON(trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_AX210))
820 return -EINVAL;
821
822 bc_tbls_size *= BC_TABLE_SIZE;
823
824 /*It is not allowed to alloc twice, so warn when this happens.
825 * We cannot rely on the previous allocation, so free and fail */
826 if (WARN_ON(trans_pcie->txq_memory)) {
827 ret = -EINVAL;
828 goto error;
829 }
830
831 ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->txqs.scd_bc_tbls,
832 bc_tbls_size);
833 if (ret) {
834 IWL_ERR(trans, "Scheduler BC Table allocation failed\n");
835 goto error;
836 }
837
838 /* Alloc keep-warm buffer */
839 ret = iwl_pcie_alloc_dma_ptr(trans, &trans_pcie->kw, IWL_KW_SIZE);
840 if (ret) {
841 IWL_ERR(trans, "Keep Warm allocation failed\n");
842 goto error;
843 }
844
845 trans_pcie->txq_memory =
846 kcalloc(trans->mac_cfg->base->num_of_queues,
847 sizeof(struct iwl_txq), GFP_KERNEL);
848 if (!trans_pcie->txq_memory) {
849 IWL_ERR(trans, "Not enough memory for txq\n");
850 ret = -ENOMEM;
851 goto error;
852 }
853
854 /* Alloc and init all Tx queues, including the command queue (#4/#9) */
855 for (txq_id = 0; txq_id < trans->mac_cfg->base->num_of_queues;
856 txq_id++) {
857 bool cmd_queue = (txq_id == trans->conf.cmd_queue);
858
859 if (cmd_queue)
860 slots_num = max_t(u32, IWL_CMD_QUEUE_SIZE,
861 trans->mac_cfg->base->min_txq_size);
862 else
863 slots_num = max_t(u32, IWL_DEFAULT_QUEUE_SIZE,
864 trans->mac_cfg->base->min_ba_txq_size);
865 trans_pcie->txqs.txq[txq_id] = &trans_pcie->txq_memory[txq_id];
866 ret = iwl_pcie_txq_alloc(trans, trans_pcie->txqs.txq[txq_id],
867 slots_num, cmd_queue);
868 if (ret) {
869 IWL_ERR(trans, "Tx %d queue alloc failed\n", txq_id);
870 goto error;
871 }
872 trans_pcie->txqs.txq[txq_id]->id = txq_id;
873 }
874
875 return 0;
876
877 error:
878 iwl_pcie_tx_free(trans);
879
880 return ret;
881 }
882
883 /*
884 * iwl_queue_init - Initialize queue's high/low-water and read/write indexes
885 */
iwl_queue_init(struct iwl_txq * q,int slots_num)886 static int iwl_queue_init(struct iwl_txq *q, int slots_num)
887 {
888 q->n_window = slots_num;
889
890 /* slots_num must be power-of-two size, otherwise
891 * iwl_txq_get_cmd_index is broken.
892 */
893 if (WARN_ON(!is_power_of_2(slots_num)))
894 return -EINVAL;
895
896 q->low_mark = q->n_window / 4;
897 if (q->low_mark < 4)
898 q->low_mark = 4;
899
900 q->high_mark = q->n_window / 8;
901 if (q->high_mark < 2)
902 q->high_mark = 2;
903
904 q->write_ptr = 0;
905 q->read_ptr = 0;
906
907 return 0;
908 }
909
iwl_txq_init(struct iwl_trans * trans,struct iwl_txq * txq,int slots_num,bool cmd_queue)910 int iwl_txq_init(struct iwl_trans *trans, struct iwl_txq *txq,
911 int slots_num, bool cmd_queue)
912 {
913 u32 tfd_queue_max_size =
914 trans->mac_cfg->base->max_tfd_queue_size;
915 int ret;
916
917 txq->need_update = false;
918
919 /* max_tfd_queue_size must be power-of-two size, otherwise
920 * iwl_txq_inc_wrap and iwl_txq_dec_wrap are broken.
921 */
922 if (WARN_ONCE(tfd_queue_max_size & (tfd_queue_max_size - 1),
923 "Max tfd queue size must be a power of two, but is %d",
924 tfd_queue_max_size))
925 return -EINVAL;
926
927 /* Initialize queue's high/low-water marks, and head/tail indexes */
928 ret = iwl_queue_init(txq, slots_num);
929 if (ret)
930 return ret;
931
932 spin_lock_init(&txq->lock);
933 spin_lock_init(&txq->reclaim_lock);
934
935 if (cmd_queue) {
936 #if defined(__linux__)
937 static struct lock_class_key iwl_txq_cmd_queue_lock_class;
938
939 lockdep_set_class(&txq->lock, &iwl_txq_cmd_queue_lock_class);
940 #endif
941 }
942
943 __skb_queue_head_init(&txq->overflow_q);
944
945 return 0;
946 }
947
iwl_pcie_tx_init(struct iwl_trans * trans)948 int iwl_pcie_tx_init(struct iwl_trans *trans)
949 {
950 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
951 int ret;
952 int txq_id, slots_num;
953 bool alloc = false;
954
955 if (!trans_pcie->txq_memory) {
956 ret = iwl_pcie_tx_alloc(trans);
957 if (ret)
958 goto error;
959 alloc = true;
960 }
961
962 spin_lock_bh(&trans_pcie->irq_lock);
963
964 /* Turn off all Tx DMA fifos */
965 iwl_scd_deactivate_fifos(trans);
966
967 /* Tell NIC where to find the "keep warm" buffer */
968 iwl_write_direct32(trans, FH_KW_MEM_ADDR_REG,
969 trans_pcie->kw.dma >> 4);
970
971 spin_unlock_bh(&trans_pcie->irq_lock);
972
973 /* Alloc and init all Tx queues, including the command queue (#4/#9) */
974 for (txq_id = 0; txq_id < trans->mac_cfg->base->num_of_queues;
975 txq_id++) {
976 bool cmd_queue = (txq_id == trans->conf.cmd_queue);
977
978 if (cmd_queue)
979 slots_num = max_t(u32, IWL_CMD_QUEUE_SIZE,
980 trans->mac_cfg->base->min_txq_size);
981 else
982 slots_num = max_t(u32, IWL_DEFAULT_QUEUE_SIZE,
983 trans->mac_cfg->base->min_ba_txq_size);
984 ret = iwl_txq_init(trans, trans_pcie->txqs.txq[txq_id], slots_num,
985 cmd_queue);
986 if (ret) {
987 IWL_ERR(trans, "Tx %d queue init failed\n", txq_id);
988 goto error;
989 }
990
991 /*
992 * Tell nic where to find circular buffer of TFDs for a
993 * given Tx queue, and enable the DMA channel used for that
994 * queue.
995 * Circular buffer (TFD queue in DRAM) physical base address
996 */
997 iwl_write_direct32(trans, FH_MEM_CBBC_QUEUE(trans, txq_id),
998 trans_pcie->txqs.txq[txq_id]->dma_addr >> 8);
999 }
1000
1001 iwl_set_bits_prph(trans, SCD_GP_CTRL, SCD_GP_CTRL_AUTO_ACTIVE_MODE);
1002 if (trans->mac_cfg->base->num_of_queues > 20)
1003 iwl_set_bits_prph(trans, SCD_GP_CTRL,
1004 SCD_GP_CTRL_ENABLE_31_QUEUES);
1005
1006 return 0;
1007 error:
1008 /*Upon error, free only if we allocated something */
1009 if (alloc)
1010 iwl_pcie_tx_free(trans);
1011 return ret;
1012 }
1013
iwl_pcie_set_cmd_in_flight(struct iwl_trans * trans,const struct iwl_host_cmd * cmd)1014 static int iwl_pcie_set_cmd_in_flight(struct iwl_trans *trans,
1015 const struct iwl_host_cmd *cmd)
1016 {
1017 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1018
1019 /* Make sure the NIC is still alive in the bus */
1020 if (test_bit(STATUS_TRANS_DEAD, &trans->status))
1021 return -ENODEV;
1022
1023 if (!trans->mac_cfg->base->apmg_wake_up_wa)
1024 return 0;
1025
1026 /*
1027 * wake up the NIC to make sure that the firmware will see the host
1028 * command - we will let the NIC sleep once all the host commands
1029 * returned. This needs to be done only on NICs that have
1030 * apmg_wake_up_wa set (see above.)
1031 */
1032 if (!_iwl_trans_pcie_grab_nic_access(trans, false))
1033 return -EIO;
1034
1035 /*
1036 * In iwl_trans_grab_nic_access(), we've acquired the reg_lock.
1037 * There, we also returned immediately if cmd_hold_nic_awake is
1038 * already true, so it's OK to unconditionally set it to true.
1039 */
1040 trans_pcie->cmd_hold_nic_awake = true;
1041 spin_unlock(&trans_pcie->reg_lock);
1042
1043 return 0;
1044 }
1045
iwl_txq_progress(struct iwl_txq * txq)1046 static void iwl_txq_progress(struct iwl_txq *txq)
1047 {
1048 lockdep_assert_held(&txq->lock);
1049
1050 if (!txq->wd_timeout)
1051 return;
1052
1053 /*
1054 * station is asleep and we send data - that must
1055 * be uAPSD or PS-Poll. Don't rearm the timer.
1056 */
1057 if (txq->frozen)
1058 return;
1059
1060 /*
1061 * if empty delete timer, otherwise move timer forward
1062 * since we're making progress on this queue
1063 */
1064 if (txq->read_ptr == txq->write_ptr)
1065 timer_delete(&txq->stuck_timer);
1066 else
1067 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1068 }
1069
iwl_txq_used(const struct iwl_txq * q,int i,int read_ptr,int write_ptr)1070 static inline bool iwl_txq_used(const struct iwl_txq *q, int i,
1071 int read_ptr, int write_ptr)
1072 {
1073 int index = iwl_txq_get_cmd_index(q, i);
1074 int r = iwl_txq_get_cmd_index(q, read_ptr);
1075 int w = iwl_txq_get_cmd_index(q, write_ptr);
1076
1077 return w >= r ?
1078 (index >= r && index < w) :
1079 !(index < r && index >= w);
1080 }
1081
1082 /*
1083 * iwl_pcie_cmdq_reclaim - Reclaim TX command queue entries already Tx'd
1084 *
1085 * When FW advances 'R' index, all entries between old and new 'R' index
1086 * need to be reclaimed. As result, some free space forms. If there is
1087 * enough free space (> low mark), wake the stack that feeds us.
1088 */
iwl_pcie_cmdq_reclaim(struct iwl_trans * trans,int txq_id,int idx)1089 static void iwl_pcie_cmdq_reclaim(struct iwl_trans *trans, int txq_id, int idx)
1090 {
1091 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1092 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
1093 int nfreed = 0;
1094 u16 r;
1095
1096 lockdep_assert_held(&txq->lock);
1097
1098 idx = iwl_txq_get_cmd_index(txq, idx);
1099 r = iwl_txq_get_cmd_index(txq, txq->read_ptr);
1100
1101 if (idx >= trans->mac_cfg->base->max_tfd_queue_size ||
1102 (!iwl_txq_used(txq, idx, txq->read_ptr, txq->write_ptr))) {
1103 WARN_ONCE(test_bit(txq_id, trans_pcie->txqs.queue_used),
1104 "%s: Read index for DMA queue txq id (%d), index %d is out of range [0-%d] %d %d.\n",
1105 __func__, txq_id, idx,
1106 trans->mac_cfg->base->max_tfd_queue_size,
1107 txq->write_ptr, txq->read_ptr);
1108 return;
1109 }
1110
1111 for (idx = iwl_txq_inc_wrap(trans, idx); r != idx;
1112 r = iwl_txq_inc_wrap(trans, r)) {
1113 txq->read_ptr = iwl_txq_inc_wrap(trans, txq->read_ptr);
1114
1115 if (nfreed++ > 0) {
1116 IWL_ERR(trans, "HCMD skipped: index (%d) %d %d\n",
1117 idx, txq->write_ptr, r);
1118 iwl_force_nmi(trans);
1119 }
1120 }
1121
1122 if (txq->read_ptr == txq->write_ptr)
1123 iwl_pcie_clear_cmd_in_flight(trans);
1124
1125 iwl_txq_progress(txq);
1126 }
1127
iwl_pcie_txq_set_ratid_map(struct iwl_trans * trans,u16 ra_tid,u16 txq_id)1128 static int iwl_pcie_txq_set_ratid_map(struct iwl_trans *trans, u16 ra_tid,
1129 u16 txq_id)
1130 {
1131 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1132 u32 tbl_dw_addr;
1133 u32 tbl_dw;
1134 u16 scd_q2ratid;
1135
1136 scd_q2ratid = ra_tid & SCD_QUEUE_RA_TID_MAP_RATID_MSK;
1137
1138 tbl_dw_addr = trans_pcie->scd_base_addr +
1139 SCD_TRANS_TBL_OFFSET_QUEUE(txq_id);
1140
1141 tbl_dw = iwl_trans_read_mem32(trans, tbl_dw_addr);
1142
1143 if (txq_id & 0x1)
1144 tbl_dw = (scd_q2ratid << 16) | (tbl_dw & 0x0000FFFF);
1145 else
1146 tbl_dw = scd_q2ratid | (tbl_dw & 0xFFFF0000);
1147
1148 iwl_trans_write_mem32(trans, tbl_dw_addr, tbl_dw);
1149
1150 return 0;
1151 }
1152
1153 /* Receiver address (actually, Rx station's index into station table),
1154 * combined with Traffic ID (QOS priority), in format used by Tx Scheduler */
1155 #define BUILD_RAxTID(sta_id, tid) (((sta_id) << 4) + (tid))
1156
iwl_trans_pcie_txq_enable(struct iwl_trans * trans,int txq_id,u16 ssn,const struct iwl_trans_txq_scd_cfg * cfg,unsigned int wdg_timeout)1157 bool iwl_trans_pcie_txq_enable(struct iwl_trans *trans, int txq_id, u16 ssn,
1158 const struct iwl_trans_txq_scd_cfg *cfg,
1159 unsigned int wdg_timeout)
1160 {
1161 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1162 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
1163 int fifo = -1;
1164 bool scd_bug = false;
1165
1166 if (test_and_set_bit(txq_id, trans_pcie->txqs.queue_used))
1167 WARN_ONCE(1, "queue %d already used - expect issues", txq_id);
1168
1169 txq->wd_timeout = msecs_to_jiffies(wdg_timeout);
1170
1171 if (cfg) {
1172 fifo = cfg->fifo;
1173
1174 /* Disable the scheduler prior configuring the cmd queue */
1175 if (txq_id == trans->conf.cmd_queue &&
1176 trans->conf.scd_set_active)
1177 iwl_scd_enable_set_active(trans, 0);
1178
1179 /* Stop this Tx queue before configuring it */
1180 iwl_scd_txq_set_inactive(trans, txq_id);
1181
1182 /* Set this queue as a chain-building queue unless it is CMD */
1183 if (txq_id != trans->conf.cmd_queue)
1184 iwl_scd_txq_set_chain(trans, txq_id);
1185
1186 if (cfg->aggregate) {
1187 u16 ra_tid = BUILD_RAxTID(cfg->sta_id, cfg->tid);
1188
1189 /* Map receiver-address / traffic-ID to this queue */
1190 iwl_pcie_txq_set_ratid_map(trans, ra_tid, txq_id);
1191
1192 /* enable aggregations for the queue */
1193 iwl_scd_txq_enable_agg(trans, txq_id);
1194 txq->ampdu = true;
1195 } else {
1196 /*
1197 * disable aggregations for the queue, this will also
1198 * make the ra_tid mapping configuration irrelevant
1199 * since it is now a non-AGG queue.
1200 */
1201 iwl_scd_txq_disable_agg(trans, txq_id);
1202
1203 ssn = txq->read_ptr;
1204 }
1205 } else {
1206 /*
1207 * If we need to move the SCD write pointer by steps of
1208 * 0x40, 0x80 or 0xc0, it gets stuck. Avoids this and let
1209 * the op_mode know by returning true later.
1210 * Do this only in case cfg is NULL since this trick can
1211 * be done only if we have DQA enabled which is true for mvm
1212 * only. And mvm never sets a cfg pointer.
1213 * This is really ugly, but this is the easiest way out for
1214 * this sad hardware issue.
1215 * This bug has been fixed on devices 9000 and up.
1216 */
1217 scd_bug = !trans->mac_cfg->mq_rx_supported &&
1218 !((ssn - txq->write_ptr) & 0x3f) &&
1219 (ssn != txq->write_ptr);
1220 if (scd_bug)
1221 ssn++;
1222 }
1223
1224 /* Place first TFD at index corresponding to start sequence number.
1225 * Assumes that ssn_idx is valid (!= 0xFFF) */
1226 txq->read_ptr = (ssn & 0xff);
1227 txq->write_ptr = (ssn & 0xff);
1228 iwl_write_direct32(trans, HBUS_TARG_WRPTR,
1229 (ssn & 0xff) | (txq_id << 8));
1230
1231 if (cfg) {
1232 u8 frame_limit = cfg->frame_limit;
1233
1234 iwl_write_prph(trans, SCD_QUEUE_RDPTR(txq_id), ssn);
1235
1236 /* Set up Tx window size and frame limit for this queue */
1237 iwl_trans_write_mem32(trans, trans_pcie->scd_base_addr +
1238 SCD_CONTEXT_QUEUE_OFFSET(txq_id), 0);
1239 iwl_trans_write_mem32(trans,
1240 trans_pcie->scd_base_addr +
1241 SCD_CONTEXT_QUEUE_OFFSET(txq_id) + sizeof(u32),
1242 SCD_QUEUE_CTX_REG2_VAL(WIN_SIZE, frame_limit) |
1243 SCD_QUEUE_CTX_REG2_VAL(FRAME_LIMIT, frame_limit));
1244
1245 /* Set up status area in SRAM, map to Tx DMA/FIFO, activate */
1246 iwl_write_prph(trans, SCD_QUEUE_STATUS_BITS(txq_id),
1247 (1 << SCD_QUEUE_STTS_REG_POS_ACTIVE) |
1248 (cfg->fifo << SCD_QUEUE_STTS_REG_POS_TXF) |
1249 (1 << SCD_QUEUE_STTS_REG_POS_WSL) |
1250 SCD_QUEUE_STTS_REG_MSK);
1251
1252 /* enable the scheduler for this queue (only) */
1253 if (txq_id == trans->conf.cmd_queue &&
1254 trans->conf.scd_set_active)
1255 iwl_scd_enable_set_active(trans, BIT(txq_id));
1256
1257 IWL_DEBUG_TX_QUEUES(trans,
1258 "Activate queue %d on FIFO %d WrPtr: %d\n",
1259 txq_id, fifo, ssn & 0xff);
1260 } else {
1261 IWL_DEBUG_TX_QUEUES(trans,
1262 "Activate queue %d WrPtr: %d\n",
1263 txq_id, ssn & 0xff);
1264 }
1265
1266 return scd_bug;
1267 }
1268
iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans * trans,u32 txq_id,bool shared_mode)1269 void iwl_trans_pcie_txq_set_shared_mode(struct iwl_trans *trans, u32 txq_id,
1270 bool shared_mode)
1271 {
1272 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1273 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
1274
1275 txq->ampdu = !shared_mode;
1276 }
1277
iwl_trans_pcie_txq_disable(struct iwl_trans * trans,int txq_id,bool configure_scd)1278 void iwl_trans_pcie_txq_disable(struct iwl_trans *trans, int txq_id,
1279 bool configure_scd)
1280 {
1281 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1282 u32 stts_addr = trans_pcie->scd_base_addr +
1283 SCD_TX_STTS_QUEUE_OFFSET(txq_id);
1284 static const u32 zero_val[4] = {};
1285
1286 trans_pcie->txqs.txq[txq_id]->frozen_expiry_remainder = 0;
1287 trans_pcie->txqs.txq[txq_id]->frozen = false;
1288
1289 /*
1290 * Upon HW Rfkill - we stop the device, and then stop the queues
1291 * in the op_mode. Just for the sake of the simplicity of the op_mode,
1292 * allow the op_mode to call txq_disable after it already called
1293 * stop_device.
1294 */
1295 if (!test_and_clear_bit(txq_id, trans_pcie->txqs.queue_used)) {
1296 WARN_ONCE(test_bit(STATUS_DEVICE_ENABLED, &trans->status),
1297 "queue %d not used", txq_id);
1298 return;
1299 }
1300
1301 if (configure_scd) {
1302 iwl_scd_txq_set_inactive(trans, txq_id);
1303
1304 iwl_trans_write_mem(trans, stts_addr, (const void *)zero_val,
1305 ARRAY_SIZE(zero_val));
1306 }
1307
1308 iwl_pcie_txq_unmap(trans, txq_id);
1309 trans_pcie->txqs.txq[txq_id]->ampdu = false;
1310
1311 IWL_DEBUG_TX_QUEUES(trans, "Deactivate queue %d\n", txq_id);
1312 }
1313
1314 /*************** HOST COMMAND QUEUE FUNCTIONS *****/
1315
iwl_trans_pcie_block_txq_ptrs(struct iwl_trans * trans,bool block)1316 static void iwl_trans_pcie_block_txq_ptrs(struct iwl_trans *trans, bool block)
1317 {
1318 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1319 int i;
1320
1321 for (i = 0; i < trans->mac_cfg->base->num_of_queues; i++) {
1322 struct iwl_txq *txq = trans_pcie->txqs.txq[i];
1323
1324 if (i == trans->conf.cmd_queue)
1325 continue;
1326
1327 /* we skip the command queue (obviously) so it's OK to nest */
1328 spin_lock_nested(&txq->lock, 1);
1329
1330 if (!block && !(WARN_ON_ONCE(!txq->block))) {
1331 txq->block--;
1332 if (!txq->block) {
1333 iwl_write32(trans, HBUS_TARG_WRPTR,
1334 txq->write_ptr | (i << 8));
1335 }
1336 } else if (block) {
1337 txq->block++;
1338 }
1339
1340 spin_unlock(&txq->lock);
1341 }
1342 }
1343
1344 /*
1345 * iwl_pcie_enqueue_hcmd - enqueue a uCode command
1346 * @priv: device private data point
1347 * @cmd: a pointer to the ucode command structure
1348 *
1349 * The function returns < 0 values to indicate the operation
1350 * failed. On success, it returns the index (>= 0) of command in the
1351 * command queue.
1352 */
iwl_pcie_enqueue_hcmd(struct iwl_trans * trans,struct iwl_host_cmd * cmd)1353 int iwl_pcie_enqueue_hcmd(struct iwl_trans *trans,
1354 struct iwl_host_cmd *cmd)
1355 {
1356 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1357 struct iwl_txq *txq = trans_pcie->txqs.txq[trans->conf.cmd_queue];
1358 struct iwl_device_cmd *out_cmd;
1359 struct iwl_cmd_meta *out_meta;
1360 void *dup_buf = NULL;
1361 dma_addr_t phys_addr;
1362 int idx;
1363 u16 copy_size, cmd_size, tb0_size;
1364 bool had_nocopy = false;
1365 u8 group_id = iwl_cmd_groupid(cmd->id);
1366 int i, ret;
1367 u32 cmd_pos;
1368 const u8 *cmddata[IWL_MAX_CMD_TBS_PER_TFD];
1369 u16 cmdlen[IWL_MAX_CMD_TBS_PER_TFD];
1370 unsigned long flags;
1371
1372 if (WARN(!trans->conf.wide_cmd_header &&
1373 group_id > IWL_ALWAYS_LONG_GROUP,
1374 "unsupported wide command %#x\n", cmd->id))
1375 return -EINVAL;
1376
1377 if (group_id != 0) {
1378 copy_size = sizeof(struct iwl_cmd_header_wide);
1379 cmd_size = sizeof(struct iwl_cmd_header_wide);
1380 } else {
1381 copy_size = sizeof(struct iwl_cmd_header);
1382 cmd_size = sizeof(struct iwl_cmd_header);
1383 }
1384
1385 /* need one for the header if the first is NOCOPY */
1386 BUILD_BUG_ON(IWL_MAX_CMD_TBS_PER_TFD > IWL_NUM_OF_TBS - 1);
1387
1388 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1389 cmddata[i] = cmd->data[i];
1390 cmdlen[i] = cmd->len[i];
1391
1392 if (!cmd->len[i])
1393 continue;
1394
1395 /* need at least IWL_FIRST_TB_SIZE copied */
1396 if (copy_size < IWL_FIRST_TB_SIZE) {
1397 int copy = IWL_FIRST_TB_SIZE - copy_size;
1398
1399 if (copy > cmdlen[i])
1400 copy = cmdlen[i];
1401 cmdlen[i] -= copy;
1402 cmddata[i] += copy;
1403 copy_size += copy;
1404 }
1405
1406 if (cmd->dataflags[i] & IWL_HCMD_DFL_NOCOPY) {
1407 had_nocopy = true;
1408 if (WARN_ON(cmd->dataflags[i] & IWL_HCMD_DFL_DUP)) {
1409 idx = -EINVAL;
1410 goto free_dup_buf;
1411 }
1412 } else if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP) {
1413 /*
1414 * This is also a chunk that isn't copied
1415 * to the static buffer so set had_nocopy.
1416 */
1417 had_nocopy = true;
1418
1419 /* only allowed once */
1420 if (WARN_ON(dup_buf)) {
1421 idx = -EINVAL;
1422 goto free_dup_buf;
1423 }
1424
1425 dup_buf = kmemdup(cmddata[i], cmdlen[i],
1426 GFP_ATOMIC);
1427 if (!dup_buf)
1428 return -ENOMEM;
1429 } else {
1430 /* NOCOPY must not be followed by normal! */
1431 if (WARN_ON(had_nocopy)) {
1432 idx = -EINVAL;
1433 goto free_dup_buf;
1434 }
1435 copy_size += cmdlen[i];
1436 }
1437 cmd_size += cmd->len[i];
1438 }
1439
1440 /*
1441 * If any of the command structures end up being larger than
1442 * the TFD_MAX_PAYLOAD_SIZE and they aren't dynamically
1443 * allocated into separate TFDs, then we will need to
1444 * increase the size of the buffers.
1445 */
1446 if (WARN(copy_size > TFD_MAX_PAYLOAD_SIZE,
1447 "Command %s (%#x) is too large (%d bytes)\n",
1448 iwl_get_cmd_string(trans, cmd->id),
1449 cmd->id, copy_size)) {
1450 idx = -EINVAL;
1451 goto free_dup_buf;
1452 }
1453
1454 spin_lock_irqsave(&txq->lock, flags);
1455
1456 if (iwl_txq_space(trans, txq) < ((cmd->flags & CMD_ASYNC) ? 2 : 1)) {
1457 spin_unlock_irqrestore(&txq->lock, flags);
1458
1459 IWL_ERR(trans, "No space in command queue\n");
1460 iwl_op_mode_nic_error(trans->op_mode,
1461 IWL_ERR_TYPE_CMD_QUEUE_FULL);
1462 iwl_trans_schedule_reset(trans, IWL_ERR_TYPE_CMD_QUEUE_FULL);
1463 idx = -ENOSPC;
1464 goto free_dup_buf;
1465 }
1466
1467 idx = iwl_txq_get_cmd_index(txq, txq->write_ptr);
1468 out_cmd = txq->entries[idx].cmd;
1469 out_meta = &txq->entries[idx].meta;
1470
1471 /* re-initialize, this also marks the SG list as unused */
1472 memset(out_meta, 0, sizeof(*out_meta));
1473 if (cmd->flags & CMD_WANT_SKB)
1474 out_meta->source = cmd;
1475
1476 /* set up the header */
1477 if (group_id != 0) {
1478 out_cmd->hdr_wide.cmd = iwl_cmd_opcode(cmd->id);
1479 out_cmd->hdr_wide.group_id = group_id;
1480 out_cmd->hdr_wide.version = iwl_cmd_version(cmd->id);
1481 out_cmd->hdr_wide.length =
1482 cpu_to_le16(cmd_size -
1483 sizeof(struct iwl_cmd_header_wide));
1484 out_cmd->hdr_wide.reserved = 0;
1485 out_cmd->hdr_wide.sequence =
1486 cpu_to_le16(QUEUE_TO_SEQ(trans->conf.cmd_queue) |
1487 INDEX_TO_SEQ(txq->write_ptr));
1488
1489 cmd_pos = sizeof(struct iwl_cmd_header_wide);
1490 copy_size = sizeof(struct iwl_cmd_header_wide);
1491 } else {
1492 out_cmd->hdr.cmd = iwl_cmd_opcode(cmd->id);
1493 out_cmd->hdr.sequence =
1494 cpu_to_le16(QUEUE_TO_SEQ(trans->conf.cmd_queue) |
1495 INDEX_TO_SEQ(txq->write_ptr));
1496 out_cmd->hdr.group_id = 0;
1497
1498 cmd_pos = sizeof(struct iwl_cmd_header);
1499 copy_size = sizeof(struct iwl_cmd_header);
1500 }
1501
1502 /* and copy the data that needs to be copied */
1503 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1504 int copy;
1505
1506 if (!cmd->len[i])
1507 continue;
1508
1509 /* copy everything if not nocopy/dup */
1510 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1511 IWL_HCMD_DFL_DUP))) {
1512 copy = cmd->len[i];
1513
1514 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1515 cmd_pos += copy;
1516 copy_size += copy;
1517 continue;
1518 }
1519
1520 /*
1521 * Otherwise we need at least IWL_FIRST_TB_SIZE copied
1522 * in total (for bi-directional DMA), but copy up to what
1523 * we can fit into the payload for debug dump purposes.
1524 */
1525 copy = min_t(int, TFD_MAX_PAYLOAD_SIZE - cmd_pos, cmd->len[i]);
1526
1527 memcpy((u8 *)out_cmd + cmd_pos, cmd->data[i], copy);
1528 cmd_pos += copy;
1529
1530 /* However, treat copy_size the proper way, we need it below */
1531 if (copy_size < IWL_FIRST_TB_SIZE) {
1532 copy = IWL_FIRST_TB_SIZE - copy_size;
1533
1534 if (copy > cmd->len[i])
1535 copy = cmd->len[i];
1536 copy_size += copy;
1537 }
1538 }
1539
1540 IWL_DEBUG_HC(trans,
1541 "Sending command %s (%.2x.%.2x), seq: 0x%04X, %d bytes at %d[%d]:%d\n",
1542 iwl_get_cmd_string(trans, cmd->id),
1543 group_id, out_cmd->hdr.cmd,
1544 le16_to_cpu(out_cmd->hdr.sequence),
1545 cmd_size, txq->write_ptr, idx, trans->conf.cmd_queue);
1546
1547 /* start the TFD with the minimum copy bytes */
1548 tb0_size = min_t(int, copy_size, IWL_FIRST_TB_SIZE);
1549 memcpy(&txq->first_tb_bufs[idx], &out_cmd->hdr, tb0_size);
1550 iwl_pcie_txq_build_tfd(trans, txq,
1551 iwl_txq_get_first_tb_dma(txq, idx),
1552 tb0_size, true);
1553
1554 /* map first command fragment, if any remains */
1555 if (copy_size > tb0_size) {
1556 phys_addr = dma_map_single(trans->dev,
1557 ((u8 *)&out_cmd->hdr) + tb0_size,
1558 copy_size - tb0_size,
1559 DMA_TO_DEVICE);
1560 if (dma_mapping_error(trans->dev, phys_addr)) {
1561 iwl_txq_gen1_tfd_unmap(trans, out_meta, txq,
1562 txq->write_ptr);
1563 idx = -ENOMEM;
1564 goto out;
1565 }
1566
1567 iwl_pcie_txq_build_tfd(trans, txq, phys_addr,
1568 copy_size - tb0_size, false);
1569 }
1570
1571 /* map the remaining (adjusted) nocopy/dup fragments */
1572 for (i = 0; i < IWL_MAX_CMD_TBS_PER_TFD; i++) {
1573 void *data = (void *)(uintptr_t)cmddata[i];
1574
1575 if (!cmdlen[i])
1576 continue;
1577 if (!(cmd->dataflags[i] & (IWL_HCMD_DFL_NOCOPY |
1578 IWL_HCMD_DFL_DUP)))
1579 continue;
1580 if (cmd->dataflags[i] & IWL_HCMD_DFL_DUP)
1581 data = dup_buf;
1582 phys_addr = dma_map_single(trans->dev, data,
1583 cmdlen[i], DMA_TO_DEVICE);
1584 if (dma_mapping_error(trans->dev, phys_addr)) {
1585 iwl_txq_gen1_tfd_unmap(trans, out_meta, txq,
1586 txq->write_ptr);
1587 idx = -ENOMEM;
1588 goto out;
1589 }
1590
1591 iwl_pcie_txq_build_tfd(trans, txq, phys_addr, cmdlen[i], false);
1592 }
1593
1594 BUILD_BUG_ON(IWL_TFH_NUM_TBS > sizeof(out_meta->tbs) * BITS_PER_BYTE);
1595 out_meta->flags = cmd->flags;
1596 if (WARN_ON_ONCE(txq->entries[idx].free_buf))
1597 kfree_sensitive(txq->entries[idx].free_buf);
1598 txq->entries[idx].free_buf = dup_buf;
1599
1600 trace_iwlwifi_dev_hcmd(trans->dev, cmd, cmd_size, &out_cmd->hdr_wide);
1601
1602 /* start timer if queue currently empty */
1603 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout)
1604 mod_timer(&txq->stuck_timer, jiffies + txq->wd_timeout);
1605
1606 ret = iwl_pcie_set_cmd_in_flight(trans, cmd);
1607 if (ret < 0) {
1608 idx = ret;
1609 goto out;
1610 }
1611
1612 if (cmd->flags & CMD_BLOCK_TXQS)
1613 iwl_trans_pcie_block_txq_ptrs(trans, true);
1614
1615 /* Increment and update queue's write index */
1616 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr);
1617 iwl_pcie_txq_inc_wr_ptr(trans, txq);
1618
1619 out:
1620 spin_unlock_irqrestore(&txq->lock, flags);
1621 free_dup_buf:
1622 if (idx < 0)
1623 kfree(dup_buf);
1624 return idx;
1625 }
1626
1627 /*
1628 * iwl_pcie_hcmd_complete - Pull unused buffers off the queue and reclaim them
1629 * @rxb: Rx buffer to reclaim
1630 */
iwl_pcie_hcmd_complete(struct iwl_trans * trans,struct iwl_rx_cmd_buffer * rxb)1631 void iwl_pcie_hcmd_complete(struct iwl_trans *trans,
1632 struct iwl_rx_cmd_buffer *rxb)
1633 {
1634 struct iwl_rx_packet *pkt = rxb_addr(rxb);
1635 u16 sequence = le16_to_cpu(pkt->hdr.sequence);
1636 u8 group_id;
1637 u32 cmd_id;
1638 int txq_id = SEQ_TO_QUEUE(sequence);
1639 int index = SEQ_TO_INDEX(sequence);
1640 int cmd_index;
1641 struct iwl_device_cmd *cmd;
1642 struct iwl_cmd_meta *meta;
1643 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1644 struct iwl_txq *txq = trans_pcie->txqs.txq[trans->conf.cmd_queue];
1645
1646 /* If a Tx command is being handled and it isn't in the actual
1647 * command queue then there a command routing bug has been introduced
1648 * in the queue management code. */
1649 if (IWL_FW_CHECK(trans, txq_id != trans->conf.cmd_queue,
1650 "wrong command queue %d (should be %d), sequence 0x%X readp=%d writep=%d pkt=%*phN\n",
1651 txq_id, trans->conf.cmd_queue, sequence, txq->read_ptr,
1652 txq->write_ptr, 32, pkt))
1653 return;
1654
1655 spin_lock_bh(&txq->lock);
1656
1657 cmd_index = iwl_txq_get_cmd_index(txq, index);
1658 cmd = txq->entries[cmd_index].cmd;
1659 meta = &txq->entries[cmd_index].meta;
1660 group_id = cmd->hdr.group_id;
1661 cmd_id = WIDE_ID(group_id, cmd->hdr.cmd);
1662
1663 if (trans->mac_cfg->gen2)
1664 iwl_txq_gen2_tfd_unmap(trans, meta,
1665 iwl_txq_get_tfd(trans, txq, index));
1666 else
1667 iwl_txq_gen1_tfd_unmap(trans, meta, txq, index);
1668
1669 /* Input error checking is done when commands are added to queue. */
1670 if (meta->flags & CMD_WANT_SKB) {
1671 struct page *p = rxb_steal_page(rxb);
1672
1673 meta->source->resp_pkt = pkt;
1674 #if defined(__linux__)
1675 meta->source->_rx_page_addr = (unsigned long)page_address(p);
1676 #elif defined(__FreeBSD__)
1677 meta->source->_page = p;
1678 #endif
1679 meta->source->_rx_page_order = trans_pcie->rx_page_order;
1680 }
1681
1682 if (meta->flags & CMD_BLOCK_TXQS)
1683 iwl_trans_pcie_block_txq_ptrs(trans, false);
1684
1685 iwl_pcie_cmdq_reclaim(trans, txq_id, index);
1686
1687 if (!(meta->flags & CMD_ASYNC)) {
1688 if (!test_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status)) {
1689 IWL_WARN(trans,
1690 "HCMD_ACTIVE already clear for command %s\n",
1691 iwl_get_cmd_string(trans, cmd_id));
1692 }
1693 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
1694 IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
1695 iwl_get_cmd_string(trans, cmd_id));
1696 wake_up(&trans_pcie->wait_command_queue);
1697 }
1698
1699 meta->flags = 0;
1700
1701 spin_unlock_bh(&txq->lock);
1702 }
1703
iwl_fill_data_tbs(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_txq * txq,u8 hdr_len,struct iwl_cmd_meta * out_meta)1704 static int iwl_fill_data_tbs(struct iwl_trans *trans, struct sk_buff *skb,
1705 struct iwl_txq *txq, u8 hdr_len,
1706 struct iwl_cmd_meta *out_meta)
1707 {
1708 u16 head_tb_len;
1709 int i;
1710
1711 /*
1712 * Set up TFD's third entry to point directly to remainder
1713 * of skb's head, if any
1714 */
1715 head_tb_len = skb_headlen(skb) - hdr_len;
1716
1717 if (head_tb_len > 0) {
1718 dma_addr_t tb_phys = dma_map_single(trans->dev,
1719 skb->data + hdr_len,
1720 head_tb_len, DMA_TO_DEVICE);
1721 if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
1722 return -EINVAL;
1723 trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb->data + hdr_len,
1724 tb_phys, head_tb_len);
1725 iwl_pcie_txq_build_tfd(trans, txq, tb_phys, head_tb_len, false);
1726 }
1727
1728 /* set up the remaining entries to point to the data */
1729 for (i = 0; i < skb_shinfo(skb)->nr_frags; i++) {
1730 const skb_frag_t *frag = &skb_shinfo(skb)->frags[i];
1731 dma_addr_t tb_phys;
1732 int tb_idx;
1733
1734 if (!skb_frag_size(frag))
1735 continue;
1736
1737 tb_phys = skb_frag_dma_map(trans->dev, frag, 0,
1738 skb_frag_size(frag), DMA_TO_DEVICE);
1739
1740 if (unlikely(dma_mapping_error(trans->dev, tb_phys)))
1741 return -EINVAL;
1742 trace_iwlwifi_dev_tx_tb(trans->dev, skb, skb_frag_address(frag),
1743 tb_phys, skb_frag_size(frag));
1744 tb_idx = iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
1745 skb_frag_size(frag), false);
1746 if (tb_idx < 0)
1747 return tb_idx;
1748
1749 out_meta->tbs |= BIT(tb_idx);
1750 }
1751
1752 return 0;
1753 }
1754
1755 #ifdef CONFIG_INET
iwl_pcie_get_page_hdr(struct iwl_trans * trans,size_t len,struct sk_buff * skb)1756 static void *iwl_pcie_get_page_hdr(struct iwl_trans *trans,
1757 size_t len, struct sk_buff *skb)
1758 {
1759 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1760 struct iwl_tso_hdr_page *p = this_cpu_ptr(trans_pcie->txqs.tso_hdr_page);
1761 struct iwl_tso_page_info *info;
1762 struct page **page_ptr;
1763 dma_addr_t phys;
1764 void *ret;
1765
1766 page_ptr = (void *)((u8 *)skb->cb + trans->conf.cb_data_offs);
1767
1768 if (WARN_ON(*page_ptr))
1769 return NULL;
1770
1771 if (!p->page)
1772 goto alloc;
1773
1774 /*
1775 * Check if there's enough room on this page
1776 *
1777 * Note that we put a page chaining pointer *last* in the
1778 * page - we need it somewhere, and if it's there then we
1779 * avoid DMA mapping the last bits of the page which may
1780 * trigger the 32-bit boundary hardware bug.
1781 *
1782 * (see also get_workaround_page() in tx-gen2.c)
1783 */
1784 if (((unsigned long)p->pos & ~PAGE_MASK) + len < IWL_TSO_PAGE_DATA_SIZE) {
1785 info = IWL_TSO_PAGE_INFO(page_address(p->page));
1786 goto out;
1787 }
1788
1789 /* We don't have enough room on this page, get a new one. */
1790 iwl_pcie_free_and_unmap_tso_page(trans, p->page);
1791
1792 alloc:
1793 p->page = alloc_page(GFP_ATOMIC);
1794 if (!p->page)
1795 return NULL;
1796 p->pos = page_address(p->page);
1797
1798 info = IWL_TSO_PAGE_INFO(page_address(p->page));
1799
1800 /* set the chaining pointer to NULL */
1801 info->next = NULL;
1802
1803 /* Create a DMA mapping for the page */
1804 phys = dma_map_page_attrs(trans->dev, p->page, 0, PAGE_SIZE,
1805 DMA_TO_DEVICE, DMA_ATTR_SKIP_CPU_SYNC);
1806 if (unlikely(dma_mapping_error(trans->dev, phys))) {
1807 __free_page(p->page);
1808 p->page = NULL;
1809
1810 return NULL;
1811 }
1812
1813 /* Store physical address and set use count */
1814 info->dma_addr = phys;
1815 refcount_set(&info->use_count, 1);
1816 out:
1817 *page_ptr = p->page;
1818 /* Return an internal reference for the caller */
1819 refcount_inc(&info->use_count);
1820 ret = p->pos;
1821 p->pos += len;
1822
1823 return ret;
1824 }
1825
1826 /**
1827 * iwl_pcie_get_sgt_tb_phys - Find TB address in mapped SG list
1828 * @sgt: scatter gather table
1829 * @offset: Offset into the mapped memory (i.e. SKB payload data)
1830 * @len: Length of the area
1831 *
1832 * Find the DMA address that corresponds to the SKB payload data at the
1833 * position given by @offset.
1834 *
1835 * Returns: Address for TB entry
1836 */
iwl_pcie_get_sgt_tb_phys(struct sg_table * sgt,unsigned int offset,unsigned int len)1837 dma_addr_t iwl_pcie_get_sgt_tb_phys(struct sg_table *sgt, unsigned int offset,
1838 unsigned int len)
1839 {
1840 struct scatterlist *sg;
1841 unsigned int sg_offset = 0;
1842 int i;
1843
1844 /*
1845 * Search the mapped DMA areas in the SG for the area that contains the
1846 * data at offset with the given length.
1847 */
1848 for_each_sgtable_dma_sg(sgt, sg, i) {
1849 if (offset >= sg_offset &&
1850 offset + len <= sg_offset + sg_dma_len(sg))
1851 return sg_dma_address(sg) + offset - sg_offset;
1852
1853 sg_offset += sg_dma_len(sg);
1854 }
1855
1856 WARN_ON_ONCE(1);
1857
1858 return DMA_MAPPING_ERROR;
1859 }
1860
1861 /**
1862 * iwl_pcie_prep_tso - Prepare TSO page and SKB for sending
1863 * @trans: transport private data
1864 * @skb: the SKB to map
1865 * @cmd_meta: command meta to store the scatter list information for unmapping
1866 * @hdr: output argument for TSO headers
1867 * @hdr_room: requested length for TSO headers
1868 * @offset: offset into the data from which mapping should start
1869 *
1870 * Allocate space for a scatter gather list and TSO headers and map the SKB
1871 * using the scatter gather list. The SKB is unmapped again when the page is
1872 * free'ed again at the end of the operation.
1873 *
1874 * Returns: newly allocated and mapped scatter gather table with list
1875 */
iwl_pcie_prep_tso(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_cmd_meta * cmd_meta,u8 ** hdr,unsigned int hdr_room,unsigned int offset)1876 struct sg_table *iwl_pcie_prep_tso(struct iwl_trans *trans, struct sk_buff *skb,
1877 struct iwl_cmd_meta *cmd_meta,
1878 u8 **hdr, unsigned int hdr_room,
1879 unsigned int offset)
1880 {
1881 struct sg_table *sgt;
1882 unsigned int n_segments = skb_shinfo(skb)->nr_frags + 1;
1883 int orig_nents;
1884
1885 if (WARN_ON_ONCE(skb_has_frag_list(skb)))
1886 return NULL;
1887
1888 *hdr = iwl_pcie_get_page_hdr(trans,
1889 hdr_room + __alignof__(struct sg_table) +
1890 sizeof(struct sg_table) +
1891 n_segments * sizeof(struct scatterlist),
1892 skb);
1893 if (!*hdr)
1894 return NULL;
1895
1896 sgt = (void *)PTR_ALIGN(*hdr + hdr_room, __alignof__(struct sg_table));
1897 sgt->sgl = (void *)(sgt + 1);
1898
1899 sg_init_table(sgt->sgl, n_segments);
1900
1901 /* Only map the data, not the header (it is copied to the TSO page) */
1902 orig_nents = skb_to_sgvec(skb, sgt->sgl, offset, skb->len - offset);
1903 if (WARN_ON_ONCE(orig_nents <= 0))
1904 return NULL;
1905
1906 sgt->orig_nents = orig_nents;
1907
1908 /* And map the entire SKB */
1909 if (dma_map_sgtable(trans->dev, sgt, DMA_TO_DEVICE, 0) < 0)
1910 return NULL;
1911
1912 /* Store non-zero (i.e. valid) offset for unmapping */
1913 cmd_meta->sg_offset = (unsigned long) sgt & ~PAGE_MASK;
1914
1915 return sgt;
1916 }
1917
iwl_fill_data_tbs_amsdu(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_txq * txq,u8 hdr_len,struct iwl_cmd_meta * out_meta,struct iwl_device_tx_cmd * dev_cmd,u16 tb1_len)1918 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
1919 struct iwl_txq *txq, u8 hdr_len,
1920 struct iwl_cmd_meta *out_meta,
1921 struct iwl_device_tx_cmd *dev_cmd,
1922 u16 tb1_len)
1923 {
1924 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
1925 struct iwl_tx_cmd_v6 *tx_cmd = (void *)dev_cmd->payload;
1926 struct ieee80211_hdr *hdr = (void *)skb->data;
1927 unsigned int snap_ip_tcp_hdrlen, ip_hdrlen, total_len, hdr_room;
1928 unsigned int mss = skb_shinfo(skb)->gso_size;
1929 unsigned int data_offset = 0;
1930 u16 length, iv_len, amsdu_pad;
1931 dma_addr_t start_hdr_phys;
1932 u8 *start_hdr, *pos_hdr;
1933 struct sg_table *sgt;
1934 struct tso_t tso;
1935
1936 /* if the packet is protected, then it must be CCMP or GCMP */
1937 BUILD_BUG_ON(IEEE80211_CCMP_HDR_LEN != IEEE80211_GCMP_HDR_LEN);
1938 iv_len = ieee80211_has_protected(hdr->frame_control) ?
1939 IEEE80211_CCMP_HDR_LEN : 0;
1940
1941 trace_iwlwifi_dev_tx(trans->dev, skb,
1942 iwl_txq_get_tfd(trans, txq, txq->write_ptr),
1943 trans_pcie->txqs.tfd.size,
1944 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len, 0);
1945
1946 ip_hdrlen = skb_network_header_len(skb);
1947 snap_ip_tcp_hdrlen = 8 + ip_hdrlen + tcp_hdrlen(skb);
1948 total_len = skb->len - snap_ip_tcp_hdrlen - hdr_len - iv_len;
1949 amsdu_pad = 0;
1950
1951 /* total amount of header we may need for this A-MSDU */
1952 hdr_room = DIV_ROUND_UP(total_len, mss) *
1953 (3 + snap_ip_tcp_hdrlen + sizeof(struct ethhdr)) + iv_len;
1954
1955 /* Our device supports 9 segments at most, it will fit in 1 page */
1956 sgt = iwl_pcie_prep_tso(trans, skb, out_meta, &start_hdr, hdr_room,
1957 snap_ip_tcp_hdrlen + hdr_len + iv_len);
1958 if (!sgt)
1959 return -ENOMEM;
1960
1961 start_hdr_phys = iwl_pcie_get_tso_page_phys(start_hdr);
1962 pos_hdr = start_hdr;
1963 memcpy(pos_hdr, skb->data + hdr_len, iv_len);
1964 pos_hdr += iv_len;
1965
1966 /*
1967 * Pull the ieee80211 header + IV to be able to use TSO core,
1968 * we will restore it for the tx_status flow.
1969 */
1970 skb_pull(skb, hdr_len + iv_len);
1971
1972 /*
1973 * Remove the length of all the headers that we don't actually
1974 * have in the MPDU by themselves, but that we duplicate into
1975 * all the different MSDUs inside the A-MSDU.
1976 */
1977 le16_add_cpu(&tx_cmd->params.len, -snap_ip_tcp_hdrlen);
1978
1979 tso_start(skb, &tso);
1980
1981 while (total_len) {
1982 /* this is the data left for this subframe */
1983 unsigned int data_left =
1984 min_t(unsigned int, mss, total_len);
1985 unsigned int hdr_tb_len;
1986 dma_addr_t hdr_tb_phys;
1987 u8 *subf_hdrs_start = pos_hdr;
1988
1989 total_len -= data_left;
1990
1991 memset(pos_hdr, 0, amsdu_pad);
1992 pos_hdr += amsdu_pad;
1993 amsdu_pad = (4 - (sizeof(struct ethhdr) + snap_ip_tcp_hdrlen +
1994 data_left)) & 0x3;
1995 ether_addr_copy(pos_hdr, ieee80211_get_DA(hdr));
1996 pos_hdr += ETH_ALEN;
1997 ether_addr_copy(pos_hdr, ieee80211_get_SA(hdr));
1998 pos_hdr += ETH_ALEN;
1999
2000 length = snap_ip_tcp_hdrlen + data_left;
2001 *((__be16 *)pos_hdr) = cpu_to_be16(length);
2002 pos_hdr += sizeof(length);
2003
2004 /*
2005 * This will copy the SNAP as well which will be considered
2006 * as MAC header.
2007 */
2008 tso_build_hdr(skb, pos_hdr, &tso, data_left, !total_len);
2009
2010 pos_hdr += snap_ip_tcp_hdrlen;
2011
2012 hdr_tb_len = pos_hdr - start_hdr;
2013 hdr_tb_phys = iwl_pcie_get_tso_page_phys(start_hdr);
2014
2015 iwl_pcie_txq_build_tfd(trans, txq, hdr_tb_phys,
2016 hdr_tb_len, false);
2017 trace_iwlwifi_dev_tx_tb(trans->dev, skb, start_hdr,
2018 hdr_tb_phys, hdr_tb_len);
2019 /* add this subframe's headers' length to the tx_cmd */
2020 le16_add_cpu(&tx_cmd->params.len, pos_hdr - subf_hdrs_start);
2021
2022 /* prepare the start_hdr for the next subframe */
2023 start_hdr = pos_hdr;
2024
2025 /* put the payload */
2026 while (data_left) {
2027 unsigned int size = min_t(unsigned int, tso.size,
2028 data_left);
2029 dma_addr_t tb_phys;
2030
2031 tb_phys = iwl_pcie_get_sgt_tb_phys(sgt, data_offset, size);
2032 /* Not a real mapping error, use direct comparison */
2033 if (unlikely(tb_phys == DMA_MAPPING_ERROR))
2034 return -EINVAL;
2035
2036 iwl_pcie_txq_build_tfd(trans, txq, tb_phys,
2037 size, false);
2038 trace_iwlwifi_dev_tx_tb(trans->dev, skb, tso.data,
2039 tb_phys, size);
2040
2041 data_left -= size;
2042 data_offset += size;
2043 tso_build_data(skb, &tso, size);
2044 }
2045 }
2046
2047 dma_sync_single_for_device(trans->dev, start_hdr_phys, hdr_room,
2048 DMA_TO_DEVICE);
2049
2050 /* re -add the WiFi header and IV */
2051 skb_push(skb, hdr_len + iv_len);
2052
2053 return 0;
2054 }
2055 #else /* CONFIG_INET */
iwl_fill_data_tbs_amsdu(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_txq * txq,u8 hdr_len,struct iwl_cmd_meta * out_meta,struct iwl_device_tx_cmd * dev_cmd,u16 tb1_len)2056 static int iwl_fill_data_tbs_amsdu(struct iwl_trans *trans, struct sk_buff *skb,
2057 struct iwl_txq *txq, u8 hdr_len,
2058 struct iwl_cmd_meta *out_meta,
2059 struct iwl_device_tx_cmd *dev_cmd,
2060 u16 tb1_len)
2061 {
2062 /* No A-MSDU without CONFIG_INET */
2063 WARN_ON(1);
2064
2065 return -1;
2066 }
2067 #endif /* CONFIG_INET */
2068
2069 #define IWL_TX_CRC_SIZE 4
2070 #define IWL_TX_DELIMITER_SIZE 4
2071
2072 /*
2073 * iwl_txq_gen1_update_byte_cnt_tbl - Set up entry in Tx byte-count array
2074 */
iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans * trans,struct iwl_txq * txq,u16 byte_cnt,int num_tbs)2075 static void iwl_txq_gen1_update_byte_cnt_tbl(struct iwl_trans *trans,
2076 struct iwl_txq *txq, u16 byte_cnt,
2077 int num_tbs)
2078 {
2079 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2080 struct iwl_bc_tbl_entry *scd_bc_tbl;
2081 int write_ptr = txq->write_ptr;
2082 int txq_id = txq->id;
2083 u8 sec_ctl = 0;
2084 u16 len = byte_cnt + IWL_TX_CRC_SIZE + IWL_TX_DELIMITER_SIZE;
2085 __le16 bc_ent;
2086 struct iwl_device_tx_cmd *dev_cmd = txq->entries[txq->write_ptr].cmd;
2087 struct iwl_tx_cmd_v6 *tx_cmd = (void *)dev_cmd->payload;
2088 u8 sta_id = tx_cmd->params.sta_id;
2089
2090 scd_bc_tbl = trans_pcie->txqs.scd_bc_tbls.addr;
2091
2092 sec_ctl = tx_cmd->params.sec_ctl;
2093
2094 switch (sec_ctl & TX_CMD_SEC_MSK) {
2095 case TX_CMD_SEC_CCM:
2096 len += IEEE80211_CCMP_MIC_LEN;
2097 break;
2098 case TX_CMD_SEC_TKIP:
2099 len += IEEE80211_TKIP_ICV_LEN;
2100 break;
2101 case TX_CMD_SEC_WEP:
2102 len += IEEE80211_WEP_IV_LEN + IEEE80211_WEP_ICV_LEN;
2103 break;
2104 }
2105
2106 if (trans->mac_cfg->device_family >= IWL_DEVICE_FAMILY_7000 &&
2107 trans->mac_cfg->device_family < IWL_DEVICE_FAMILY_AX210)
2108 len = DIV_ROUND_UP(len, 4);
2109
2110 if (WARN_ON(len > 0xFFF || write_ptr >= TFD_QUEUE_SIZE_MAX))
2111 return;
2112
2113 bc_ent = cpu_to_le16(len | (sta_id << 12));
2114
2115 scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + write_ptr].tfd_offset = bc_ent;
2116
2117 if (write_ptr < TFD_QUEUE_SIZE_BC_DUP)
2118 scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + TFD_QUEUE_SIZE_MAX + write_ptr].tfd_offset =
2119 bc_ent;
2120 }
2121
iwl_trans_pcie_tx(struct iwl_trans * trans,struct sk_buff * skb,struct iwl_device_tx_cmd * dev_cmd,int txq_id)2122 int iwl_trans_pcie_tx(struct iwl_trans *trans, struct sk_buff *skb,
2123 struct iwl_device_tx_cmd *dev_cmd, int txq_id)
2124 {
2125 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2126 struct ieee80211_hdr *hdr;
2127 struct iwl_tx_cmd_v6 *tx_cmd = (struct iwl_tx_cmd_v6 *)dev_cmd->payload;
2128 struct iwl_cmd_meta *out_meta;
2129 struct iwl_txq *txq;
2130 dma_addr_t tb0_phys, tb1_phys, scratch_phys;
2131 void *tb1_addr;
2132 void *tfd;
2133 u16 len, tb1_len;
2134 bool wait_write_ptr;
2135 __le16 fc;
2136 u8 hdr_len;
2137 u16 wifi_seq;
2138 bool amsdu;
2139
2140 txq = trans_pcie->txqs.txq[txq_id];
2141
2142 if (WARN_ONCE(!test_bit(txq_id, trans_pcie->txqs.queue_used),
2143 "TX on unused queue %d\n", txq_id))
2144 return -EINVAL;
2145
2146 if (skb_is_nonlinear(skb) &&
2147 skb_shinfo(skb)->nr_frags > IWL_TRANS_PCIE_MAX_FRAGS(trans_pcie) &&
2148 __skb_linearize(skb))
2149 return -ENOMEM;
2150
2151 /* mac80211 always puts the full header into the SKB's head,
2152 * so there's no need to check if it's readable there
2153 */
2154 hdr = (struct ieee80211_hdr *)skb->data;
2155 fc = hdr->frame_control;
2156 hdr_len = ieee80211_hdrlen(fc);
2157
2158 spin_lock(&txq->lock);
2159
2160 if (iwl_txq_space(trans, txq) < txq->high_mark) {
2161 iwl_txq_stop(trans, txq);
2162
2163 /* don't put the packet on the ring, if there is no room */
2164 if (unlikely(iwl_txq_space(trans, txq) < 3)) {
2165 struct iwl_device_tx_cmd **dev_cmd_ptr;
2166
2167 dev_cmd_ptr = (void *)((u8 *)skb->cb +
2168 trans->conf.cb_data_offs +
2169 sizeof(void *));
2170
2171 *dev_cmd_ptr = dev_cmd;
2172 __skb_queue_tail(&txq->overflow_q, skb);
2173
2174 spin_unlock(&txq->lock);
2175 return 0;
2176 }
2177 }
2178
2179 /* In AGG mode, the index in the ring must correspond to the WiFi
2180 * sequence number. This is a HW requirements to help the SCD to parse
2181 * the BA.
2182 * Check here that the packets are in the right place on the ring.
2183 */
2184 wifi_seq = IEEE80211_SEQ_TO_SN(le16_to_cpu(hdr->seq_ctrl));
2185 WARN_ONCE(txq->ampdu &&
2186 (wifi_seq & 0xff) != txq->write_ptr,
2187 "Q: %d WiFi Seq %d tfdNum %d",
2188 txq_id, wifi_seq, txq->write_ptr);
2189
2190 /* Set up driver data for this TFD */
2191 txq->entries[txq->write_ptr].skb = skb;
2192 txq->entries[txq->write_ptr].cmd = dev_cmd;
2193
2194 dev_cmd->hdr.sequence =
2195 cpu_to_le16((u16)(QUEUE_TO_SEQ(txq_id) |
2196 INDEX_TO_SEQ(txq->write_ptr)));
2197
2198 tb0_phys = iwl_txq_get_first_tb_dma(txq, txq->write_ptr);
2199 scratch_phys = tb0_phys + sizeof(struct iwl_cmd_header) +
2200 offsetof(struct iwl_tx_cmd_v6_params, scratch);
2201
2202 tx_cmd->params.dram_lsb_ptr = cpu_to_le32(scratch_phys);
2203 tx_cmd->params.dram_msb_ptr = iwl_get_dma_hi_addr(scratch_phys);
2204
2205 /* Set up first empty entry in queue's array of Tx/cmd buffers */
2206 out_meta = &txq->entries[txq->write_ptr].meta;
2207 memset(out_meta, 0, sizeof(*out_meta));
2208
2209 /*
2210 * The second TB (tb1) points to the remainder of the TX command
2211 * and the 802.11 header - dword aligned size
2212 * (This calculation modifies the TX command, so do it before the
2213 * setup of the first TB)
2214 */
2215 len = sizeof(struct iwl_tx_cmd_v6) + sizeof(struct iwl_cmd_header) +
2216 hdr_len - IWL_FIRST_TB_SIZE;
2217 /* do not align A-MSDU to dword as the subframe header aligns it */
2218 amsdu = ieee80211_is_data_qos(fc) &&
2219 (*ieee80211_get_qos_ctl(hdr) &
2220 IEEE80211_QOS_CTL_A_MSDU_PRESENT);
2221 if (!amsdu) {
2222 tb1_len = ALIGN(len, 4);
2223 /* Tell NIC about any 2-byte padding after MAC header */
2224 if (tb1_len != len)
2225 tx_cmd->params.tx_flags |= cpu_to_le32(TX_CMD_FLG_MH_PAD);
2226 } else {
2227 tb1_len = len;
2228 }
2229
2230 /*
2231 * The first TB points to bi-directional DMA data, we'll
2232 * memcpy the data into it later.
2233 */
2234 iwl_pcie_txq_build_tfd(trans, txq, tb0_phys,
2235 IWL_FIRST_TB_SIZE, true);
2236
2237 /* there must be data left over for TB1 or this code must be changed */
2238 BUILD_BUG_ON(sizeof(struct iwl_tx_cmd_v6) < IWL_FIRST_TB_SIZE);
2239 BUILD_BUG_ON(sizeof(struct iwl_cmd_header) +
2240 offsetofend(struct iwl_tx_cmd_v6_params, scratch) >
2241 IWL_FIRST_TB_SIZE);
2242
2243 /* map the data for TB1 */
2244 tb1_addr = ((u8 *)&dev_cmd->hdr) + IWL_FIRST_TB_SIZE;
2245 tb1_phys = dma_map_single(trans->dev, tb1_addr, tb1_len, DMA_TO_DEVICE);
2246 if (unlikely(dma_mapping_error(trans->dev, tb1_phys)))
2247 goto out_err;
2248 iwl_pcie_txq_build_tfd(trans, txq, tb1_phys, tb1_len, false);
2249
2250 trace_iwlwifi_dev_tx(trans->dev, skb,
2251 iwl_txq_get_tfd(trans, txq, txq->write_ptr),
2252 trans_pcie->txqs.tfd.size,
2253 &dev_cmd->hdr, IWL_FIRST_TB_SIZE + tb1_len,
2254 hdr_len);
2255
2256 /*
2257 * If gso_size wasn't set, don't give the frame "amsdu treatment"
2258 * (adding subframes, etc.).
2259 * This can happen in some testing flows when the amsdu was already
2260 * pre-built, and we just need to send the resulting skb.
2261 */
2262 if (amsdu && skb_shinfo(skb)->gso_size) {
2263 if (unlikely(iwl_fill_data_tbs_amsdu(trans, skb, txq, hdr_len,
2264 out_meta, dev_cmd,
2265 tb1_len)))
2266 goto out_err;
2267 } else {
2268 struct sk_buff *frag;
2269
2270 if (unlikely(iwl_fill_data_tbs(trans, skb, txq, hdr_len,
2271 out_meta)))
2272 goto out_err;
2273
2274 skb_walk_frags(skb, frag) {
2275 if (unlikely(iwl_fill_data_tbs(trans, frag, txq, 0,
2276 out_meta)))
2277 goto out_err;
2278 }
2279 }
2280
2281 /* building the A-MSDU might have changed this data, so memcpy it now */
2282 memcpy(&txq->first_tb_bufs[txq->write_ptr], dev_cmd, IWL_FIRST_TB_SIZE);
2283
2284 tfd = iwl_txq_get_tfd(trans, txq, txq->write_ptr);
2285 /* Set up entry for this TFD in Tx byte-count array */
2286 iwl_txq_gen1_update_byte_cnt_tbl(trans, txq, le16_to_cpu(tx_cmd->params.len),
2287 iwl_txq_gen1_tfd_get_num_tbs(tfd));
2288
2289 wait_write_ptr = ieee80211_has_morefrags(fc);
2290
2291 /* start timer if queue currently empty */
2292 if (txq->read_ptr == txq->write_ptr && txq->wd_timeout) {
2293 /*
2294 * If the TXQ is active, then set the timer, if not,
2295 * set the timer in remainder so that the timer will
2296 * be armed with the right value when the station will
2297 * wake up.
2298 */
2299 if (!txq->frozen)
2300 mod_timer(&txq->stuck_timer,
2301 jiffies + txq->wd_timeout);
2302 else
2303 txq->frozen_expiry_remainder = txq->wd_timeout;
2304 }
2305
2306 /* Tell device the write index *just past* this latest filled TFD */
2307 txq->write_ptr = iwl_txq_inc_wrap(trans, txq->write_ptr);
2308 if (!wait_write_ptr)
2309 iwl_pcie_txq_inc_wr_ptr(trans, txq);
2310
2311 /*
2312 * At this point the frame is "transmitted" successfully
2313 * and we will get a TX status notification eventually.
2314 */
2315 spin_unlock(&txq->lock);
2316 return 0;
2317 out_err:
2318 iwl_txq_gen1_tfd_unmap(trans, out_meta, txq, txq->write_ptr);
2319 spin_unlock(&txq->lock);
2320 return -1;
2321 }
2322
iwl_txq_gen1_inval_byte_cnt_tbl(struct iwl_trans * trans,struct iwl_txq * txq,int read_ptr)2323 static void iwl_txq_gen1_inval_byte_cnt_tbl(struct iwl_trans *trans,
2324 struct iwl_txq *txq,
2325 int read_ptr)
2326 {
2327 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2328 struct iwl_bc_tbl_entry *scd_bc_tbl = trans_pcie->txqs.scd_bc_tbls.addr;
2329 int txq_id = txq->id;
2330 u8 sta_id = 0;
2331 __le16 bc_ent;
2332 struct iwl_device_tx_cmd *dev_cmd = txq->entries[read_ptr].cmd;
2333 struct iwl_tx_cmd_v6 *tx_cmd = (void *)dev_cmd->payload;
2334
2335 WARN_ON(read_ptr >= TFD_QUEUE_SIZE_MAX);
2336
2337 if (txq_id != trans->conf.cmd_queue)
2338 sta_id = tx_cmd->params.sta_id;
2339
2340 bc_ent = cpu_to_le16(1 | (sta_id << 12));
2341
2342 scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + read_ptr].tfd_offset = bc_ent;
2343
2344 if (read_ptr < TFD_QUEUE_SIZE_BC_DUP)
2345 scd_bc_tbl[txq_id * TFD_QUEUE_BC_SIZE + TFD_QUEUE_SIZE_MAX + read_ptr].tfd_offset =
2346 bc_ent;
2347 }
2348
2349 /* Frees buffers until index _not_ inclusive */
iwl_pcie_reclaim(struct iwl_trans * trans,int txq_id,int ssn,struct sk_buff_head * skbs,bool is_flush)2350 void iwl_pcie_reclaim(struct iwl_trans *trans, int txq_id, int ssn,
2351 struct sk_buff_head *skbs, bool is_flush)
2352 {
2353 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2354 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
2355 int tfd_num, read_ptr, last_to_free;
2356 int txq_read_ptr, txq_write_ptr;
2357
2358 /* This function is not meant to release cmd queue*/
2359 if (WARN_ON(txq_id == trans->conf.cmd_queue))
2360 return;
2361
2362 if (WARN_ON(!txq))
2363 return;
2364
2365 tfd_num = iwl_txq_get_cmd_index(txq, ssn);
2366
2367 spin_lock_bh(&txq->reclaim_lock);
2368
2369 spin_lock(&txq->lock);
2370 txq_read_ptr = txq->read_ptr;
2371 txq_write_ptr = txq->write_ptr;
2372 spin_unlock(&txq->lock);
2373
2374 /* There is nothing to do if we are flushing an empty queue */
2375 if (is_flush && txq_write_ptr == txq_read_ptr)
2376 goto out;
2377
2378 read_ptr = iwl_txq_get_cmd_index(txq, txq_read_ptr);
2379
2380 if (!test_bit(txq_id, trans_pcie->txqs.queue_used)) {
2381 IWL_DEBUG_TX_QUEUES(trans, "Q %d inactive - ignoring idx %d\n",
2382 txq_id, ssn);
2383 goto out;
2384 }
2385
2386 if (read_ptr == tfd_num)
2387 goto out;
2388
2389 IWL_DEBUG_TX_REPLY(trans, "[Q %d] %d (%d) -> %d (%d)\n",
2390 txq_id, read_ptr, txq_read_ptr, tfd_num, ssn);
2391
2392 /* Since we free until index _not_ inclusive, the one before index is
2393 * the last we will free. This one must be used
2394 */
2395 last_to_free = iwl_txq_dec_wrap(trans, tfd_num);
2396
2397 if (!iwl_txq_used(txq, last_to_free, txq_read_ptr, txq_write_ptr)) {
2398 IWL_ERR(trans,
2399 "%s: Read index for txq id (%d), last_to_free %d is out of range [0-%d] %d %d.\n",
2400 __func__, txq_id, last_to_free,
2401 trans->mac_cfg->base->max_tfd_queue_size,
2402 txq_write_ptr, txq_read_ptr);
2403
2404 iwl_op_mode_time_point(trans->op_mode,
2405 IWL_FW_INI_TIME_POINT_FAKE_TX,
2406 NULL);
2407 goto out;
2408 }
2409
2410 if (WARN_ON(!skb_queue_empty(skbs)))
2411 goto out;
2412
2413 for (;
2414 read_ptr != tfd_num;
2415 txq_read_ptr = iwl_txq_inc_wrap(trans, txq_read_ptr),
2416 read_ptr = iwl_txq_get_cmd_index(txq, txq_read_ptr)) {
2417 struct iwl_cmd_meta *cmd_meta = &txq->entries[read_ptr].meta;
2418 struct sk_buff *skb = txq->entries[read_ptr].skb;
2419
2420 if (WARN_ONCE(!skb, "no SKB at %d (%d) on queue %d\n",
2421 read_ptr, txq_read_ptr, txq_id))
2422 continue;
2423
2424 iwl_pcie_free_tso_pages(trans, skb, cmd_meta);
2425
2426 __skb_queue_tail(skbs, skb);
2427
2428 txq->entries[read_ptr].skb = NULL;
2429
2430 if (!trans->mac_cfg->gen2)
2431 iwl_txq_gen1_inval_byte_cnt_tbl(trans, txq,
2432 txq_read_ptr);
2433
2434 iwl_txq_free_tfd(trans, txq, txq_read_ptr);
2435 }
2436
2437 spin_lock(&txq->lock);
2438 txq->read_ptr = txq_read_ptr;
2439
2440 iwl_txq_progress(txq);
2441
2442 if (iwl_txq_space(trans, txq) > txq->low_mark &&
2443 test_bit(txq_id, trans_pcie->txqs.queue_stopped)) {
2444 struct sk_buff_head overflow_skbs;
2445 struct sk_buff *skb;
2446
2447 __skb_queue_head_init(&overflow_skbs);
2448 skb_queue_splice_init(&txq->overflow_q,
2449 is_flush ? skbs : &overflow_skbs);
2450
2451 /*
2452 * We are going to transmit from the overflow queue.
2453 * Remember this state so that wait_for_txq_empty will know we
2454 * are adding more packets to the TFD queue. It cannot rely on
2455 * the state of &txq->overflow_q, as we just emptied it, but
2456 * haven't TXed the content yet.
2457 */
2458 txq->overflow_tx = true;
2459
2460 /*
2461 * This is tricky: we are in reclaim path and are holding
2462 * reclaim_lock, so noone will try to access the txq data
2463 * from that path. We stopped tx, so we can't have tx as well.
2464 * Bottom line, we can unlock and re-lock later.
2465 */
2466 spin_unlock(&txq->lock);
2467
2468 while ((skb = __skb_dequeue(&overflow_skbs))) {
2469 struct iwl_device_tx_cmd *dev_cmd_ptr;
2470
2471 dev_cmd_ptr = *(void **)((u8 *)skb->cb +
2472 trans->conf.cb_data_offs +
2473 sizeof(void *));
2474
2475 /*
2476 * Note that we can very well be overflowing again.
2477 * In that case, iwl_txq_space will be small again
2478 * and we won't wake mac80211's queue.
2479 */
2480 iwl_trans_tx(trans, skb, dev_cmd_ptr, txq_id);
2481 }
2482
2483 if (iwl_txq_space(trans, txq) > txq->low_mark)
2484 iwl_trans_pcie_wake_queue(trans, txq);
2485
2486 spin_lock(&txq->lock);
2487 txq->overflow_tx = false;
2488 }
2489
2490 spin_unlock(&txq->lock);
2491 out:
2492 spin_unlock_bh(&txq->reclaim_lock);
2493 }
2494
2495 /* Set wr_ptr of specific device and txq */
iwl_pcie_set_q_ptrs(struct iwl_trans * trans,int txq_id,int ptr)2496 void iwl_pcie_set_q_ptrs(struct iwl_trans *trans, int txq_id, int ptr)
2497 {
2498 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2499 struct iwl_txq *txq = trans_pcie->txqs.txq[txq_id];
2500
2501 spin_lock_bh(&txq->lock);
2502
2503 txq->write_ptr = ptr;
2504 txq->read_ptr = txq->write_ptr;
2505
2506 spin_unlock_bh(&txq->lock);
2507 }
2508
iwl_pcie_freeze_txq_timer(struct iwl_trans * trans,unsigned long txqs,bool freeze)2509 void iwl_pcie_freeze_txq_timer(struct iwl_trans *trans,
2510 unsigned long txqs, bool freeze)
2511 {
2512 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2513 int queue;
2514
2515 for_each_set_bit(queue, &txqs, BITS_PER_LONG) {
2516 struct iwl_txq *txq = trans_pcie->txqs.txq[queue];
2517 unsigned long now;
2518
2519 spin_lock_bh(&txq->lock);
2520
2521 now = jiffies;
2522
2523 if (txq->frozen == freeze)
2524 goto next_queue;
2525
2526 IWL_DEBUG_TX_QUEUES(trans, "%s TXQ %d\n",
2527 freeze ? "Freezing" : "Waking", queue);
2528
2529 txq->frozen = freeze;
2530
2531 if (txq->read_ptr == txq->write_ptr)
2532 goto next_queue;
2533
2534 if (freeze) {
2535 if (unlikely(time_after(now,
2536 txq->stuck_timer.expires))) {
2537 /*
2538 * The timer should have fired, maybe it is
2539 * spinning right now on the lock.
2540 */
2541 goto next_queue;
2542 }
2543 /* remember how long until the timer fires */
2544 txq->frozen_expiry_remainder =
2545 txq->stuck_timer.expires - now;
2546 timer_delete(&txq->stuck_timer);
2547 goto next_queue;
2548 }
2549
2550 /*
2551 * Wake a non-empty queue -> arm timer with the
2552 * remainder before it froze
2553 */
2554 mod_timer(&txq->stuck_timer,
2555 now + txq->frozen_expiry_remainder);
2556
2557 next_queue:
2558 spin_unlock_bh(&txq->lock);
2559 }
2560 }
2561
2562 #define HOST_COMPLETE_TIMEOUT (2 * HZ)
2563
iwl_trans_pcie_send_hcmd_sync(struct iwl_trans * trans,struct iwl_host_cmd * cmd,const char * cmd_str)2564 static int iwl_trans_pcie_send_hcmd_sync(struct iwl_trans *trans,
2565 struct iwl_host_cmd *cmd,
2566 const char *cmd_str)
2567 {
2568 struct iwl_trans_pcie *trans_pcie = IWL_TRANS_GET_PCIE_TRANS(trans);
2569 struct iwl_txq *txq = trans_pcie->txqs.txq[trans->conf.cmd_queue];
2570 int cmd_idx;
2571 int ret;
2572
2573 IWL_DEBUG_INFO(trans, "Attempting to send sync command %s\n", cmd_str);
2574
2575 if (WARN(test_and_set_bit(STATUS_SYNC_HCMD_ACTIVE,
2576 &trans->status),
2577 "Command %s: a command is already active!\n", cmd_str))
2578 return -EIO;
2579
2580 IWL_DEBUG_INFO(trans, "Setting HCMD_ACTIVE for command %s\n", cmd_str);
2581
2582 if (trans->mac_cfg->gen2)
2583 cmd_idx = iwl_pcie_gen2_enqueue_hcmd(trans, cmd);
2584 else
2585 cmd_idx = iwl_pcie_enqueue_hcmd(trans, cmd);
2586
2587 if (cmd_idx < 0) {
2588 ret = cmd_idx;
2589 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
2590 IWL_ERR(trans, "Error sending %s: enqueue_hcmd failed: %d\n",
2591 cmd_str, ret);
2592 return ret;
2593 }
2594
2595 ret = wait_event_timeout(trans_pcie->wait_command_queue,
2596 !test_bit(STATUS_SYNC_HCMD_ACTIVE,
2597 &trans->status),
2598 HOST_COMPLETE_TIMEOUT);
2599 if (!ret) {
2600 IWL_ERR(trans, "Error sending %s: time out after %dms.\n",
2601 cmd_str, jiffies_to_msecs(HOST_COMPLETE_TIMEOUT));
2602
2603 IWL_ERR(trans, "Current CMD queue read_ptr %d write_ptr %d\n",
2604 txq->read_ptr, txq->write_ptr);
2605
2606 clear_bit(STATUS_SYNC_HCMD_ACTIVE, &trans->status);
2607 IWL_DEBUG_INFO(trans, "Clearing HCMD_ACTIVE for command %s\n",
2608 cmd_str);
2609 ret = -ETIMEDOUT;
2610
2611 iwl_trans_pcie_sync_nmi(trans);
2612 goto cancel;
2613 }
2614
2615 if (test_bit(STATUS_FW_ERROR, &trans->status)) {
2616 if (!test_and_clear_bit(STATUS_SUPPRESS_CMD_ERROR_ONCE,
2617 &trans->status)) {
2618 IWL_ERR(trans, "FW error in SYNC CMD %s\n", cmd_str);
2619 dump_stack();
2620 }
2621 ret = -EIO;
2622 goto cancel;
2623 }
2624
2625 if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
2626 test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
2627 IWL_DEBUG_RF_KILL(trans, "RFKILL in SYNC CMD... no rsp\n");
2628 ret = -ERFKILL;
2629 goto cancel;
2630 }
2631
2632 if ((cmd->flags & CMD_WANT_SKB) && !cmd->resp_pkt) {
2633 IWL_ERR(trans, "Error: Response NULL in '%s'\n", cmd_str);
2634 ret = -EIO;
2635 goto cancel;
2636 }
2637
2638 return 0;
2639
2640 cancel:
2641 if (cmd->flags & CMD_WANT_SKB) {
2642 /*
2643 * Cancel the CMD_WANT_SKB flag for the cmd in the
2644 * TX cmd queue. Otherwise in case the cmd comes
2645 * in later, it will possibly set an invalid
2646 * address (cmd->meta.source).
2647 */
2648 txq->entries[cmd_idx].meta.flags &= ~CMD_WANT_SKB;
2649 }
2650
2651 if (cmd->resp_pkt) {
2652 iwl_free_resp(cmd);
2653 cmd->resp_pkt = NULL;
2654 }
2655
2656 return ret;
2657 }
2658
iwl_trans_pcie_send_hcmd(struct iwl_trans * trans,struct iwl_host_cmd * cmd)2659 int iwl_trans_pcie_send_hcmd(struct iwl_trans *trans,
2660 struct iwl_host_cmd *cmd)
2661 {
2662 const char *cmd_str = iwl_get_cmd_string(trans, cmd->id);
2663
2664 /* Make sure the NIC is still alive in the bus */
2665 if (test_bit(STATUS_TRANS_DEAD, &trans->status))
2666 return -ENODEV;
2667
2668 if (!(cmd->flags & CMD_SEND_IN_RFKILL) &&
2669 test_bit(STATUS_RFKILL_OPMODE, &trans->status)) {
2670 IWL_DEBUG_RF_KILL(trans, "Dropping CMD 0x%x: RF KILL\n",
2671 cmd->id);
2672 return -ERFKILL;
2673 }
2674
2675 if (cmd->flags & CMD_ASYNC) {
2676 int ret;
2677
2678 IWL_DEBUG_INFO(trans, "Sending async command %s\n", cmd_str);
2679
2680 /* An asynchronous command can not expect an SKB to be set. */
2681 if (WARN_ON(cmd->flags & CMD_WANT_SKB))
2682 return -EINVAL;
2683
2684 if (trans->mac_cfg->gen2)
2685 ret = iwl_pcie_gen2_enqueue_hcmd(trans, cmd);
2686 else
2687 ret = iwl_pcie_enqueue_hcmd(trans, cmd);
2688
2689 if (ret < 0) {
2690 IWL_ERR(trans,
2691 "Error sending %s: enqueue_hcmd failed: %d\n",
2692 iwl_get_cmd_string(trans, cmd->id), ret);
2693 return ret;
2694 }
2695 return 0;
2696 }
2697
2698 return iwl_trans_pcie_send_hcmd_sync(trans, cmd, cmd_str);
2699 }
2700