xref: /freebsd/sys/dev/ath/if_ath_tx_edma.c (revision 273c26a3c3bea87a241d6879abd4f991db180bf0)
1 /*-
2  * Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer,
10  *    without modification.
11  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
12  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
13  *    redistribution must be conditioned upon including a substantially
14  *    similar Disclaimer requirement for further binary redistribution.
15  *
16  * NO WARRANTY
17  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
18  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
19  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
20  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
21  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
22  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
25  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
27  * THE POSSIBILITY OF SUCH DAMAGES.
28  */
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 /*
34  * Driver for the Atheros Wireless LAN controller.
35  *
36  * This software is derived from work of Atsushi Onoe; his contribution
37  * is greatly appreciated.
38  */
39 
40 #include "opt_inet.h"
41 #include "opt_ath.h"
42 /*
43  * This is needed for register operations which are performed
44  * by the driver - eg, calls to ath_hal_gettsf32().
45  *
46  * It's also required for any AH_DEBUG checks in here, eg the
47  * module dependencies.
48  */
49 #include "opt_ah.h"
50 #include "opt_wlan.h"
51 
52 #include <sys/param.h>
53 #include <sys/systm.h>
54 #include <sys/sysctl.h>
55 #include <sys/mbuf.h>
56 #include <sys/malloc.h>
57 #include <sys/lock.h>
58 #include <sys/mutex.h>
59 #include <sys/kernel.h>
60 #include <sys/socket.h>
61 #include <sys/sockio.h>
62 #include <sys/errno.h>
63 #include <sys/callout.h>
64 #include <sys/bus.h>
65 #include <sys/endian.h>
66 #include <sys/kthread.h>
67 #include <sys/taskqueue.h>
68 #include <sys/priv.h>
69 #include <sys/module.h>
70 #include <sys/ktr.h>
71 #include <sys/smp.h>	/* for mp_ncpus */
72 
73 #include <machine/bus.h>
74 
75 #include <net/if.h>
76 #include <net/if_var.h>
77 #include <net/if_dl.h>
78 #include <net/if_media.h>
79 #include <net/if_types.h>
80 #include <net/if_arp.h>
81 #include <net/ethernet.h>
82 #include <net/if_llc.h>
83 
84 #include <net80211/ieee80211_var.h>
85 #include <net80211/ieee80211_regdomain.h>
86 #ifdef IEEE80211_SUPPORT_SUPERG
87 #include <net80211/ieee80211_superg.h>
88 #endif
89 #ifdef IEEE80211_SUPPORT_TDMA
90 #include <net80211/ieee80211_tdma.h>
91 #endif
92 
93 #include <net/bpf.h>
94 
95 #ifdef INET
96 #include <netinet/in.h>
97 #include <netinet/if_ether.h>
98 #endif
99 
100 #include <dev/ath/if_athvar.h>
101 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
102 #include <dev/ath/ath_hal/ah_diagcodes.h>
103 
104 #include <dev/ath/if_ath_debug.h>
105 #include <dev/ath/if_ath_misc.h>
106 #include <dev/ath/if_ath_tsf.h>
107 #include <dev/ath/if_ath_tx.h>
108 #include <dev/ath/if_ath_sysctl.h>
109 #include <dev/ath/if_ath_led.h>
110 #include <dev/ath/if_ath_keycache.h>
111 #include <dev/ath/if_ath_rx.h>
112 #include <dev/ath/if_ath_beacon.h>
113 #include <dev/ath/if_athdfs.h>
114 #include <dev/ath/if_ath_descdma.h>
115 
116 #ifdef ATH_TX99_DIAG
117 #include <dev/ath/ath_tx99/ath_tx99.h>
118 #endif
119 
120 #include <dev/ath/if_ath_tx_edma.h>
121 
122 #ifdef	ATH_DEBUG_ALQ
123 #include <dev/ath/if_ath_alq.h>
124 #endif
125 
126 /*
127  * some general macros
128  */
129 #define	INCR(_l, _sz)		(_l) ++; (_l) &= ((_sz) - 1)
130 #define	DECR(_l, _sz)		(_l) --; (_l) &= ((_sz) - 1)
131 
132 /*
133  * XXX doesn't belong here, and should be tunable
134  */
135 #define	ATH_TXSTATUS_RING_SIZE	512
136 
137 MALLOC_DECLARE(M_ATHDEV);
138 
139 static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
140 
141 #ifdef	ATH_DEBUG_ALQ
142 static void
143 ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
144     int fifo_depth, int frame_cnt)
145 {
146 	struct if_ath_alq_tx_fifo_push aq;
147 
148 	aq.txq = htobe32(txq);
149 	aq.nframes = htobe32(nframes);
150 	aq.fifo_depth = htobe32(fifo_depth);
151 	aq.frame_cnt = htobe32(frame_cnt);
152 
153 	if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
154 	    sizeof(aq),
155 	    (const char *) &aq);
156 }
157 #endif	/* ATH_DEBUG_ALQ */
158 
159 /*
160  * XXX TODO: push an aggregate as a single FIFO slot, even though
161  * it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
162  *
163  * The TX completion code handles a TX FIFO slot having multiple frames,
164  * aggregate or otherwise, but it may just make things easier to deal
165  * with.
166  *
167  * XXX TODO: track the number of aggregate subframes and put that in the
168  * push alq message.
169  */
170 static void
171 ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
172     int limit)
173 {
174 	struct ath_buf *bf, *bf_last;
175 	struct ath_buf *bfi, *bfp;
176 	int i, sqdepth;
177 	TAILQ_HEAD(axq_q_f_s, ath_buf)  sq;
178 
179 	ATH_TXQ_LOCK_ASSERT(txq);
180 
181 	/*
182 	 * Don't bother doing any work if it's full.
183 	 */
184 	if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
185 		return;
186 
187 	if (TAILQ_EMPTY(&txq->axq_q))
188 		return;
189 
190 	TAILQ_INIT(&sq);
191 
192 	/*
193 	 * First pass - walk sq, queue up to 'limit' entries,
194 	 * subtract them from the staging queue.
195 	 */
196 	sqdepth = 0;
197 	for (i = 0; i < limit; i++) {
198 		/* Grab the head entry */
199 		bf = ATH_TXQ_FIRST(txq);
200 		if (bf == NULL)
201 			break;
202 		ATH_TXQ_REMOVE(txq, bf, bf_list);
203 
204 		/* Queue it into our staging list */
205 		TAILQ_INSERT_TAIL(&sq, bf, bf_list);
206 
207 		/* Ensure the flags are cleared */
208 		bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
209 		sqdepth++;
210 	}
211 
212 	/*
213 	 * Ok, so now we have a staging list of up to 'limit'
214 	 * frames from the txq.  Now let's wrap that up
215 	 * into its own list and pass that to the hardware
216 	 * as one FIFO entry.
217 	 */
218 
219 	bf = TAILQ_FIRST(&sq);
220 	bf_last = TAILQ_LAST(&sq, axq_q_s);
221 
222 	/*
223 	 * Ok, so here's the gymnastics reqiured to make this
224 	 * all sensible.
225 	 */
226 
227 	/*
228 	 * Tag the first/last buffer appropriately.
229 	 */
230 	bf->bf_flags |= ATH_BUF_FIFOPTR;
231 	bf_last->bf_flags |= ATH_BUF_FIFOEND;
232 
233 	/*
234 	 * Walk the descriptor list and link them appropriately.
235 	 */
236 	bfp = NULL;
237 	TAILQ_FOREACH(bfi, &sq, bf_list) {
238 		if (bfp != NULL) {
239 			ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
240 			    bfi->bf_daddr);
241 		}
242 		bfp = bfi;
243 	}
244 
245 	i = 0;
246 	TAILQ_FOREACH(bfi, &sq, bf_list) {
247 #ifdef	ATH_DEBUG
248 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
249 			ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
250 #endif/* ATH_DEBUG */
251 #ifdef	ATH_DEBUG_ALQ
252 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
253 			ath_tx_alq_post(sc, bfi);
254 #endif /* ATH_DEBUG_ALQ */
255 		i++;
256 	}
257 
258 	/*
259 	 * We now need to push this set of frames onto the tail
260 	 * of the FIFO queue.  We don't adjust the aggregate
261 	 * count, only the queue depth counter(s).
262 	 * We also need to blank the link pointer now.
263 	 */
264 
265 	TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
266 	/* Bump total queue tracking in FIFO queue */
267 	txq->fifo.axq_depth += sqdepth;
268 
269 	/* Bump FIFO queue */
270 	txq->axq_fifo_depth++;
271 	DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
272 	    "%s: queued %d packets; depth=%d, fifo depth=%d\n",
273 	    __func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
274 
275 	/* Push the first entry into the hardware */
276 	ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
277 
278 	/* Push start on the DMA if it's not already started */
279 	ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
280 
281 #ifdef	ATH_DEBUG_ALQ
282 	ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
283 	    txq->axq_fifo_depth,
284 	    txq->fifo.axq_depth);
285 #endif /* ATH_DEBUG_ALQ */
286 }
287 
288 #define	TX_BATCH_SIZE	32
289 
290 /*
291  * Push some frames into the TX FIFO if we have space.
292  */
293 static void
294 ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
295 {
296 
297 	ATH_TXQ_LOCK_ASSERT(txq);
298 
299 	DPRINTF(sc, ATH_DEBUG_TX_PROC,
300 	    "%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
301 	    __func__,
302 	    txq->axq_qnum,
303 	    txq->fifo.axq_depth,
304 	    txq->axq_fifo_depth,
305 	    txq->axq_depth,
306 	    txq->axq_aggr_depth);
307 
308 	/*
309 	 * For now, push up to 32 frames per TX FIFO slot.
310 	 * If more are in the hardware queue then they'll
311 	 * get populated when we try to send another frame
312 	 * or complete a frame - so at most there'll be
313 	 * 32 non-AMPDU frames per node/TID anyway.
314 	 *
315 	 * Note that the hardware staging queue will limit
316 	 * how many frames in total we will have pushed into
317 	 * here.
318 	 *
319 	 * Later on, we'll want to push less frames into
320 	 * the TX FIFO since we don't want to necessarily
321 	 * fill tens or hundreds of milliseconds of potential
322 	 * frames.
323 	 *
324 	 * However, we need more frames right now because of
325 	 * how the MAC implements the frame scheduling policy.
326 	 * It only ungates a single FIFO entry at a time,
327 	 * and will run that until CHNTIME expires or the
328 	 * end of that FIFO entry descriptor list is reached.
329 	 * So for TDMA we suffer a big performance penalty -
330 	 * single TX FIFO entries mean the MAC only sends out
331 	 * one frame per DBA event, which turned out on average
332 	 * 6ms per TX frame.
333 	 *
334 	 * So, for aggregates it's okay - it'll push two at a
335 	 * time and this will just do them more efficiently.
336 	 * For non-aggregates it'll do 4 at a time, up to the
337 	 * non-aggr limit (non_aggr, which is 32.)  They should
338 	 * be time based rather than a hard count, but I also
339 	 * do need sleep.
340 	 */
341 
342 	/*
343 	 * Do some basic, basic batching to the hardware
344 	 * queue.
345 	 *
346 	 * If we have TX_BATCH_SIZE entries in the staging
347 	 * queue, then let's try to send them all in one hit.
348 	 *
349 	 * Ensure we don't push more than TX_BATCH_SIZE worth
350 	 * in, otherwise we end up draining 8 slots worth of
351 	 * 32 frames into the hardware queue and then we don't
352 	 * attempt to push more frames in until we empty the
353 	 * FIFO.
354 	 */
355 	if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
356 	    txq->fifo.axq_depth <= TX_BATCH_SIZE) {
357 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
358 	}
359 
360 	/*
361 	 * Aggregate check: if we have less than two FIFO slots
362 	 * busy and we have some aggregate frames, queue it.
363 	 *
364 	 * Now, ideally we'd just check to see if the scheduler
365 	 * has given us aggregate frames and push them into the FIFO
366 	 * as individual slots, as honestly we should just be pushing
367 	 * a single aggregate in as one FIFO slot.
368 	 *
369 	 * Let's do that next once I know this works.
370 	 */
371 	else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
372 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
373 
374 	/*
375 	 *
376 	 * If we have less, and the TXFIFO isn't empty, let's
377 	 * wait until we've finished sending the FIFO.
378 	 *
379 	 * If we have less, and the TXFIFO is empty, then
380 	 * send them.
381 	 */
382 	else if (txq->axq_fifo_depth == 0) {
383 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
384 	}
385 }
386 
387 /*
388  * Re-initialise the DMA FIFO with the current contents of
389  * said TXQ.
390  *
391  * This should only be called as part of the chip reset path, as it
392  * assumes the FIFO is currently empty.
393  */
394 static void
395 ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
396 {
397 	struct ath_buf *bf;
398 	int i = 0;
399 	int fifostart = 1;
400 	int old_fifo_depth;
401 
402 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
403 	    __func__,
404 	    txq->axq_qnum);
405 
406 	ATH_TXQ_LOCK_ASSERT(txq);
407 
408 	/*
409 	 * Let's log if the tracked FIFO depth doesn't match
410 	 * what we actually push in.
411 	 */
412 	old_fifo_depth = txq->axq_fifo_depth;
413 	txq->axq_fifo_depth = 0;
414 
415 	/*
416 	 * Walk the FIFO staging list, looking for "head" entries.
417 	 * Since we may have a partially completed list of frames,
418 	 * we push the first frame we see into the FIFO and re-mark
419 	 * it as the head entry.  We then skip entries until we see
420 	 * FIFO end, at which point we get ready to push another
421 	 * entry into the FIFO.
422 	 */
423 	TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
424 		/*
425 		 * If we're looking for FIFOEND and we haven't found
426 		 * it, skip.
427 		 *
428 		 * If we're looking for FIFOEND and we've found it,
429 		 * reset for another descriptor.
430 		 */
431 #ifdef	ATH_DEBUG
432 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
433 			ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
434 #endif/* ATH_DEBUG */
435 #ifdef	ATH_DEBUG_ALQ
436 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
437 			ath_tx_alq_post(sc, bf);
438 #endif /* ATH_DEBUG_ALQ */
439 
440 		if (fifostart == 0) {
441 			if (bf->bf_flags & ATH_BUF_FIFOEND)
442 				fifostart = 1;
443 			continue;
444 		}
445 
446 		/* Make sure we're not overflowing the FIFO! */
447 		if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
448 			device_printf(sc->sc_dev,
449 			    "%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
450 			    __func__,
451 			    txq->axq_qnum,
452 			    txq->axq_fifo_depth);
453 		}
454 
455 #if 0
456 		DPRINTF(sc, ATH_DEBUG_RESET,
457 		    "%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
458 		    __func__,
459 		    txq->axq_qnum,
460 		    txq->axq_fifo_depth,
461 		    bf,
462 		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
463 		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
464 #endif
465 
466 		/*
467 		 * Set this to be the first buffer in the FIFO
468 		 * list - even if it's also the last buffer in
469 		 * a FIFO list!
470 		 */
471 		bf->bf_flags |= ATH_BUF_FIFOPTR;
472 
473 		/* Push it into the FIFO and bump the FIFO count */
474 		ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
475 		txq->axq_fifo_depth++;
476 
477 		/*
478 		 * If this isn't the last entry either, let's
479 		 * clear fifostart so we continue looking for
480 		 * said last entry.
481 		 */
482 		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
483 			fifostart = 0;
484 		i++;
485 	}
486 
487 	/* Only bother starting the queue if there's something in it */
488 	if (i > 0)
489 		ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
490 
491 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
492 	    __func__,
493 	    txq->axq_qnum,
494 	    old_fifo_depth,
495 	    txq->axq_fifo_depth);
496 
497 	/* And now, let's check! */
498 	if (txq->axq_fifo_depth != old_fifo_depth) {
499 		device_printf(sc->sc_dev,
500 		    "%s: Q%d: FIFO depth should be %d, is %d\n",
501 		    __func__,
502 		    txq->axq_qnum,
503 		    old_fifo_depth,
504 		    txq->axq_fifo_depth);
505 	}
506 }
507 
508 /*
509  * Hand off this frame to a hardware queue.
510  *
511  * Things are a bit hairy in the EDMA world.  The TX FIFO is only
512  * 8 entries deep, so we need to keep track of exactly what we've
513  * pushed into the FIFO and what's just sitting in the TX queue,
514  * waiting to go out.
515  *
516  * So this is split into two halves - frames get appended to the
517  * TXQ; then a scheduler is called to push some frames into the
518  * actual TX FIFO.
519  */
520 static void
521 ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
522     struct ath_buf *bf)
523 {
524 
525 	ATH_TXQ_LOCK(txq);
526 
527 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
528 	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
529 
530 	/*
531 	 * XXX TODO: write a hard-coded check to ensure that
532 	 * the queue id in the TX descriptor matches txq->axq_qnum.
533 	 */
534 
535 	/* Update aggr stats */
536 	if (bf->bf_state.bfs_aggr)
537 		txq->axq_aggr_depth++;
538 
539 	/* Push and update frame stats */
540 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
541 
542 	/*
543 	 * Finally, call the FIFO schedule routine to schedule some
544 	 * frames to the FIFO.
545 	 */
546 	ath_edma_tx_fifo_fill(sc, txq);
547 	ATH_TXQ_UNLOCK(txq);
548 }
549 
550 /*
551  * Hand off this frame to a multicast software queue.
552  *
553  * The EDMA TX CABQ will get a list of chained frames, chained
554  * together using the next pointer.  The single head of that
555  * particular queue is pushed to the hardware CABQ.
556  */
557 static void
558 ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
559     struct ath_buf *bf)
560 {
561 
562 	ATH_TX_LOCK_ASSERT(sc);
563 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
564 	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
565 
566 	ATH_TXQ_LOCK(txq);
567 	/*
568 	 * XXX this is mostly duplicated in ath_tx_handoff_mcast().
569 	 */
570 	if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
571 		struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
572 		struct ieee80211_frame *wh;
573 
574 		/* mark previous frame */
575 		wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
576 		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
577 
578 		/* re-sync buffer to memory */
579 		bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
580 		   BUS_DMASYNC_PREWRITE);
581 
582 		/* link descriptor */
583 		ath_hal_settxdesclink(sc->sc_ah,
584 		    bf_last->bf_lastds,
585 		    bf->bf_daddr);
586 	}
587 #ifdef	ATH_DEBUG_ALQ
588 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
589 		ath_tx_alq_post(sc, bf);
590 #endif	/* ATH_DEBUG_ALQ */
591 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
592 	ATH_TXQ_UNLOCK(txq);
593 }
594 
595 /*
596  * Handoff this frame to the hardware.
597  *
598  * For the multicast queue, this will treat it as a software queue
599  * and append it to the list, after updating the MORE_DATA flag
600  * in the previous frame.  The cabq processing code will ensure
601  * that the queue contents gets transferred over.
602  *
603  * For the hardware queues, this will queue a frame to the queue
604  * like before, then populate the FIFO from that.  Since the
605  * EDMA hardware has 8 FIFO slots per TXQ, this ensures that
606  * frames such as management frames don't get prematurely dropped.
607  *
608  * This does imply that a similar flush-hwq-to-fifoq method will
609  * need to be called from the processq function, before the
610  * per-node software scheduler is called.
611  */
612 static void
613 ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
614     struct ath_buf *bf)
615 {
616 
617 	DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
618 	    "%s: called; bf=%p, txq=%p, qnum=%d\n",
619 	    __func__,
620 	    bf,
621 	    txq,
622 	    txq->axq_qnum);
623 
624 	if (txq->axq_qnum == ATH_TXQ_SWQ)
625 		ath_edma_xmit_handoff_mcast(sc, txq, bf);
626 	else
627 		ath_edma_xmit_handoff_hw(sc, txq, bf);
628 }
629 
630 static int
631 ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
632 {
633 	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
634 
635 	te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
636 	    M_ATHDEV,
637 	    M_NOWAIT | M_ZERO);
638 	if (te->m_fifo == NULL) {
639 		device_printf(sc->sc_dev, "%s: malloc failed\n",
640 		    __func__);
641 		return (-ENOMEM);
642 	}
643 
644 	/*
645 	 * Set initial "empty" state.
646 	 */
647 	te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
648 
649 	return (0);
650 }
651 
652 static int
653 ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
654 {
655 	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
656 
657 	/* XXX TODO: actually deref the ath_buf entries? */
658 	free(te->m_fifo, M_ATHDEV);
659 	return (0);
660 }
661 
662 static int
663 ath_edma_dma_txsetup(struct ath_softc *sc)
664 {
665 	int error;
666 	int i;
667 
668 	error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
669 	    NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
670 	if (error != 0)
671 		return (error);
672 
673 	ath_hal_setuptxstatusring(sc->sc_ah,
674 	    (void *) sc->sc_txsdma.dd_desc,
675 	    sc->sc_txsdma.dd_desc_paddr,
676 	    ATH_TXSTATUS_RING_SIZE);
677 
678 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
679 		ath_edma_setup_txfifo(sc, i);
680 	}
681 
682 	return (0);
683 }
684 
685 static int
686 ath_edma_dma_txteardown(struct ath_softc *sc)
687 {
688 	int i;
689 
690 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
691 		ath_edma_free_txfifo(sc, i);
692 	}
693 
694 	ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
695 	return (0);
696 }
697 
698 /*
699  * Drain all TXQs, potentially after completing the existing completed
700  * frames.
701  */
702 static void
703 ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
704 {
705 	int i;
706 
707 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
708 
709 	(void) ath_stoptxdma(sc);
710 
711 	/*
712 	 * If reset type is noloss, the TX FIFO needs to be serviced
713 	 * and those frames need to be handled.
714 	 *
715 	 * Otherwise, just toss everything in each TX queue.
716 	 */
717 	if (reset_type == ATH_RESET_NOLOSS) {
718 		ath_edma_tx_processq(sc, 0);
719 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
720 			if (ATH_TXQ_SETUP(sc, i)) {
721 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
722 				/*
723 				 * Free the holding buffer; DMA is now
724 				 * stopped.
725 				 */
726 				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
727 				/*
728 				 * Reset the link pointer to NULL; there's
729 				 * no frames to chain DMA to.
730 				 */
731 				sc->sc_txq[i].axq_link = NULL;
732 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
733 			}
734 		}
735 	} else {
736 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
737 			if (ATH_TXQ_SETUP(sc, i))
738 				ath_tx_draintxq(sc, &sc->sc_txq[i]);
739 		}
740 	}
741 
742 	/* XXX dump out the TX completion FIFO contents */
743 
744 	/* XXX dump out the frames */
745 
746 	sc->sc_wd_timer = 0;
747 }
748 
749 /*
750  * TX completion tasklet.
751  */
752 
753 static void
754 ath_edma_tx_proc(void *arg, int npending)
755 {
756 	struct ath_softc *sc = (struct ath_softc *) arg;
757 
758 #if 0
759 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
760 	    __func__, npending);
761 #endif
762 	ath_edma_tx_processq(sc, 1);
763 }
764 
765 /*
766  * Process the TX status queue.
767  */
768 static void
769 ath_edma_tx_processq(struct ath_softc *sc, int dosched)
770 {
771 	struct ath_hal *ah = sc->sc_ah;
772 	HAL_STATUS status;
773 	struct ath_tx_status ts;
774 	struct ath_txq *txq;
775 	struct ath_buf *bf;
776 	struct ieee80211_node *ni;
777 	int nacked = 0;
778 	int idx;
779 	int i;
780 
781 #ifdef	ATH_DEBUG
782 	/* XXX */
783 	uint32_t txstatus[32];
784 #endif
785 
786 	for (idx = 0; ; idx++) {
787 		bzero(&ts, sizeof(ts));
788 
789 		ATH_TXSTATUS_LOCK(sc);
790 #ifdef	ATH_DEBUG
791 		ath_hal_gettxrawtxdesc(ah, txstatus);
792 #endif
793 		status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
794 		ATH_TXSTATUS_UNLOCK(sc);
795 
796 		if (status == HAL_EINPROGRESS)
797 			break;
798 
799 #ifdef	ATH_DEBUG
800 		if (sc->sc_debug & ATH_DEBUG_TX_PROC)
801 			if (ts.ts_queue_id != sc->sc_bhalq)
802 			ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
803 			    idx, (status == HAL_OK));
804 #endif
805 
806 		/*
807 		 * If there is an error with this descriptor, continue
808 		 * processing.
809 		 *
810 		 * XXX TBD: log some statistics?
811 		 */
812 		if (status == HAL_EIO) {
813 			device_printf(sc->sc_dev, "%s: invalid TX status?\n",
814 			    __func__);
815 			break;
816 		}
817 
818 #if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
819 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
820 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
821 			    sc->sc_tx_statuslen,
822 			    (char *) txstatus);
823 		}
824 #endif /* ATH_DEBUG_ALQ */
825 
826 		/*
827 		 * At this point we have a valid status descriptor.
828 		 * The QID and descriptor ID (which currently isn't set)
829 		 * is part of the status.
830 		 *
831 		 * We then assume that the descriptor in question is the
832 		 * -head- of the given QID.  Eventually we should verify
833 		 * this by using the descriptor ID.
834 		 */
835 
836 		/*
837 		 * The beacon queue is not currently a "real" queue.
838 		 * Frames aren't pushed onto it and the lock isn't setup.
839 		 * So skip it for now; the beacon handling code will
840 		 * free and alloc more beacon buffers as appropriate.
841 		 */
842 		if (ts.ts_queue_id == sc->sc_bhalq)
843 			continue;
844 
845 		txq = &sc->sc_txq[ts.ts_queue_id];
846 
847 		ATH_TXQ_LOCK(txq);
848 		bf = ATH_TXQ_FIRST(&txq->fifo);
849 
850 		/*
851 		 * Work around the situation where I'm seeing notifications
852 		 * for Q1 when no frames are available.  That needs to be
853 		 * debugged but not by crashing _here_.
854 		 */
855 		if (bf == NULL) {
856 			device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
857 			    __func__,
858 			    ts.ts_queue_id);
859 			ATH_TXQ_UNLOCK(txq);
860 			continue;
861 		}
862 
863 		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
864 		    __func__,
865 		    ts.ts_queue_id, bf,
866 		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
867 		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
868 
869 		/* XXX TODO: actually output debugging info about this */
870 
871 #if 0
872 		/* XXX assert the buffer/descriptor matches the status descid */
873 		if (ts.ts_desc_id != bf->bf_descid) {
874 			device_printf(sc->sc_dev,
875 			    "%s: mismatched descid (qid=%d, tsdescid=%d, "
876 			    "bfdescid=%d\n",
877 			    __func__,
878 			    ts.ts_queue_id,
879 			    ts.ts_desc_id,
880 			    bf->bf_descid);
881 		}
882 #endif
883 
884 		/* This removes the buffer and decrements the queue depth */
885 		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
886 		if (bf->bf_state.bfs_aggr)
887 			txq->axq_aggr_depth--;
888 
889 		/*
890 		 * If this was the end of a FIFO set, decrement FIFO depth
891 		 */
892 		if (bf->bf_flags & ATH_BUF_FIFOEND)
893 			txq->axq_fifo_depth--;
894 
895 		/*
896 		 * If this isn't the final buffer in a FIFO set, mark
897 		 * the buffer as busy so it goes onto the holding queue.
898 		 */
899 		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
900 			bf->bf_flags |= ATH_BUF_BUSY;
901 
902 		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
903 		    __func__,
904 		    txq->axq_qnum,
905 		    txq->axq_fifo_depth,
906 		    txq->fifo.axq_depth);
907 
908 		/* XXX assert FIFO depth >= 0 */
909 		ATH_TXQ_UNLOCK(txq);
910 
911 		/*
912 		 * Outside of the TX lock - if the buffer is end
913 		 * end buffer in this FIFO, we don't need a holding
914 		 * buffer any longer.
915 		 */
916 		if (bf->bf_flags & ATH_BUF_FIFOEND) {
917 			ATH_TXQ_LOCK(txq);
918 			ath_txq_freeholdingbuf(sc, txq);
919 			ATH_TXQ_UNLOCK(txq);
920 		}
921 
922 		/*
923 		 * First we need to make sure ts_rate is valid.
924 		 *
925 		 * Pre-EDMA chips pass the whole TX descriptor to
926 		 * the proctxdesc function which will then fill out
927 		 * ts_rate based on the ts_finaltsi (final TX index)
928 		 * in the TX descriptor.  However the TX completion
929 		 * FIFO doesn't have this information.  So here we
930 		 * do a separate HAL call to populate that information.
931 		 *
932 		 * The same problem exists with ts_longretry.
933 		 * The FreeBSD HAL corrects ts_longretry in the HAL layer;
934 		 * the AR9380 HAL currently doesn't.  So until the HAL
935 		 * is imported and this can be added, we correct for it
936 		 * here.
937 		 */
938 		/* XXX TODO */
939 		/* XXX faked for now. Ew. */
940 		if (ts.ts_finaltsi < 4) {
941 			ts.ts_rate =
942 			    bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
943 			switch (ts.ts_finaltsi) {
944 			case 3: ts.ts_longretry +=
945 			    bf->bf_state.bfs_rc[2].tries;
946 			case 2: ts.ts_longretry +=
947 			    bf->bf_state.bfs_rc[1].tries;
948 			case 1: ts.ts_longretry +=
949 			    bf->bf_state.bfs_rc[0].tries;
950 			}
951 		} else {
952 			device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
953 			    __func__,
954 			    ts.ts_finaltsi);
955 			ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
956 		}
957 
958 		/*
959 		 * XXX This is terrible.
960 		 *
961 		 * Right now, some code uses the TX status that is
962 		 * passed in here, but the completion handlers in the
963 		 * software TX path also use bf_status.ds_txstat.
964 		 * Ew.  That should all go away.
965 		 *
966 		 * XXX It's also possible the rate control completion
967 		 * routine is called twice.
968 		 */
969 		memcpy(&bf->bf_status, &ts, sizeof(ts));
970 
971 		ni = bf->bf_node;
972 
973 		/* Update RSSI */
974 		/* XXX duplicate from ath_tx_processq */
975 		if (ni != NULL && ts.ts_status == 0 &&
976 		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
977 			nacked++;
978 			sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
979 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
980 			    ts.ts_rssi);
981 		}
982 
983 		/* Handle frame completion and rate control update */
984 		ath_tx_process_buf_completion(sc, txq, &ts, bf);
985 
986 		/* NB: bf is invalid at this point */
987 	}
988 
989 	sc->sc_wd_timer = 0;
990 
991 	/*
992 	 * XXX It's inefficient to do this if the FIFO queue is full,
993 	 * but there's no easy way right now to only populate
994 	 * the txq task for _one_ TXQ.  This should be fixed.
995 	 */
996 	if (dosched) {
997 		/* Attempt to schedule more hardware frames to the TX FIFO */
998 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
999 			if (ATH_TXQ_SETUP(sc, i)) {
1000 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
1001 				ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
1002 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
1003 			}
1004 		}
1005 		/* Kick software scheduler */
1006 		ath_tx_swq_kick(sc);
1007 	}
1008 }
1009 
1010 static void
1011 ath_edma_attach_comp_func(struct ath_softc *sc)
1012 {
1013 
1014 	TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
1015 }
1016 
1017 void
1018 ath_xmit_setup_edma(struct ath_softc *sc)
1019 {
1020 
1021 	/* Fetch EDMA field and buffer sizes */
1022 	(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
1023 	(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
1024 	(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
1025 
1026 	if (bootverbose) {
1027 		device_printf(sc->sc_dev, "TX descriptor length: %d\n",
1028 		    sc->sc_tx_desclen);
1029 		device_printf(sc->sc_dev, "TX status length: %d\n",
1030 		    sc->sc_tx_statuslen);
1031 		device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
1032 		    sc->sc_tx_nmaps);
1033 	}
1034 
1035 	sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
1036 	sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
1037 	sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
1038 
1039 	sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
1040 	sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
1041 	sc->sc_tx.xmit_drain = ath_edma_tx_drain;
1042 }
1043