xref: /freebsd/sys/dev/ath/if_ath_tx_edma.c (revision 52f72944b8f5abb2386eae924357dee8aea17d5b)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2012 Adrian Chadd <adrian@FreeBSD.org>
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer,
12  *    without modification.
13  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
14  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
15  *    redistribution must be conditioned upon including a substantially
16  *    similar Disclaimer requirement for further binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
24  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
27  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
29  * THE POSSIBILITY OF SUCH DAMAGES.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 /*
36  * Driver for the Atheros Wireless LAN controller.
37  *
38  * This software is derived from work of Atsushi Onoe; his contribution
39  * is greatly appreciated.
40  */
41 
42 #include "opt_inet.h"
43 #include "opt_ath.h"
44 /*
45  * This is needed for register operations which are performed
46  * by the driver - eg, calls to ath_hal_gettsf32().
47  *
48  * It's also required for any AH_DEBUG checks in here, eg the
49  * module dependencies.
50  */
51 #include "opt_ah.h"
52 #include "opt_wlan.h"
53 
54 #include <sys/param.h>
55 #include <sys/systm.h>
56 #include <sys/sysctl.h>
57 #include <sys/mbuf.h>
58 #include <sys/malloc.h>
59 #include <sys/lock.h>
60 #include <sys/mutex.h>
61 #include <sys/kernel.h>
62 #include <sys/socket.h>
63 #include <sys/sockio.h>
64 #include <sys/errno.h>
65 #include <sys/callout.h>
66 #include <sys/bus.h>
67 #include <sys/endian.h>
68 #include <sys/kthread.h>
69 #include <sys/taskqueue.h>
70 #include <sys/priv.h>
71 #include <sys/module.h>
72 #include <sys/ktr.h>
73 #include <sys/smp.h>	/* for mp_ncpus */
74 
75 #include <machine/bus.h>
76 
77 #include <net/if.h>
78 #include <net/if_var.h>
79 #include <net/if_dl.h>
80 #include <net/if_media.h>
81 #include <net/if_types.h>
82 #include <net/if_arp.h>
83 #include <net/ethernet.h>
84 #include <net/if_llc.h>
85 
86 #include <net80211/ieee80211_var.h>
87 #include <net80211/ieee80211_regdomain.h>
88 #ifdef IEEE80211_SUPPORT_SUPERG
89 #include <net80211/ieee80211_superg.h>
90 #endif
91 #ifdef IEEE80211_SUPPORT_TDMA
92 #include <net80211/ieee80211_tdma.h>
93 #endif
94 
95 #include <net/bpf.h>
96 
97 #ifdef INET
98 #include <netinet/in.h>
99 #include <netinet/if_ether.h>
100 #endif
101 
102 #include <dev/ath/if_athvar.h>
103 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
104 #include <dev/ath/ath_hal/ah_diagcodes.h>
105 
106 #include <dev/ath/if_ath_debug.h>
107 #include <dev/ath/if_ath_misc.h>
108 #include <dev/ath/if_ath_tsf.h>
109 #include <dev/ath/if_ath_tx.h>
110 #include <dev/ath/if_ath_sysctl.h>
111 #include <dev/ath/if_ath_led.h>
112 #include <dev/ath/if_ath_keycache.h>
113 #include <dev/ath/if_ath_rx.h>
114 #include <dev/ath/if_ath_beacon.h>
115 #include <dev/ath/if_athdfs.h>
116 #include <dev/ath/if_ath_descdma.h>
117 
118 #ifdef ATH_TX99_DIAG
119 #include <dev/ath/ath_tx99/ath_tx99.h>
120 #endif
121 
122 #include <dev/ath/if_ath_tx_edma.h>
123 
124 #ifdef	ATH_DEBUG_ALQ
125 #include <dev/ath/if_ath_alq.h>
126 #endif
127 
128 /*
129  * some general macros
130  */
131 #define	INCR(_l, _sz)		(_l) ++; (_l) &= ((_sz) - 1)
132 #define	DECR(_l, _sz)		(_l) --; (_l) &= ((_sz) - 1)
133 
134 /*
135  * XXX doesn't belong here, and should be tunable
136  */
137 #define	ATH_TXSTATUS_RING_SIZE	512
138 
139 MALLOC_DECLARE(M_ATHDEV);
140 
141 static void ath_edma_tx_processq(struct ath_softc *sc, int dosched);
142 
143 #ifdef	ATH_DEBUG_ALQ
144 static void
145 ath_tx_alq_edma_push(struct ath_softc *sc, int txq, int nframes,
146     int fifo_depth, int frame_cnt)
147 {
148 	struct if_ath_alq_tx_fifo_push aq;
149 
150 	aq.txq = htobe32(txq);
151 	aq.nframes = htobe32(nframes);
152 	aq.fifo_depth = htobe32(fifo_depth);
153 	aq.frame_cnt = htobe32(frame_cnt);
154 
155 	if_ath_alq_post(&sc->sc_alq, ATH_ALQ_TX_FIFO_PUSH,
156 	    sizeof(aq),
157 	    (const char *) &aq);
158 }
159 #endif	/* ATH_DEBUG_ALQ */
160 
161 /*
162  * XXX TODO: push an aggregate as a single FIFO slot, even though
163  * it may not meet the TXOP for say, DBA-gated traffic in TDMA mode.
164  *
165  * The TX completion code handles a TX FIFO slot having multiple frames,
166  * aggregate or otherwise, but it may just make things easier to deal
167  * with.
168  *
169  * XXX TODO: track the number of aggregate subframes and put that in the
170  * push alq message.
171  */
172 static void
173 ath_tx_edma_push_staging_list(struct ath_softc *sc, struct ath_txq *txq,
174     int limit)
175 {
176 	struct ath_buf *bf, *bf_last;
177 	struct ath_buf *bfi, *bfp;
178 	int i, sqdepth;
179 	TAILQ_HEAD(axq_q_f_s, ath_buf)  sq;
180 
181 	ATH_TXQ_LOCK_ASSERT(txq);
182 
183 	DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
184 	    "%s: called; TXQ=%d, fifo.depth=%d, axq_q empty=%d\n",
185 	    __func__,
186 	    txq->axq_qnum,
187 	    txq->axq_fifo_depth,
188 	    !! (TAILQ_EMPTY(&txq->axq_q)));
189 
190 	/*
191 	 * Don't bother doing any work if it's full.
192 	 */
193 	if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH)
194 		return;
195 
196 	if (TAILQ_EMPTY(&txq->axq_q))
197 		return;
198 
199 	TAILQ_INIT(&sq);
200 
201 	/*
202 	 * First pass - walk sq, queue up to 'limit' entries,
203 	 * subtract them from the staging queue.
204 	 */
205 	sqdepth = 0;
206 	for (i = 0; i < limit; i++) {
207 		/* Grab the head entry */
208 		bf = ATH_TXQ_FIRST(txq);
209 		if (bf == NULL)
210 			break;
211 		ATH_TXQ_REMOVE(txq, bf, bf_list);
212 
213 		/* Queue it into our staging list */
214 		TAILQ_INSERT_TAIL(&sq, bf, bf_list);
215 
216 		/* Ensure the flags are cleared */
217 		bf->bf_flags &= ~(ATH_BUF_FIFOPTR | ATH_BUF_FIFOEND);
218 		sqdepth++;
219 	}
220 
221 	/*
222 	 * Ok, so now we have a staging list of up to 'limit'
223 	 * frames from the txq.  Now let's wrap that up
224 	 * into its own list and pass that to the hardware
225 	 * as one FIFO entry.
226 	 */
227 
228 	bf = TAILQ_FIRST(&sq);
229 	bf_last = TAILQ_LAST(&sq, axq_q_s);
230 
231 	/*
232 	 * Ok, so here's the gymnastics reqiured to make this
233 	 * all sensible.
234 	 */
235 
236 	/*
237 	 * Tag the first/last buffer appropriately.
238 	 */
239 	bf->bf_flags |= ATH_BUF_FIFOPTR;
240 	bf_last->bf_flags |= ATH_BUF_FIFOEND;
241 
242 	/*
243 	 * Walk the descriptor list and link them appropriately.
244 	 */
245 	bfp = NULL;
246 	TAILQ_FOREACH(bfi, &sq, bf_list) {
247 		if (bfp != NULL) {
248 			ath_hal_settxdesclink(sc->sc_ah, bfp->bf_lastds,
249 			    bfi->bf_daddr);
250 		}
251 		bfp = bfi;
252 	}
253 
254 	i = 0;
255 	TAILQ_FOREACH(bfi, &sq, bf_list) {
256 #ifdef	ATH_DEBUG
257 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
258 			ath_printtxbuf(sc, bfi, txq->axq_qnum, i, 0);
259 #endif/* ATH_DEBUG */
260 #ifdef	ATH_DEBUG_ALQ
261 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
262 			ath_tx_alq_post(sc, bfi);
263 #endif /* ATH_DEBUG_ALQ */
264 		i++;
265 	}
266 
267 	/*
268 	 * We now need to push this set of frames onto the tail
269 	 * of the FIFO queue.  We don't adjust the aggregate
270 	 * count, only the queue depth counter(s).
271 	 * We also need to blank the link pointer now.
272 	 */
273 
274 	TAILQ_CONCAT(&txq->fifo.axq_q, &sq, bf_list);
275 	/* Bump total queue tracking in FIFO queue */
276 	txq->fifo.axq_depth += sqdepth;
277 
278 	/* Bump FIFO queue */
279 	txq->axq_fifo_depth++;
280 	DPRINTF(sc, ATH_DEBUG_XMIT | ATH_DEBUG_TX_PROC,
281 	    "%s: queued %d packets; depth=%d, fifo depth=%d\n",
282 	    __func__, sqdepth, txq->fifo.axq_depth, txq->axq_fifo_depth);
283 
284 	/* Push the first entry into the hardware */
285 	ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
286 
287 	/* Push start on the DMA if it's not already started */
288 	ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
289 
290 #ifdef	ATH_DEBUG_ALQ
291 	ath_tx_alq_edma_push(sc, txq->axq_qnum, sqdepth,
292 	    txq->axq_fifo_depth,
293 	    txq->fifo.axq_depth);
294 #endif /* ATH_DEBUG_ALQ */
295 }
296 
297 #define	TX_BATCH_SIZE	32
298 
299 /*
300  * Push some frames into the TX FIFO if we have space.
301  */
302 static void
303 ath_edma_tx_fifo_fill(struct ath_softc *sc, struct ath_txq *txq)
304 {
305 
306 	ATH_TXQ_LOCK_ASSERT(txq);
307 
308 	DPRINTF(sc, ATH_DEBUG_TX_PROC,
309 	    "%s: Q%d: called; fifo.depth=%d, fifo depth=%d, depth=%d, aggr_depth=%d\n",
310 	    __func__,
311 	    txq->axq_qnum,
312 	    txq->fifo.axq_depth,
313 	    txq->axq_fifo_depth,
314 	    txq->axq_depth,
315 	    txq->axq_aggr_depth);
316 
317 	/*
318 	 * For now, push up to 32 frames per TX FIFO slot.
319 	 * If more are in the hardware queue then they'll
320 	 * get populated when we try to send another frame
321 	 * or complete a frame - so at most there'll be
322 	 * 32 non-AMPDU frames per node/TID anyway.
323 	 *
324 	 * Note that the hardware staging queue will limit
325 	 * how many frames in total we will have pushed into
326 	 * here.
327 	 *
328 	 * Later on, we'll want to push less frames into
329 	 * the TX FIFO since we don't want to necessarily
330 	 * fill tens or hundreds of milliseconds of potential
331 	 * frames.
332 	 *
333 	 * However, we need more frames right now because of
334 	 * how the MAC implements the frame scheduling policy.
335 	 * It only ungates a single FIFO entry at a time,
336 	 * and will run that until CHNTIME expires or the
337 	 * end of that FIFO entry descriptor list is reached.
338 	 * So for TDMA we suffer a big performance penalty -
339 	 * single TX FIFO entries mean the MAC only sends out
340 	 * one frame per DBA event, which turned out on average
341 	 * 6ms per TX frame.
342 	 *
343 	 * So, for aggregates it's okay - it'll push two at a
344 	 * time and this will just do them more efficiently.
345 	 * For non-aggregates it'll do 4 at a time, up to the
346 	 * non-aggr limit (non_aggr, which is 32.)  They should
347 	 * be time based rather than a hard count, but I also
348 	 * do need sleep.
349 	 */
350 
351 	/*
352 	 * Do some basic, basic batching to the hardware
353 	 * queue.
354 	 *
355 	 * If we have TX_BATCH_SIZE entries in the staging
356 	 * queue, then let's try to send them all in one hit.
357 	 *
358 	 * Ensure we don't push more than TX_BATCH_SIZE worth
359 	 * in, otherwise we end up draining 8 slots worth of
360 	 * 32 frames into the hardware queue and then we don't
361 	 * attempt to push more frames in until we empty the
362 	 * FIFO.
363 	 */
364 	if (txq->axq_depth >= TX_BATCH_SIZE / 2 &&
365 	    txq->fifo.axq_depth <= TX_BATCH_SIZE) {
366 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
367 	}
368 
369 	/*
370 	 * Aggregate check: if we have less than two FIFO slots
371 	 * busy and we have some aggregate frames, queue it.
372 	 *
373 	 * Now, ideally we'd just check to see if the scheduler
374 	 * has given us aggregate frames and push them into the FIFO
375 	 * as individual slots, as honestly we should just be pushing
376 	 * a single aggregate in as one FIFO slot.
377 	 *
378 	 * Let's do that next once I know this works.
379 	 */
380 	else if (txq->axq_aggr_depth > 0 && txq->axq_fifo_depth < 2)
381 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
382 
383 	/*
384 	 *
385 	 * If we have less, and the TXFIFO isn't empty, let's
386 	 * wait until we've finished sending the FIFO.
387 	 *
388 	 * If we have less, and the TXFIFO is empty, then
389 	 * send them.
390 	 */
391 	else if (txq->axq_fifo_depth == 0) {
392 		ath_tx_edma_push_staging_list(sc, txq, TX_BATCH_SIZE);
393 	}
394 }
395 
396 /*
397  * Re-initialise the DMA FIFO with the current contents of
398  * said TXQ.
399  *
400  * This should only be called as part of the chip reset path, as it
401  * assumes the FIFO is currently empty.
402  */
403 static void
404 ath_edma_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
405 {
406 	struct ath_buf *bf;
407 	int i = 0;
408 	int fifostart = 1;
409 	int old_fifo_depth;
410 
411 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: called\n",
412 	    __func__,
413 	    txq->axq_qnum);
414 
415 	ATH_TXQ_LOCK_ASSERT(txq);
416 
417 	/*
418 	 * Let's log if the tracked FIFO depth doesn't match
419 	 * what we actually push in.
420 	 */
421 	old_fifo_depth = txq->axq_fifo_depth;
422 	txq->axq_fifo_depth = 0;
423 
424 	/*
425 	 * Walk the FIFO staging list, looking for "head" entries.
426 	 * Since we may have a partially completed list of frames,
427 	 * we push the first frame we see into the FIFO and re-mark
428 	 * it as the head entry.  We then skip entries until we see
429 	 * FIFO end, at which point we get ready to push another
430 	 * entry into the FIFO.
431 	 */
432 	TAILQ_FOREACH(bf, &txq->fifo.axq_q, bf_list) {
433 		/*
434 		 * If we're looking for FIFOEND and we haven't found
435 		 * it, skip.
436 		 *
437 		 * If we're looking for FIFOEND and we've found it,
438 		 * reset for another descriptor.
439 		 */
440 #ifdef	ATH_DEBUG
441 		if (sc->sc_debug & ATH_DEBUG_XMIT_DESC)
442 			ath_printtxbuf(sc, bf, txq->axq_qnum, i, 0);
443 #endif/* ATH_DEBUG */
444 #ifdef	ATH_DEBUG_ALQ
445 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
446 			ath_tx_alq_post(sc, bf);
447 #endif /* ATH_DEBUG_ALQ */
448 
449 		if (fifostart == 0) {
450 			if (bf->bf_flags & ATH_BUF_FIFOEND)
451 				fifostart = 1;
452 			continue;
453 		}
454 
455 		/* Make sure we're not overflowing the FIFO! */
456 		if (txq->axq_fifo_depth >= HAL_TXFIFO_DEPTH) {
457 			device_printf(sc->sc_dev,
458 			    "%s: Q%d: more frames in the queue; FIFO depth=%d?!\n",
459 			    __func__,
460 			    txq->axq_qnum,
461 			    txq->axq_fifo_depth);
462 		}
463 
464 #if 0
465 		DPRINTF(sc, ATH_DEBUG_RESET,
466 		    "%s: Q%d: depth=%d: pushing bf=%p; start=%d, end=%d\n",
467 		    __func__,
468 		    txq->axq_qnum,
469 		    txq->axq_fifo_depth,
470 		    bf,
471 		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
472 		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
473 #endif
474 
475 		/*
476 		 * Set this to be the first buffer in the FIFO
477 		 * list - even if it's also the last buffer in
478 		 * a FIFO list!
479 		 */
480 		bf->bf_flags |= ATH_BUF_FIFOPTR;
481 
482 		/* Push it into the FIFO and bump the FIFO count */
483 		ath_hal_puttxbuf(sc->sc_ah, txq->axq_qnum, bf->bf_daddr);
484 		txq->axq_fifo_depth++;
485 
486 		/*
487 		 * If this isn't the last entry either, let's
488 		 * clear fifostart so we continue looking for
489 		 * said last entry.
490 		 */
491 		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
492 			fifostart = 0;
493 		i++;
494 	}
495 
496 	/* Only bother starting the queue if there's something in it */
497 	if (i > 0)
498 		ath_hal_txstart(sc->sc_ah, txq->axq_qnum);
499 
500 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: Q%d: FIFO depth was %d, is %d\n",
501 	    __func__,
502 	    txq->axq_qnum,
503 	    old_fifo_depth,
504 	    txq->axq_fifo_depth);
505 
506 	/* And now, let's check! */
507 	if (txq->axq_fifo_depth != old_fifo_depth) {
508 		device_printf(sc->sc_dev,
509 		    "%s: Q%d: FIFO depth should be %d, is %d\n",
510 		    __func__,
511 		    txq->axq_qnum,
512 		    old_fifo_depth,
513 		    txq->axq_fifo_depth);
514 	}
515 }
516 
517 /*
518  * Hand off this frame to a hardware queue.
519  *
520  * Things are a bit hairy in the EDMA world.  The TX FIFO is only
521  * 8 entries deep, so we need to keep track of exactly what we've
522  * pushed into the FIFO and what's just sitting in the TX queue,
523  * waiting to go out.
524  *
525  * So this is split into two halves - frames get appended to the
526  * TXQ; then a scheduler is called to push some frames into the
527  * actual TX FIFO.
528  */
529 static void
530 ath_edma_xmit_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
531     struct ath_buf *bf)
532 {
533 
534 	ATH_TXQ_LOCK(txq);
535 
536 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
537 	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
538 
539 	/*
540 	 * XXX TODO: write a hard-coded check to ensure that
541 	 * the queue id in the TX descriptor matches txq->axq_qnum.
542 	 */
543 
544 	/* Update aggr stats */
545 	if (bf->bf_state.bfs_aggr)
546 		txq->axq_aggr_depth++;
547 
548 	/* Push and update frame stats */
549 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
550 
551 	/*
552 	 * Finally, call the FIFO schedule routine to schedule some
553 	 * frames to the FIFO.
554 	 */
555 	ath_edma_tx_fifo_fill(sc, txq);
556 	ATH_TXQ_UNLOCK(txq);
557 }
558 
559 /*
560  * Hand off this frame to a multicast software queue.
561  *
562  * The EDMA TX CABQ will get a list of chained frames, chained
563  * together using the next pointer.  The single head of that
564  * particular queue is pushed to the hardware CABQ.
565  */
566 static void
567 ath_edma_xmit_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
568     struct ath_buf *bf)
569 {
570 
571 	ATH_TX_LOCK_ASSERT(sc);
572 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
573 	    ("%s: busy status 0x%x", __func__, bf->bf_flags));
574 
575 	ATH_TXQ_LOCK(txq);
576 	/*
577 	 * XXX this is mostly duplicated in ath_tx_handoff_mcast().
578 	 */
579 	if (ATH_TXQ_LAST(txq, axq_q_s) != NULL) {
580 		struct ath_buf *bf_last = ATH_TXQ_LAST(txq, axq_q_s);
581 		struct ieee80211_frame *wh;
582 
583 		/* mark previous frame */
584 		wh = mtod(bf_last->bf_m, struct ieee80211_frame *);
585 		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
586 
587 		/* re-sync buffer to memory */
588 		bus_dmamap_sync(sc->sc_dmat, bf_last->bf_dmamap,
589 		   BUS_DMASYNC_PREWRITE);
590 
591 		/* link descriptor */
592 		ath_hal_settxdesclink(sc->sc_ah,
593 		    bf_last->bf_lastds,
594 		    bf->bf_daddr);
595 	}
596 #ifdef	ATH_DEBUG_ALQ
597 	if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXDESC))
598 		ath_tx_alq_post(sc, bf);
599 #endif	/* ATH_DEBUG_ALQ */
600 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
601 	ATH_TXQ_UNLOCK(txq);
602 }
603 
604 /*
605  * Handoff this frame to the hardware.
606  *
607  * For the multicast queue, this will treat it as a software queue
608  * and append it to the list, after updating the MORE_DATA flag
609  * in the previous frame.  The cabq processing code will ensure
610  * that the queue contents gets transferred over.
611  *
612  * For the hardware queues, this will queue a frame to the queue
613  * like before, then populate the FIFO from that.  Since the
614  * EDMA hardware has 8 FIFO slots per TXQ, this ensures that
615  * frames such as management frames don't get prematurely dropped.
616  *
617  * This does imply that a similar flush-hwq-to-fifoq method will
618  * need to be called from the processq function, before the
619  * per-node software scheduler is called.
620  */
621 static void
622 ath_edma_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
623     struct ath_buf *bf)
624 {
625 
626 	DPRINTF(sc, ATH_DEBUG_XMIT_DESC,
627 	    "%s: called; bf=%p, txq=%p, qnum=%d\n",
628 	    __func__,
629 	    bf,
630 	    txq,
631 	    txq->axq_qnum);
632 
633 	if (txq->axq_qnum == ATH_TXQ_SWQ)
634 		ath_edma_xmit_handoff_mcast(sc, txq, bf);
635 	else
636 		ath_edma_xmit_handoff_hw(sc, txq, bf);
637 }
638 
639 static int
640 ath_edma_setup_txfifo(struct ath_softc *sc, int qnum)
641 {
642 	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
643 
644 	te->m_fifo = malloc(sizeof(struct ath_buf *) * HAL_TXFIFO_DEPTH,
645 	    M_ATHDEV,
646 	    M_NOWAIT | M_ZERO);
647 	if (te->m_fifo == NULL) {
648 		device_printf(sc->sc_dev, "%s: malloc failed\n",
649 		    __func__);
650 		return (-ENOMEM);
651 	}
652 
653 	/*
654 	 * Set initial "empty" state.
655 	 */
656 	te->m_fifo_head = te->m_fifo_tail = te->m_fifo_depth = 0;
657 
658 	return (0);
659 }
660 
661 static int
662 ath_edma_free_txfifo(struct ath_softc *sc, int qnum)
663 {
664 	struct ath_tx_edma_fifo *te = &sc->sc_txedma[qnum];
665 
666 	/* XXX TODO: actually deref the ath_buf entries? */
667 	free(te->m_fifo, M_ATHDEV);
668 	return (0);
669 }
670 
671 static int
672 ath_edma_dma_txsetup(struct ath_softc *sc)
673 {
674 	int error;
675 	int i;
676 
677 	error = ath_descdma_alloc_desc(sc, &sc->sc_txsdma,
678 	    NULL, "txcomp", sc->sc_tx_statuslen, ATH_TXSTATUS_RING_SIZE);
679 	if (error != 0)
680 		return (error);
681 
682 	ath_hal_setuptxstatusring(sc->sc_ah,
683 	    (void *) sc->sc_txsdma.dd_desc,
684 	    sc->sc_txsdma.dd_desc_paddr,
685 	    ATH_TXSTATUS_RING_SIZE);
686 
687 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
688 		ath_edma_setup_txfifo(sc, i);
689 	}
690 
691 	return (0);
692 }
693 
694 static int
695 ath_edma_dma_txteardown(struct ath_softc *sc)
696 {
697 	int i;
698 
699 	for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
700 		ath_edma_free_txfifo(sc, i);
701 	}
702 
703 	ath_descdma_cleanup(sc, &sc->sc_txsdma, NULL);
704 	return (0);
705 }
706 
707 /*
708  * Drain all TXQs, potentially after completing the existing completed
709  * frames.
710  */
711 static void
712 ath_edma_tx_drain(struct ath_softc *sc, ATH_RESET_TYPE reset_type)
713 {
714 	int i;
715 
716 	DPRINTF(sc, ATH_DEBUG_RESET, "%s: called\n", __func__);
717 
718 	(void) ath_stoptxdma(sc);
719 
720 	/*
721 	 * If reset type is noloss, the TX FIFO needs to be serviced
722 	 * and those frames need to be handled.
723 	 *
724 	 * Otherwise, just toss everything in each TX queue.
725 	 */
726 	if (reset_type == ATH_RESET_NOLOSS) {
727 		ath_edma_tx_processq(sc, 0);
728 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
729 			if (ATH_TXQ_SETUP(sc, i)) {
730 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
731 				/*
732 				 * Free the holding buffer; DMA is now
733 				 * stopped.
734 				 */
735 				ath_txq_freeholdingbuf(sc, &sc->sc_txq[i]);
736 				/*
737 				 * Reset the link pointer to NULL; there's
738 				 * no frames to chain DMA to.
739 				 */
740 				sc->sc_txq[i].axq_link = NULL;
741 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
742 			}
743 		}
744 	} else {
745 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
746 			if (ATH_TXQ_SETUP(sc, i))
747 				ath_tx_draintxq(sc, &sc->sc_txq[i]);
748 		}
749 	}
750 
751 	/* XXX dump out the TX completion FIFO contents */
752 
753 	/* XXX dump out the frames */
754 
755 	sc->sc_wd_timer = 0;
756 }
757 
758 /*
759  * TX completion tasklet.
760  */
761 
762 static void
763 ath_edma_tx_proc(void *arg, int npending)
764 {
765 	struct ath_softc *sc = (struct ath_softc *) arg;
766 
767 	ATH_PCU_LOCK(sc);
768 	sc->sc_txproc_cnt++;
769 	ATH_PCU_UNLOCK(sc);
770 
771 	ATH_LOCK(sc);
772 	ath_power_set_power_state(sc, HAL_PM_AWAKE);
773 	ATH_UNLOCK(sc);
774 
775 #if 0
776 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called, npending=%d\n",
777 	    __func__, npending);
778 #endif
779 	ath_edma_tx_processq(sc, 1);
780 
781 
782 	ATH_PCU_LOCK(sc);
783 	sc->sc_txproc_cnt--;
784 	ATH_PCU_UNLOCK(sc);
785 
786 	ATH_LOCK(sc);
787 	ath_power_restore_power_state(sc);
788 	ATH_UNLOCK(sc);
789 
790 	ath_tx_kick(sc);
791 }
792 
793 /*
794  * Process the TX status queue.
795  */
796 static void
797 ath_edma_tx_processq(struct ath_softc *sc, int dosched)
798 {
799 	struct ath_hal *ah = sc->sc_ah;
800 	HAL_STATUS status;
801 	struct ath_tx_status ts;
802 	struct ath_txq *txq;
803 	struct ath_buf *bf;
804 	struct ieee80211_node *ni;
805 	int nacked = 0;
806 	int idx;
807 	int i;
808 
809 #ifdef	ATH_DEBUG
810 	/* XXX */
811 	uint32_t txstatus[32];
812 #endif
813 
814 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: called\n", __func__);
815 
816 	for (idx = 0; ; idx++) {
817 		bzero(&ts, sizeof(ts));
818 
819 		ATH_TXSTATUS_LOCK(sc);
820 #ifdef	ATH_DEBUG
821 		ath_hal_gettxrawtxdesc(ah, txstatus);
822 #endif
823 		status = ath_hal_txprocdesc(ah, NULL, (void *) &ts);
824 		ATH_TXSTATUS_UNLOCK(sc);
825 
826 		if (status == HAL_EINPROGRESS) {
827 			DPRINTF(sc, ATH_DEBUG_TX_PROC,
828 			    "%s: (%d): EINPROGRESS\n",
829 			    __func__, idx);
830 			break;
831 		}
832 
833 #ifdef	ATH_DEBUG
834 		if (sc->sc_debug & ATH_DEBUG_TX_PROC)
835 			if (ts.ts_queue_id != sc->sc_bhalq)
836 			ath_printtxstatbuf(sc, NULL, txstatus, ts.ts_queue_id,
837 			    idx, (status == HAL_OK));
838 #endif
839 
840 		/*
841 		 * If there is an error with this descriptor, continue
842 		 * processing.
843 		 *
844 		 * XXX TBD: log some statistics?
845 		 */
846 		if (status == HAL_EIO) {
847 			device_printf(sc->sc_dev, "%s: invalid TX status?\n",
848 			    __func__);
849 			break;
850 		}
851 
852 #if defined(ATH_DEBUG_ALQ) && defined(ATH_DEBUG)
853 		if (if_ath_alq_checkdebug(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS)) {
854 			if_ath_alq_post(&sc->sc_alq, ATH_ALQ_EDMA_TXSTATUS,
855 			    sc->sc_tx_statuslen,
856 			    (char *) txstatus);
857 		}
858 #endif /* ATH_DEBUG_ALQ */
859 
860 		/*
861 		 * At this point we have a valid status descriptor.
862 		 * The QID and descriptor ID (which currently isn't set)
863 		 * is part of the status.
864 		 *
865 		 * We then assume that the descriptor in question is the
866 		 * -head- of the given QID.  Eventually we should verify
867 		 * this by using the descriptor ID.
868 		 */
869 
870 		/*
871 		 * The beacon queue is not currently a "real" queue.
872 		 * Frames aren't pushed onto it and the lock isn't setup.
873 		 * So skip it for now; the beacon handling code will
874 		 * free and alloc more beacon buffers as appropriate.
875 		 */
876 		if (ts.ts_queue_id == sc->sc_bhalq)
877 			continue;
878 
879 		txq = &sc->sc_txq[ts.ts_queue_id];
880 
881 		ATH_TXQ_LOCK(txq);
882 		bf = ATH_TXQ_FIRST(&txq->fifo);
883 
884 		/*
885 		 * Work around the situation where I'm seeing notifications
886 		 * for Q1 when no frames are available.  That needs to be
887 		 * debugged but not by crashing _here_.
888 		 */
889 		if (bf == NULL) {
890 			device_printf(sc->sc_dev, "%s: Q%d: empty?\n",
891 			    __func__,
892 			    ts.ts_queue_id);
893 			ATH_TXQ_UNLOCK(txq);
894 			continue;
895 		}
896 
897 		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d, bf=%p, start=%d, end=%d\n",
898 		    __func__,
899 		    ts.ts_queue_id, bf,
900 		    !! (bf->bf_flags & ATH_BUF_FIFOPTR),
901 		    !! (bf->bf_flags & ATH_BUF_FIFOEND));
902 
903 		/* XXX TODO: actually output debugging info about this */
904 
905 #if 0
906 		/* XXX assert the buffer/descriptor matches the status descid */
907 		if (ts.ts_desc_id != bf->bf_descid) {
908 			device_printf(sc->sc_dev,
909 			    "%s: mismatched descid (qid=%d, tsdescid=%d, "
910 			    "bfdescid=%d\n",
911 			    __func__,
912 			    ts.ts_queue_id,
913 			    ts.ts_desc_id,
914 			    bf->bf_descid);
915 		}
916 #endif
917 
918 		/* This removes the buffer and decrements the queue depth */
919 		ATH_TXQ_REMOVE(&txq->fifo, bf, bf_list);
920 		if (bf->bf_state.bfs_aggr)
921 			txq->axq_aggr_depth--;
922 
923 		/*
924 		 * If this was the end of a FIFO set, decrement FIFO depth
925 		 */
926 		if (bf->bf_flags & ATH_BUF_FIFOEND)
927 			txq->axq_fifo_depth--;
928 
929 		/*
930 		 * If this isn't the final buffer in a FIFO set, mark
931 		 * the buffer as busy so it goes onto the holding queue.
932 		 */
933 		if (! (bf->bf_flags & ATH_BUF_FIFOEND))
934 			bf->bf_flags |= ATH_BUF_BUSY;
935 
936 		DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: Q%d: FIFO depth is now %d (%d)\n",
937 		    __func__,
938 		    txq->axq_qnum,
939 		    txq->axq_fifo_depth,
940 		    txq->fifo.axq_depth);
941 
942 		/* XXX assert FIFO depth >= 0 */
943 		ATH_TXQ_UNLOCK(txq);
944 
945 		/*
946 		 * Outside of the TX lock - if the buffer is end
947 		 * end buffer in this FIFO, we don't need a holding
948 		 * buffer any longer.
949 		 */
950 		if (bf->bf_flags & ATH_BUF_FIFOEND) {
951 			ATH_TXQ_LOCK(txq);
952 			ath_txq_freeholdingbuf(sc, txq);
953 			ATH_TXQ_UNLOCK(txq);
954 		}
955 
956 		/*
957 		 * First we need to make sure ts_rate is valid.
958 		 *
959 		 * Pre-EDMA chips pass the whole TX descriptor to
960 		 * the proctxdesc function which will then fill out
961 		 * ts_rate based on the ts_finaltsi (final TX index)
962 		 * in the TX descriptor.  However the TX completion
963 		 * FIFO doesn't have this information.  So here we
964 		 * do a separate HAL call to populate that information.
965 		 *
966 		 * The same problem exists with ts_longretry.
967 		 * The FreeBSD HAL corrects ts_longretry in the HAL layer;
968 		 * the AR9380 HAL currently doesn't.  So until the HAL
969 		 * is imported and this can be added, we correct for it
970 		 * here.
971 		 */
972 		/* XXX TODO */
973 		/* XXX faked for now. Ew. */
974 		if (ts.ts_finaltsi < 4) {
975 			ts.ts_rate =
976 			    bf->bf_state.bfs_rc[ts.ts_finaltsi].ratecode;
977 			switch (ts.ts_finaltsi) {
978 			case 3: ts.ts_longretry +=
979 			    bf->bf_state.bfs_rc[2].tries;
980 			case 2: ts.ts_longretry +=
981 			    bf->bf_state.bfs_rc[1].tries;
982 			case 1: ts.ts_longretry +=
983 			    bf->bf_state.bfs_rc[0].tries;
984 			}
985 		} else {
986 			device_printf(sc->sc_dev, "%s: finaltsi=%d\n",
987 			    __func__,
988 			    ts.ts_finaltsi);
989 			ts.ts_rate = bf->bf_state.bfs_rc[0].ratecode;
990 		}
991 
992 		/*
993 		 * XXX This is terrible.
994 		 *
995 		 * Right now, some code uses the TX status that is
996 		 * passed in here, but the completion handlers in the
997 		 * software TX path also use bf_status.ds_txstat.
998 		 * Ew.  That should all go away.
999 		 *
1000 		 * XXX It's also possible the rate control completion
1001 		 * routine is called twice.
1002 		 */
1003 		memcpy(&bf->bf_status, &ts, sizeof(ts));
1004 
1005 		ni = bf->bf_node;
1006 
1007 		/* Update RSSI */
1008 		/* XXX duplicate from ath_tx_processq */
1009 		if (ni != NULL && ts.ts_status == 0 &&
1010 		    ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0)) {
1011 			nacked++;
1012 			sc->sc_stats.ast_tx_rssi = ts.ts_rssi;
1013 			ATH_RSSI_LPF(sc->sc_halstats.ns_avgtxrssi,
1014 			    ts.ts_rssi);
1015 		}
1016 
1017 		/* Handle frame completion and rate control update */
1018 		ath_tx_process_buf_completion(sc, txq, &ts, bf);
1019 
1020 		/* NB: bf is invalid at this point */
1021 	}
1022 
1023 	sc->sc_wd_timer = 0;
1024 
1025 	/*
1026 	 * XXX It's inefficient to do this if the FIFO queue is full,
1027 	 * but there's no easy way right now to only populate
1028 	 * the txq task for _one_ TXQ.  This should be fixed.
1029 	 */
1030 	if (dosched) {
1031 		/* Attempt to schedule more hardware frames to the TX FIFO */
1032 		for (i = 0; i < HAL_NUM_TX_QUEUES; i++) {
1033 			if (ATH_TXQ_SETUP(sc, i)) {
1034 				ATH_TX_LOCK(sc);
1035 				ath_txq_sched(sc, &sc->sc_txq[i]);
1036 				ATH_TX_UNLOCK(sc);
1037 
1038 				ATH_TXQ_LOCK(&sc->sc_txq[i]);
1039 				ath_edma_tx_fifo_fill(sc, &sc->sc_txq[i]);
1040 				ATH_TXQ_UNLOCK(&sc->sc_txq[i]);
1041 			}
1042 		}
1043 		/* Kick software scheduler */
1044 		ath_tx_swq_kick(sc);
1045 	}
1046 
1047 	DPRINTF(sc, ATH_DEBUG_TX_PROC, "%s: end\n", __func__);
1048 }
1049 
1050 static void
1051 ath_edma_attach_comp_func(struct ath_softc *sc)
1052 {
1053 
1054 	TASK_INIT(&sc->sc_txtask, 0, ath_edma_tx_proc, sc);
1055 }
1056 
1057 void
1058 ath_xmit_setup_edma(struct ath_softc *sc)
1059 {
1060 
1061 	/* Fetch EDMA field and buffer sizes */
1062 	(void) ath_hal_gettxdesclen(sc->sc_ah, &sc->sc_tx_desclen);
1063 	(void) ath_hal_gettxstatuslen(sc->sc_ah, &sc->sc_tx_statuslen);
1064 	(void) ath_hal_getntxmaps(sc->sc_ah, &sc->sc_tx_nmaps);
1065 
1066 	if (bootverbose) {
1067 		device_printf(sc->sc_dev, "TX descriptor length: %d\n",
1068 		    sc->sc_tx_desclen);
1069 		device_printf(sc->sc_dev, "TX status length: %d\n",
1070 		    sc->sc_tx_statuslen);
1071 		device_printf(sc->sc_dev, "TX buffers per descriptor: %d\n",
1072 		    sc->sc_tx_nmaps);
1073 	}
1074 
1075 	sc->sc_tx.xmit_setup = ath_edma_dma_txsetup;
1076 	sc->sc_tx.xmit_teardown = ath_edma_dma_txteardown;
1077 	sc->sc_tx.xmit_attach_comp_func = ath_edma_attach_comp_func;
1078 
1079 	sc->sc_tx.xmit_dma_restart = ath_edma_dma_restart;
1080 	sc->sc_tx.xmit_handoff = ath_edma_xmit_handoff;
1081 	sc->sc_tx.xmit_drain = ath_edma_tx_drain;
1082 }
1083