xref: /freebsd/sys/dev/ath/if_ath_tx.c (revision 9a41df2a0e6408e9b329bbd8b9e37c2b44461a1b)
1 /*-
2  * Copyright (c) 2002-2009 Sam Leffler, Errno Consulting
3  * Copyright (c) 2010-2012 Adrian Chadd, Xenion Pty Ltd
4  * All rights reserved.
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer,
11  *    without modification.
12  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
13  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
14  *    redistribution must be conditioned upon including a substantially
15  *    similar Disclaimer requirement for further binary redistribution.
16  *
17  * NO WARRANTY
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
19  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
20  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
21  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
22  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
23  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
26  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
28  * THE POSSIBILITY OF SUCH DAMAGES.
29  */
30 
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 /*
35  * Driver for the Atheros Wireless LAN controller.
36  *
37  * This software is derived from work of Atsushi Onoe; his contribution
38  * is greatly appreciated.
39  */
40 
41 #include "opt_inet.h"
42 #include "opt_ath.h"
43 #include "opt_wlan.h"
44 
45 #include <sys/param.h>
46 #include <sys/systm.h>
47 #include <sys/sysctl.h>
48 #include <sys/mbuf.h>
49 #include <sys/malloc.h>
50 #include <sys/lock.h>
51 #include <sys/mutex.h>
52 #include <sys/kernel.h>
53 #include <sys/socket.h>
54 #include <sys/sockio.h>
55 #include <sys/errno.h>
56 #include <sys/callout.h>
57 #include <sys/bus.h>
58 #include <sys/endian.h>
59 #include <sys/kthread.h>
60 #include <sys/taskqueue.h>
61 #include <sys/priv.h>
62 
63 #include <machine/bus.h>
64 
65 #include <net/if.h>
66 #include <net/if_dl.h>
67 #include <net/if_media.h>
68 #include <net/if_types.h>
69 #include <net/if_arp.h>
70 #include <net/ethernet.h>
71 #include <net/if_llc.h>
72 
73 #include <net80211/ieee80211_var.h>
74 #include <net80211/ieee80211_regdomain.h>
75 #ifdef IEEE80211_SUPPORT_SUPERG
76 #include <net80211/ieee80211_superg.h>
77 #endif
78 #ifdef IEEE80211_SUPPORT_TDMA
79 #include <net80211/ieee80211_tdma.h>
80 #endif
81 #include <net80211/ieee80211_ht.h>
82 
83 #include <net/bpf.h>
84 
85 #ifdef INET
86 #include <netinet/in.h>
87 #include <netinet/if_ether.h>
88 #endif
89 
90 #include <dev/ath/if_athvar.h>
91 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
92 #include <dev/ath/ath_hal/ah_diagcodes.h>
93 
94 #include <dev/ath/if_ath_debug.h>
95 
96 #ifdef ATH_TX99_DIAG
97 #include <dev/ath/ath_tx99/ath_tx99.h>
98 #endif
99 
100 #include <dev/ath/if_ath_misc.h>
101 #include <dev/ath/if_ath_tx.h>
102 #include <dev/ath/if_ath_tx_ht.h>
103 
104 /*
105  * How many retries to perform in software
106  */
107 #define	SWMAX_RETRIES		10
108 
109 /*
110  * What queue to throw the non-QoS TID traffic into
111  */
112 #define	ATH_NONQOS_TID_AC	WME_AC_VO
113 
114 #if 0
115 static int ath_tx_node_is_asleep(struct ath_softc *sc, struct ath_node *an);
116 #endif
117 static int ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an,
118     int tid);
119 static int ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an,
120     int tid);
121 static ieee80211_seq ath_tx_tid_seqno_assign(struct ath_softc *sc,
122     struct ieee80211_node *ni, struct ath_buf *bf, struct mbuf *m0);
123 static int ath_tx_action_frame_override_queue(struct ath_softc *sc,
124     struct ieee80211_node *ni, struct mbuf *m0, int *tid);
125 static struct ath_buf *
126 ath_tx_retry_clone(struct ath_softc *sc, struct ath_node *an,
127     struct ath_tid *tid, struct ath_buf *bf);
128 
129 /*
130  * Whether to use the 11n rate scenario functions or not
131  */
132 static inline int
133 ath_tx_is_11n(struct ath_softc *sc)
134 {
135 	return ((sc->sc_ah->ah_magic == 0x20065416) ||
136 		    (sc->sc_ah->ah_magic == 0x19741014));
137 }
138 
139 /*
140  * Obtain the current TID from the given frame.
141  *
142  * Non-QoS frames need to go into TID 16 (IEEE80211_NONQOS_TID.)
143  * This has implications for which AC/priority the packet is placed
144  * in.
145  */
146 static int
147 ath_tx_gettid(struct ath_softc *sc, const struct mbuf *m0)
148 {
149 	const struct ieee80211_frame *wh;
150 	int pri = M_WME_GETAC(m0);
151 
152 	wh = mtod(m0, const struct ieee80211_frame *);
153 	if (! IEEE80211_QOS_HAS_SEQ(wh))
154 		return IEEE80211_NONQOS_TID;
155 	else
156 		return WME_AC_TO_TID(pri);
157 }
158 
159 static void
160 ath_tx_set_retry(struct ath_softc *sc, struct ath_buf *bf)
161 {
162 	struct ieee80211_frame *wh;
163 
164 	wh = mtod(bf->bf_m, struct ieee80211_frame *);
165 	/* Only update/resync if needed */
166 	if (bf->bf_state.bfs_isretried == 0) {
167 		wh->i_fc[1] |= IEEE80211_FC1_RETRY;
168 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
169 		    BUS_DMASYNC_PREWRITE);
170 	}
171 	bf->bf_state.bfs_isretried = 1;
172 	bf->bf_state.bfs_retries ++;
173 }
174 
175 /*
176  * Determine what the correct AC queue for the given frame
177  * should be.
178  *
179  * This code assumes that the TIDs map consistently to
180  * the underlying hardware (or software) ath_txq.
181  * Since the sender may try to set an AC which is
182  * arbitrary, non-QoS TIDs may end up being put on
183  * completely different ACs. There's no way to put a
184  * TID into multiple ath_txq's for scheduling, so
185  * for now we override the AC/TXQ selection and set
186  * non-QOS TID frames into the BE queue.
187  *
188  * This may be completely incorrect - specifically,
189  * some management frames may end up out of order
190  * compared to the QoS traffic they're controlling.
191  * I'll look into this later.
192  */
193 static int
194 ath_tx_getac(struct ath_softc *sc, const struct mbuf *m0)
195 {
196 	const struct ieee80211_frame *wh;
197 	int pri = M_WME_GETAC(m0);
198 	wh = mtod(m0, const struct ieee80211_frame *);
199 	if (IEEE80211_QOS_HAS_SEQ(wh))
200 		return pri;
201 
202 	return ATH_NONQOS_TID_AC;
203 }
204 
205 void
206 ath_txfrag_cleanup(struct ath_softc *sc,
207 	ath_bufhead *frags, struct ieee80211_node *ni)
208 {
209 	struct ath_buf *bf, *next;
210 
211 	ATH_TXBUF_LOCK_ASSERT(sc);
212 
213 	TAILQ_FOREACH_SAFE(bf, frags, bf_list, next) {
214 		/* NB: bf assumed clean */
215 		TAILQ_REMOVE(frags, bf, bf_list);
216 		ath_returnbuf_head(sc, bf);
217 		ieee80211_node_decref(ni);
218 	}
219 }
220 
221 /*
222  * Setup xmit of a fragmented frame.  Allocate a buffer
223  * for each frag and bump the node reference count to
224  * reflect the held reference to be setup by ath_tx_start.
225  */
226 int
227 ath_txfrag_setup(struct ath_softc *sc, ath_bufhead *frags,
228 	struct mbuf *m0, struct ieee80211_node *ni)
229 {
230 	struct mbuf *m;
231 	struct ath_buf *bf;
232 
233 	ATH_TXBUF_LOCK(sc);
234 	for (m = m0->m_nextpkt; m != NULL; m = m->m_nextpkt) {
235 		/* XXX non-management? */
236 		bf = _ath_getbuf_locked(sc, ATH_BUFTYPE_NORMAL);
237 		if (bf == NULL) {	/* out of buffers, cleanup */
238 			device_printf(sc->sc_dev, "%s: no buffer?\n",
239 			    __func__);
240 			ath_txfrag_cleanup(sc, frags, ni);
241 			break;
242 		}
243 		ieee80211_node_incref(ni);
244 		TAILQ_INSERT_TAIL(frags, bf, bf_list);
245 	}
246 	ATH_TXBUF_UNLOCK(sc);
247 
248 	return !TAILQ_EMPTY(frags);
249 }
250 
251 /*
252  * Reclaim mbuf resources.  For fragmented frames we
253  * need to claim each frag chained with m_nextpkt.
254  */
255 void
256 ath_freetx(struct mbuf *m)
257 {
258 	struct mbuf *next;
259 
260 	do {
261 		next = m->m_nextpkt;
262 		m->m_nextpkt = NULL;
263 		m_freem(m);
264 	} while ((m = next) != NULL);
265 }
266 
267 static int
268 ath_tx_dmasetup(struct ath_softc *sc, struct ath_buf *bf, struct mbuf *m0)
269 {
270 	struct mbuf *m;
271 	int error;
272 
273 	/*
274 	 * Load the DMA map so any coalescing is done.  This
275 	 * also calculates the number of descriptors we need.
276 	 */
277 	error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
278 				     bf->bf_segs, &bf->bf_nseg,
279 				     BUS_DMA_NOWAIT);
280 	if (error == EFBIG) {
281 		/* XXX packet requires too many descriptors */
282 		bf->bf_nseg = ATH_TXDESC+1;
283 	} else if (error != 0) {
284 		sc->sc_stats.ast_tx_busdma++;
285 		ath_freetx(m0);
286 		return error;
287 	}
288 	/*
289 	 * Discard null packets and check for packets that
290 	 * require too many TX descriptors.  We try to convert
291 	 * the latter to a cluster.
292 	 */
293 	if (bf->bf_nseg > ATH_TXDESC) {		/* too many desc's, linearize */
294 		sc->sc_stats.ast_tx_linear++;
295 		m = m_collapse(m0, M_DONTWAIT, ATH_TXDESC);
296 		if (m == NULL) {
297 			ath_freetx(m0);
298 			sc->sc_stats.ast_tx_nombuf++;
299 			return ENOMEM;
300 		}
301 		m0 = m;
302 		error = bus_dmamap_load_mbuf_sg(sc->sc_dmat, bf->bf_dmamap, m0,
303 					     bf->bf_segs, &bf->bf_nseg,
304 					     BUS_DMA_NOWAIT);
305 		if (error != 0) {
306 			sc->sc_stats.ast_tx_busdma++;
307 			ath_freetx(m0);
308 			return error;
309 		}
310 		KASSERT(bf->bf_nseg <= ATH_TXDESC,
311 		    ("too many segments after defrag; nseg %u", bf->bf_nseg));
312 	} else if (bf->bf_nseg == 0) {		/* null packet, discard */
313 		sc->sc_stats.ast_tx_nodata++;
314 		ath_freetx(m0);
315 		return EIO;
316 	}
317 	DPRINTF(sc, ATH_DEBUG_XMIT, "%s: m %p len %u\n",
318 		__func__, m0, m0->m_pkthdr.len);
319 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
320 	bf->bf_m = m0;
321 
322 	return 0;
323 }
324 
325 /*
326  * Chain together segments+descriptors for a non-11n frame.
327  */
328 static void
329 ath_tx_chaindesclist(struct ath_softc *sc, struct ath_buf *bf)
330 {
331 	struct ath_hal *ah = sc->sc_ah;
332 	char *ds, *ds0;
333 	int i, bp, dsp;
334 	HAL_DMA_ADDR bufAddrList[4];
335 	uint32_t segLenList[4];
336 	int numTxMaps = 1;
337 	int isFirstDesc = 1;
338 	int qnum;
339 
340 	/*
341 	 * XXX There's txdma and txdma_mgmt; the descriptor
342 	 * sizes must match.
343 	 */
344 	struct ath_descdma *dd = &sc->sc_txdma;
345 
346 	/*
347 	 * Fillin the remainder of the descriptor info.
348 	 */
349 
350 	/*
351 	 * For now the HAL doesn't implement halNumTxMaps for non-EDMA
352 	 * (ie it's 0.)  So just work around it.
353 	 *
354 	 * XXX TODO: populate halNumTxMaps for each HAL chip and
355 	 * then undo this hack.
356 	 */
357 	if (sc->sc_ah->ah_magic == 0x19741014)
358 		numTxMaps = 4;
359 
360 	/*
361 	 * For EDMA and later chips ensure the TX map is fully populated
362 	 * before advancing to the next descriptor.
363 	 */
364 	ds0 = ds = (char *) bf->bf_desc;
365 	bp = dsp = 0;
366 	bzero(bufAddrList, sizeof(bufAddrList));
367 	bzero(segLenList, sizeof(segLenList));
368 	for (i = 0; i < bf->bf_nseg; i++) {
369 		bufAddrList[bp] = bf->bf_segs[i].ds_addr;
370 		segLenList[bp] = bf->bf_segs[i].ds_len;
371 		bp++;
372 
373 		/*
374 		 * Go to the next segment if this isn't the last segment
375 		 * and there's space in the current TX map.
376 		 */
377 		if ((i != bf->bf_nseg - 1) && (bp < numTxMaps))
378 			continue;
379 
380 		/*
381 		 * Last segment or we're out of buffer pointers.
382 		 */
383 		bp = 0;
384 
385 		if (i == bf->bf_nseg - 1)
386 			ath_hal_settxdesclink(ah, (struct ath_desc *) ds, 0);
387 		else
388 			ath_hal_settxdesclink(ah, (struct ath_desc *) ds,
389 			    bf->bf_daddr + dd->dd_descsize * (dsp + 1));
390 
391 		/*
392 		 * XXX this assumes that bfs_txq is the actual destination
393 		 * hardware queue at this point.  It may not have been assigned,
394 		 * it may actually be pointing to the multicast software
395 		 * TXQ id.  These must be fixed!
396 		 */
397 		qnum = bf->bf_state.bfs_txq->axq_qnum;
398 
399 		ath_hal_filltxdesc(ah, (struct ath_desc *) ds
400 			, bufAddrList
401 			, segLenList
402 			, bf->bf_descid		/* XXX desc id */
403 			, qnum
404 			, isFirstDesc		/* first segment */
405 			, i == bf->bf_nseg - 1	/* last segment */
406 			, (struct ath_desc *) ds0	/* first descriptor */
407 		);
408 
409 		/* Make sure the 11n aggregate fields are cleared */
410 		if (ath_tx_is_11n(sc))
411 			ath_hal_clr11n_aggr(sc->sc_ah, (struct ath_desc *) ds);
412 
413 		isFirstDesc = 0;
414 #ifdef	ATH_DEBUG
415 		if (sc->sc_debug & ATH_DEBUG_XMIT)
416 			ath_printtxbuf(sc, bf, qnum, 0, 0);
417 #endif
418 		bf->bf_lastds = (struct ath_desc *) ds;
419 
420 		/*
421 		 * Don't forget to skip to the next descriptor.
422 		 */
423 		ds += sc->sc_tx_desclen;
424 		dsp++;
425 
426 		/*
427 		 * .. and don't forget to blank these out!
428 		 */
429 		bzero(bufAddrList, sizeof(bufAddrList));
430 		bzero(segLenList, sizeof(segLenList));
431 	}
432 	bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap, BUS_DMASYNC_PREWRITE);
433 }
434 
435 /*
436  * Fill in the descriptor list for a aggregate subframe.
437  *
438  * The subframe is returned with the ds_link field in the last subframe
439  * pointing to 0.
440  */
441 static void
442 ath_tx_chaindesclist_subframe(struct ath_softc *sc, struct ath_buf *bf)
443 {
444 	struct ath_hal *ah = sc->sc_ah;
445 	struct ath_desc *ds, *ds0;
446 	int i;
447 	HAL_DMA_ADDR bufAddrList[4];
448 	uint32_t segLenList[4];
449 
450 	/*
451 	 * XXX There's txdma and txdma_mgmt; the descriptor
452 	 * sizes must match.
453 	 */
454 	struct ath_descdma *dd = &sc->sc_txdma;
455 
456 	ds0 = ds = bf->bf_desc;
457 
458 	/*
459 	 * There's no need to call ath_hal_setupfirsttxdesc here;
460 	 * That's only going to occur for the first frame in an aggregate.
461 	 */
462 	for (i = 0; i < bf->bf_nseg; i++, ds++) {
463 		bzero(bufAddrList, sizeof(bufAddrList));
464 		bzero(segLenList, sizeof(segLenList));
465 		if (i == bf->bf_nseg - 1)
466 			ath_hal_settxdesclink(ah, ds, 0);
467 		else
468 			ath_hal_settxdesclink(ah, ds,
469 			    bf->bf_daddr + dd->dd_descsize * (i + 1));
470 
471 		bufAddrList[0] = bf->bf_segs[i].ds_addr;
472 		segLenList[0] = bf->bf_segs[i].ds_len;
473 
474 		/*
475 		 * This performs the setup for an aggregate frame.
476 		 * This includes enabling the aggregate flags if needed.
477 		 */
478 		ath_hal_chaintxdesc(ah, ds,
479 		    bufAddrList,
480 		    segLenList,
481 		    bf->bf_state.bfs_pktlen,
482 		    bf->bf_state.bfs_hdrlen,
483 		    HAL_PKT_TYPE_AMPDU,	/* forces aggregate bits to be set */
484 		    bf->bf_state.bfs_keyix,
485 		    0,			/* cipher, calculated from keyix */
486 		    bf->bf_state.bfs_ndelim,
487 		    i == 0,		/* first segment */
488 		    i == bf->bf_nseg - 1,	/* last segment */
489 		    bf->bf_next == NULL		/* last sub-frame in aggr */
490 		);
491 
492 		DPRINTF(sc, ATH_DEBUG_XMIT,
493 			"%s: %d: %08x %08x %08x %08x %08x %08x\n",
494 			__func__, i, ds->ds_link, ds->ds_data,
495 			ds->ds_ctl0, ds->ds_ctl1, ds->ds_hw[0], ds->ds_hw[1]);
496 		bf->bf_lastds = ds;
497 		bus_dmamap_sync(sc->sc_dmat, bf->bf_dmamap,
498 		    BUS_DMASYNC_PREWRITE);
499 	}
500 }
501 
502 /*
503  * Set the rate control fields in the given descriptor based on
504  * the bf_state fields and node state.
505  *
506  * The bfs fields should already be set with the relevant rate
507  * control information, including whether MRR is to be enabled.
508  *
509  * Since the FreeBSD HAL currently sets up the first TX rate
510  * in ath_hal_setuptxdesc(), this will setup the MRR
511  * conditionally for the pre-11n chips, and call ath_buf_set_rate
512  * unconditionally for 11n chips. These require the 11n rate
513  * scenario to be set if MCS rates are enabled, so it's easier
514  * to just always call it. The caller can then only set rates 2, 3
515  * and 4 if multi-rate retry is needed.
516  */
517 static void
518 ath_tx_set_ratectrl(struct ath_softc *sc, struct ieee80211_node *ni,
519     struct ath_buf *bf)
520 {
521 	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
522 
523 	/* If mrr is disabled, blank tries 1, 2, 3 */
524 	if (! bf->bf_state.bfs_ismrr)
525 		rc[1].tries = rc[2].tries = rc[3].tries = 0;
526 
527 	/*
528 	 * Always call - that way a retried descriptor will
529 	 * have the MRR fields overwritten.
530 	 *
531 	 * XXX TODO: see if this is really needed - setting up
532 	 * the first descriptor should set the MRR fields to 0
533 	 * for us anyway.
534 	 */
535 	if (ath_tx_is_11n(sc)) {
536 		ath_buf_set_rate(sc, ni, bf);
537 	} else {
538 		ath_hal_setupxtxdesc(sc->sc_ah, bf->bf_desc
539 			, rc[1].ratecode, rc[1].tries
540 			, rc[2].ratecode, rc[2].tries
541 			, rc[3].ratecode, rc[3].tries
542 		);
543 	}
544 }
545 
546 /*
547  * Setup segments+descriptors for an 11n aggregate.
548  * bf_first is the first buffer in the aggregate.
549  * The descriptor list must already been linked together using
550  * bf->bf_next.
551  */
552 static void
553 ath_tx_setds_11n(struct ath_softc *sc, struct ath_buf *bf_first)
554 {
555 	struct ath_buf *bf, *bf_prev = NULL;
556 
557 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: nframes=%d, al=%d\n",
558 	    __func__, bf_first->bf_state.bfs_nframes,
559 	    bf_first->bf_state.bfs_al);
560 
561 	/*
562 	 * Setup all descriptors of all subframes.
563 	 */
564 	bf = bf_first;
565 	while (bf != NULL) {
566 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
567 		    "%s: bf=%p, nseg=%d, pktlen=%d, seqno=%d\n",
568 		    __func__, bf, bf->bf_nseg, bf->bf_state.bfs_pktlen,
569 		    SEQNO(bf->bf_state.bfs_seqno));
570 
571 		/* Sub-frame setup */
572 		ath_tx_chaindesclist_subframe(sc, bf);
573 
574 		/*
575 		 * Link the last descriptor of the previous frame
576 		 * to the beginning descriptor of this frame.
577 		 */
578 		if (bf_prev != NULL)
579 			ath_hal_settxdesclink(sc->sc_ah, bf_prev->bf_lastds,
580 			    bf->bf_daddr);
581 
582 		/* Save a copy so we can link the next descriptor in */
583 		bf_prev = bf;
584 		bf = bf->bf_next;
585 	}
586 
587 	/*
588 	 * Setup first descriptor of first frame.
589 	 * chaintxdesc() overwrites the descriptor entries;
590 	 * setupfirsttxdesc() merges in things.
591 	 * Otherwise various fields aren't set correctly (eg flags).
592 	 */
593 	ath_hal_setupfirsttxdesc(sc->sc_ah,
594 	    bf_first->bf_desc,
595 	    bf_first->bf_state.bfs_al,
596 	    bf_first->bf_state.bfs_txflags | HAL_TXDESC_INTREQ,
597 	    bf_first->bf_state.bfs_txpower,
598 	    bf_first->bf_state.bfs_txrate0,
599 	    bf_first->bf_state.bfs_try0,
600 	    bf_first->bf_state.bfs_txantenna,
601 	    bf_first->bf_state.bfs_ctsrate,
602 	    bf_first->bf_state.bfs_ctsduration);
603 
604 	/*
605 	 * Set the first descriptor bf_lastds field to point to
606 	 * the last descriptor in the last subframe, that's where
607 	 * the status update will occur.
608 	 */
609 	bf_first->bf_lastds = bf_prev->bf_lastds;
610 
611 	/*
612 	 * And bf_last in the first descriptor points to the end of
613 	 * the aggregate list.
614 	 */
615 	bf_first->bf_last = bf_prev;
616 
617 	/*
618 	 * setup first desc with rate and aggr info
619 	 */
620 	ath_tx_set_ratectrl(sc, bf_first->bf_node, bf_first);
621 
622 	/*
623 	 * Setup the last descriptor in the list.
624 	 *
625 	 * bf_first->bf_lastds already points to it; the rate
626 	 * control information needs to be squirreled away here
627 	 * as well ans clearing the moreaggr/paddelim fields.
628 	 */
629 	ath_hal_setuplasttxdesc(sc->sc_ah, bf_first->bf_lastds,
630 	    bf_first->bf_desc);
631 
632 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: end\n", __func__);
633 }
634 
635 /*
636  * Hand-off a frame to the multicast TX queue.
637  *
638  * This is a software TXQ which will be appended to the CAB queue
639  * during the beacon setup code.
640  *
641  * XXX TODO: since the AR9300 EDMA TX queue support wants the QCU ID
642  * as part of the TX descriptor, bf_state.bfs_txq must be updated
643  * with the actual hardware txq, or all of this will fall apart.
644  *
645  * XXX It may not be a bad idea to just stuff the QCU ID into bf_state
646  * and retire bfs_txq; then make sure the CABQ QCU ID is populated
647  * correctly.
648  */
649 static void
650 ath_tx_handoff_mcast(struct ath_softc *sc, struct ath_txq *txq,
651     struct ath_buf *bf)
652 {
653 	ATH_TXQ_LOCK_ASSERT(txq);
654 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
655 	     ("%s: busy status 0x%x", __func__, bf->bf_flags));
656 	if (txq->axq_link != NULL) {
657 		struct ath_buf *last = ATH_TXQ_LAST(txq, axq_q_s);
658 		struct ieee80211_frame *wh;
659 
660 		/* mark previous frame */
661 		wh = mtod(last->bf_m, struct ieee80211_frame *);
662 		wh->i_fc[1] |= IEEE80211_FC1_MORE_DATA;
663 		bus_dmamap_sync(sc->sc_dmat, last->bf_dmamap,
664 		    BUS_DMASYNC_PREWRITE);
665 
666 		/* link descriptor */
667 		*txq->axq_link = bf->bf_daddr;
668 	}
669 	ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
670 	ath_hal_gettxdesclinkptr(sc->sc_ah, bf->bf_lastds, &txq->axq_link);
671 }
672 
673 /*
674  * Hand-off packet to a hardware queue.
675  */
676 static void
677 ath_tx_handoff_hw(struct ath_softc *sc, struct ath_txq *txq,
678     struct ath_buf *bf)
679 {
680 	struct ath_hal *ah = sc->sc_ah;
681 
682 	/*
683 	 * Insert the frame on the outbound list and pass it on
684 	 * to the hardware.  Multicast frames buffered for power
685 	 * save stations and transmit from the CAB queue are stored
686 	 * on a s/w only queue and loaded on to the CAB queue in
687 	 * the SWBA handler since frames only go out on DTIM and
688 	 * to avoid possible races.
689 	 */
690 	ATH_TXQ_LOCK_ASSERT(txq);
691 	KASSERT((bf->bf_flags & ATH_BUF_BUSY) == 0,
692 	     ("%s: busy status 0x%x", __func__, bf->bf_flags));
693 	KASSERT(txq->axq_qnum != ATH_TXQ_SWQ,
694 	     ("ath_tx_handoff_hw called for mcast queue"));
695 
696 #if 0
697 	/*
698 	 * This causes a LOR. Find out where the PCU lock is being
699 	 * held whilst the TXQ lock is grabbed - that shouldn't
700 	 * be occuring.
701 	 */
702 	ATH_PCU_LOCK(sc);
703 	if (sc->sc_inreset_cnt) {
704 		ATH_PCU_UNLOCK(sc);
705 		DPRINTF(sc, ATH_DEBUG_RESET,
706 		    "%s: called with sc_in_reset != 0\n",
707 		    __func__);
708 		DPRINTF(sc, ATH_DEBUG_XMIT,
709 		    "%s: queued: TXDP[%u] = %p (%p) depth %d\n",
710 		    __func__, txq->axq_qnum,
711 		    (caddr_t)bf->bf_daddr, bf->bf_desc,
712 		    txq->axq_depth);
713 		ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
714 		if (bf->bf_state.bfs_aggr)
715 			txq->axq_aggr_depth++;
716 		/*
717 		 * There's no need to update axq_link; the hardware
718 		 * is in reset and once the reset is complete, any
719 		 * non-empty queues will simply have DMA restarted.
720 		 */
721 		return;
722 		}
723 	ATH_PCU_UNLOCK(sc);
724 #endif
725 
726 	/* For now, so not to generate whitespace diffs */
727 	if (1) {
728 #ifdef IEEE80211_SUPPORT_TDMA
729 		int qbusy;
730 
731 		ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
732 		qbusy = ath_hal_txqenabled(ah, txq->axq_qnum);
733 
734 		ATH_KTR(sc, ATH_KTR_TX, 4,
735 		    "ath_tx_handoff: txq=%u, add bf=%p, qbusy=%d, depth=%d",
736 		    txq->axq_qnum, bf, qbusy, txq->axq_depth);
737 		if (txq->axq_link == NULL) {
738 			/*
739 			 * Be careful writing the address to TXDP.  If
740 			 * the tx q is enabled then this write will be
741 			 * ignored.  Normally this is not an issue but
742 			 * when tdma is in use and the q is beacon gated
743 			 * this race can occur.  If the q is busy then
744 			 * defer the work to later--either when another
745 			 * packet comes along or when we prepare a beacon
746 			 * frame at SWBA.
747 			 */
748 			if (!qbusy) {
749 				ath_hal_puttxbuf(ah, txq->axq_qnum,
750 				    bf->bf_daddr);
751 				txq->axq_flags &= ~ATH_TXQ_PUTPENDING;
752 				DPRINTF(sc, ATH_DEBUG_XMIT,
753 				    "%s: TXDP[%u] = %p (%p) lastds=%p depth %d\n",
754 				    __func__, txq->axq_qnum,
755 				    (caddr_t)bf->bf_daddr, bf->bf_desc,
756 				    bf->bf_lastds,
757 				    txq->axq_depth);
758 				ATH_KTR(sc, ATH_KTR_TX, 5,
759 				    "ath_tx_handoff: TXDP[%u] = %p (%p) "
760 				    "lastds=%p depth %d",
761 				    txq->axq_qnum,
762 				    (caddr_t)bf->bf_daddr, bf->bf_desc,
763 				    bf->bf_lastds,
764 				    txq->axq_depth);
765 			} else {
766 				txq->axq_flags |= ATH_TXQ_PUTPENDING;
767 				DPRINTF(sc, ATH_DEBUG_TDMA | ATH_DEBUG_XMIT,
768 				    "%s: Q%u busy, defer enable\n", __func__,
769 				    txq->axq_qnum);
770 				ATH_KTR(sc, ATH_KTR_TX, 0, "defer enable");
771 			}
772 		} else {
773 			*txq->axq_link = bf->bf_daddr;
774 			DPRINTF(sc, ATH_DEBUG_XMIT,
775 			    "%s: link[%u](%p)=%p (%p) depth %d\n", __func__,
776 			    txq->axq_qnum, txq->axq_link,
777 			    (caddr_t)bf->bf_daddr, bf->bf_desc,
778 			    txq->axq_depth);
779 			ATH_KTR(sc, ATH_KTR_TX, 5,
780 			    "ath_tx_handoff: link[%u](%p)=%p (%p) lastds=%p",
781 			    txq->axq_qnum, txq->axq_link,
782 			    (caddr_t)bf->bf_daddr, bf->bf_desc,
783 			    bf->bf_lastds);
784 
785 			if ((txq->axq_flags & ATH_TXQ_PUTPENDING) && !qbusy) {
786 				/*
787 				 * The q was busy when we previously tried
788 				 * to write the address of the first buffer
789 				 * in the chain.  Since it's not busy now
790 				 * handle this chore.  We are certain the
791 				 * buffer at the front is the right one since
792 				 * axq_link is NULL only when the buffer list
793 				 * is/was empty.
794 				 */
795 				ath_hal_puttxbuf(ah, txq->axq_qnum,
796 					TAILQ_FIRST(&txq->axq_q)->bf_daddr);
797 				txq->axq_flags &= ~ATH_TXQ_PUTPENDING;
798 				DPRINTF(sc, ATH_DEBUG_TDMA | ATH_DEBUG_XMIT,
799 				    "%s: Q%u restarted\n", __func__,
800 				    txq->axq_qnum);
801 				ATH_KTR(sc, ATH_KTR_TX, 4,
802 				  "ath_tx_handoff: txq[%d] restarted, bf=%p "
803 				  "daddr=%p ds=%p",
804 				    txq->axq_qnum,
805 				    bf,
806 				    (caddr_t)bf->bf_daddr,
807 				    bf->bf_desc);
808 			}
809 		}
810 #else
811 		ATH_TXQ_INSERT_TAIL(txq, bf, bf_list);
812 		ATH_KTR(sc, ATH_KTR_TX, 3,
813 		    "ath_tx_handoff: non-tdma: txq=%u, add bf=%p "
814 		    "depth=%d",
815 		    txq->axq_qnum,
816 		    bf,
817 		    txq->axq_depth);
818 		if (txq->axq_link == NULL) {
819 			ath_hal_puttxbuf(ah, txq->axq_qnum, bf->bf_daddr);
820 			DPRINTF(sc, ATH_DEBUG_XMIT,
821 			    "%s: TXDP[%u] = %p (%p) depth %d\n",
822 			    __func__, txq->axq_qnum,
823 			    (caddr_t)bf->bf_daddr, bf->bf_desc,
824 			    txq->axq_depth);
825 			ATH_KTR(sc, ATH_KTR_TX, 5,
826 			    "ath_tx_handoff: non-tdma: TXDP[%u] = %p (%p) "
827 			    "lastds=%p depth %d",
828 			    txq->axq_qnum,
829 			    (caddr_t)bf->bf_daddr, bf->bf_desc,
830 			    bf->bf_lastds,
831 			    txq->axq_depth);
832 
833 		} else {
834 			*txq->axq_link = bf->bf_daddr;
835 			DPRINTF(sc, ATH_DEBUG_XMIT,
836 			    "%s: link[%u](%p)=%p (%p) depth %d\n", __func__,
837 			    txq->axq_qnum, txq->axq_link,
838 			    (caddr_t)bf->bf_daddr, bf->bf_desc,
839 			    txq->axq_depth);
840 			ATH_KTR(sc, ATH_KTR_TX, 5,
841 			    "ath_tx_handoff: non-tdma: link[%u](%p)=%p (%p) "
842 			    "lastds=%d",
843 			    txq->axq_qnum, txq->axq_link,
844 			    (caddr_t)bf->bf_daddr, bf->bf_desc,
845 			    bf->bf_lastds);
846 
847 		}
848 #endif /* IEEE80211_SUPPORT_TDMA */
849 		if (bf->bf_state.bfs_aggr)
850 			txq->axq_aggr_depth++;
851 		ath_hal_gettxdesclinkptr(ah, bf->bf_lastds, &txq->axq_link);
852 		ath_hal_txstart(ah, txq->axq_qnum);
853 		ATH_KTR(sc, ATH_KTR_TX, 1,
854 		    "ath_tx_handoff: txq=%u, txstart", txq->axq_qnum);
855 	}
856 }
857 
858 /*
859  * Restart TX DMA for the given TXQ.
860  *
861  * This must be called whether the queue is empty or not.
862  */
863 static void
864 ath_legacy_tx_dma_restart(struct ath_softc *sc, struct ath_txq *txq)
865 {
866 	struct ath_hal *ah = sc->sc_ah;
867 	struct ath_buf *bf, *bf_last;
868 
869 	ATH_TXQ_LOCK_ASSERT(txq);
870 
871 	/* This is always going to be cleared, empty or not */
872 	txq->axq_flags &= ~ATH_TXQ_PUTPENDING;
873 
874 	/* XXX make this ATH_TXQ_FIRST */
875 	bf = TAILQ_FIRST(&txq->axq_q);
876 	bf_last = ATH_TXQ_LAST(txq, axq_q_s);
877 
878 	if (bf == NULL)
879 		return;
880 
881 	ath_hal_puttxbuf(ah, txq->axq_qnum, bf->bf_daddr);
882 	ath_hal_gettxdesclinkptr(ah, bf_last->bf_lastds, &txq->axq_link);
883 	ath_hal_txstart(ah, txq->axq_qnum);
884 }
885 
886 /*
887  * Hand off a packet to the hardware (or mcast queue.)
888  *
889  * The relevant hardware txq should be locked.
890  */
891 static void
892 ath_legacy_xmit_handoff(struct ath_softc *sc, struct ath_txq *txq,
893     struct ath_buf *bf)
894 {
895 	ATH_TXQ_LOCK_ASSERT(txq);
896 
897 	if (txq->axq_qnum == ATH_TXQ_SWQ)
898 		ath_tx_handoff_mcast(sc, txq, bf);
899 	else
900 		ath_tx_handoff_hw(sc, txq, bf);
901 }
902 
903 static int
904 ath_tx_tag_crypto(struct ath_softc *sc, struct ieee80211_node *ni,
905     struct mbuf *m0, int iswep, int isfrag, int *hdrlen, int *pktlen,
906     int *keyix)
907 {
908 	DPRINTF(sc, ATH_DEBUG_XMIT,
909 	    "%s: hdrlen=%d, pktlen=%d, isfrag=%d, iswep=%d, m0=%p\n",
910 	    __func__,
911 	    *hdrlen,
912 	    *pktlen,
913 	    isfrag,
914 	    iswep,
915 	    m0);
916 
917 	if (iswep) {
918 		const struct ieee80211_cipher *cip;
919 		struct ieee80211_key *k;
920 
921 		/*
922 		 * Construct the 802.11 header+trailer for an encrypted
923 		 * frame. The only reason this can fail is because of an
924 		 * unknown or unsupported cipher/key type.
925 		 */
926 		k = ieee80211_crypto_encap(ni, m0);
927 		if (k == NULL) {
928 			/*
929 			 * This can happen when the key is yanked after the
930 			 * frame was queued.  Just discard the frame; the
931 			 * 802.11 layer counts failures and provides
932 			 * debugging/diagnostics.
933 			 */
934 			return (0);
935 		}
936 		/*
937 		 * Adjust the packet + header lengths for the crypto
938 		 * additions and calculate the h/w key index.  When
939 		 * a s/w mic is done the frame will have had any mic
940 		 * added to it prior to entry so m0->m_pkthdr.len will
941 		 * account for it. Otherwise we need to add it to the
942 		 * packet length.
943 		 */
944 		cip = k->wk_cipher;
945 		(*hdrlen) += cip->ic_header;
946 		(*pktlen) += cip->ic_header + cip->ic_trailer;
947 		/* NB: frags always have any TKIP MIC done in s/w */
948 		if ((k->wk_flags & IEEE80211_KEY_SWMIC) == 0 && !isfrag)
949 			(*pktlen) += cip->ic_miclen;
950 		(*keyix) = k->wk_keyix;
951 	} else if (ni->ni_ucastkey.wk_cipher == &ieee80211_cipher_none) {
952 		/*
953 		 * Use station key cache slot, if assigned.
954 		 */
955 		(*keyix) = ni->ni_ucastkey.wk_keyix;
956 		if ((*keyix) == IEEE80211_KEYIX_NONE)
957 			(*keyix) = HAL_TXKEYIX_INVALID;
958 	} else
959 		(*keyix) = HAL_TXKEYIX_INVALID;
960 
961 	return (1);
962 }
963 
964 /*
965  * Calculate whether interoperability protection is required for
966  * this frame.
967  *
968  * This requires the rate control information be filled in,
969  * as the protection requirement depends upon the current
970  * operating mode / PHY.
971  */
972 static void
973 ath_tx_calc_protection(struct ath_softc *sc, struct ath_buf *bf)
974 {
975 	struct ieee80211_frame *wh;
976 	uint8_t rix;
977 	uint16_t flags;
978 	int shortPreamble;
979 	const HAL_RATE_TABLE *rt = sc->sc_currates;
980 	struct ifnet *ifp = sc->sc_ifp;
981 	struct ieee80211com *ic = ifp->if_l2com;
982 
983 	flags = bf->bf_state.bfs_txflags;
984 	rix = bf->bf_state.bfs_rc[0].rix;
985 	shortPreamble = bf->bf_state.bfs_shpream;
986 	wh = mtod(bf->bf_m, struct ieee80211_frame *);
987 
988 	/*
989 	 * If 802.11g protection is enabled, determine whether
990 	 * to use RTS/CTS or just CTS.  Note that this is only
991 	 * done for OFDM unicast frames.
992 	 */
993 	if ((ic->ic_flags & IEEE80211_F_USEPROT) &&
994 	    rt->info[rix].phy == IEEE80211_T_OFDM &&
995 	    (flags & HAL_TXDESC_NOACK) == 0) {
996 		bf->bf_state.bfs_doprot = 1;
997 		/* XXX fragments must use CCK rates w/ protection */
998 		if (ic->ic_protmode == IEEE80211_PROT_RTSCTS) {
999 			flags |= HAL_TXDESC_RTSENA;
1000 		} else if (ic->ic_protmode == IEEE80211_PROT_CTSONLY) {
1001 			flags |= HAL_TXDESC_CTSENA;
1002 		}
1003 		/*
1004 		 * For frags it would be desirable to use the
1005 		 * highest CCK rate for RTS/CTS.  But stations
1006 		 * farther away may detect it at a lower CCK rate
1007 		 * so use the configured protection rate instead
1008 		 * (for now).
1009 		 */
1010 		sc->sc_stats.ast_tx_protect++;
1011 	}
1012 
1013 	/*
1014 	 * If 11n protection is enabled and it's a HT frame,
1015 	 * enable RTS.
1016 	 *
1017 	 * XXX ic_htprotmode or ic_curhtprotmode?
1018 	 * XXX should it_htprotmode only matter if ic_curhtprotmode
1019 	 * XXX indicates it's not a HT pure environment?
1020 	 */
1021 	if ((ic->ic_htprotmode == IEEE80211_PROT_RTSCTS) &&
1022 	    rt->info[rix].phy == IEEE80211_T_HT &&
1023 	    (flags & HAL_TXDESC_NOACK) == 0) {
1024 		flags |= HAL_TXDESC_RTSENA;
1025 		sc->sc_stats.ast_tx_htprotect++;
1026 	}
1027 	bf->bf_state.bfs_txflags = flags;
1028 }
1029 
1030 /*
1031  * Update the frame duration given the currently selected rate.
1032  *
1033  * This also updates the frame duration value, so it will require
1034  * a DMA flush.
1035  */
1036 static void
1037 ath_tx_calc_duration(struct ath_softc *sc, struct ath_buf *bf)
1038 {
1039 	struct ieee80211_frame *wh;
1040 	uint8_t rix;
1041 	uint16_t flags;
1042 	int shortPreamble;
1043 	struct ath_hal *ah = sc->sc_ah;
1044 	const HAL_RATE_TABLE *rt = sc->sc_currates;
1045 	int isfrag = bf->bf_m->m_flags & M_FRAG;
1046 
1047 	flags = bf->bf_state.bfs_txflags;
1048 	rix = bf->bf_state.bfs_rc[0].rix;
1049 	shortPreamble = bf->bf_state.bfs_shpream;
1050 	wh = mtod(bf->bf_m, struct ieee80211_frame *);
1051 
1052 	/*
1053 	 * Calculate duration.  This logically belongs in the 802.11
1054 	 * layer but it lacks sufficient information to calculate it.
1055 	 */
1056 	if ((flags & HAL_TXDESC_NOACK) == 0 &&
1057 	    (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) != IEEE80211_FC0_TYPE_CTL) {
1058 		u_int16_t dur;
1059 		if (shortPreamble)
1060 			dur = rt->info[rix].spAckDuration;
1061 		else
1062 			dur = rt->info[rix].lpAckDuration;
1063 		if (wh->i_fc[1] & IEEE80211_FC1_MORE_FRAG) {
1064 			dur += dur;		/* additional SIFS+ACK */
1065 			KASSERT(bf->bf_m->m_nextpkt != NULL, ("no fragment"));
1066 			/*
1067 			 * Include the size of next fragment so NAV is
1068 			 * updated properly.  The last fragment uses only
1069 			 * the ACK duration
1070 			 */
1071 			dur += ath_hal_computetxtime(ah, rt,
1072 					bf->bf_m->m_nextpkt->m_pkthdr.len,
1073 					rix, shortPreamble);
1074 		}
1075 		if (isfrag) {
1076 			/*
1077 			 * Force hardware to use computed duration for next
1078 			 * fragment by disabling multi-rate retry which updates
1079 			 * duration based on the multi-rate duration table.
1080 			 */
1081 			bf->bf_state.bfs_ismrr = 0;
1082 			bf->bf_state.bfs_try0 = ATH_TXMGTTRY;
1083 			/* XXX update bfs_rc[0].try? */
1084 		}
1085 
1086 		/* Update the duration field itself */
1087 		*(u_int16_t *)wh->i_dur = htole16(dur);
1088 	}
1089 }
1090 
1091 static uint8_t
1092 ath_tx_get_rtscts_rate(struct ath_hal *ah, const HAL_RATE_TABLE *rt,
1093     int cix, int shortPreamble)
1094 {
1095 	uint8_t ctsrate;
1096 
1097 	/*
1098 	 * CTS transmit rate is derived from the transmit rate
1099 	 * by looking in the h/w rate table.  We must also factor
1100 	 * in whether or not a short preamble is to be used.
1101 	 */
1102 	/* NB: cix is set above where RTS/CTS is enabled */
1103 	KASSERT(cix != 0xff, ("cix not setup"));
1104 	ctsrate = rt->info[cix].rateCode;
1105 
1106 	/* XXX this should only matter for legacy rates */
1107 	if (shortPreamble)
1108 		ctsrate |= rt->info[cix].shortPreamble;
1109 
1110 	return (ctsrate);
1111 }
1112 
1113 /*
1114  * Calculate the RTS/CTS duration for legacy frames.
1115  */
1116 static int
1117 ath_tx_calc_ctsduration(struct ath_hal *ah, int rix, int cix,
1118     int shortPreamble, int pktlen, const HAL_RATE_TABLE *rt,
1119     int flags)
1120 {
1121 	int ctsduration = 0;
1122 
1123 	/* This mustn't be called for HT modes */
1124 	if (rt->info[cix].phy == IEEE80211_T_HT) {
1125 		printf("%s: HT rate where it shouldn't be (0x%x)\n",
1126 		    __func__, rt->info[cix].rateCode);
1127 		return (-1);
1128 	}
1129 
1130 	/*
1131 	 * Compute the transmit duration based on the frame
1132 	 * size and the size of an ACK frame.  We call into the
1133 	 * HAL to do the computation since it depends on the
1134 	 * characteristics of the actual PHY being used.
1135 	 *
1136 	 * NB: CTS is assumed the same size as an ACK so we can
1137 	 *     use the precalculated ACK durations.
1138 	 */
1139 	if (shortPreamble) {
1140 		if (flags & HAL_TXDESC_RTSENA)		/* SIFS + CTS */
1141 			ctsduration += rt->info[cix].spAckDuration;
1142 		ctsduration += ath_hal_computetxtime(ah,
1143 			rt, pktlen, rix, AH_TRUE);
1144 		if ((flags & HAL_TXDESC_NOACK) == 0)	/* SIFS + ACK */
1145 			ctsduration += rt->info[rix].spAckDuration;
1146 	} else {
1147 		if (flags & HAL_TXDESC_RTSENA)		/* SIFS + CTS */
1148 			ctsduration += rt->info[cix].lpAckDuration;
1149 		ctsduration += ath_hal_computetxtime(ah,
1150 			rt, pktlen, rix, AH_FALSE);
1151 		if ((flags & HAL_TXDESC_NOACK) == 0)	/* SIFS + ACK */
1152 			ctsduration += rt->info[rix].lpAckDuration;
1153 	}
1154 
1155 	return (ctsduration);
1156 }
1157 
1158 /*
1159  * Update the given ath_buf with updated rts/cts setup and duration
1160  * values.
1161  *
1162  * To support rate lookups for each software retry, the rts/cts rate
1163  * and cts duration must be re-calculated.
1164  *
1165  * This function assumes the RTS/CTS flags have been set as needed;
1166  * mrr has been disabled; and the rate control lookup has been done.
1167  *
1168  * XXX TODO: MRR need only be disabled for the pre-11n NICs.
1169  * XXX The 11n NICs support per-rate RTS/CTS configuration.
1170  */
1171 static void
1172 ath_tx_set_rtscts(struct ath_softc *sc, struct ath_buf *bf)
1173 {
1174 	uint16_t ctsduration = 0;
1175 	uint8_t ctsrate = 0;
1176 	uint8_t rix = bf->bf_state.bfs_rc[0].rix;
1177 	uint8_t cix = 0;
1178 	const HAL_RATE_TABLE *rt = sc->sc_currates;
1179 
1180 	/*
1181 	 * No RTS/CTS enabled? Don't bother.
1182 	 */
1183 	if ((bf->bf_state.bfs_txflags &
1184 	    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA)) == 0) {
1185 		/* XXX is this really needed? */
1186 		bf->bf_state.bfs_ctsrate = 0;
1187 		bf->bf_state.bfs_ctsduration = 0;
1188 		return;
1189 	}
1190 
1191 	/*
1192 	 * If protection is enabled, use the protection rix control
1193 	 * rate. Otherwise use the rate0 control rate.
1194 	 */
1195 	if (bf->bf_state.bfs_doprot)
1196 		rix = sc->sc_protrix;
1197 	else
1198 		rix = bf->bf_state.bfs_rc[0].rix;
1199 
1200 	/*
1201 	 * If the raw path has hard-coded ctsrate0 to something,
1202 	 * use it.
1203 	 */
1204 	if (bf->bf_state.bfs_ctsrate0 != 0)
1205 		cix = ath_tx_findrix(sc, bf->bf_state.bfs_ctsrate0);
1206 	else
1207 		/* Control rate from above */
1208 		cix = rt->info[rix].controlRate;
1209 
1210 	/* Calculate the rtscts rate for the given cix */
1211 	ctsrate = ath_tx_get_rtscts_rate(sc->sc_ah, rt, cix,
1212 	    bf->bf_state.bfs_shpream);
1213 
1214 	/* The 11n chipsets do ctsduration calculations for you */
1215 	if (! ath_tx_is_11n(sc))
1216 		ctsduration = ath_tx_calc_ctsduration(sc->sc_ah, rix, cix,
1217 		    bf->bf_state.bfs_shpream, bf->bf_state.bfs_pktlen,
1218 		    rt, bf->bf_state.bfs_txflags);
1219 
1220 	/* Squirrel away in ath_buf */
1221 	bf->bf_state.bfs_ctsrate = ctsrate;
1222 	bf->bf_state.bfs_ctsduration = ctsduration;
1223 
1224 	/*
1225 	 * Must disable multi-rate retry when using RTS/CTS.
1226 	 */
1227 	if (!sc->sc_mrrprot) {
1228 		bf->bf_state.bfs_ismrr = 0;
1229 		bf->bf_state.bfs_try0 =
1230 		    bf->bf_state.bfs_rc[0].tries = ATH_TXMGTTRY; /* XXX ew */
1231 	}
1232 }
1233 
1234 /*
1235  * Setup the descriptor chain for a normal or fast-frame
1236  * frame.
1237  *
1238  * XXX TODO: extend to include the destination hardware QCU ID.
1239  * Make sure that is correct.  Make sure that when being added
1240  * to the mcastq, the CABQ QCUID is set or things will get a bit
1241  * odd.
1242  */
1243 static void
1244 ath_tx_setds(struct ath_softc *sc, struct ath_buf *bf)
1245 {
1246 	struct ath_desc *ds = bf->bf_desc;
1247 	struct ath_hal *ah = sc->sc_ah;
1248 
1249 	ath_hal_setuptxdesc(ah, ds
1250 		, bf->bf_state.bfs_pktlen	/* packet length */
1251 		, bf->bf_state.bfs_hdrlen	/* header length */
1252 		, bf->bf_state.bfs_atype	/* Atheros packet type */
1253 		, bf->bf_state.bfs_txpower	/* txpower */
1254 		, bf->bf_state.bfs_txrate0
1255 		, bf->bf_state.bfs_try0		/* series 0 rate/tries */
1256 		, bf->bf_state.bfs_keyix	/* key cache index */
1257 		, bf->bf_state.bfs_txantenna	/* antenna mode */
1258 		, bf->bf_state.bfs_txflags	/* flags */
1259 		, bf->bf_state.bfs_ctsrate	/* rts/cts rate */
1260 		, bf->bf_state.bfs_ctsduration	/* rts/cts duration */
1261 	);
1262 
1263 	/*
1264 	 * This will be overriden when the descriptor chain is written.
1265 	 */
1266 	bf->bf_lastds = ds;
1267 	bf->bf_last = bf;
1268 
1269 	/* Set rate control and descriptor chain for this frame */
1270 	ath_tx_set_ratectrl(sc, bf->bf_node, bf);
1271 	ath_tx_chaindesclist(sc, bf);
1272 }
1273 
1274 /*
1275  * Do a rate lookup.
1276  *
1277  * This performs a rate lookup for the given ath_buf only if it's required.
1278  * Non-data frames and raw frames don't require it.
1279  *
1280  * This populates the primary and MRR entries; MRR values are
1281  * then disabled later on if something requires it (eg RTS/CTS on
1282  * pre-11n chipsets.
1283  *
1284  * This needs to be done before the RTS/CTS fields are calculated
1285  * as they may depend upon the rate chosen.
1286  */
1287 static void
1288 ath_tx_do_ratelookup(struct ath_softc *sc, struct ath_buf *bf)
1289 {
1290 	uint8_t rate, rix;
1291 	int try0;
1292 
1293 	if (! bf->bf_state.bfs_doratelookup)
1294 		return;
1295 
1296 	/* Get rid of any previous state */
1297 	bzero(bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
1298 
1299 	ATH_NODE_LOCK(ATH_NODE(bf->bf_node));
1300 	ath_rate_findrate(sc, ATH_NODE(bf->bf_node), bf->bf_state.bfs_shpream,
1301 	    bf->bf_state.bfs_pktlen, &rix, &try0, &rate);
1302 
1303 	/* In case MRR is disabled, make sure rc[0] is setup correctly */
1304 	bf->bf_state.bfs_rc[0].rix = rix;
1305 	bf->bf_state.bfs_rc[0].ratecode = rate;
1306 	bf->bf_state.bfs_rc[0].tries = try0;
1307 
1308 	if (bf->bf_state.bfs_ismrr && try0 != ATH_TXMAXTRY)
1309 		ath_rate_getxtxrates(sc, ATH_NODE(bf->bf_node), rix,
1310 		    bf->bf_state.bfs_rc);
1311 	ATH_NODE_UNLOCK(ATH_NODE(bf->bf_node));
1312 
1313 	sc->sc_txrix = rix;	/* for LED blinking */
1314 	sc->sc_lastdatarix = rix;	/* for fast frames */
1315 	bf->bf_state.bfs_try0 = try0;
1316 	bf->bf_state.bfs_txrate0 = rate;
1317 }
1318 
1319 /*
1320  * Update the CLRDMASK bit in the ath_buf if it needs to be set.
1321  */
1322 static void
1323 ath_tx_update_clrdmask(struct ath_softc *sc, struct ath_tid *tid,
1324     struct ath_buf *bf)
1325 {
1326 
1327 	ATH_TID_LOCK_ASSERT(sc, tid);
1328 
1329 	if (tid->clrdmask == 1) {
1330 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1331 		tid->clrdmask = 0;
1332 	}
1333 }
1334 
1335 /*
1336  * Transmit the given frame to the hardware.
1337  *
1338  * The frame must already be setup; rate control must already have
1339  * been done.
1340  *
1341  * XXX since the TXQ lock is being held here (and I dislike holding
1342  * it for this long when not doing software aggregation), later on
1343  * break this function into "setup_normal" and "xmit_normal". The
1344  * lock only needs to be held for the ath_tx_handoff call.
1345  */
1346 static void
1347 ath_tx_xmit_normal(struct ath_softc *sc, struct ath_txq *txq,
1348     struct ath_buf *bf)
1349 {
1350 	struct ath_node *an = ATH_NODE(bf->bf_node);
1351 	struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid];
1352 
1353 	ATH_TXQ_LOCK_ASSERT(txq);
1354 
1355 	/*
1356 	 * For now, just enable CLRDMASK. ath_tx_xmit_normal() does
1357 	 * set a completion handler however it doesn't (yet) properly
1358 	 * handle the strict ordering requirements needed for normal,
1359 	 * non-aggregate session frames.
1360 	 *
1361 	 * Once this is implemented, only set CLRDMASK like this for
1362 	 * frames that must go out - eg management/raw frames.
1363 	 */
1364 	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1365 
1366 	/* Setup the descriptor before handoff */
1367 	ath_tx_do_ratelookup(sc, bf);
1368 	ath_tx_calc_duration(sc, bf);
1369 	ath_tx_calc_protection(sc, bf);
1370 	ath_tx_set_rtscts(sc, bf);
1371 	ath_tx_rate_fill_rcflags(sc, bf);
1372 	ath_tx_setds(sc, bf);
1373 
1374 	/* Track per-TID hardware queue depth correctly */
1375 	tid->hwq_depth++;
1376 
1377 	/* Assign the completion handler */
1378 	bf->bf_comp = ath_tx_normal_comp;
1379 
1380 	/* Hand off to hardware */
1381 	ath_tx_handoff(sc, txq, bf);
1382 }
1383 
1384 /*
1385  * Do the basic frame setup stuff that's required before the frame
1386  * is added to a software queue.
1387  *
1388  * All frames get mostly the same treatment and it's done once.
1389  * Retransmits fiddle with things like the rate control setup,
1390  * setting the retransmit bit in the packet; doing relevant DMA/bus
1391  * syncing and relinking it (back) into the hardware TX queue.
1392  *
1393  * Note that this may cause the mbuf to be reallocated, so
1394  * m0 may not be valid.
1395  */
1396 static int
1397 ath_tx_normal_setup(struct ath_softc *sc, struct ieee80211_node *ni,
1398     struct ath_buf *bf, struct mbuf *m0, struct ath_txq *txq)
1399 {
1400 	struct ieee80211vap *vap = ni->ni_vap;
1401 	struct ath_hal *ah = sc->sc_ah;
1402 	struct ifnet *ifp = sc->sc_ifp;
1403 	struct ieee80211com *ic = ifp->if_l2com;
1404 	const struct chanAccParams *cap = &ic->ic_wme.wme_chanParams;
1405 	int error, iswep, ismcast, isfrag, ismrr;
1406 	int keyix, hdrlen, pktlen, try0 = 0;
1407 	u_int8_t rix = 0, txrate = 0;
1408 	struct ath_desc *ds;
1409 	struct ieee80211_frame *wh;
1410 	u_int subtype, flags;
1411 	HAL_PKT_TYPE atype;
1412 	const HAL_RATE_TABLE *rt;
1413 	HAL_BOOL shortPreamble;
1414 	struct ath_node *an;
1415 	u_int pri;
1416 
1417 	/*
1418 	 * To ensure that both sequence numbers and the CCMP PN handling
1419 	 * is "correct", make sure that the relevant TID queue is locked.
1420 	 * Otherwise the CCMP PN and seqno may appear out of order, causing
1421 	 * re-ordered frames to have out of order CCMP PN's, resulting
1422 	 * in many, many frame drops.
1423 	 */
1424 	ATH_TXQ_LOCK_ASSERT(txq);
1425 
1426 	wh = mtod(m0, struct ieee80211_frame *);
1427 	iswep = wh->i_fc[1] & IEEE80211_FC1_WEP;
1428 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
1429 	isfrag = m0->m_flags & M_FRAG;
1430 	hdrlen = ieee80211_anyhdrsize(wh);
1431 	/*
1432 	 * Packet length must not include any
1433 	 * pad bytes; deduct them here.
1434 	 */
1435 	pktlen = m0->m_pkthdr.len - (hdrlen & 3);
1436 
1437 	/* Handle encryption twiddling if needed */
1438 	if (! ath_tx_tag_crypto(sc, ni, m0, iswep, isfrag, &hdrlen,
1439 	    &pktlen, &keyix)) {
1440 		ath_freetx(m0);
1441 		return EIO;
1442 	}
1443 
1444 	/* packet header may have moved, reset our local pointer */
1445 	wh = mtod(m0, struct ieee80211_frame *);
1446 
1447 	pktlen += IEEE80211_CRC_LEN;
1448 
1449 	/*
1450 	 * Load the DMA map so any coalescing is done.  This
1451 	 * also calculates the number of descriptors we need.
1452 	 */
1453 	error = ath_tx_dmasetup(sc, bf, m0);
1454 	if (error != 0)
1455 		return error;
1456 	bf->bf_node = ni;			/* NB: held reference */
1457 	m0 = bf->bf_m;				/* NB: may have changed */
1458 	wh = mtod(m0, struct ieee80211_frame *);
1459 
1460 	/* setup descriptors */
1461 	ds = bf->bf_desc;
1462 	rt = sc->sc_currates;
1463 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
1464 
1465 	/*
1466 	 * NB: the 802.11 layer marks whether or not we should
1467 	 * use short preamble based on the current mode and
1468 	 * negotiated parameters.
1469 	 */
1470 	if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) &&
1471 	    (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE)) {
1472 		shortPreamble = AH_TRUE;
1473 		sc->sc_stats.ast_tx_shortpre++;
1474 	} else {
1475 		shortPreamble = AH_FALSE;
1476 	}
1477 
1478 	an = ATH_NODE(ni);
1479 	//flags = HAL_TXDESC_CLRDMASK;		/* XXX needed for crypto errs */
1480 	flags = 0;
1481 	ismrr = 0;				/* default no multi-rate retry*/
1482 	pri = M_WME_GETAC(m0);			/* honor classification */
1483 	/* XXX use txparams instead of fixed values */
1484 	/*
1485 	 * Calculate Atheros packet type from IEEE80211 packet header,
1486 	 * setup for rate calculations, and select h/w transmit queue.
1487 	 */
1488 	switch (wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) {
1489 	case IEEE80211_FC0_TYPE_MGT:
1490 		subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
1491 		if (subtype == IEEE80211_FC0_SUBTYPE_BEACON)
1492 			atype = HAL_PKT_TYPE_BEACON;
1493 		else if (subtype == IEEE80211_FC0_SUBTYPE_PROBE_RESP)
1494 			atype = HAL_PKT_TYPE_PROBE_RESP;
1495 		else if (subtype == IEEE80211_FC0_SUBTYPE_ATIM)
1496 			atype = HAL_PKT_TYPE_ATIM;
1497 		else
1498 			atype = HAL_PKT_TYPE_NORMAL;	/* XXX */
1499 		rix = an->an_mgmtrix;
1500 		txrate = rt->info[rix].rateCode;
1501 		if (shortPreamble)
1502 			txrate |= rt->info[rix].shortPreamble;
1503 		try0 = ATH_TXMGTTRY;
1504 		flags |= HAL_TXDESC_INTREQ;	/* force interrupt */
1505 		break;
1506 	case IEEE80211_FC0_TYPE_CTL:
1507 		atype = HAL_PKT_TYPE_PSPOLL;	/* stop setting of duration */
1508 		rix = an->an_mgmtrix;
1509 		txrate = rt->info[rix].rateCode;
1510 		if (shortPreamble)
1511 			txrate |= rt->info[rix].shortPreamble;
1512 		try0 = ATH_TXMGTTRY;
1513 		flags |= HAL_TXDESC_INTREQ;	/* force interrupt */
1514 		break;
1515 	case IEEE80211_FC0_TYPE_DATA:
1516 		atype = HAL_PKT_TYPE_NORMAL;		/* default */
1517 		/*
1518 		 * Data frames: multicast frames go out at a fixed rate,
1519 		 * EAPOL frames use the mgmt frame rate; otherwise consult
1520 		 * the rate control module for the rate to use.
1521 		 */
1522 		if (ismcast) {
1523 			rix = an->an_mcastrix;
1524 			txrate = rt->info[rix].rateCode;
1525 			if (shortPreamble)
1526 				txrate |= rt->info[rix].shortPreamble;
1527 			try0 = 1;
1528 		} else if (m0->m_flags & M_EAPOL) {
1529 			/* XXX? maybe always use long preamble? */
1530 			rix = an->an_mgmtrix;
1531 			txrate = rt->info[rix].rateCode;
1532 			if (shortPreamble)
1533 				txrate |= rt->info[rix].shortPreamble;
1534 			try0 = ATH_TXMAXTRY;	/* XXX?too many? */
1535 		} else {
1536 			/*
1537 			 * Do rate lookup on each TX, rather than using
1538 			 * the hard-coded TX information decided here.
1539 			 */
1540 			ismrr = 1;
1541 			bf->bf_state.bfs_doratelookup = 1;
1542 		}
1543 		if (cap->cap_wmeParams[pri].wmep_noackPolicy)
1544 			flags |= HAL_TXDESC_NOACK;
1545 		break;
1546 	default:
1547 		if_printf(ifp, "bogus frame type 0x%x (%s)\n",
1548 			wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK, __func__);
1549 		/* XXX statistic */
1550 		ath_freetx(m0);
1551 		return EIO;
1552 	}
1553 
1554 	/*
1555 	 * There are two known scenarios where the frame AC doesn't match
1556 	 * what the destination TXQ is.
1557 	 *
1558 	 * + non-QoS frames (eg management?) that the net80211 stack has
1559 	 *   assigned a higher AC to, but since it's a non-QoS TID, it's
1560 	 *   being thrown into TID 16.  TID 16 gets the AC_BE queue.
1561 	 *   It's quite possible that management frames should just be
1562 	 *   direct dispatched to hardware rather than go via the software
1563 	 *   queue; that should be investigated in the future.  There are
1564 	 *   some specific scenarios where this doesn't make sense, mostly
1565 	 *   surrounding ADDBA request/response - hence why that is special
1566 	 *   cased.
1567 	 *
1568 	 * + Multicast frames going into the VAP mcast queue.  That shows up
1569 	 *   as "TXQ 11".
1570 	 *
1571 	 * This driver should eventually support separate TID and TXQ locking,
1572 	 * allowing for arbitrary AC frames to appear on arbitrary software
1573 	 * queues, being queued to the "correct" hardware queue when needed.
1574 	 */
1575 #if 0
1576 	if (txq != sc->sc_ac2q[pri]) {
1577 		device_printf(sc->sc_dev,
1578 		    "%s: txq=%p (%d), pri=%d, pri txq=%p (%d)\n",
1579 		    __func__,
1580 		    txq,
1581 		    txq->axq_qnum,
1582 		    pri,
1583 		    sc->sc_ac2q[pri],
1584 		    sc->sc_ac2q[pri]->axq_qnum);
1585 	}
1586 #endif
1587 
1588 	/*
1589 	 * Calculate miscellaneous flags.
1590 	 */
1591 	if (ismcast) {
1592 		flags |= HAL_TXDESC_NOACK;	/* no ack on broad/multicast */
1593 	} else if (pktlen > vap->iv_rtsthreshold &&
1594 	    (ni->ni_ath_flags & IEEE80211_NODE_FF) == 0) {
1595 		flags |= HAL_TXDESC_RTSENA;	/* RTS based on frame length */
1596 		sc->sc_stats.ast_tx_rts++;
1597 	}
1598 	if (flags & HAL_TXDESC_NOACK)		/* NB: avoid double counting */
1599 		sc->sc_stats.ast_tx_noack++;
1600 #ifdef IEEE80211_SUPPORT_TDMA
1601 	if (sc->sc_tdma && (flags & HAL_TXDESC_NOACK) == 0) {
1602 		DPRINTF(sc, ATH_DEBUG_TDMA,
1603 		    "%s: discard frame, ACK required w/ TDMA\n", __func__);
1604 		sc->sc_stats.ast_tdma_ack++;
1605 		ath_freetx(m0);
1606 		return EIO;
1607 	}
1608 #endif
1609 
1610 	/*
1611 	 * Determine if a tx interrupt should be generated for
1612 	 * this descriptor.  We take a tx interrupt to reap
1613 	 * descriptors when the h/w hits an EOL condition or
1614 	 * when the descriptor is specifically marked to generate
1615 	 * an interrupt.  We periodically mark descriptors in this
1616 	 * way to insure timely replenishing of the supply needed
1617 	 * for sending frames.  Defering interrupts reduces system
1618 	 * load and potentially allows more concurrent work to be
1619 	 * done but if done to aggressively can cause senders to
1620 	 * backup.
1621 	 *
1622 	 * NB: use >= to deal with sc_txintrperiod changing
1623 	 *     dynamically through sysctl.
1624 	 */
1625 	if (flags & HAL_TXDESC_INTREQ) {
1626 		txq->axq_intrcnt = 0;
1627 	} else if (++txq->axq_intrcnt >= sc->sc_txintrperiod) {
1628 		flags |= HAL_TXDESC_INTREQ;
1629 		txq->axq_intrcnt = 0;
1630 	}
1631 
1632 	/* This point forward is actual TX bits */
1633 
1634 	/*
1635 	 * At this point we are committed to sending the frame
1636 	 * and we don't need to look at m_nextpkt; clear it in
1637 	 * case this frame is part of frag chain.
1638 	 */
1639 	m0->m_nextpkt = NULL;
1640 
1641 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
1642 		ieee80211_dump_pkt(ic, mtod(m0, const uint8_t *), m0->m_len,
1643 		    sc->sc_hwmap[rix].ieeerate, -1);
1644 
1645 	if (ieee80211_radiotap_active_vap(vap)) {
1646 		u_int64_t tsf = ath_hal_gettsf64(ah);
1647 
1648 		sc->sc_tx_th.wt_tsf = htole64(tsf);
1649 		sc->sc_tx_th.wt_flags = sc->sc_hwmap[rix].txflags;
1650 		if (iswep)
1651 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
1652 		if (isfrag)
1653 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
1654 		sc->sc_tx_th.wt_rate = sc->sc_hwmap[rix].ieeerate;
1655 		sc->sc_tx_th.wt_txpower = ni->ni_txpower;
1656 		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
1657 
1658 		ieee80211_radiotap_tx(vap, m0);
1659 	}
1660 
1661 	/* Blank the legacy rate array */
1662 	bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
1663 
1664 	/*
1665 	 * ath_buf_set_rate needs at least one rate/try to setup
1666 	 * the rate scenario.
1667 	 */
1668 	bf->bf_state.bfs_rc[0].rix = rix;
1669 	bf->bf_state.bfs_rc[0].tries = try0;
1670 	bf->bf_state.bfs_rc[0].ratecode = txrate;
1671 
1672 	/* Store the decided rate index values away */
1673 	bf->bf_state.bfs_pktlen = pktlen;
1674 	bf->bf_state.bfs_hdrlen = hdrlen;
1675 	bf->bf_state.bfs_atype = atype;
1676 	bf->bf_state.bfs_txpower = ni->ni_txpower;
1677 	bf->bf_state.bfs_txrate0 = txrate;
1678 	bf->bf_state.bfs_try0 = try0;
1679 	bf->bf_state.bfs_keyix = keyix;
1680 	bf->bf_state.bfs_txantenna = sc->sc_txantenna;
1681 	bf->bf_state.bfs_txflags = flags;
1682 	bf->bf_state.bfs_shpream = shortPreamble;
1683 
1684 	/* XXX this should be done in ath_tx_setrate() */
1685 	bf->bf_state.bfs_ctsrate0 = 0;	/* ie, no hard-coded ctsrate */
1686 	bf->bf_state.bfs_ctsrate = 0;	/* calculated later */
1687 	bf->bf_state.bfs_ctsduration = 0;
1688 	bf->bf_state.bfs_ismrr = ismrr;
1689 
1690 	return 0;
1691 }
1692 
1693 /*
1694  * Queue a frame to the hardware or software queue.
1695  *
1696  * This can be called by the net80211 code.
1697  *
1698  * XXX what about locking? Or, push the seqno assign into the
1699  * XXX aggregate scheduler so its serialised?
1700  *
1701  * XXX When sending management frames via ath_raw_xmit(),
1702  *     should CLRDMASK be set unconditionally?
1703  */
1704 int
1705 ath_tx_start(struct ath_softc *sc, struct ieee80211_node *ni,
1706     struct ath_buf *bf, struct mbuf *m0)
1707 {
1708 	struct ieee80211vap *vap = ni->ni_vap;
1709 	struct ath_vap *avp = ATH_VAP(vap);
1710 	int r = 0;
1711 	u_int pri;
1712 	int tid;
1713 	struct ath_txq *txq;
1714 	int ismcast;
1715 	const struct ieee80211_frame *wh;
1716 	int is_ampdu, is_ampdu_tx, is_ampdu_pending;
1717 	ieee80211_seq seqno;
1718 	uint8_t type, subtype;
1719 
1720 	/*
1721 	 * Determine the target hardware queue.
1722 	 *
1723 	 * For multicast frames, the txq gets overridden appropriately
1724 	 * depending upon the state of PS.
1725 	 *
1726 	 * For any other frame, we do a TID/QoS lookup inside the frame
1727 	 * to see what the TID should be. If it's a non-QoS frame, the
1728 	 * AC and TID are overridden. The TID/TXQ code assumes the
1729 	 * TID is on a predictable hardware TXQ, so we don't support
1730 	 * having a node TID queued to multiple hardware TXQs.
1731 	 * This may change in the future but would require some locking
1732 	 * fudgery.
1733 	 */
1734 	pri = ath_tx_getac(sc, m0);
1735 	tid = ath_tx_gettid(sc, m0);
1736 
1737 	txq = sc->sc_ac2q[pri];
1738 	wh = mtod(m0, struct ieee80211_frame *);
1739 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
1740 	type = wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK;
1741 	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
1742 
1743 	/*
1744 	 * Enforce how deep the multicast queue can grow.
1745 	 *
1746 	 * XXX duplicated in ath_raw_xmit().
1747 	 */
1748 	if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
1749 		ATH_TXQ_LOCK(sc->sc_cabq);
1750 
1751 		if (sc->sc_cabq->axq_depth > sc->sc_txq_mcastq_maxdepth) {
1752 			sc->sc_stats.ast_tx_mcastq_overflow++;
1753 			r = ENOBUFS;
1754 		}
1755 
1756 		ATH_TXQ_UNLOCK(sc->sc_cabq);
1757 
1758 		if (r != 0) {
1759 			m_freem(m0);
1760 			return r;
1761 		}
1762 	}
1763 
1764 	/* A-MPDU TX */
1765 	is_ampdu_tx = ath_tx_ampdu_running(sc, ATH_NODE(ni), tid);
1766 	is_ampdu_pending = ath_tx_ampdu_pending(sc, ATH_NODE(ni), tid);
1767 	is_ampdu = is_ampdu_tx | is_ampdu_pending;
1768 
1769 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, ac=%d, is_ampdu=%d\n",
1770 	    __func__, tid, pri, is_ampdu);
1771 
1772 	/* Set local packet state, used to queue packets to hardware */
1773 	bf->bf_state.bfs_tid = tid;
1774 	bf->bf_state.bfs_txq = txq;
1775 	bf->bf_state.bfs_pri = pri;
1776 
1777 	/*
1778 	 * When servicing one or more stations in power-save mode
1779 	 * (or) if there is some mcast data waiting on the mcast
1780 	 * queue (to prevent out of order delivery) multicast frames
1781 	 * must be bufferd until after the beacon.
1782 	 *
1783 	 * TODO: we should lock the mcastq before we check the length.
1784 	 */
1785 	if (ismcast && (vap->iv_ps_sta || avp->av_mcastq.axq_depth)) {
1786 		txq = &avp->av_mcastq;
1787 		/*
1788 		 * Mark the frame as eventually belonging on the CAB
1789 		 * queue, so the descriptor setup functions will
1790 		 * correctly initialise the descriptor 'qcuId' field.
1791 		 */
1792 		bf->bf_state.bfs_txq = sc->sc_cabq;
1793 	}
1794 
1795 	/* Do the generic frame setup */
1796 	/* XXX should just bzero the bf_state? */
1797 	bf->bf_state.bfs_dobaw = 0;
1798 
1799 	/*
1800 	 * Acquire the TXQ lock early, so both the encap and seqno
1801 	 * are allocated together.
1802 	 *
1803 	 * XXX should TXQ for CABQ traffic be the multicast queue,
1804 	 * or the TXQ the given PRI would allocate from? (eg for
1805 	 * sequence number allocation locking.)
1806 	 */
1807 	ATH_TXQ_LOCK(txq);
1808 
1809 	/* A-MPDU TX? Manually set sequence number */
1810 	/*
1811 	 * Don't do it whilst pending; the net80211 layer still
1812 	 * assigns them.
1813 	 */
1814 	if (is_ampdu_tx) {
1815 		/*
1816 		 * Always call; this function will
1817 		 * handle making sure that null data frames
1818 		 * don't get a sequence number from the current
1819 		 * TID and thus mess with the BAW.
1820 		 */
1821 		seqno = ath_tx_tid_seqno_assign(sc, ni, bf, m0);
1822 
1823 		/*
1824 		 * Don't add QoS NULL frames to the BAW.
1825 		 */
1826 		if (IEEE80211_QOS_HAS_SEQ(wh) &&
1827 		    subtype != IEEE80211_FC0_SUBTYPE_QOS_NULL) {
1828 			bf->bf_state.bfs_dobaw = 1;
1829 		}
1830 	}
1831 
1832 	/*
1833 	 * If needed, the sequence number has been assigned.
1834 	 * Squirrel it away somewhere easy to get to.
1835 	 */
1836 	bf->bf_state.bfs_seqno = M_SEQNO_GET(m0) << IEEE80211_SEQ_SEQ_SHIFT;
1837 
1838 	/* Is ampdu pending? fetch the seqno and print it out */
1839 	if (is_ampdu_pending)
1840 		DPRINTF(sc, ATH_DEBUG_SW_TX,
1841 		    "%s: tid %d: ampdu pending, seqno %d\n",
1842 		    __func__, tid, M_SEQNO_GET(m0));
1843 
1844 	/* This also sets up the DMA map */
1845 	r = ath_tx_normal_setup(sc, ni, bf, m0, txq);
1846 
1847 	if (r != 0)
1848 		goto done;
1849 
1850 	/* At this point m0 could have changed! */
1851 	m0 = bf->bf_m;
1852 
1853 #if 1
1854 	/*
1855 	 * If it's a multicast frame, do a direct-dispatch to the
1856 	 * destination hardware queue. Don't bother software
1857 	 * queuing it.
1858 	 */
1859 	/*
1860 	 * If it's a BAR frame, do a direct dispatch to the
1861 	 * destination hardware queue. Don't bother software
1862 	 * queuing it, as the TID will now be paused.
1863 	 * Sending a BAR frame can occur from the net80211 txa timer
1864 	 * (ie, retries) or from the ath txtask (completion call.)
1865 	 * It queues directly to hardware because the TID is paused
1866 	 * at this point (and won't be unpaused until the BAR has
1867 	 * either been TXed successfully or max retries has been
1868 	 * reached.)
1869 	 */
1870 	if (txq == &avp->av_mcastq) {
1871 		DPRINTF(sc, ATH_DEBUG_SW_TX,
1872 		    "%s: bf=%p: mcastq: TX'ing\n", __func__, bf);
1873 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1874 		ath_tx_xmit_normal(sc, txq, bf);
1875 	} else if (type == IEEE80211_FC0_TYPE_CTL &&
1876 		    subtype == IEEE80211_FC0_SUBTYPE_BAR) {
1877 		DPRINTF(sc, ATH_DEBUG_SW_TX,
1878 		    "%s: BAR: TX'ing direct\n", __func__);
1879 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1880 		ath_tx_xmit_normal(sc, txq, bf);
1881 	} else {
1882 		/* add to software queue */
1883 		DPRINTF(sc, ATH_DEBUG_SW_TX,
1884 		    "%s: bf=%p: swq: TX'ing\n", __func__, bf);
1885 		ath_tx_swq(sc, ni, txq, bf);
1886 	}
1887 #else
1888 	/*
1889 	 * For now, since there's no software queue,
1890 	 * direct-dispatch to the hardware.
1891 	 */
1892 	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
1893 	ath_tx_xmit_normal(sc, txq, bf);
1894 #endif
1895 done:
1896 	ATH_TXQ_UNLOCK(txq);
1897 
1898 	return 0;
1899 }
1900 
1901 static int
1902 ath_tx_raw_start(struct ath_softc *sc, struct ieee80211_node *ni,
1903 	struct ath_buf *bf, struct mbuf *m0,
1904 	const struct ieee80211_bpf_params *params)
1905 {
1906 	struct ifnet *ifp = sc->sc_ifp;
1907 	struct ieee80211com *ic = ifp->if_l2com;
1908 	struct ath_hal *ah = sc->sc_ah;
1909 	struct ieee80211vap *vap = ni->ni_vap;
1910 	int error, ismcast, ismrr;
1911 	int keyix, hdrlen, pktlen, try0, txantenna;
1912 	u_int8_t rix, txrate;
1913 	struct ieee80211_frame *wh;
1914 	u_int flags;
1915 	HAL_PKT_TYPE atype;
1916 	const HAL_RATE_TABLE *rt;
1917 	struct ath_desc *ds;
1918 	u_int pri;
1919 	int o_tid = -1;
1920 	int do_override;
1921 
1922 	wh = mtod(m0, struct ieee80211_frame *);
1923 	ismcast = IEEE80211_IS_MULTICAST(wh->i_addr1);
1924 	hdrlen = ieee80211_anyhdrsize(wh);
1925 	/*
1926 	 * Packet length must not include any
1927 	 * pad bytes; deduct them here.
1928 	 */
1929 	/* XXX honor IEEE80211_BPF_DATAPAD */
1930 	pktlen = m0->m_pkthdr.len - (hdrlen & 3) + IEEE80211_CRC_LEN;
1931 
1932 	ATH_KTR(sc, ATH_KTR_TX, 2,
1933 	     "ath_tx_raw_start: ni=%p, bf=%p, raw", ni, bf);
1934 
1935 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: ismcast=%d\n",
1936 	    __func__, ismcast);
1937 
1938 	pri = params->ibp_pri & 3;
1939 	/* Override pri if the frame isn't a QoS one */
1940 	if (! IEEE80211_QOS_HAS_SEQ(wh))
1941 		pri = ath_tx_getac(sc, m0);
1942 
1943 	/* XXX If it's an ADDBA, override the correct queue */
1944 	do_override = ath_tx_action_frame_override_queue(sc, ni, m0, &o_tid);
1945 
1946 	/* Map ADDBA to the correct priority */
1947 	if (do_override) {
1948 #if 0
1949 		device_printf(sc->sc_dev,
1950 		    "%s: overriding tid %d pri %d -> %d\n",
1951 		    __func__, o_tid, pri, TID_TO_WME_AC(o_tid));
1952 #endif
1953 		pri = TID_TO_WME_AC(o_tid);
1954 	}
1955 
1956 	ATH_TXQ_LOCK(sc->sc_ac2q[pri]);
1957 
1958 	/* Handle encryption twiddling if needed */
1959 	if (! ath_tx_tag_crypto(sc, ni,
1960 	    m0, params->ibp_flags & IEEE80211_BPF_CRYPTO, 0,
1961 	    &hdrlen, &pktlen, &keyix)) {
1962 		ath_freetx(m0);
1963 		return EIO;
1964 	}
1965 	/* packet header may have moved, reset our local pointer */
1966 	wh = mtod(m0, struct ieee80211_frame *);
1967 
1968 	/* Do the generic frame setup */
1969 	/* XXX should just bzero the bf_state? */
1970 	bf->bf_state.bfs_dobaw = 0;
1971 
1972 	error = ath_tx_dmasetup(sc, bf, m0);
1973 	if (error != 0)
1974 		return error;
1975 	m0 = bf->bf_m;				/* NB: may have changed */
1976 	wh = mtod(m0, struct ieee80211_frame *);
1977 	bf->bf_node = ni;			/* NB: held reference */
1978 
1979 	/* Always enable CLRDMASK for raw frames for now.. */
1980 	flags = HAL_TXDESC_CLRDMASK;		/* XXX needed for crypto errs */
1981 	flags |= HAL_TXDESC_INTREQ;		/* force interrupt */
1982 	if (params->ibp_flags & IEEE80211_BPF_RTS)
1983 		flags |= HAL_TXDESC_RTSENA;
1984 	else if (params->ibp_flags & IEEE80211_BPF_CTS) {
1985 		/* XXX assume 11g/11n protection? */
1986 		bf->bf_state.bfs_doprot = 1;
1987 		flags |= HAL_TXDESC_CTSENA;
1988 	}
1989 	/* XXX leave ismcast to injector? */
1990 	if ((params->ibp_flags & IEEE80211_BPF_NOACK) || ismcast)
1991 		flags |= HAL_TXDESC_NOACK;
1992 
1993 	rt = sc->sc_currates;
1994 	KASSERT(rt != NULL, ("no rate table, mode %u", sc->sc_curmode));
1995 	rix = ath_tx_findrix(sc, params->ibp_rate0);
1996 	txrate = rt->info[rix].rateCode;
1997 	if (params->ibp_flags & IEEE80211_BPF_SHORTPRE)
1998 		txrate |= rt->info[rix].shortPreamble;
1999 	sc->sc_txrix = rix;
2000 	try0 = params->ibp_try0;
2001 	ismrr = (params->ibp_try1 != 0);
2002 	txantenna = params->ibp_pri >> 2;
2003 	if (txantenna == 0)			/* XXX? */
2004 		txantenna = sc->sc_txantenna;
2005 
2006 	/*
2007 	 * Since ctsrate is fixed, store it away for later
2008 	 * use when the descriptor fields are being set.
2009 	 */
2010 	if (flags & (HAL_TXDESC_RTSENA|HAL_TXDESC_CTSENA))
2011 		bf->bf_state.bfs_ctsrate0 = params->ibp_ctsrate;
2012 
2013 	/*
2014 	 * NB: we mark all packets as type PSPOLL so the h/w won't
2015 	 * set the sequence number, duration, etc.
2016 	 */
2017 	atype = HAL_PKT_TYPE_PSPOLL;
2018 
2019 	if (IFF_DUMPPKTS(sc, ATH_DEBUG_XMIT))
2020 		ieee80211_dump_pkt(ic, mtod(m0, caddr_t), m0->m_len,
2021 		    sc->sc_hwmap[rix].ieeerate, -1);
2022 
2023 	if (ieee80211_radiotap_active_vap(vap)) {
2024 		u_int64_t tsf = ath_hal_gettsf64(ah);
2025 
2026 		sc->sc_tx_th.wt_tsf = htole64(tsf);
2027 		sc->sc_tx_th.wt_flags = sc->sc_hwmap[rix].txflags;
2028 		if (wh->i_fc[1] & IEEE80211_FC1_WEP)
2029 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_WEP;
2030 		if (m0->m_flags & M_FRAG)
2031 			sc->sc_tx_th.wt_flags |= IEEE80211_RADIOTAP_F_FRAG;
2032 		sc->sc_tx_th.wt_rate = sc->sc_hwmap[rix].ieeerate;
2033 		sc->sc_tx_th.wt_txpower = ni->ni_txpower;
2034 		sc->sc_tx_th.wt_antenna = sc->sc_txantenna;
2035 
2036 		ieee80211_radiotap_tx(vap, m0);
2037 	}
2038 
2039 	/*
2040 	 * Formulate first tx descriptor with tx controls.
2041 	 */
2042 	ds = bf->bf_desc;
2043 	/* XXX check return value? */
2044 
2045 	/* Store the decided rate index values away */
2046 	bf->bf_state.bfs_pktlen = pktlen;
2047 	bf->bf_state.bfs_hdrlen = hdrlen;
2048 	bf->bf_state.bfs_atype = atype;
2049 	bf->bf_state.bfs_txpower = params->ibp_power;
2050 	bf->bf_state.bfs_txrate0 = txrate;
2051 	bf->bf_state.bfs_try0 = try0;
2052 	bf->bf_state.bfs_keyix = keyix;
2053 	bf->bf_state.bfs_txantenna = txantenna;
2054 	bf->bf_state.bfs_txflags = flags;
2055 	bf->bf_state.bfs_shpream =
2056 	    !! (params->ibp_flags & IEEE80211_BPF_SHORTPRE);
2057 
2058 	/* Set local packet state, used to queue packets to hardware */
2059 	bf->bf_state.bfs_tid = WME_AC_TO_TID(pri);
2060 	bf->bf_state.bfs_txq = sc->sc_ac2q[pri];
2061 	bf->bf_state.bfs_pri = pri;
2062 
2063 	/* XXX this should be done in ath_tx_setrate() */
2064 	bf->bf_state.bfs_ctsrate = 0;
2065 	bf->bf_state.bfs_ctsduration = 0;
2066 	bf->bf_state.bfs_ismrr = ismrr;
2067 
2068 	/* Blank the legacy rate array */
2069 	bzero(&bf->bf_state.bfs_rc, sizeof(bf->bf_state.bfs_rc));
2070 
2071 	bf->bf_state.bfs_rc[0].rix =
2072 	    ath_tx_findrix(sc, params->ibp_rate0);
2073 	bf->bf_state.bfs_rc[0].tries = try0;
2074 	bf->bf_state.bfs_rc[0].ratecode = txrate;
2075 
2076 	if (ismrr) {
2077 		int rix;
2078 
2079 		rix = ath_tx_findrix(sc, params->ibp_rate1);
2080 		bf->bf_state.bfs_rc[1].rix = rix;
2081 		bf->bf_state.bfs_rc[1].tries = params->ibp_try1;
2082 
2083 		rix = ath_tx_findrix(sc, params->ibp_rate2);
2084 		bf->bf_state.bfs_rc[2].rix = rix;
2085 		bf->bf_state.bfs_rc[2].tries = params->ibp_try2;
2086 
2087 		rix = ath_tx_findrix(sc, params->ibp_rate3);
2088 		bf->bf_state.bfs_rc[3].rix = rix;
2089 		bf->bf_state.bfs_rc[3].tries = params->ibp_try3;
2090 	}
2091 	/*
2092 	 * All the required rate control decisions have been made;
2093 	 * fill in the rc flags.
2094 	 */
2095 	ath_tx_rate_fill_rcflags(sc, bf);
2096 
2097 	/* NB: no buffered multicast in power save support */
2098 
2099 	/*
2100 	 * If we're overiding the ADDBA destination, dump directly
2101 	 * into the hardware queue, right after any pending
2102 	 * frames to that node are.
2103 	 */
2104 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: dooverride=%d\n",
2105 	    __func__, do_override);
2106 
2107 #if 1
2108 	if (do_override) {
2109 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2110 		ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
2111 	} else {
2112 		/* Queue to software queue */
2113 		ath_tx_swq(sc, ni, sc->sc_ac2q[pri], bf);
2114 	}
2115 #else
2116 	/* Direct-dispatch to the hardware */
2117 	bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
2118 	ath_tx_xmit_normal(sc, sc->sc_ac2q[pri], bf);
2119 #endif
2120 	ATH_TXQ_UNLOCK(sc->sc_ac2q[pri]);
2121 
2122 	return 0;
2123 }
2124 
2125 /*
2126  * Send a raw frame.
2127  *
2128  * This can be called by net80211.
2129  */
2130 int
2131 ath_raw_xmit(struct ieee80211_node *ni, struct mbuf *m,
2132 	const struct ieee80211_bpf_params *params)
2133 {
2134 	struct ieee80211com *ic = ni->ni_ic;
2135 	struct ifnet *ifp = ic->ic_ifp;
2136 	struct ath_softc *sc = ifp->if_softc;
2137 	struct ath_buf *bf;
2138 	struct ieee80211_frame *wh = mtod(m, struct ieee80211_frame *);
2139 	int error = 0;
2140 
2141 	ATH_PCU_LOCK(sc);
2142 	if (sc->sc_inreset_cnt > 0) {
2143 		device_printf(sc->sc_dev, "%s: sc_inreset_cnt > 0; bailing\n",
2144 		    __func__);
2145 		error = EIO;
2146 		ATH_PCU_UNLOCK(sc);
2147 		goto bad0;
2148 	}
2149 	sc->sc_txstart_cnt++;
2150 	ATH_PCU_UNLOCK(sc);
2151 
2152 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || sc->sc_invalid) {
2153 		DPRINTF(sc, ATH_DEBUG_XMIT, "%s: discard frame, %s", __func__,
2154 		    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 ?
2155 			"!running" : "invalid");
2156 		m_freem(m);
2157 		error = ENETDOWN;
2158 		goto bad;
2159 	}
2160 
2161 	/*
2162 	 * Enforce how deep the multicast queue can grow.
2163 	 *
2164 	 * XXX duplicated in ath_tx_start().
2165 	 */
2166 	if (IEEE80211_IS_MULTICAST(wh->i_addr1)) {
2167 		ATH_TXQ_LOCK(sc->sc_cabq);
2168 
2169 		if (sc->sc_cabq->axq_depth > sc->sc_txq_mcastq_maxdepth) {
2170 			sc->sc_stats.ast_tx_mcastq_overflow++;
2171 			error = ENOBUFS;
2172 		}
2173 
2174 		ATH_TXQ_UNLOCK(sc->sc_cabq);
2175 
2176 		if (error != 0) {
2177 			m_freem(m);
2178 			goto bad;
2179 		}
2180 	}
2181 
2182 	/*
2183 	 * Grab a TX buffer and associated resources.
2184 	 */
2185 	bf = ath_getbuf(sc, ATH_BUFTYPE_MGMT);
2186 	if (bf == NULL) {
2187 		sc->sc_stats.ast_tx_nobuf++;
2188 		m_freem(m);
2189 		error = ENOBUFS;
2190 		goto bad;
2191 	}
2192 	ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: m=%p, params=%p, bf=%p\n",
2193 	    m, params,  bf);
2194 
2195 	if (params == NULL) {
2196 		/*
2197 		 * Legacy path; interpret frame contents to decide
2198 		 * precisely how to send the frame.
2199 		 */
2200 		if (ath_tx_start(sc, ni, bf, m)) {
2201 			error = EIO;		/* XXX */
2202 			goto bad2;
2203 		}
2204 	} else {
2205 		/*
2206 		 * Caller supplied explicit parameters to use in
2207 		 * sending the frame.
2208 		 */
2209 		if (ath_tx_raw_start(sc, ni, bf, m, params)) {
2210 			error = EIO;		/* XXX */
2211 			goto bad2;
2212 		}
2213 	}
2214 	sc->sc_wd_timer = 5;
2215 	ifp->if_opackets++;
2216 	sc->sc_stats.ast_tx_raw++;
2217 
2218 	ATH_PCU_LOCK(sc);
2219 	sc->sc_txstart_cnt--;
2220 	ATH_PCU_UNLOCK(sc);
2221 
2222 	return 0;
2223 bad2:
2224 	ATH_KTR(sc, ATH_KTR_TX, 3, "ath_raw_xmit: bad2: m=%p, params=%p, "
2225 	    "bf=%p",
2226 	    m,
2227 	    params,
2228 	    bf);
2229 	ATH_TXBUF_LOCK(sc);
2230 	ath_returnbuf_head(sc, bf);
2231 	ATH_TXBUF_UNLOCK(sc);
2232 bad:
2233 	ATH_PCU_LOCK(sc);
2234 	sc->sc_txstart_cnt--;
2235 	ATH_PCU_UNLOCK(sc);
2236 bad0:
2237 	ATH_KTR(sc, ATH_KTR_TX, 2, "ath_raw_xmit: bad0: m=%p, params=%p",
2238 	    m, params);
2239 	ifp->if_oerrors++;
2240 	sc->sc_stats.ast_tx_raw_fail++;
2241 	ieee80211_free_node(ni);
2242 
2243 	return error;
2244 }
2245 
2246 /* Some helper functions */
2247 
2248 /*
2249  * ADDBA (and potentially others) need to be placed in the same
2250  * hardware queue as the TID/node it's relating to. This is so
2251  * it goes out after any pending non-aggregate frames to the
2252  * same node/TID.
2253  *
2254  * If this isn't done, the ADDBA can go out before the frames
2255  * queued in hardware. Even though these frames have a sequence
2256  * number -earlier- than the ADDBA can be transmitted (but
2257  * no frames whose sequence numbers are after the ADDBA should
2258  * be!) they'll arrive after the ADDBA - and the receiving end
2259  * will simply drop them as being out of the BAW.
2260  *
2261  * The frames can't be appended to the TID software queue - it'll
2262  * never be sent out. So these frames have to be directly
2263  * dispatched to the hardware, rather than queued in software.
2264  * So if this function returns true, the TXQ has to be
2265  * overridden and it has to be directly dispatched.
2266  *
2267  * It's a dirty hack, but someone's gotta do it.
2268  */
2269 
2270 /*
2271  * XXX doesn't belong here!
2272  */
2273 static int
2274 ieee80211_is_action(struct ieee80211_frame *wh)
2275 {
2276 	/* Type: Management frame? */
2277 	if ((wh->i_fc[0] & IEEE80211_FC0_TYPE_MASK) !=
2278 	    IEEE80211_FC0_TYPE_MGT)
2279 		return 0;
2280 
2281 	/* Subtype: Action frame? */
2282 	if ((wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK) !=
2283 	    IEEE80211_FC0_SUBTYPE_ACTION)
2284 		return 0;
2285 
2286 	return 1;
2287 }
2288 
2289 #define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
2290 /*
2291  * Return an alternate TID for ADDBA request frames.
2292  *
2293  * Yes, this likely should be done in the net80211 layer.
2294  */
2295 static int
2296 ath_tx_action_frame_override_queue(struct ath_softc *sc,
2297     struct ieee80211_node *ni,
2298     struct mbuf *m0, int *tid)
2299 {
2300 	struct ieee80211_frame *wh = mtod(m0, struct ieee80211_frame *);
2301 	struct ieee80211_action_ba_addbarequest *ia;
2302 	uint8_t *frm;
2303 	uint16_t baparamset;
2304 
2305 	/* Not action frame? Bail */
2306 	if (! ieee80211_is_action(wh))
2307 		return 0;
2308 
2309 	/* XXX Not needed for frames we send? */
2310 #if 0
2311 	/* Correct length? */
2312 	if (! ieee80211_parse_action(ni, m))
2313 		return 0;
2314 #endif
2315 
2316 	/* Extract out action frame */
2317 	frm = (u_int8_t *)&wh[1];
2318 	ia = (struct ieee80211_action_ba_addbarequest *) frm;
2319 
2320 	/* Not ADDBA? Bail */
2321 	if (ia->rq_header.ia_category != IEEE80211_ACTION_CAT_BA)
2322 		return 0;
2323 	if (ia->rq_header.ia_action != IEEE80211_ACTION_BA_ADDBA_REQUEST)
2324 		return 0;
2325 
2326 	/* Extract TID, return it */
2327 	baparamset = le16toh(ia->rq_baparamset);
2328 	*tid = (int) MS(baparamset, IEEE80211_BAPS_TID);
2329 
2330 	return 1;
2331 }
2332 #undef	MS
2333 
2334 /* Per-node software queue operations */
2335 
2336 /*
2337  * Add the current packet to the given BAW.
2338  * It is assumed that the current packet
2339  *
2340  * + fits inside the BAW;
2341  * + already has had a sequence number allocated.
2342  *
2343  * Since the BAW status may be modified by both the ath task and
2344  * the net80211/ifnet contexts, the TID must be locked.
2345  */
2346 void
2347 ath_tx_addto_baw(struct ath_softc *sc, struct ath_node *an,
2348     struct ath_tid *tid, struct ath_buf *bf)
2349 {
2350 	int index, cindex;
2351 	struct ieee80211_tx_ampdu *tap;
2352 
2353 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
2354 	ATH_TID_LOCK_ASSERT(sc, tid);
2355 
2356 	if (bf->bf_state.bfs_isretried)
2357 		return;
2358 
2359 	tap = ath_tx_get_tx_tid(an, tid->tid);
2360 
2361 	if (! bf->bf_state.bfs_dobaw) {
2362 		device_printf(sc->sc_dev,
2363 		    "%s: dobaw=0, seqno=%d, window %d:%d\n",
2364 		    __func__,
2365 		    SEQNO(bf->bf_state.bfs_seqno),
2366 		    tap->txa_start,
2367 		    tap->txa_wnd);
2368 	}
2369 
2370 	if (bf->bf_state.bfs_addedbaw)
2371 		device_printf(sc->sc_dev,
2372 		    "%s: re-added? tid=%d, seqno %d; window %d:%d; "
2373 		    "baw head=%d tail=%d\n",
2374 		    __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
2375 		    tap->txa_start, tap->txa_wnd, tid->baw_head,
2376 		    tid->baw_tail);
2377 
2378 	/*
2379 	 * Verify that the given sequence number is not outside of the
2380 	 * BAW.  Complain loudly if that's the case.
2381 	 */
2382 	if (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
2383 	    SEQNO(bf->bf_state.bfs_seqno))) {
2384 		device_printf(sc->sc_dev,
2385 		    "%s: bf=%p: outside of BAW?? tid=%d, seqno %d; window %d:%d; "
2386 		    "baw head=%d tail=%d\n",
2387 		    __func__, bf, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
2388 		    tap->txa_start, tap->txa_wnd, tid->baw_head,
2389 		    tid->baw_tail);
2390 	}
2391 
2392 	/*
2393 	 * ni->ni_txseqs[] is the currently allocated seqno.
2394 	 * the txa state contains the current baw start.
2395 	 */
2396 	index  = ATH_BA_INDEX(tap->txa_start, SEQNO(bf->bf_state.bfs_seqno));
2397 	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
2398 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
2399 	    "%s: tid=%d, seqno %d; window %d:%d; index=%d cindex=%d "
2400 	    "baw head=%d tail=%d\n",
2401 	    __func__, tid->tid, SEQNO(bf->bf_state.bfs_seqno),
2402 	    tap->txa_start, tap->txa_wnd, index, cindex, tid->baw_head,
2403 	    tid->baw_tail);
2404 
2405 
2406 #if 0
2407 	assert(tid->tx_buf[cindex] == NULL);
2408 #endif
2409 	if (tid->tx_buf[cindex] != NULL) {
2410 		device_printf(sc->sc_dev,
2411 		    "%s: ba packet dup (index=%d, cindex=%d, "
2412 		    "head=%d, tail=%d)\n",
2413 		    __func__, index, cindex, tid->baw_head, tid->baw_tail);
2414 		device_printf(sc->sc_dev,
2415 		    "%s: BA bf: %p; seqno=%d ; new bf: %p; seqno=%d\n",
2416 		    __func__,
2417 		    tid->tx_buf[cindex],
2418 		    SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno),
2419 		    bf,
2420 		    SEQNO(bf->bf_state.bfs_seqno)
2421 		);
2422 	}
2423 	tid->tx_buf[cindex] = bf;
2424 
2425 	if (index >= ((tid->baw_tail - tid->baw_head) &
2426 	    (ATH_TID_MAX_BUFS - 1))) {
2427 		tid->baw_tail = cindex;
2428 		INCR(tid->baw_tail, ATH_TID_MAX_BUFS);
2429 	}
2430 }
2431 
2432 /*
2433  * Flip the BAW buffer entry over from the existing one to the new one.
2434  *
2435  * When software retransmitting a (sub-)frame, it is entirely possible that
2436  * the frame ath_buf is marked as BUSY and can't be immediately reused.
2437  * In that instance the buffer is cloned and the new buffer is used for
2438  * retransmit. We thus need to update the ath_buf slot in the BAW buf
2439  * tracking array to maintain consistency.
2440  */
2441 static void
2442 ath_tx_switch_baw_buf(struct ath_softc *sc, struct ath_node *an,
2443     struct ath_tid *tid, struct ath_buf *old_bf, struct ath_buf *new_bf)
2444 {
2445 	int index, cindex;
2446 	struct ieee80211_tx_ampdu *tap;
2447 	int seqno = SEQNO(old_bf->bf_state.bfs_seqno);
2448 
2449 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
2450 	ATH_TID_LOCK_ASSERT(sc, tid);
2451 
2452 	tap = ath_tx_get_tx_tid(an, tid->tid);
2453 	index  = ATH_BA_INDEX(tap->txa_start, seqno);
2454 	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
2455 
2456 	/*
2457 	 * Just warn for now; if it happens then we should find out
2458 	 * about it. It's highly likely the aggregation session will
2459 	 * soon hang.
2460 	 */
2461 	if (old_bf->bf_state.bfs_seqno != new_bf->bf_state.bfs_seqno) {
2462 		device_printf(sc->sc_dev, "%s: retransmitted buffer"
2463 		    " has mismatching seqno's, BA session may hang.\n",
2464 		    __func__);
2465 		device_printf(sc->sc_dev, "%s: old seqno=%d, new_seqno=%d\n",
2466 		    __func__,
2467 		    old_bf->bf_state.bfs_seqno,
2468 		    new_bf->bf_state.bfs_seqno);
2469 	}
2470 
2471 	if (tid->tx_buf[cindex] != old_bf) {
2472 		device_printf(sc->sc_dev, "%s: ath_buf pointer incorrect; "
2473 		    " has m BA session may hang.\n",
2474 		    __func__);
2475 		device_printf(sc->sc_dev, "%s: old bf=%p, new bf=%p\n",
2476 		    __func__,
2477 		    old_bf, new_bf);
2478 	}
2479 
2480 	tid->tx_buf[cindex] = new_bf;
2481 }
2482 
2483 /*
2484  * seq_start - left edge of BAW
2485  * seq_next - current/next sequence number to allocate
2486  *
2487  * Since the BAW status may be modified by both the ath task and
2488  * the net80211/ifnet contexts, the TID must be locked.
2489  */
2490 static void
2491 ath_tx_update_baw(struct ath_softc *sc, struct ath_node *an,
2492     struct ath_tid *tid, const struct ath_buf *bf)
2493 {
2494 	int index, cindex;
2495 	struct ieee80211_tx_ampdu *tap;
2496 	int seqno = SEQNO(bf->bf_state.bfs_seqno);
2497 
2498 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
2499 	ATH_TID_LOCK_ASSERT(sc, tid);
2500 
2501 	tap = ath_tx_get_tx_tid(an, tid->tid);
2502 	index  = ATH_BA_INDEX(tap->txa_start, seqno);
2503 	cindex = (tid->baw_head + index) & (ATH_TID_MAX_BUFS - 1);
2504 
2505 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
2506 	    "%s: tid=%d, baw=%d:%d, seqno=%d, index=%d, cindex=%d, "
2507 	    "baw head=%d, tail=%d\n",
2508 	    __func__, tid->tid, tap->txa_start, tap->txa_wnd, seqno, index,
2509 	    cindex, tid->baw_head, tid->baw_tail);
2510 
2511 	/*
2512 	 * If this occurs then we have a big problem - something else
2513 	 * has slid tap->txa_start along without updating the BAW
2514 	 * tracking start/end pointers. Thus the TX BAW state is now
2515 	 * completely busted.
2516 	 *
2517 	 * But for now, since I haven't yet fixed TDMA and buffer cloning,
2518 	 * it's quite possible that a cloned buffer is making its way
2519 	 * here and causing it to fire off. Disable TDMA for now.
2520 	 */
2521 	if (tid->tx_buf[cindex] != bf) {
2522 		device_printf(sc->sc_dev,
2523 		    "%s: comp bf=%p, seq=%d; slot bf=%p, seqno=%d\n",
2524 		    __func__,
2525 		    bf, SEQNO(bf->bf_state.bfs_seqno),
2526 		    tid->tx_buf[cindex],
2527 		    SEQNO(tid->tx_buf[cindex]->bf_state.bfs_seqno));
2528 	}
2529 
2530 	tid->tx_buf[cindex] = NULL;
2531 
2532 	while (tid->baw_head != tid->baw_tail &&
2533 	    !tid->tx_buf[tid->baw_head]) {
2534 		INCR(tap->txa_start, IEEE80211_SEQ_RANGE);
2535 		INCR(tid->baw_head, ATH_TID_MAX_BUFS);
2536 	}
2537 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
2538 	    "%s: baw is now %d:%d, baw head=%d\n",
2539 	    __func__, tap->txa_start, tap->txa_wnd, tid->baw_head);
2540 }
2541 
2542 /*
2543  * Mark the current node/TID as ready to TX.
2544  *
2545  * This is done to make it easy for the software scheduler to
2546  * find which nodes have data to send.
2547  *
2548  * The TXQ lock must be held.
2549  */
2550 static void
2551 ath_tx_tid_sched(struct ath_softc *sc, struct ath_tid *tid)
2552 {
2553 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
2554 
2555 	ATH_TXQ_LOCK_ASSERT(txq);
2556 
2557 	if (tid->paused)
2558 		return;		/* paused, can't schedule yet */
2559 
2560 	if (tid->sched)
2561 		return;		/* already scheduled */
2562 
2563 	tid->sched = 1;
2564 
2565 	TAILQ_INSERT_TAIL(&txq->axq_tidq, tid, axq_qelem);
2566 }
2567 
2568 /*
2569  * Mark the current node as no longer needing to be polled for
2570  * TX packets.
2571  *
2572  * The TXQ lock must be held.
2573  */
2574 static void
2575 ath_tx_tid_unsched(struct ath_softc *sc, struct ath_tid *tid)
2576 {
2577 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
2578 
2579 	ATH_TXQ_LOCK_ASSERT(txq);
2580 
2581 	if (tid->sched == 0)
2582 		return;
2583 
2584 	tid->sched = 0;
2585 	TAILQ_REMOVE(&txq->axq_tidq, tid, axq_qelem);
2586 }
2587 
2588 /*
2589  * Assign a sequence number manually to the given frame.
2590  *
2591  * This should only be called for A-MPDU TX frames.
2592  */
2593 static ieee80211_seq
2594 ath_tx_tid_seqno_assign(struct ath_softc *sc, struct ieee80211_node *ni,
2595     struct ath_buf *bf, struct mbuf *m0)
2596 {
2597 	struct ieee80211_frame *wh;
2598 	int tid, pri;
2599 	ieee80211_seq seqno;
2600 	uint8_t subtype;
2601 
2602 	/* TID lookup */
2603 	wh = mtod(m0, struct ieee80211_frame *);
2604 	pri = M_WME_GETAC(m0);			/* honor classification */
2605 	tid = WME_AC_TO_TID(pri);
2606 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pri=%d, tid=%d, qos has seq=%d\n",
2607 	    __func__, pri, tid, IEEE80211_QOS_HAS_SEQ(wh));
2608 
2609 	/* XXX Is it a control frame? Ignore */
2610 
2611 	/* Does the packet require a sequence number? */
2612 	if (! IEEE80211_QOS_HAS_SEQ(wh))
2613 		return -1;
2614 
2615 	ATH_TID_LOCK_ASSERT(sc, &(ATH_NODE(ni)->an_tid[tid]));
2616 
2617 	/*
2618 	 * Is it a QOS NULL Data frame? Give it a sequence number from
2619 	 * the default TID (IEEE80211_NONQOS_TID.)
2620 	 *
2621 	 * The RX path of everything I've looked at doesn't include the NULL
2622 	 * data frame sequence number in the aggregation state updates, so
2623 	 * assigning it a sequence number there will cause a BAW hole on the
2624 	 * RX side.
2625 	 */
2626 	subtype = wh->i_fc[0] & IEEE80211_FC0_SUBTYPE_MASK;
2627 	if (subtype == IEEE80211_FC0_SUBTYPE_QOS_NULL) {
2628 		/* XXX no locking for this TID? This is a bit of a problem. */
2629 		seqno = ni->ni_txseqs[IEEE80211_NONQOS_TID];
2630 		INCR(ni->ni_txseqs[IEEE80211_NONQOS_TID], IEEE80211_SEQ_RANGE);
2631 	} else {
2632 		/* Manually assign sequence number */
2633 		seqno = ni->ni_txseqs[tid];
2634 		INCR(ni->ni_txseqs[tid], IEEE80211_SEQ_RANGE);
2635 	}
2636 	*(uint16_t *)&wh->i_seq[0] = htole16(seqno << IEEE80211_SEQ_SEQ_SHIFT);
2637 	M_SEQNO_SET(m0, seqno);
2638 
2639 	/* Return so caller can do something with it if needed */
2640 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s:  -> seqno=%d\n", __func__, seqno);
2641 	return seqno;
2642 }
2643 
2644 /*
2645  * Attempt to direct dispatch an aggregate frame to hardware.
2646  * If the frame is out of BAW, queue.
2647  * Otherwise, schedule it as a single frame.
2648  */
2649 static void
2650 ath_tx_xmit_aggr(struct ath_softc *sc, struct ath_node *an,
2651     struct ath_txq *txq, struct ath_buf *bf)
2652 {
2653 	struct ath_tid *tid = &an->an_tid[bf->bf_state.bfs_tid];
2654 //	struct ath_txq *txq = bf->bf_state.bfs_txq;
2655 	struct ieee80211_tx_ampdu *tap;
2656 
2657 	if (txq != bf->bf_state.bfs_txq) {
2658 		device_printf(sc->sc_dev, "%s: txq %d != bfs_txq %d!\n",
2659 		    __func__,
2660 		    txq->axq_qnum,
2661 		    bf->bf_state.bfs_txq->axq_qnum);
2662 	}
2663 
2664 	ATH_TXQ_LOCK_ASSERT(txq);
2665 	ATH_TID_LOCK_ASSERT(sc, tid);
2666 
2667 	tap = ath_tx_get_tx_tid(an, tid->tid);
2668 
2669 	/* paused? queue */
2670 	if (tid->paused) {
2671 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
2672 		/* XXX don't sched - we're paused! */
2673 		return;
2674 	}
2675 
2676 	/* outside baw? queue */
2677 	if (bf->bf_state.bfs_dobaw &&
2678 	    (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
2679 	    SEQNO(bf->bf_state.bfs_seqno)))) {
2680 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
2681 		ath_tx_tid_sched(sc, tid);
2682 		return;
2683 	}
2684 
2685 	/*
2686 	 * This is a temporary check and should be removed once
2687 	 * all the relevant code paths have been fixed.
2688 	 *
2689 	 * During aggregate retries, it's possible that the head
2690 	 * frame will fail (which has the bfs_aggr and bfs_nframes
2691 	 * fields set for said aggregate) and will be retried as
2692 	 * a single frame.  In this instance, the values should
2693 	 * be reset or the completion code will get upset with you.
2694 	 */
2695 	if (bf->bf_state.bfs_aggr != 0 || bf->bf_state.bfs_nframes > 1) {
2696 		device_printf(sc->sc_dev, "%s: bfs_aggr=%d, bfs_nframes=%d\n",
2697 		    __func__,
2698 		    bf->bf_state.bfs_aggr,
2699 		    bf->bf_state.bfs_nframes);
2700 		bf->bf_state.bfs_aggr = 0;
2701 		bf->bf_state.bfs_nframes = 1;
2702 	}
2703 
2704 	/* Update CLRDMASK just before this frame is queued */
2705 	ath_tx_update_clrdmask(sc, tid, bf);
2706 
2707 	/* Direct dispatch to hardware */
2708 	ath_tx_do_ratelookup(sc, bf);
2709 	ath_tx_calc_duration(sc, bf);
2710 	ath_tx_calc_protection(sc, bf);
2711 	ath_tx_set_rtscts(sc, bf);
2712 	ath_tx_rate_fill_rcflags(sc, bf);
2713 	ath_tx_setds(sc, bf);
2714 
2715 	/* Statistics */
2716 	sc->sc_aggr_stats.aggr_low_hwq_single_pkt++;
2717 
2718 	/* Track per-TID hardware queue depth correctly */
2719 	tid->hwq_depth++;
2720 
2721 	/* Add to BAW */
2722 	if (bf->bf_state.bfs_dobaw) {
2723 		ath_tx_addto_baw(sc, an, tid, bf);
2724 		bf->bf_state.bfs_addedbaw = 1;
2725 	}
2726 
2727 	/* Set completion handler, multi-frame aggregate or not */
2728 	bf->bf_comp = ath_tx_aggr_comp;
2729 
2730 	/* Hand off to hardware */
2731 	ath_tx_handoff(sc, txq, bf);
2732 }
2733 
2734 /*
2735  * Attempt to send the packet.
2736  * If the queue isn't busy, direct-dispatch.
2737  * If the queue is busy enough, queue the given packet on the
2738  *  relevant software queue.
2739  */
2740 void
2741 ath_tx_swq(struct ath_softc *sc, struct ieee80211_node *ni, struct ath_txq *txq,
2742     struct ath_buf *bf)
2743 {
2744 	struct ath_node *an = ATH_NODE(ni);
2745 	struct ieee80211_frame *wh;
2746 	struct ath_tid *atid;
2747 	int pri, tid;
2748 	struct mbuf *m0 = bf->bf_m;
2749 
2750 	ATH_TXQ_LOCK_ASSERT(txq);
2751 
2752 	/* Fetch the TID - non-QoS frames get assigned to TID 16 */
2753 	wh = mtod(m0, struct ieee80211_frame *);
2754 	pri = ath_tx_getac(sc, m0);
2755 	tid = ath_tx_gettid(sc, m0);
2756 	atid = &an->an_tid[tid];
2757 
2758 	ATH_TID_LOCK_ASSERT(sc, atid);
2759 
2760 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p, pri=%d, tid=%d, qos=%d\n",
2761 	    __func__, bf, pri, tid, IEEE80211_QOS_HAS_SEQ(wh));
2762 
2763 	/* Set local packet state, used to queue packets to hardware */
2764 	/* XXX potentially duplicate info, re-check */
2765 	/* XXX remember, txq must be the hardware queue, not the av_mcastq */
2766 	bf->bf_state.bfs_tid = tid;
2767 	bf->bf_state.bfs_txq = txq;
2768 	bf->bf_state.bfs_pri = pri;
2769 
2770 	/*
2771 	 * If the hardware queue isn't busy, queue it directly.
2772 	 * If the hardware queue is busy, queue it.
2773 	 * If the TID is paused or the traffic it outside BAW, software
2774 	 * queue it.
2775 	 */
2776 	if (atid->paused) {
2777 		/* TID is paused, queue */
2778 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: paused\n", __func__);
2779 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
2780 	} else if (ath_tx_ampdu_pending(sc, an, tid)) {
2781 		/* AMPDU pending; queue */
2782 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: pending\n", __func__);
2783 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
2784 		/* XXX sched? */
2785 	} else if (ath_tx_ampdu_running(sc, an, tid)) {
2786 		/* AMPDU running, attempt direct dispatch if possible */
2787 
2788 		/*
2789 		 * Always queue the frame to the tail of the list.
2790 		 */
2791 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
2792 
2793 		/*
2794 		 * If the hardware queue isn't busy, direct dispatch
2795 		 * the head frame in the list.  Don't schedule the
2796 		 * TID - let it build some more frames first?
2797 		 *
2798 		 * Otherwise, schedule the TID.
2799 		 */
2800 		if (txq->axq_depth < sc->sc_hwq_limit) {
2801 			bf = ATH_TID_FIRST(atid);
2802 			ATH_TID_REMOVE(atid, bf, bf_list);
2803 
2804 			/*
2805 			 * Ensure it's definitely treated as a non-AMPDU
2806 			 * frame - this information may have been left
2807 			 * over from a previous attempt.
2808 			 */
2809 			bf->bf_state.bfs_aggr = 0;
2810 			bf->bf_state.bfs_nframes = 1;
2811 
2812 			/* Queue to the hardware */
2813 			ath_tx_xmit_aggr(sc, an, txq, bf);
2814 			DPRINTF(sc, ATH_DEBUG_SW_TX,
2815 			    "%s: xmit_aggr\n",
2816 			    __func__);
2817 		} else {
2818 			DPRINTF(sc, ATH_DEBUG_SW_TX,
2819 			    "%s: ampdu; swq'ing\n",
2820 			    __func__);
2821 
2822 			ath_tx_tid_sched(sc, atid);
2823 		}
2824 	} else if (txq->axq_depth < sc->sc_hwq_limit) {
2825 		/* AMPDU not running, attempt direct dispatch */
2826 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: xmit_normal\n", __func__);
2827 		/* See if clrdmask needs to be set */
2828 		ath_tx_update_clrdmask(sc, atid, bf);
2829 		ath_tx_xmit_normal(sc, txq, bf);
2830 	} else {
2831 		/* Busy; queue */
2832 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: swq'ing\n", __func__);
2833 		ATH_TID_INSERT_TAIL(atid, bf, bf_list);
2834 		ath_tx_tid_sched(sc, atid);
2835 	}
2836 }
2837 
2838 /*
2839  * Configure the per-TID node state.
2840  *
2841  * This likely belongs in if_ath_node.c but I can't think of anywhere
2842  * else to put it just yet.
2843  *
2844  * This sets up the SLISTs and the mutex as appropriate.
2845  */
2846 void
2847 ath_tx_tid_init(struct ath_softc *sc, struct ath_node *an)
2848 {
2849 	int i, j;
2850 	struct ath_tid *atid;
2851 
2852 	for (i = 0; i < IEEE80211_TID_SIZE; i++) {
2853 		atid = &an->an_tid[i];
2854 
2855 		/* XXX now with this bzer(), is the field 0'ing needed? */
2856 		bzero(atid, sizeof(*atid));
2857 
2858 		TAILQ_INIT(&atid->tid_q);
2859 		TAILQ_INIT(&atid->filtq.tid_q);
2860 		atid->tid = i;
2861 		atid->an = an;
2862 		for (j = 0; j < ATH_TID_MAX_BUFS; j++)
2863 			atid->tx_buf[j] = NULL;
2864 		atid->baw_head = atid->baw_tail = 0;
2865 		atid->paused = 0;
2866 		atid->sched = 0;
2867 		atid->hwq_depth = 0;
2868 		atid->cleanup_inprogress = 0;
2869 		atid->clrdmask = 1;	/* Always start by setting this bit */
2870 		if (i == IEEE80211_NONQOS_TID)
2871 			atid->ac = ATH_NONQOS_TID_AC;
2872 		else
2873 			atid->ac = TID_TO_WME_AC(i);
2874 	}
2875 }
2876 
2877 /*
2878  * Pause the current TID. This stops packets from being transmitted
2879  * on it.
2880  *
2881  * Since this is also called from upper layers as well as the driver,
2882  * it will get the TID lock.
2883  */
2884 static void
2885 ath_tx_tid_pause(struct ath_softc *sc, struct ath_tid *tid)
2886 {
2887 
2888 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
2889 	tid->paused++;
2890 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: paused = %d\n",
2891 	    __func__, tid->paused);
2892 }
2893 
2894 /*
2895  * Unpause the current TID, and schedule it if needed.
2896  */
2897 static void
2898 ath_tx_tid_resume(struct ath_softc *sc, struct ath_tid *tid)
2899 {
2900 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
2901 
2902 	tid->paused--;
2903 
2904 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: unpaused = %d\n",
2905 	    __func__, tid->paused);
2906 
2907 	if (tid->paused)
2908 		return;
2909 
2910 	/*
2911 	 * Override the clrdmask configuration for the next frame
2912 	 * from this TID, just to get the ball rolling.
2913 	 */
2914 	tid->clrdmask = 1;
2915 
2916 	if (tid->axq_depth == 0)
2917 		return;
2918 
2919 	/* XXX isfiltered shouldn't ever be 0 at this point */
2920 	if (tid->isfiltered == 1) {
2921 		device_printf(sc->sc_dev, "%s: filtered?!\n", __func__);
2922 		return;
2923 	}
2924 
2925 	ath_tx_tid_sched(sc, tid);
2926 	/* Punt some frames to the hardware if needed */
2927 	//ath_txq_sched(sc, sc->sc_ac2q[tid->ac]);
2928 	taskqueue_enqueue(sc->sc_tq, &sc->sc_txqtask);
2929 }
2930 
2931 /*
2932  * Add the given ath_buf to the TID filtered frame list.
2933  * This requires the TID be filtered.
2934  */
2935 static void
2936 ath_tx_tid_filt_addbuf(struct ath_softc *sc, struct ath_tid *tid,
2937     struct ath_buf *bf)
2938 {
2939 
2940 	ATH_TID_LOCK_ASSERT(sc, tid);
2941 	if (! tid->isfiltered)
2942 		device_printf(sc->sc_dev, "%s: not filtered?!\n", __func__);
2943 
2944 	DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: bf=%p\n", __func__, bf);
2945 
2946 	/* Set the retry bit and bump the retry counter */
2947 	ath_tx_set_retry(sc, bf);
2948 	sc->sc_stats.ast_tx_swfiltered++;
2949 
2950 	ATH_TID_FILT_INSERT_TAIL(tid, bf, bf_list);
2951 }
2952 
2953 /*
2954  * Handle a completed filtered frame from the given TID.
2955  * This just enables/pauses the filtered frame state if required
2956  * and appends the filtered frame to the filtered queue.
2957  */
2958 static void
2959 ath_tx_tid_filt_comp_buf(struct ath_softc *sc, struct ath_tid *tid,
2960     struct ath_buf *bf)
2961 {
2962 
2963 	ATH_TID_LOCK_ASSERT(sc, tid);
2964 
2965 	if (! tid->isfiltered) {
2966 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: filter transition\n",
2967 		    __func__);
2968 		tid->isfiltered = 1;
2969 		ath_tx_tid_pause(sc, tid);
2970 	}
2971 
2972 	/* Add the frame to the filter queue */
2973 	ath_tx_tid_filt_addbuf(sc, tid, bf);
2974 }
2975 
2976 /*
2977  * Complete the filtered frame TX completion.
2978  *
2979  * If there are no more frames in the hardware queue, unpause/unfilter
2980  * the TID if applicable.  Otherwise we will wait for a node PS transition
2981  * to unfilter.
2982  */
2983 static void
2984 ath_tx_tid_filt_comp_complete(struct ath_softc *sc, struct ath_tid *tid)
2985 {
2986 	struct ath_buf *bf;
2987 
2988 	ATH_TID_LOCK_ASSERT(sc, tid);
2989 
2990 	if (tid->hwq_depth != 0)
2991 		return;
2992 
2993 	DPRINTF(sc, ATH_DEBUG_SW_TX_FILT, "%s: hwq=0, transition back\n",
2994 	    __func__);
2995 	tid->isfiltered = 0;
2996 	tid->clrdmask = 1;
2997 
2998 	/* XXX this is really quite inefficient */
2999 	while ((bf = ATH_TID_FILT_LAST(tid, ath_bufhead_s)) != NULL) {
3000 		ATH_TID_FILT_REMOVE(tid, bf, bf_list);
3001 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
3002 	}
3003 
3004 	ath_tx_tid_resume(sc, tid);
3005 }
3006 
3007 /*
3008  * Called when a single (aggregate or otherwise) frame is completed.
3009  *
3010  * Returns 1 if the buffer could be added to the filtered list
3011  * (cloned or otherwise), 0 if the buffer couldn't be added to the
3012  * filtered list (failed clone; expired retry) and the caller should
3013  * free it and handle it like a failure (eg by sending a BAR.)
3014  */
3015 static int
3016 ath_tx_tid_filt_comp_single(struct ath_softc *sc, struct ath_tid *tid,
3017     struct ath_buf *bf)
3018 {
3019 	struct ath_buf *nbf;
3020 	int retval;
3021 
3022 	ATH_TID_LOCK_ASSERT(sc, tid);
3023 
3024 	/*
3025 	 * Don't allow a filtered frame to live forever.
3026 	 */
3027 	if (bf->bf_state.bfs_retries > SWMAX_RETRIES) {
3028 		sc->sc_stats.ast_tx_swretrymax++;
3029 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3030 		    "%s: bf=%p, seqno=%d, exceeded retries\n",
3031 		    __func__,
3032 		    bf,
3033 		    bf->bf_state.bfs_seqno);
3034 		return (0);
3035 	}
3036 
3037 	/*
3038 	 * A busy buffer can't be added to the retry list.
3039 	 * It needs to be cloned.
3040 	 */
3041 	if (bf->bf_flags & ATH_BUF_BUSY) {
3042 		nbf = ath_tx_retry_clone(sc, tid->an, tid, bf);
3043 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3044 		    "%s: busy buffer clone: %p -> %p\n",
3045 		    __func__, bf, nbf);
3046 	} else {
3047 		nbf = bf;
3048 	}
3049 
3050 	if (nbf == NULL) {
3051 		DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3052 		    "%s: busy buffer couldn't be cloned (%p)!\n",
3053 		    __func__, bf);
3054 		retval = 1;
3055 	} else {
3056 		ath_tx_tid_filt_comp_buf(sc, tid, nbf);
3057 		retval = 0;
3058 	}
3059 	ath_tx_tid_filt_comp_complete(sc, tid);
3060 
3061 	return (retval);
3062 }
3063 
3064 static void
3065 ath_tx_tid_filt_comp_aggr(struct ath_softc *sc, struct ath_tid *tid,
3066     struct ath_buf *bf_first, ath_bufhead *bf_q)
3067 {
3068 	struct ath_buf *bf, *bf_next, *nbf;
3069 
3070 	ATH_TID_LOCK_ASSERT(sc, tid);
3071 
3072 	bf = bf_first;
3073 	while (bf) {
3074 		bf_next = bf->bf_next;
3075 		bf->bf_next = NULL;	/* Remove it from the aggr list */
3076 
3077 		/*
3078 		 * Don't allow a filtered frame to live forever.
3079 		 */
3080 		if (bf->bf_state.bfs_retries > SWMAX_RETRIES) {
3081 		sc->sc_stats.ast_tx_swretrymax++;
3082 			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3083 			    "%s: bf=%p, seqno=%d, exceeded retries\n",
3084 			    __func__,
3085 			    bf,
3086 			    bf->bf_state.bfs_seqno);
3087 			TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
3088 			goto next;
3089 		}
3090 
3091 		if (bf->bf_flags & ATH_BUF_BUSY) {
3092 			nbf = ath_tx_retry_clone(sc, tid->an, tid, bf);
3093 			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3094 			    "%s: busy buffer cloned: %p -> %p",
3095 			    __func__, bf, nbf);
3096 		} else {
3097 			nbf = bf;
3098 		}
3099 
3100 		/*
3101 		 * If the buffer couldn't be cloned, add it to bf_q;
3102 		 * the caller will free the buffer(s) as required.
3103 		 */
3104 		if (nbf == NULL) {
3105 			DPRINTF(sc, ATH_DEBUG_SW_TX_FILT,
3106 			    "%s: buffer couldn't be cloned! (%p)\n",
3107 			    __func__, bf);
3108 			TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
3109 		} else {
3110 			ath_tx_tid_filt_comp_buf(sc, tid, nbf);
3111 		}
3112 next:
3113 		bf = bf_next;
3114 	}
3115 
3116 	ath_tx_tid_filt_comp_complete(sc, tid);
3117 }
3118 
3119 /*
3120  * Suspend the queue because we need to TX a BAR.
3121  */
3122 static void
3123 ath_tx_tid_bar_suspend(struct ath_softc *sc, struct ath_tid *tid)
3124 {
3125 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
3126 
3127 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3128 	    "%s: tid=%p, bar_wait=%d, bar_tx=%d, called\n",
3129 	    __func__,
3130 	    tid,
3131 	    tid->bar_wait,
3132 	    tid->bar_tx);
3133 
3134 	/* We shouldn't be called when bar_tx is 1 */
3135 	if (tid->bar_tx) {
3136 		device_printf(sc->sc_dev, "%s: bar_tx is 1?!\n",
3137 		    __func__);
3138 	}
3139 
3140 	/* If we've already been called, just be patient. */
3141 	if (tid->bar_wait)
3142 		return;
3143 
3144 	/* Wait! */
3145 	tid->bar_wait = 1;
3146 
3147 	/* Only one pause, no matter how many frames fail */
3148 	ath_tx_tid_pause(sc, tid);
3149 }
3150 
3151 /*
3152  * We've finished with BAR handling - either we succeeded or
3153  * failed. Either way, unsuspend TX.
3154  */
3155 static void
3156 ath_tx_tid_bar_unsuspend(struct ath_softc *sc, struct ath_tid *tid)
3157 {
3158 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
3159 
3160 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3161 	    "%s: tid=%p, called\n",
3162 	    __func__,
3163 	    tid);
3164 
3165 	if (tid->bar_tx == 0 || tid->bar_wait == 0) {
3166 		device_printf(sc->sc_dev, "%s: bar_tx=%d, bar_wait=%d: ?\n",
3167 		    __func__, tid->bar_tx, tid->bar_wait);
3168 	}
3169 
3170 	tid->bar_tx = tid->bar_wait = 0;
3171 	ath_tx_tid_resume(sc, tid);
3172 }
3173 
3174 /*
3175  * Return whether we're ready to TX a BAR frame.
3176  *
3177  * Requires the TID lock be held.
3178  */
3179 static int
3180 ath_tx_tid_bar_tx_ready(struct ath_softc *sc, struct ath_tid *tid)
3181 {
3182 
3183 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
3184 
3185 	if (tid->bar_wait == 0 || tid->hwq_depth > 0)
3186 		return (0);
3187 
3188 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR, "%s: tid=%p (%d), bar ready\n",
3189 	    __func__, tid, tid->tid);
3190 
3191 	return (1);
3192 }
3193 
3194 /*
3195  * Check whether the current TID is ready to have a BAR
3196  * TXed and if so, do the TX.
3197  *
3198  * Since the TID/TXQ lock can't be held during a call to
3199  * ieee80211_send_bar(), we have to do the dirty thing of unlocking it,
3200  * sending the BAR and locking it again.
3201  *
3202  * Eventually, the code to send the BAR should be broken out
3203  * from this routine so the lock doesn't have to be reacquired
3204  * just to be immediately dropped by the caller.
3205  */
3206 static void
3207 ath_tx_tid_bar_tx(struct ath_softc *sc, struct ath_tid *tid)
3208 {
3209 	struct ieee80211_tx_ampdu *tap;
3210 
3211 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[tid->ac]);
3212 
3213 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3214 	    "%s: tid=%p, called\n",
3215 	    __func__,
3216 	    tid);
3217 
3218 	tap = ath_tx_get_tx_tid(tid->an, tid->tid);
3219 
3220 	/*
3221 	 * This is an error condition!
3222 	 */
3223 	if (tid->bar_wait == 0 || tid->bar_tx == 1) {
3224 		device_printf(sc->sc_dev,
3225 		    "%s: tid=%p, bar_tx=%d, bar_wait=%d: ?\n",
3226 		    __func__,
3227 		    tid,
3228 		    tid->bar_tx,
3229 		    tid->bar_wait);
3230 		return;
3231 	}
3232 
3233 	/* Don't do anything if we still have pending frames */
3234 	if (tid->hwq_depth > 0) {
3235 		DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3236 		    "%s: tid=%p, hwq_depth=%d, waiting\n",
3237 		    __func__,
3238 		    tid,
3239 		    tid->hwq_depth);
3240 		return;
3241 	}
3242 
3243 	/* We're now about to TX */
3244 	tid->bar_tx = 1;
3245 
3246 	/*
3247 	 * Override the clrdmask configuration for the next frame,
3248 	 * just to get the ball rolling.
3249 	 */
3250 	tid->clrdmask = 1;
3251 
3252 	/*
3253 	 * Calculate new BAW left edge, now that all frames have either
3254 	 * succeeded or failed.
3255 	 *
3256 	 * XXX verify this is _actually_ the valid value to begin at!
3257 	 */
3258 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
3259 	    "%s: tid=%p, new BAW left edge=%d\n",
3260 	    __func__,
3261 	    tid,
3262 	    tap->txa_start);
3263 
3264 	/* Try sending the BAR frame */
3265 	/* We can't hold the lock here! */
3266 
3267 	ATH_TXQ_UNLOCK(sc->sc_ac2q[tid->ac]);
3268 	if (ieee80211_send_bar(&tid->an->an_node, tap, tap->txa_start) == 0) {
3269 		/* Success? Now we wait for notification that it's done */
3270 		ATH_TXQ_LOCK(sc->sc_ac2q[tid->ac]);
3271 		return;
3272 	}
3273 
3274 	/* Failure? For now, warn loudly and continue */
3275 	ATH_TXQ_LOCK(sc->sc_ac2q[tid->ac]);
3276 	device_printf(sc->sc_dev, "%s: tid=%p, failed to TX BAR, continue!\n",
3277 	    __func__, tid);
3278 	ath_tx_tid_bar_unsuspend(sc, tid);
3279 }
3280 
3281 static void
3282 ath_tx_tid_drain_pkt(struct ath_softc *sc, struct ath_node *an,
3283     struct ath_tid *tid, ath_bufhead *bf_cq, struct ath_buf *bf)
3284 {
3285 
3286 	ATH_TID_LOCK_ASSERT(sc, tid);
3287 
3288 	/*
3289 	 * If the current TID is running AMPDU, update
3290 	 * the BAW.
3291 	 */
3292 	if (ath_tx_ampdu_running(sc, an, tid->tid) &&
3293 	    bf->bf_state.bfs_dobaw) {
3294 		/*
3295 		 * Only remove the frame from the BAW if it's
3296 		 * been transmitted at least once; this means
3297 		 * the frame was in the BAW to begin with.
3298 		 */
3299 		if (bf->bf_state.bfs_retries > 0) {
3300 			ath_tx_update_baw(sc, an, tid, bf);
3301 			bf->bf_state.bfs_dobaw = 0;
3302 		}
3303 		/*
3304 		 * This has become a non-fatal error now
3305 		 */
3306 		if (! bf->bf_state.bfs_addedbaw)
3307 			device_printf(sc->sc_dev,
3308 			    "%s: wasn't added: seqno %d\n",
3309 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
3310 	}
3311 	TAILQ_INSERT_TAIL(bf_cq, bf, bf_list);
3312 }
3313 
3314 static void
3315 ath_tx_tid_drain_print(struct ath_softc *sc, struct ath_node *an,
3316     const char *pfx, struct ath_tid *tid, struct ath_buf *bf)
3317 {
3318 	struct ieee80211_node *ni = &an->an_node;
3319 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
3320 	struct ieee80211_tx_ampdu *tap;
3321 
3322 	tap = ath_tx_get_tx_tid(an, tid->tid);
3323 
3324 	device_printf(sc->sc_dev,
3325 	    "%s: %s: node %p: bf=%p: addbaw=%d, dobaw=%d, "
3326 	    "seqno=%d, retry=%d\n",
3327 	    __func__, pfx, ni, bf,
3328 	    bf->bf_state.bfs_addedbaw,
3329 	    bf->bf_state.bfs_dobaw,
3330 	    SEQNO(bf->bf_state.bfs_seqno),
3331 	    bf->bf_state.bfs_retries);
3332 	device_printf(sc->sc_dev,
3333 	    "%s: node %p: bf=%p: txq[%d] axq_depth=%d, axq_aggr_depth=%d\n",
3334 	        __func__, ni, bf,
3335 	    txq->axq_qnum,
3336 	    txq->axq_depth,
3337 	    txq->axq_aggr_depth);
3338 
3339 	device_printf(sc->sc_dev,
3340 	    "%s: node %p: bf=%p: tid txq_depth=%d hwq_depth=%d, bar_wait=%d, isfiltered=%d\n",
3341 	    __func__, ni, bf,
3342 	    tid->axq_depth,
3343 	    tid->hwq_depth,
3344 	    tid->bar_wait,
3345 	    tid->isfiltered);
3346 	device_printf(sc->sc_dev,
3347 	    "%s: node %p: tid %d: "
3348 	    "sched=%d, paused=%d, "
3349 	    "incomp=%d, baw_head=%d, "
3350 	    "baw_tail=%d txa_start=%d, ni_txseqs=%d\n",
3351 	     __func__, ni, tid->tid,
3352 	     tid->sched, tid->paused,
3353 	     tid->incomp, tid->baw_head,
3354 	     tid->baw_tail, tap == NULL ? -1 : tap->txa_start,
3355 	     ni->ni_txseqs[tid->tid]);
3356 
3357 	/* XXX Dump the frame, see what it is? */
3358 	ieee80211_dump_pkt(ni->ni_ic,
3359 	    mtod(bf->bf_m, const uint8_t *),
3360 	    bf->bf_m->m_len, 0, -1);
3361 }
3362 
3363 /*
3364  * Free any packets currently pending in the software TX queue.
3365  *
3366  * This will be called when a node is being deleted.
3367  *
3368  * It can also be called on an active node during an interface
3369  * reset or state transition.
3370  *
3371  * (From Linux/reference):
3372  *
3373  * TODO: For frame(s) that are in the retry state, we will reuse the
3374  * sequence number(s) without setting the retry bit. The
3375  * alternative is to give up on these and BAR the receiver's window
3376  * forward.
3377  */
3378 static void
3379 ath_tx_tid_drain(struct ath_softc *sc, struct ath_node *an,
3380     struct ath_tid *tid, ath_bufhead *bf_cq)
3381 {
3382 	struct ath_buf *bf;
3383 	struct ieee80211_tx_ampdu *tap;
3384 	struct ieee80211_node *ni = &an->an_node;
3385 	int t;
3386 
3387 	tap = ath_tx_get_tx_tid(an, tid->tid);
3388 
3389 	ATH_TID_LOCK_ASSERT(sc, tid);
3390 
3391 	/* Walk the queue, free frames */
3392 	t = 0;
3393 	for (;;) {
3394 		bf = ATH_TID_FIRST(tid);
3395 		if (bf == NULL) {
3396 			break;
3397 		}
3398 
3399 		if (t == 0) {
3400 			ath_tx_tid_drain_print(sc, an, "norm", tid, bf);
3401 			t = 1;
3402 		}
3403 
3404 		ATH_TID_REMOVE(tid, bf, bf_list);
3405 		ath_tx_tid_drain_pkt(sc, an, tid, bf_cq, bf);
3406 	}
3407 
3408 	/* And now, drain the filtered frame queue */
3409 	t = 0;
3410 	for (;;) {
3411 		bf = ATH_TID_FILT_FIRST(tid);
3412 		if (bf == NULL)
3413 			break;
3414 
3415 		if (t == 0) {
3416 			ath_tx_tid_drain_print(sc, an, "filt", tid, bf);
3417 			t = 1;
3418 		}
3419 
3420 		ATH_TID_FILT_REMOVE(tid, bf, bf_list);
3421 		ath_tx_tid_drain_pkt(sc, an, tid, bf_cq, bf);
3422 	}
3423 
3424 	/*
3425 	 * Override the clrdmask configuration for the next frame
3426 	 * in case there is some future transmission, just to get
3427 	 * the ball rolling.
3428 	 *
3429 	 * This won't hurt things if the TID is about to be freed.
3430 	 */
3431 	tid->clrdmask = 1;
3432 
3433 	/*
3434 	 * Now that it's completed, grab the TID lock and update
3435 	 * the sequence number and BAW window.
3436 	 * Because sequence numbers have been assigned to frames
3437 	 * that haven't been sent yet, it's entirely possible
3438 	 * we'll be called with some pending frames that have not
3439 	 * been transmitted.
3440 	 *
3441 	 * The cleaner solution is to do the sequence number allocation
3442 	 * when the packet is first transmitted - and thus the "retries"
3443 	 * check above would be enough to update the BAW/seqno.
3444 	 */
3445 
3446 	/* But don't do it for non-QoS TIDs */
3447 	if (tap) {
3448 #if 0
3449 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
3450 		    "%s: node %p: TID %d: sliding BAW left edge to %d\n",
3451 		    __func__, an, tid->tid, tap->txa_start);
3452 #endif
3453 		ni->ni_txseqs[tid->tid] = tap->txa_start;
3454 		tid->baw_tail = tid->baw_head;
3455 	}
3456 }
3457 
3458 /*
3459  * Flush all software queued packets for the given node.
3460  *
3461  * This occurs when a completion handler frees the last buffer
3462  * for a node, and the node is thus freed. This causes the node
3463  * to be cleaned up, which ends up calling ath_tx_node_flush.
3464  */
3465 void
3466 ath_tx_node_flush(struct ath_softc *sc, struct ath_node *an)
3467 {
3468 	int tid;
3469 	ath_bufhead bf_cq;
3470 	struct ath_buf *bf;
3471 
3472 	TAILQ_INIT(&bf_cq);
3473 
3474 	ATH_KTR(sc, ATH_KTR_NODE, 1, "ath_tx_node_flush: flush node; ni=%p",
3475 	    &an->an_node);
3476 
3477 	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
3478 		struct ath_tid *atid = &an->an_tid[tid];
3479 		struct ath_txq *txq = sc->sc_ac2q[atid->ac];
3480 
3481 		ATH_TXQ_LOCK(txq);
3482 		/* Free packets */
3483 		ath_tx_tid_drain(sc, an, atid, &bf_cq);
3484 		/* Remove this tid from the list of active tids */
3485 		ath_tx_tid_unsched(sc, atid);
3486 		ATH_TXQ_UNLOCK(txq);
3487 	}
3488 
3489 	/* Handle completed frames */
3490 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
3491 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
3492 		ath_tx_default_comp(sc, bf, 0);
3493 	}
3494 }
3495 
3496 /*
3497  * Drain all the software TXQs currently with traffic queued.
3498  */
3499 void
3500 ath_tx_txq_drain(struct ath_softc *sc, struct ath_txq *txq)
3501 {
3502 	struct ath_tid *tid;
3503 	ath_bufhead bf_cq;
3504 	struct ath_buf *bf;
3505 
3506 	TAILQ_INIT(&bf_cq);
3507 	ATH_TXQ_LOCK(txq);
3508 
3509 	/*
3510 	 * Iterate over all active tids for the given txq,
3511 	 * flushing and unsched'ing them
3512 	 */
3513 	while (! TAILQ_EMPTY(&txq->axq_tidq)) {
3514 		tid = TAILQ_FIRST(&txq->axq_tidq);
3515 		ath_tx_tid_drain(sc, tid->an, tid, &bf_cq);
3516 		ath_tx_tid_unsched(sc, tid);
3517 	}
3518 
3519 	ATH_TXQ_UNLOCK(txq);
3520 
3521 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
3522 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
3523 		ath_tx_default_comp(sc, bf, 0);
3524 	}
3525 }
3526 
3527 /*
3528  * Handle completion of non-aggregate session frames.
3529  *
3530  * This (currently) doesn't implement software retransmission of
3531  * non-aggregate frames!
3532  *
3533  * Software retransmission of non-aggregate frames needs to obey
3534  * the strict sequence number ordering, and drop any frames that
3535  * will fail this.
3536  *
3537  * For now, filtered frames and frame transmission will cause
3538  * all kinds of issues.  So we don't support them.
3539  *
3540  * So anyone queuing frames via ath_tx_normal_xmit() or
3541  * ath_tx_hw_queue_norm() must override and set CLRDMASK.
3542  */
3543 void
3544 ath_tx_normal_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
3545 {
3546 	struct ieee80211_node *ni = bf->bf_node;
3547 	struct ath_node *an = ATH_NODE(ni);
3548 	int tid = bf->bf_state.bfs_tid;
3549 	struct ath_tid *atid = &an->an_tid[tid];
3550 	struct ath_tx_status *ts = &bf->bf_status.ds_txstat;
3551 
3552 	/* The TID state is protected behind the TXQ lock */
3553 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
3554 
3555 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: bf=%p: fail=%d, hwq_depth now %d\n",
3556 	    __func__, bf, fail, atid->hwq_depth - 1);
3557 
3558 	atid->hwq_depth--;
3559 
3560 #if 0
3561 	/*
3562 	 * If the frame was filtered, stick it on the filter frame
3563 	 * queue and complain about it.  It shouldn't happen!
3564 	 */
3565 	if ((ts->ts_status & HAL_TXERR_FILT) ||
3566 	    (ts->ts_status != 0 && atid->isfiltered)) {
3567 		device_printf(sc->sc_dev,
3568 		    "%s: isfiltered=%d, ts_status=%d: huh?\n",
3569 		    __func__,
3570 		    atid->isfiltered,
3571 		    ts->ts_status);
3572 		ath_tx_tid_filt_comp_buf(sc, atid, bf);
3573 	}
3574 #endif
3575 	if (atid->isfiltered)
3576 		device_printf(sc->sc_dev, "%s: filtered?!\n", __func__);
3577 	if (atid->hwq_depth < 0)
3578 		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
3579 		    __func__, atid->hwq_depth);
3580 
3581 	/*
3582 	 * If the queue is filtered, potentially mark it as complete
3583 	 * and reschedule it as needed.
3584 	 *
3585 	 * This is required as there may be a subsequent TX descriptor
3586 	 * for this end-node that has CLRDMASK set, so it's quite possible
3587 	 * that a filtered frame will be followed by a non-filtered
3588 	 * (complete or otherwise) frame.
3589 	 *
3590 	 * XXX should we do this before we complete the frame?
3591 	 */
3592 	if (atid->isfiltered)
3593 		ath_tx_tid_filt_comp_complete(sc, atid);
3594 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
3595 
3596 	/*
3597 	 * punt to rate control if we're not being cleaned up
3598 	 * during a hw queue drain and the frame wanted an ACK.
3599 	 */
3600 	if (fail == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0))
3601 		ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc,
3602 		    ts, bf->bf_state.bfs_pktlen,
3603 		    1, (ts->ts_status == 0) ? 0 : 1);
3604 
3605 	ath_tx_default_comp(sc, bf, fail);
3606 }
3607 
3608 /*
3609  * Handle cleanup of aggregate session packets that aren't
3610  * an A-MPDU.
3611  *
3612  * There's no need to update the BAW here - the session is being
3613  * torn down.
3614  */
3615 static void
3616 ath_tx_comp_cleanup_unaggr(struct ath_softc *sc, struct ath_buf *bf)
3617 {
3618 	struct ieee80211_node *ni = bf->bf_node;
3619 	struct ath_node *an = ATH_NODE(ni);
3620 	int tid = bf->bf_state.bfs_tid;
3621 	struct ath_tid *atid = &an->an_tid[tid];
3622 
3623 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: TID %d: incomp=%d\n",
3624 	    __func__, tid, atid->incomp);
3625 
3626 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
3627 	atid->incomp--;
3628 	if (atid->incomp == 0) {
3629 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
3630 		    "%s: TID %d: cleaned up! resume!\n",
3631 		    __func__, tid);
3632 		atid->cleanup_inprogress = 0;
3633 		ath_tx_tid_resume(sc, atid);
3634 	}
3635 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
3636 
3637 	ath_tx_default_comp(sc, bf, 0);
3638 }
3639 
3640 /*
3641  * Performs transmit side cleanup when TID changes from aggregated to
3642  * unaggregated.
3643  *
3644  * - Discard all retry frames from the s/w queue.
3645  * - Fix the tx completion function for all buffers in s/w queue.
3646  * - Count the number of unacked frames, and let transmit completion
3647  *   handle it later.
3648  *
3649  * The caller is responsible for pausing the TID.
3650  */
3651 static void
3652 ath_tx_tid_cleanup(struct ath_softc *sc, struct ath_node *an, int tid)
3653 {
3654 	struct ath_tid *atid = &an->an_tid[tid];
3655 	struct ieee80211_tx_ampdu *tap;
3656 	struct ath_buf *bf, *bf_next;
3657 	ath_bufhead bf_cq;
3658 
3659 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAW,
3660 	    "%s: TID %d: called\n", __func__, tid);
3661 
3662 	TAILQ_INIT(&bf_cq);
3663 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
3664 
3665 	/*
3666 	 * Move the filtered frames to the TX queue, before
3667 	 * we run off and discard/process things.
3668 	 */
3669 	/* XXX this is really quite inefficient */
3670 	while ((bf = ATH_TID_FILT_LAST(atid, ath_bufhead_s)) != NULL) {
3671 		ATH_TID_FILT_REMOVE(atid, bf, bf_list);
3672 		ATH_TID_INSERT_HEAD(atid, bf, bf_list);
3673 	}
3674 
3675 	/*
3676 	 * Update the frames in the software TX queue:
3677 	 *
3678 	 * + Discard retry frames in the queue
3679 	 * + Fix the completion function to be non-aggregate
3680 	 */
3681 	bf = ATH_TID_FIRST(atid);
3682 	while (bf) {
3683 		if (bf->bf_state.bfs_isretried) {
3684 			bf_next = TAILQ_NEXT(bf, bf_list);
3685 			ATH_TID_REMOVE(atid, bf, bf_list);
3686 			atid->axq_depth--;
3687 			if (bf->bf_state.bfs_dobaw) {
3688 				ath_tx_update_baw(sc, an, atid, bf);
3689 				if (! bf->bf_state.bfs_addedbaw)
3690 					device_printf(sc->sc_dev,
3691 					    "%s: wasn't added: seqno %d\n",
3692 					    __func__,
3693 					    SEQNO(bf->bf_state.bfs_seqno));
3694 			}
3695 			bf->bf_state.bfs_dobaw = 0;
3696 			/*
3697 			 * Call the default completion handler with "fail" just
3698 			 * so upper levels are suitably notified about this.
3699 			 */
3700 			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
3701 			bf = bf_next;
3702 			continue;
3703 		}
3704 		/* Give these the default completion handler */
3705 		bf->bf_comp = ath_tx_normal_comp;
3706 		bf = TAILQ_NEXT(bf, bf_list);
3707 	}
3708 
3709 	/* The caller is required to pause the TID */
3710 #if 0
3711 	/* Pause the TID */
3712 	ath_tx_tid_pause(sc, atid);
3713 #endif
3714 
3715 	/*
3716 	 * Calculate what hardware-queued frames exist based
3717 	 * on the current BAW size. Ie, what frames have been
3718 	 * added to the TX hardware queue for this TID but
3719 	 * not yet ACKed.
3720 	 */
3721 	tap = ath_tx_get_tx_tid(an, tid);
3722 	/* Need the lock - fiddling with BAW */
3723 	while (atid->baw_head != atid->baw_tail) {
3724 		if (atid->tx_buf[atid->baw_head]) {
3725 			atid->incomp++;
3726 			atid->cleanup_inprogress = 1;
3727 			atid->tx_buf[atid->baw_head] = NULL;
3728 		}
3729 		INCR(atid->baw_head, ATH_TID_MAX_BUFS);
3730 		INCR(tap->txa_start, IEEE80211_SEQ_RANGE);
3731 	}
3732 
3733 	/*
3734 	 * If cleanup is required, defer TID scheduling
3735 	 * until all the HW queued packets have been
3736 	 * sent.
3737 	 */
3738 	if (! atid->cleanup_inprogress)
3739 		ath_tx_tid_resume(sc, atid);
3740 
3741 	if (atid->cleanup_inprogress)
3742 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
3743 		    "%s: TID %d: cleanup needed: %d packets\n",
3744 		    __func__, tid, atid->incomp);
3745 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
3746 
3747 	/* Handle completing frames and fail them */
3748 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
3749 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
3750 		ath_tx_default_comp(sc, bf, 1);
3751 	}
3752 }
3753 
3754 static struct ath_buf *
3755 ath_tx_retry_clone(struct ath_softc *sc, struct ath_node *an,
3756     struct ath_tid *tid, struct ath_buf *bf)
3757 {
3758 	struct ath_buf *nbf;
3759 	int error;
3760 
3761 	nbf = ath_buf_clone(sc, bf);
3762 
3763 #if 0
3764 	device_printf(sc->sc_dev, "%s: ATH_BUF_BUSY; cloning\n",
3765 	    __func__);
3766 #endif
3767 
3768 	if (nbf == NULL) {
3769 		/* Failed to clone */
3770 		device_printf(sc->sc_dev,
3771 		    "%s: failed to clone a busy buffer\n",
3772 		    __func__);
3773 		return NULL;
3774 	}
3775 
3776 	/* Setup the dma for the new buffer */
3777 	error = ath_tx_dmasetup(sc, nbf, nbf->bf_m);
3778 	if (error != 0) {
3779 		device_printf(sc->sc_dev,
3780 		    "%s: failed to setup dma for clone\n",
3781 		    __func__);
3782 		/*
3783 		 * Put this at the head of the list, not tail;
3784 		 * that way it doesn't interfere with the
3785 		 * busy buffer logic (which uses the tail of
3786 		 * the list.)
3787 		 */
3788 		ATH_TXBUF_LOCK(sc);
3789 		ath_returnbuf_head(sc, nbf);
3790 		ATH_TXBUF_UNLOCK(sc);
3791 		return NULL;
3792 	}
3793 
3794 	/* Update BAW if required, before we free the original buf */
3795 	if (bf->bf_state.bfs_dobaw)
3796 		ath_tx_switch_baw_buf(sc, an, tid, bf, nbf);
3797 
3798 	/* Free current buffer; return the older buffer */
3799 	bf->bf_m = NULL;
3800 	bf->bf_node = NULL;
3801 	ath_freebuf(sc, bf);
3802 
3803 	return nbf;
3804 }
3805 
3806 /*
3807  * Handle retrying an unaggregate frame in an aggregate
3808  * session.
3809  *
3810  * If too many retries occur, pause the TID, wait for
3811  * any further retransmits (as there's no reason why
3812  * non-aggregate frames in an aggregate session are
3813  * transmitted in-order; they just have to be in-BAW)
3814  * and then queue a BAR.
3815  */
3816 static void
3817 ath_tx_aggr_retry_unaggr(struct ath_softc *sc, struct ath_buf *bf)
3818 {
3819 	struct ieee80211_node *ni = bf->bf_node;
3820 	struct ath_node *an = ATH_NODE(ni);
3821 	int tid = bf->bf_state.bfs_tid;
3822 	struct ath_tid *atid = &an->an_tid[tid];
3823 	struct ieee80211_tx_ampdu *tap;
3824 
3825 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
3826 
3827 	tap = ath_tx_get_tx_tid(an, tid);
3828 
3829 	/*
3830 	 * If the buffer is marked as busy, we can't directly
3831 	 * reuse it. Instead, try to clone the buffer.
3832 	 * If the clone is successful, recycle the old buffer.
3833 	 * If the clone is unsuccessful, set bfs_retries to max
3834 	 * to force the next bit of code to free the buffer
3835 	 * for us.
3836 	 */
3837 	if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) &&
3838 	    (bf->bf_flags & ATH_BUF_BUSY)) {
3839 		struct ath_buf *nbf;
3840 		nbf = ath_tx_retry_clone(sc, an, atid, bf);
3841 		if (nbf)
3842 			/* bf has been freed at this point */
3843 			bf = nbf;
3844 		else
3845 			bf->bf_state.bfs_retries = SWMAX_RETRIES + 1;
3846 	}
3847 
3848 	if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) {
3849 		DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES,
3850 		    "%s: exceeded retries; seqno %d\n",
3851 		    __func__, SEQNO(bf->bf_state.bfs_seqno));
3852 		sc->sc_stats.ast_tx_swretrymax++;
3853 
3854 		/* Update BAW anyway */
3855 		if (bf->bf_state.bfs_dobaw) {
3856 			ath_tx_update_baw(sc, an, atid, bf);
3857 			if (! bf->bf_state.bfs_addedbaw)
3858 				device_printf(sc->sc_dev,
3859 				    "%s: wasn't added: seqno %d\n",
3860 				    __func__, SEQNO(bf->bf_state.bfs_seqno));
3861 		}
3862 		bf->bf_state.bfs_dobaw = 0;
3863 
3864 		/* Suspend the TX queue and get ready to send the BAR */
3865 		ath_tx_tid_bar_suspend(sc, atid);
3866 
3867 		/* Send the BAR if there are no other frames waiting */
3868 		if (ath_tx_tid_bar_tx_ready(sc, atid))
3869 			ath_tx_tid_bar_tx(sc, atid);
3870 
3871 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
3872 
3873 		/* Free buffer, bf is free after this call */
3874 		ath_tx_default_comp(sc, bf, 0);
3875 		return;
3876 	}
3877 
3878 	/*
3879 	 * This increments the retry counter as well as
3880 	 * sets the retry flag in the ath_buf and packet
3881 	 * body.
3882 	 */
3883 	ath_tx_set_retry(sc, bf);
3884 	sc->sc_stats.ast_tx_swretries++;
3885 
3886 	/*
3887 	 * Insert this at the head of the queue, so it's
3888 	 * retried before any current/subsequent frames.
3889 	 */
3890 	ATH_TID_INSERT_HEAD(atid, bf, bf_list);
3891 	ath_tx_tid_sched(sc, atid);
3892 	/* Send the BAR if there are no other frames waiting */
3893 	if (ath_tx_tid_bar_tx_ready(sc, atid))
3894 		ath_tx_tid_bar_tx(sc, atid);
3895 
3896 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
3897 }
3898 
3899 /*
3900  * Common code for aggregate excessive retry/subframe retry.
3901  * If retrying, queues buffers to bf_q. If not, frees the
3902  * buffers.
3903  *
3904  * XXX should unify this with ath_tx_aggr_retry_unaggr()
3905  */
3906 static int
3907 ath_tx_retry_subframe(struct ath_softc *sc, struct ath_buf *bf,
3908     ath_bufhead *bf_q)
3909 {
3910 	struct ieee80211_node *ni = bf->bf_node;
3911 	struct ath_node *an = ATH_NODE(ni);
3912 	int tid = bf->bf_state.bfs_tid;
3913 	struct ath_tid *atid = &an->an_tid[tid];
3914 
3915 	ATH_TXQ_LOCK_ASSERT(sc->sc_ac2q[atid->ac]);
3916 
3917 	/* XXX clr11naggr should be done for all subframes */
3918 	ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
3919 	ath_hal_set11nburstduration(sc->sc_ah, bf->bf_desc, 0);
3920 
3921 	/* ath_hal_set11n_virtualmorefrag(sc->sc_ah, bf->bf_desc, 0); */
3922 
3923 	/*
3924 	 * If the buffer is marked as busy, we can't directly
3925 	 * reuse it. Instead, try to clone the buffer.
3926 	 * If the clone is successful, recycle the old buffer.
3927 	 * If the clone is unsuccessful, set bfs_retries to max
3928 	 * to force the next bit of code to free the buffer
3929 	 * for us.
3930 	 */
3931 	if ((bf->bf_state.bfs_retries < SWMAX_RETRIES) &&
3932 	    (bf->bf_flags & ATH_BUF_BUSY)) {
3933 		struct ath_buf *nbf;
3934 		nbf = ath_tx_retry_clone(sc, an, atid, bf);
3935 		if (nbf)
3936 			/* bf has been freed at this point */
3937 			bf = nbf;
3938 		else
3939 			bf->bf_state.bfs_retries = SWMAX_RETRIES + 1;
3940 	}
3941 
3942 	if (bf->bf_state.bfs_retries >= SWMAX_RETRIES) {
3943 		sc->sc_stats.ast_tx_swretrymax++;
3944 		DPRINTF(sc, ATH_DEBUG_SW_TX_RETRIES,
3945 		    "%s: max retries: seqno %d\n",
3946 		    __func__, SEQNO(bf->bf_state.bfs_seqno));
3947 		ath_tx_update_baw(sc, an, atid, bf);
3948 		if (! bf->bf_state.bfs_addedbaw)
3949 			device_printf(sc->sc_dev,
3950 			    "%s: wasn't added: seqno %d\n",
3951 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
3952 		bf->bf_state.bfs_dobaw = 0;
3953 		return 1;
3954 	}
3955 
3956 	ath_tx_set_retry(sc, bf);
3957 	sc->sc_stats.ast_tx_swretries++;
3958 	bf->bf_next = NULL;		/* Just to make sure */
3959 
3960 	/* Clear the aggregate state */
3961 	bf->bf_state.bfs_aggr = 0;
3962 	bf->bf_state.bfs_ndelim = 0;	/* ??? needed? */
3963 	bf->bf_state.bfs_nframes = 1;
3964 
3965 	TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
3966 	return 0;
3967 }
3968 
3969 /*
3970  * error pkt completion for an aggregate destination
3971  */
3972 static void
3973 ath_tx_comp_aggr_error(struct ath_softc *sc, struct ath_buf *bf_first,
3974     struct ath_tid *tid)
3975 {
3976 	struct ieee80211_node *ni = bf_first->bf_node;
3977 	struct ath_node *an = ATH_NODE(ni);
3978 	struct ath_buf *bf_next, *bf;
3979 	ath_bufhead bf_q;
3980 	int drops = 0;
3981 	struct ieee80211_tx_ampdu *tap;
3982 	ath_bufhead bf_cq;
3983 
3984 	TAILQ_INIT(&bf_q);
3985 	TAILQ_INIT(&bf_cq);
3986 
3987 	/*
3988 	 * Update rate control - all frames have failed.
3989 	 *
3990 	 * XXX use the length in the first frame in the series;
3991 	 * XXX just so things are consistent for now.
3992 	 */
3993 	ath_tx_update_ratectrl(sc, ni, bf_first->bf_state.bfs_rc,
3994 	    &bf_first->bf_status.ds_txstat,
3995 	    bf_first->bf_state.bfs_pktlen,
3996 	    bf_first->bf_state.bfs_nframes, bf_first->bf_state.bfs_nframes);
3997 
3998 	ATH_TXQ_LOCK(sc->sc_ac2q[tid->ac]);
3999 	tap = ath_tx_get_tx_tid(an, tid->tid);
4000 	sc->sc_stats.ast_tx_aggr_failall++;
4001 
4002 	/* Retry all subframes */
4003 	bf = bf_first;
4004 	while (bf) {
4005 		bf_next = bf->bf_next;
4006 		bf->bf_next = NULL;	/* Remove it from the aggr list */
4007 		sc->sc_stats.ast_tx_aggr_fail++;
4008 		if (ath_tx_retry_subframe(sc, bf, &bf_q)) {
4009 			drops++;
4010 			bf->bf_next = NULL;
4011 			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
4012 		}
4013 		bf = bf_next;
4014 	}
4015 
4016 	/* Prepend all frames to the beginning of the queue */
4017 	while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) {
4018 		TAILQ_REMOVE(&bf_q, bf, bf_list);
4019 		ATH_TID_INSERT_HEAD(tid, bf, bf_list);
4020 	}
4021 
4022 	/*
4023 	 * Schedule the TID to be re-tried.
4024 	 */
4025 	ath_tx_tid_sched(sc, tid);
4026 
4027 	/*
4028 	 * send bar if we dropped any frames
4029 	 *
4030 	 * Keep the txq lock held for now, as we need to ensure
4031 	 * that ni_txseqs[] is consistent (as it's being updated
4032 	 * in the ifnet TX context or raw TX context.)
4033 	 */
4034 	if (drops) {
4035 		/* Suspend the TX queue and get ready to send the BAR */
4036 		ath_tx_tid_bar_suspend(sc, tid);
4037 	}
4038 
4039 	/*
4040 	 * Send BAR if required
4041 	 */
4042 	if (ath_tx_tid_bar_tx_ready(sc, tid))
4043 		ath_tx_tid_bar_tx(sc, tid);
4044 
4045 	ATH_TXQ_UNLOCK(sc->sc_ac2q[tid->ac]);
4046 
4047 	/* Complete frames which errored out */
4048 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
4049 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
4050 		ath_tx_default_comp(sc, bf, 0);
4051 	}
4052 }
4053 
4054 /*
4055  * Handle clean-up of packets from an aggregate list.
4056  *
4057  * There's no need to update the BAW here - the session is being
4058  * torn down.
4059  */
4060 static void
4061 ath_tx_comp_cleanup_aggr(struct ath_softc *sc, struct ath_buf *bf_first)
4062 {
4063 	struct ath_buf *bf, *bf_next;
4064 	struct ieee80211_node *ni = bf_first->bf_node;
4065 	struct ath_node *an = ATH_NODE(ni);
4066 	int tid = bf_first->bf_state.bfs_tid;
4067 	struct ath_tid *atid = &an->an_tid[tid];
4068 
4069 	bf = bf_first;
4070 
4071 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
4072 
4073 	/* update incomp */
4074 	while (bf) {
4075 		atid->incomp--;
4076 		bf = bf->bf_next;
4077 	}
4078 
4079 	if (atid->incomp == 0) {
4080 		DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
4081 		    "%s: TID %d: cleaned up! resume!\n",
4082 		    __func__, tid);
4083 		atid->cleanup_inprogress = 0;
4084 		ath_tx_tid_resume(sc, atid);
4085 	}
4086 
4087 	/* Send BAR if required */
4088 	/* XXX why would we send a BAR when transitioning to non-aggregation? */
4089 	if (ath_tx_tid_bar_tx_ready(sc, atid))
4090 		ath_tx_tid_bar_tx(sc, atid);
4091 
4092 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4093 
4094 	/* Handle frame completion */
4095 	while (bf) {
4096 		bf_next = bf->bf_next;
4097 		ath_tx_default_comp(sc, bf, 1);
4098 		bf = bf_next;
4099 	}
4100 }
4101 
4102 /*
4103  * Handle completion of an set of aggregate frames.
4104  *
4105  * XXX for now, simply complete each sub-frame.
4106  *
4107  * Note: the completion handler is the last descriptor in the aggregate,
4108  * not the last descriptor in the first frame.
4109  */
4110 static void
4111 ath_tx_aggr_comp_aggr(struct ath_softc *sc, struct ath_buf *bf_first,
4112     int fail)
4113 {
4114 	//struct ath_desc *ds = bf->bf_lastds;
4115 	struct ieee80211_node *ni = bf_first->bf_node;
4116 	struct ath_node *an = ATH_NODE(ni);
4117 	int tid = bf_first->bf_state.bfs_tid;
4118 	struct ath_tid *atid = &an->an_tid[tid];
4119 	struct ath_tx_status ts;
4120 	struct ieee80211_tx_ampdu *tap;
4121 	ath_bufhead bf_q;
4122 	ath_bufhead bf_cq;
4123 	int seq_st, tx_ok;
4124 	int hasba, isaggr;
4125 	uint32_t ba[2];
4126 	struct ath_buf *bf, *bf_next;
4127 	int ba_index;
4128 	int drops = 0;
4129 	int nframes = 0, nbad = 0, nf;
4130 	int pktlen;
4131 	/* XXX there's too much on the stack? */
4132 	struct ath_rc_series rc[ATH_RC_NUM];
4133 	int txseq;
4134 
4135 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR, "%s: called; hwq_depth=%d\n",
4136 	    __func__, atid->hwq_depth);
4137 
4138 	/*
4139 	 * Take a copy; this may be needed -after- bf_first
4140 	 * has been completed and freed.
4141 	 */
4142 	ts = bf_first->bf_status.ds_txstat;
4143 
4144 	TAILQ_INIT(&bf_q);
4145 	TAILQ_INIT(&bf_cq);
4146 
4147 	/* The TID state is kept behind the TXQ lock */
4148 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
4149 
4150 	atid->hwq_depth--;
4151 	if (atid->hwq_depth < 0)
4152 		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
4153 		    __func__, atid->hwq_depth);
4154 
4155 	/*
4156 	 * If the TID is filtered, handle completing the filter
4157 	 * transition before potentially kicking it to the cleanup
4158 	 * function.
4159 	 *
4160 	 * XXX this is duplicate work, ew.
4161 	 */
4162 	if (atid->isfiltered)
4163 		ath_tx_tid_filt_comp_complete(sc, atid);
4164 
4165 	/*
4166 	 * Punt cleanup to the relevant function, not our problem now
4167 	 */
4168 	if (atid->cleanup_inprogress) {
4169 		if (atid->isfiltered)
4170 			device_printf(sc->sc_dev,
4171 			    "%s: isfiltered=1, normal_comp?\n",
4172 			    __func__);
4173 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4174 		ath_tx_comp_cleanup_aggr(sc, bf_first);
4175 		return;
4176 	}
4177 
4178 	/*
4179 	 * If the frame is filtered, transition to filtered frame
4180 	 * mode and add this to the filtered frame list.
4181 	 *
4182 	 * XXX TODO: figure out how this interoperates with
4183 	 * BAR, pause and cleanup states.
4184 	 */
4185 	if ((ts.ts_status & HAL_TXERR_FILT) ||
4186 	    (ts.ts_status != 0 && atid->isfiltered)) {
4187 		if (fail != 0)
4188 			device_printf(sc->sc_dev,
4189 			    "%s: isfiltered=1, fail=%d\n", __func__, fail);
4190 		ath_tx_tid_filt_comp_aggr(sc, atid, bf_first, &bf_cq);
4191 
4192 		/* Remove from BAW */
4193 		TAILQ_FOREACH_SAFE(bf, &bf_cq, bf_list, bf_next) {
4194 			if (bf->bf_state.bfs_addedbaw)
4195 				drops++;
4196 			if (bf->bf_state.bfs_dobaw) {
4197 				ath_tx_update_baw(sc, an, atid, bf);
4198 				if (! bf->bf_state.bfs_addedbaw)
4199 					device_printf(sc->sc_dev,
4200 					    "%s: wasn't added: seqno %d\n",
4201 					    __func__,
4202 					    SEQNO(bf->bf_state.bfs_seqno));
4203 			}
4204 			bf->bf_state.bfs_dobaw = 0;
4205 		}
4206 		/*
4207 		 * If any intermediate frames in the BAW were dropped when
4208 		 * handling filtering things, send a BAR.
4209 		 */
4210 		if (drops)
4211 			ath_tx_tid_bar_suspend(sc, atid);
4212 
4213 		/*
4214 		 * Finish up by sending a BAR if required and freeing
4215 		 * the frames outside of the TX lock.
4216 		 */
4217 		goto finish_send_bar;
4218 	}
4219 
4220 	/*
4221 	 * XXX for now, use the first frame in the aggregate for
4222 	 * XXX rate control completion; it's at least consistent.
4223 	 */
4224 	pktlen = bf_first->bf_state.bfs_pktlen;
4225 
4226 	/*
4227 	 * Handle errors first!
4228 	 *
4229 	 * Here, handle _any_ error as a "exceeded retries" error.
4230 	 * Later on (when filtered frames are to be specially handled)
4231 	 * it'll have to be expanded.
4232 	 */
4233 #if 0
4234 	if (ts.ts_status & HAL_TXERR_XRETRY) {
4235 #endif
4236 	if (ts.ts_status != 0) {
4237 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4238 		ath_tx_comp_aggr_error(sc, bf_first, atid);
4239 		return;
4240 	}
4241 
4242 	tap = ath_tx_get_tx_tid(an, tid);
4243 
4244 	/*
4245 	 * extract starting sequence and block-ack bitmap
4246 	 */
4247 	/* XXX endian-ness of seq_st, ba? */
4248 	seq_st = ts.ts_seqnum;
4249 	hasba = !! (ts.ts_flags & HAL_TX_BA);
4250 	tx_ok = (ts.ts_status == 0);
4251 	isaggr = bf_first->bf_state.bfs_aggr;
4252 	ba[0] = ts.ts_ba_low;
4253 	ba[1] = ts.ts_ba_high;
4254 
4255 	/*
4256 	 * Copy the TX completion status and the rate control
4257 	 * series from the first descriptor, as it may be freed
4258 	 * before the rate control code can get its grubby fingers
4259 	 * into things.
4260 	 */
4261 	memcpy(rc, bf_first->bf_state.bfs_rc, sizeof(rc));
4262 
4263 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4264 	    "%s: txa_start=%d, tx_ok=%d, status=%.8x, flags=%.8x, "
4265 	    "isaggr=%d, seq_st=%d, hasba=%d, ba=%.8x, %.8x\n",
4266 	    __func__, tap->txa_start, tx_ok, ts.ts_status, ts.ts_flags,
4267 	    isaggr, seq_st, hasba, ba[0], ba[1]);
4268 
4269 	/* Occasionally, the MAC sends a tx status for the wrong TID. */
4270 	if (tid != ts.ts_tid) {
4271 		device_printf(sc->sc_dev, "%s: tid %d != hw tid %d\n",
4272 		    __func__, tid, ts.ts_tid);
4273 		tx_ok = 0;
4274 	}
4275 
4276 	/* AR5416 BA bug; this requires an interface reset */
4277 	if (isaggr && tx_ok && (! hasba)) {
4278 		device_printf(sc->sc_dev,
4279 		    "%s: AR5416 bug: hasba=%d; txok=%d, isaggr=%d, "
4280 		    "seq_st=%d\n",
4281 		    __func__, hasba, tx_ok, isaggr, seq_st);
4282 		/* XXX TODO: schedule an interface reset */
4283 #ifdef ATH_DEBUG
4284 		ath_printtxbuf(sc, bf_first,
4285 		    sc->sc_ac2q[atid->ac]->axq_qnum, 0, 0);
4286 #endif
4287 	}
4288 
4289 	/*
4290 	 * Walk the list of frames, figure out which ones were correctly
4291 	 * sent and which weren't.
4292 	 */
4293 	bf = bf_first;
4294 	nf = bf_first->bf_state.bfs_nframes;
4295 
4296 	/* bf_first is going to be invalid once this list is walked */
4297 	bf_first = NULL;
4298 
4299 	/*
4300 	 * Walk the list of completed frames and determine
4301 	 * which need to be completed and which need to be
4302 	 * retransmitted.
4303 	 *
4304 	 * For completed frames, the completion functions need
4305 	 * to be called at the end of this function as the last
4306 	 * node reference may free the node.
4307 	 *
4308 	 * Finally, since the TXQ lock can't be held during the
4309 	 * completion callback (to avoid lock recursion),
4310 	 * the completion calls have to be done outside of the
4311 	 * lock.
4312 	 */
4313 	while (bf) {
4314 		nframes++;
4315 		ba_index = ATH_BA_INDEX(seq_st,
4316 		    SEQNO(bf->bf_state.bfs_seqno));
4317 		bf_next = bf->bf_next;
4318 		bf->bf_next = NULL;	/* Remove it from the aggr list */
4319 
4320 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4321 		    "%s: checking bf=%p seqno=%d; ack=%d\n",
4322 		    __func__, bf, SEQNO(bf->bf_state.bfs_seqno),
4323 		    ATH_BA_ISSET(ba, ba_index));
4324 
4325 		if (tx_ok && ATH_BA_ISSET(ba, ba_index)) {
4326 			sc->sc_stats.ast_tx_aggr_ok++;
4327 			ath_tx_update_baw(sc, an, atid, bf);
4328 			bf->bf_state.bfs_dobaw = 0;
4329 			if (! bf->bf_state.bfs_addedbaw)
4330 				device_printf(sc->sc_dev,
4331 				    "%s: wasn't added: seqno %d\n",
4332 				    __func__, SEQNO(bf->bf_state.bfs_seqno));
4333 			bf->bf_next = NULL;
4334 			TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
4335 		} else {
4336 			sc->sc_stats.ast_tx_aggr_fail++;
4337 			if (ath_tx_retry_subframe(sc, bf, &bf_q)) {
4338 				drops++;
4339 				bf->bf_next = NULL;
4340 				TAILQ_INSERT_TAIL(&bf_cq, bf, bf_list);
4341 			}
4342 			nbad++;
4343 		}
4344 		bf = bf_next;
4345 	}
4346 
4347 	/*
4348 	 * Now that the BAW updates have been done, unlock
4349 	 *
4350 	 * txseq is grabbed before the lock is released so we
4351 	 * have a consistent view of what -was- in the BAW.
4352 	 * Anything after this point will not yet have been
4353 	 * TXed.
4354 	 */
4355 	txseq = tap->txa_start;
4356 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4357 
4358 	if (nframes != nf)
4359 		device_printf(sc->sc_dev,
4360 		    "%s: num frames seen=%d; bf nframes=%d\n",
4361 		    __func__, nframes, nf);
4362 
4363 	/*
4364 	 * Now we know how many frames were bad, call the rate
4365 	 * control code.
4366 	 */
4367 	if (fail == 0)
4368 		ath_tx_update_ratectrl(sc, ni, rc, &ts, pktlen, nframes,
4369 		    nbad);
4370 
4371 	/*
4372 	 * send bar if we dropped any frames
4373 	 */
4374 	if (drops) {
4375 		/* Suspend the TX queue and get ready to send the BAR */
4376 		ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
4377 		ath_tx_tid_bar_suspend(sc, atid);
4378 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4379 	}
4380 
4381 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4382 	    "%s: txa_start now %d\n", __func__, tap->txa_start);
4383 
4384 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
4385 
4386 	/* Prepend all frames to the beginning of the queue */
4387 	while ((bf = TAILQ_LAST(&bf_q, ath_bufhead_s)) != NULL) {
4388 		TAILQ_REMOVE(&bf_q, bf, bf_list);
4389 		ATH_TID_INSERT_HEAD(atid, bf, bf_list);
4390 	}
4391 
4392 	/*
4393 	 * Reschedule to grab some further frames.
4394 	 */
4395 	ath_tx_tid_sched(sc, atid);
4396 
4397 	/*
4398 	 * If the queue is filtered, re-schedule as required.
4399 	 *
4400 	 * This is required as there may be a subsequent TX descriptor
4401 	 * for this end-node that has CLRDMASK set, so it's quite possible
4402 	 * that a filtered frame will be followed by a non-filtered
4403 	 * (complete or otherwise) frame.
4404 	 *
4405 	 * XXX should we do this before we complete the frame?
4406 	 */
4407 	if (atid->isfiltered)
4408 		ath_tx_tid_filt_comp_complete(sc, atid);
4409 
4410 finish_send_bar:
4411 
4412 	/*
4413 	 * Send BAR if required
4414 	 */
4415 	if (ath_tx_tid_bar_tx_ready(sc, atid))
4416 		ath_tx_tid_bar_tx(sc, atid);
4417 
4418 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4419 
4420 	/* Do deferred completion */
4421 	while ((bf = TAILQ_FIRST(&bf_cq)) != NULL) {
4422 		TAILQ_REMOVE(&bf_cq, bf, bf_list);
4423 		ath_tx_default_comp(sc, bf, 0);
4424 	}
4425 }
4426 
4427 /*
4428  * Handle completion of unaggregated frames in an ADDBA
4429  * session.
4430  *
4431  * Fail is set to 1 if the entry is being freed via a call to
4432  * ath_tx_draintxq().
4433  */
4434 static void
4435 ath_tx_aggr_comp_unaggr(struct ath_softc *sc, struct ath_buf *bf, int fail)
4436 {
4437 	struct ieee80211_node *ni = bf->bf_node;
4438 	struct ath_node *an = ATH_NODE(ni);
4439 	int tid = bf->bf_state.bfs_tid;
4440 	struct ath_tid *atid = &an->an_tid[tid];
4441 	struct ath_tx_status ts;
4442 	int drops = 0;
4443 
4444 	/*
4445 	 * Take a copy of this; filtering/cloning the frame may free the
4446 	 * bf pointer.
4447 	 */
4448 	ts = bf->bf_status.ds_txstat;
4449 
4450 	/*
4451 	 * Update rate control status here, before we possibly
4452 	 * punt to retry or cleanup.
4453 	 *
4454 	 * Do it outside of the TXQ lock.
4455 	 */
4456 	if (fail == 0 && ((bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) == 0))
4457 		ath_tx_update_ratectrl(sc, ni, bf->bf_state.bfs_rc,
4458 		    &bf->bf_status.ds_txstat,
4459 		    bf->bf_state.bfs_pktlen,
4460 		    1, (ts.ts_status == 0) ? 0 : 1);
4461 
4462 	/*
4463 	 * This is called early so atid->hwq_depth can be tracked.
4464 	 * This unfortunately means that it's released and regrabbed
4465 	 * during retry and cleanup. That's rather inefficient.
4466 	 */
4467 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
4468 
4469 	if (tid == IEEE80211_NONQOS_TID)
4470 		device_printf(sc->sc_dev, "%s: TID=16!\n", __func__);
4471 
4472 	DPRINTF(sc, ATH_DEBUG_SW_TX,
4473 	    "%s: bf=%p: tid=%d, hwq_depth=%d, seqno=%d\n",
4474 	    __func__, bf, bf->bf_state.bfs_tid, atid->hwq_depth,
4475 	    SEQNO(bf->bf_state.bfs_seqno));
4476 
4477 	atid->hwq_depth--;
4478 	if (atid->hwq_depth < 0)
4479 		device_printf(sc->sc_dev, "%s: hwq_depth < 0: %d\n",
4480 		    __func__, atid->hwq_depth);
4481 
4482 	/*
4483 	 * If the TID is filtered, handle completing the filter
4484 	 * transition before potentially kicking it to the cleanup
4485 	 * function.
4486 	 */
4487 	if (atid->isfiltered)
4488 		ath_tx_tid_filt_comp_complete(sc, atid);
4489 
4490 	/*
4491 	 * If a cleanup is in progress, punt to comp_cleanup;
4492 	 * rather than handling it here. It's thus their
4493 	 * responsibility to clean up, call the completion
4494 	 * function in net80211, etc.
4495 	 */
4496 	if (atid->cleanup_inprogress) {
4497 		if (atid->isfiltered)
4498 			device_printf(sc->sc_dev,
4499 			    "%s: isfiltered=1, normal_comp?\n",
4500 			    __func__);
4501 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4502 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: cleanup_unaggr\n",
4503 		    __func__);
4504 		ath_tx_comp_cleanup_unaggr(sc, bf);
4505 		return;
4506 	}
4507 
4508 	/*
4509 	 * XXX TODO: how does cleanup, BAR and filtered frame handling
4510 	 * overlap?
4511 	 *
4512 	 * If the frame is filtered OR if it's any failure but
4513 	 * the TID is filtered, the frame must be added to the
4514 	 * filtered frame list.
4515 	 *
4516 	 * However - a busy buffer can't be added to the filtered
4517 	 * list as it will end up being recycled without having
4518 	 * been made available for the hardware.
4519 	 */
4520 	if ((ts.ts_status & HAL_TXERR_FILT) ||
4521 	    (ts.ts_status != 0 && atid->isfiltered)) {
4522 		int freeframe;
4523 
4524 		if (fail != 0)
4525 			device_printf(sc->sc_dev,
4526 			    "%s: isfiltered=1, fail=%d\n",
4527 			    __func__,
4528 			    fail);
4529 		freeframe = ath_tx_tid_filt_comp_single(sc, atid, bf);
4530 		if (freeframe) {
4531 			/* Remove from BAW */
4532 			if (bf->bf_state.bfs_addedbaw)
4533 				drops++;
4534 			if (bf->bf_state.bfs_dobaw) {
4535 				ath_tx_update_baw(sc, an, atid, bf);
4536 				if (! bf->bf_state.bfs_addedbaw)
4537 					device_printf(sc->sc_dev,
4538 					    "%s: wasn't added: seqno %d\n",
4539 					    __func__, SEQNO(bf->bf_state.bfs_seqno));
4540 			}
4541 			bf->bf_state.bfs_dobaw = 0;
4542 		}
4543 
4544 		/*
4545 		 * If the frame couldn't be filtered, treat it as a drop and
4546 		 * prepare to send a BAR.
4547 		 */
4548 		if (freeframe && drops)
4549 			ath_tx_tid_bar_suspend(sc, atid);
4550 
4551 		/*
4552 		 * Send BAR if required
4553 		 */
4554 		if (ath_tx_tid_bar_tx_ready(sc, atid))
4555 			ath_tx_tid_bar_tx(sc, atid);
4556 
4557 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4558 		/*
4559 		 * If freeframe is set, then the frame couldn't be
4560 		 * cloned and bf is still valid.  Just complete/free it.
4561 		 */
4562 		if (freeframe)
4563 			ath_tx_default_comp(sc, bf, fail);
4564 
4565 
4566 		return;
4567 	}
4568 	/*
4569 	 * Don't bother with the retry check if all frames
4570 	 * are being failed (eg during queue deletion.)
4571 	 */
4572 #if 0
4573 	if (fail == 0 && ts->ts_status & HAL_TXERR_XRETRY) {
4574 #endif
4575 	if (fail == 0 && ts.ts_status != 0) {
4576 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4577 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: retry_unaggr\n",
4578 		    __func__);
4579 		ath_tx_aggr_retry_unaggr(sc, bf);
4580 		return;
4581 	}
4582 
4583 	/* Success? Complete */
4584 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: TID=%d, seqno %d\n",
4585 	    __func__, tid, SEQNO(bf->bf_state.bfs_seqno));
4586 	if (bf->bf_state.bfs_dobaw) {
4587 		ath_tx_update_baw(sc, an, atid, bf);
4588 		bf->bf_state.bfs_dobaw = 0;
4589 		if (! bf->bf_state.bfs_addedbaw)
4590 			device_printf(sc->sc_dev,
4591 			    "%s: wasn't added: seqno %d\n",
4592 			    __func__, SEQNO(bf->bf_state.bfs_seqno));
4593 	}
4594 
4595 	/*
4596 	 * If the queue is filtered, re-schedule as required.
4597 	 *
4598 	 * This is required as there may be a subsequent TX descriptor
4599 	 * for this end-node that has CLRDMASK set, so it's quite possible
4600 	 * that a filtered frame will be followed by a non-filtered
4601 	 * (complete or otherwise) frame.
4602 	 *
4603 	 * XXX should we do this before we complete the frame?
4604 	 */
4605 	if (atid->isfiltered)
4606 		ath_tx_tid_filt_comp_complete(sc, atid);
4607 
4608 	/*
4609 	 * Send BAR if required
4610 	 */
4611 	if (ath_tx_tid_bar_tx_ready(sc, atid))
4612 		ath_tx_tid_bar_tx(sc, atid);
4613 
4614 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
4615 
4616 	ath_tx_default_comp(sc, bf, fail);
4617 	/* bf is freed at this point */
4618 }
4619 
4620 void
4621 ath_tx_aggr_comp(struct ath_softc *sc, struct ath_buf *bf, int fail)
4622 {
4623 	if (bf->bf_state.bfs_aggr)
4624 		ath_tx_aggr_comp_aggr(sc, bf, fail);
4625 	else
4626 		ath_tx_aggr_comp_unaggr(sc, bf, fail);
4627 }
4628 
4629 /*
4630  * Schedule some packets from the given node/TID to the hardware.
4631  *
4632  * This is the aggregate version.
4633  */
4634 void
4635 ath_tx_tid_hw_queue_aggr(struct ath_softc *sc, struct ath_node *an,
4636     struct ath_tid *tid)
4637 {
4638 	struct ath_buf *bf;
4639 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
4640 	struct ieee80211_tx_ampdu *tap;
4641 	ATH_AGGR_STATUS status;
4642 	ath_bufhead bf_q;
4643 
4644 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d\n", __func__, tid->tid);
4645 	ATH_TXQ_LOCK_ASSERT(txq);
4646 
4647 	tap = ath_tx_get_tx_tid(an, tid->tid);
4648 
4649 	if (tid->tid == IEEE80211_NONQOS_TID)
4650 		device_printf(sc->sc_dev, "%s: called for TID=NONQOS_TID?\n",
4651 		    __func__);
4652 
4653 	for (;;) {
4654 		status = ATH_AGGR_DONE;
4655 
4656 		/*
4657 		 * If the upper layer has paused the TID, don't
4658 		 * queue any further packets.
4659 		 *
4660 		 * This can also occur from the completion task because
4661 		 * of packet loss; but as its serialised with this code,
4662 		 * it won't "appear" half way through queuing packets.
4663 		 */
4664 		if (tid->paused)
4665 			break;
4666 
4667 		bf = ATH_TID_FIRST(tid);
4668 		if (bf == NULL) {
4669 			break;
4670 		}
4671 
4672 		/*
4673 		 * If the packet doesn't fall within the BAW (eg a NULL
4674 		 * data frame), schedule it directly; continue.
4675 		 */
4676 		if (! bf->bf_state.bfs_dobaw) {
4677 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4678 			    "%s: non-baw packet\n",
4679 			    __func__);
4680 			ATH_TID_REMOVE(tid, bf, bf_list);
4681 
4682 			if (bf->bf_state.bfs_nframes > 1)
4683 				device_printf(sc->sc_dev,
4684 				    "%s: aggr=%d, nframes=%d\n",
4685 				    __func__,
4686 				    bf->bf_state.bfs_aggr,
4687 				    bf->bf_state.bfs_nframes);
4688 
4689 			/*
4690 			 * This shouldn't happen - such frames shouldn't
4691 			 * ever have been queued as an aggregate in the
4692 			 * first place.  However, make sure the fields
4693 			 * are correctly setup just to be totally sure.
4694 			 */
4695 			bf->bf_state.bfs_aggr = 0;
4696 			bf->bf_state.bfs_nframes = 1;
4697 
4698 			/* Update CLRDMASK just before this frame is queued */
4699 			ath_tx_update_clrdmask(sc, tid, bf);
4700 
4701 			ath_tx_do_ratelookup(sc, bf);
4702 			ath_tx_calc_duration(sc, bf);
4703 			ath_tx_calc_protection(sc, bf);
4704 			ath_tx_set_rtscts(sc, bf);
4705 			ath_tx_rate_fill_rcflags(sc, bf);
4706 			ath_tx_setds(sc, bf);
4707 			ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
4708 
4709 			sc->sc_aggr_stats.aggr_nonbaw_pkt++;
4710 
4711 			/* Queue the packet; continue */
4712 			goto queuepkt;
4713 		}
4714 
4715 		TAILQ_INIT(&bf_q);
4716 
4717 		/*
4718 		 * Do a rate control lookup on the first frame in the
4719 		 * list. The rate control code needs that to occur
4720 		 * before it can determine whether to TX.
4721 		 * It's inaccurate because the rate control code doesn't
4722 		 * really "do" aggregate lookups, so it only considers
4723 		 * the size of the first frame.
4724 		 */
4725 		ath_tx_do_ratelookup(sc, bf);
4726 		bf->bf_state.bfs_rc[3].rix = 0;
4727 		bf->bf_state.bfs_rc[3].tries = 0;
4728 
4729 		ath_tx_calc_duration(sc, bf);
4730 		ath_tx_calc_protection(sc, bf);
4731 
4732 		ath_tx_set_rtscts(sc, bf);
4733 		ath_tx_rate_fill_rcflags(sc, bf);
4734 
4735 		status = ath_tx_form_aggr(sc, an, tid, &bf_q);
4736 
4737 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4738 		    "%s: ath_tx_form_aggr() status=%d\n", __func__, status);
4739 
4740 		/*
4741 		 * No frames to be picked up - out of BAW
4742 		 */
4743 		if (TAILQ_EMPTY(&bf_q))
4744 			break;
4745 
4746 		/*
4747 		 * This assumes that the descriptor list in the ath_bufhead
4748 		 * are already linked together via bf_next pointers.
4749 		 */
4750 		bf = TAILQ_FIRST(&bf_q);
4751 
4752 		if (status == ATH_AGGR_8K_LIMITED)
4753 			sc->sc_aggr_stats.aggr_rts_aggr_limited++;
4754 
4755 		/*
4756 		 * If it's the only frame send as non-aggregate
4757 		 * assume that ath_tx_form_aggr() has checked
4758 		 * whether it's in the BAW and added it appropriately.
4759 		 */
4760 		if (bf->bf_state.bfs_nframes == 1) {
4761 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4762 			    "%s: single-frame aggregate\n", __func__);
4763 
4764 			/* Update CLRDMASK just before this frame is queued */
4765 			ath_tx_update_clrdmask(sc, tid, bf);
4766 
4767 			bf->bf_state.bfs_aggr = 0;
4768 			bf->bf_state.bfs_ndelim = 0;
4769 			ath_tx_setds(sc, bf);
4770 			ath_hal_clr11n_aggr(sc->sc_ah, bf->bf_desc);
4771 			if (status == ATH_AGGR_BAW_CLOSED)
4772 				sc->sc_aggr_stats.aggr_baw_closed_single_pkt++;
4773 			else
4774 				sc->sc_aggr_stats.aggr_single_pkt++;
4775 		} else {
4776 			DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
4777 			    "%s: multi-frame aggregate: %d frames, "
4778 			    "length %d\n",
4779 			     __func__, bf->bf_state.bfs_nframes,
4780 			    bf->bf_state.bfs_al);
4781 			bf->bf_state.bfs_aggr = 1;
4782 			sc->sc_aggr_stats.aggr_pkts[bf->bf_state.bfs_nframes]++;
4783 			sc->sc_aggr_stats.aggr_aggr_pkt++;
4784 
4785 			/* Update CLRDMASK just before this frame is queued */
4786 			ath_tx_update_clrdmask(sc, tid, bf);
4787 
4788 			/*
4789 			 * Calculate the duration/protection as required.
4790 			 */
4791 			ath_tx_calc_duration(sc, bf);
4792 			ath_tx_calc_protection(sc, bf);
4793 
4794 			/*
4795 			 * Update the rate and rtscts information based on the
4796 			 * rate decision made by the rate control code;
4797 			 * the first frame in the aggregate needs it.
4798 			 */
4799 			ath_tx_set_rtscts(sc, bf);
4800 
4801 			/*
4802 			 * Setup the relevant descriptor fields
4803 			 * for aggregation. The first descriptor
4804 			 * already points to the rest in the chain.
4805 			 */
4806 			ath_tx_setds_11n(sc, bf);
4807 
4808 		}
4809 	queuepkt:
4810 		//txq = bf->bf_state.bfs_txq;
4811 
4812 		/* Set completion handler, multi-frame aggregate or not */
4813 		bf->bf_comp = ath_tx_aggr_comp;
4814 
4815 		if (bf->bf_state.bfs_tid == IEEE80211_NONQOS_TID)
4816 		    device_printf(sc->sc_dev, "%s: TID=16?\n", __func__);
4817 
4818 		/* Punt to txq */
4819 		ath_tx_handoff(sc, txq, bf);
4820 
4821 		/* Track outstanding buffer count to hardware */
4822 		/* aggregates are "one" buffer */
4823 		tid->hwq_depth++;
4824 
4825 		/*
4826 		 * Break out if ath_tx_form_aggr() indicated
4827 		 * there can't be any further progress (eg BAW is full.)
4828 		 * Checking for an empty txq is done above.
4829 		 *
4830 		 * XXX locking on txq here?
4831 		 */
4832 		if (txq->axq_aggr_depth >= sc->sc_hwq_limit ||
4833 		    status == ATH_AGGR_BAW_CLOSED)
4834 			break;
4835 	}
4836 }
4837 
4838 /*
4839  * Schedule some packets from the given node/TID to the hardware.
4840  */
4841 void
4842 ath_tx_tid_hw_queue_norm(struct ath_softc *sc, struct ath_node *an,
4843     struct ath_tid *tid)
4844 {
4845 	struct ath_buf *bf;
4846 	struct ath_txq *txq = sc->sc_ac2q[tid->ac];
4847 
4848 	DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: node %p: TID %d: called\n",
4849 	    __func__, an, tid->tid);
4850 
4851 	ATH_TID_LOCK_ASSERT(sc, tid);
4852 
4853 	/* Check - is AMPDU pending or running? then print out something */
4854 	if (ath_tx_ampdu_pending(sc, an, tid->tid))
4855 		device_printf(sc->sc_dev, "%s: tid=%d, ampdu pending?\n",
4856 		    __func__, tid->tid);
4857 	if (ath_tx_ampdu_running(sc, an, tid->tid))
4858 		device_printf(sc->sc_dev, "%s: tid=%d, ampdu running?\n",
4859 		    __func__, tid->tid);
4860 
4861 	for (;;) {
4862 
4863 		/*
4864 		 * If the upper layers have paused the TID, don't
4865 		 * queue any further packets.
4866 		 */
4867 		if (tid->paused)
4868 			break;
4869 
4870 		bf = ATH_TID_FIRST(tid);
4871 		if (bf == NULL) {
4872 			break;
4873 		}
4874 
4875 		ATH_TID_REMOVE(tid, bf, bf_list);
4876 
4877 		KASSERT(txq == bf->bf_state.bfs_txq, ("txqs not equal!\n"));
4878 
4879 		/* Sanity check! */
4880 		if (tid->tid != bf->bf_state.bfs_tid) {
4881 			device_printf(sc->sc_dev, "%s: bfs_tid %d !="
4882 			    " tid %d\n",
4883 			    __func__, bf->bf_state.bfs_tid, tid->tid);
4884 		}
4885 		/* Normal completion handler */
4886 		bf->bf_comp = ath_tx_normal_comp;
4887 
4888 		/*
4889 		 * Override this for now, until the non-aggregate
4890 		 * completion handler correctly handles software retransmits.
4891 		 */
4892 		bf->bf_state.bfs_txflags |= HAL_TXDESC_CLRDMASK;
4893 
4894 		/* Update CLRDMASK just before this frame is queued */
4895 		ath_tx_update_clrdmask(sc, tid, bf);
4896 
4897 		/* Program descriptors + rate control */
4898 		ath_tx_do_ratelookup(sc, bf);
4899 		ath_tx_calc_duration(sc, bf);
4900 		ath_tx_calc_protection(sc, bf);
4901 		ath_tx_set_rtscts(sc, bf);
4902 		ath_tx_rate_fill_rcflags(sc, bf);
4903 		ath_tx_setds(sc, bf);
4904 
4905 		/* Track outstanding buffer count to hardware */
4906 		/* aggregates are "one" buffer */
4907 		tid->hwq_depth++;
4908 
4909 		/* Punt to hardware or software txq */
4910 		ath_tx_handoff(sc, txq, bf);
4911 	}
4912 }
4913 
4914 /*
4915  * Schedule some packets to the given hardware queue.
4916  *
4917  * This function walks the list of TIDs (ie, ath_node TIDs
4918  * with queued traffic) and attempts to schedule traffic
4919  * from them.
4920  *
4921  * TID scheduling is implemented as a FIFO, with TIDs being
4922  * added to the end of the queue after some frames have been
4923  * scheduled.
4924  */
4925 void
4926 ath_txq_sched(struct ath_softc *sc, struct ath_txq *txq)
4927 {
4928 	struct ath_tid *tid, *next, *last;
4929 
4930 	ATH_TXQ_LOCK_ASSERT(txq);
4931 
4932 	/*
4933 	 * Don't schedule if the hardware queue is busy.
4934 	 * This (hopefully) gives some more time to aggregate
4935 	 * some packets in the aggregation queue.
4936 	 */
4937 	if (txq->axq_aggr_depth >= sc->sc_hwq_limit) {
4938 		sc->sc_aggr_stats.aggr_sched_nopkt++;
4939 		return;
4940 	}
4941 
4942 	last = TAILQ_LAST(&txq->axq_tidq, axq_t_s);
4943 
4944 	TAILQ_FOREACH_SAFE(tid, &txq->axq_tidq, axq_qelem, next) {
4945 		/*
4946 		 * Suspend paused queues here; they'll be resumed
4947 		 * once the addba completes or times out.
4948 		 */
4949 		DPRINTF(sc, ATH_DEBUG_SW_TX, "%s: tid=%d, paused=%d\n",
4950 		    __func__, tid->tid, tid->paused);
4951 		ath_tx_tid_unsched(sc, tid);
4952 		if (tid->paused) {
4953 			continue;
4954 		}
4955 		if (ath_tx_ampdu_running(sc, tid->an, tid->tid))
4956 			ath_tx_tid_hw_queue_aggr(sc, tid->an, tid);
4957 		else
4958 			ath_tx_tid_hw_queue_norm(sc, tid->an, tid);
4959 
4960 		/* Not empty? Re-schedule */
4961 		if (tid->axq_depth != 0)
4962 			ath_tx_tid_sched(sc, tid);
4963 
4964 		/* Give the software queue time to aggregate more packets */
4965 		if (txq->axq_aggr_depth >= sc->sc_hwq_limit) {
4966 			break;
4967 		}
4968 
4969 		/*
4970 		 * If this was the last entry on the original list, stop.
4971 		 * Otherwise nodes that have been rescheduled onto the end
4972 		 * of the TID FIFO list will just keep being rescheduled.
4973 		 */
4974 		if (tid == last)
4975 			break;
4976 	}
4977 }
4978 
4979 /*
4980  * TX addba handling
4981  */
4982 
4983 /*
4984  * Return net80211 TID struct pointer, or NULL for none
4985  */
4986 struct ieee80211_tx_ampdu *
4987 ath_tx_get_tx_tid(struct ath_node *an, int tid)
4988 {
4989 	struct ieee80211_node *ni = &an->an_node;
4990 	struct ieee80211_tx_ampdu *tap;
4991 
4992 	if (tid == IEEE80211_NONQOS_TID)
4993 		return NULL;
4994 
4995 	tap = &ni->ni_tx_ampdu[tid];
4996 	return tap;
4997 }
4998 
4999 /*
5000  * Is AMPDU-TX running?
5001  */
5002 static int
5003 ath_tx_ampdu_running(struct ath_softc *sc, struct ath_node *an, int tid)
5004 {
5005 	struct ieee80211_tx_ampdu *tap;
5006 
5007 	if (tid == IEEE80211_NONQOS_TID)
5008 		return 0;
5009 
5010 	tap = ath_tx_get_tx_tid(an, tid);
5011 	if (tap == NULL)
5012 		return 0;	/* Not valid; default to not running */
5013 
5014 	return !! (tap->txa_flags & IEEE80211_AGGR_RUNNING);
5015 }
5016 
5017 /*
5018  * Is AMPDU-TX negotiation pending?
5019  */
5020 static int
5021 ath_tx_ampdu_pending(struct ath_softc *sc, struct ath_node *an, int tid)
5022 {
5023 	struct ieee80211_tx_ampdu *tap;
5024 
5025 	if (tid == IEEE80211_NONQOS_TID)
5026 		return 0;
5027 
5028 	tap = ath_tx_get_tx_tid(an, tid);
5029 	if (tap == NULL)
5030 		return 0;	/* Not valid; default to not pending */
5031 
5032 	return !! (tap->txa_flags & IEEE80211_AGGR_XCHGPEND);
5033 }
5034 
5035 /*
5036  * Is AMPDU-TX pending for the given TID?
5037  */
5038 
5039 
5040 /*
5041  * Method to handle sending an ADDBA request.
5042  *
5043  * We tap this so the relevant flags can be set to pause the TID
5044  * whilst waiting for the response.
5045  *
5046  * XXX there's no timeout handler we can override?
5047  */
5048 int
5049 ath_addba_request(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
5050     int dialogtoken, int baparamset, int batimeout)
5051 {
5052 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5053 	int tid = tap->txa_tid;
5054 	struct ath_node *an = ATH_NODE(ni);
5055 	struct ath_tid *atid = &an->an_tid[tid];
5056 
5057 	/*
5058 	 * XXX danger Will Robinson!
5059 	 *
5060 	 * Although the taskqueue may be running and scheduling some more
5061 	 * packets, these should all be _before_ the addba sequence number.
5062 	 * However, net80211 will keep self-assigning sequence numbers
5063 	 * until addba has been negotiated.
5064 	 *
5065 	 * In the past, these packets would be "paused" (which still works
5066 	 * fine, as they're being scheduled to the driver in the same
5067 	 * serialised method which is calling the addba request routine)
5068 	 * and when the aggregation session begins, they'll be dequeued
5069 	 * as aggregate packets and added to the BAW. However, now there's
5070 	 * a "bf->bf_state.bfs_dobaw" flag, and this isn't set for these
5071 	 * packets. Thus they never get included in the BAW tracking and
5072 	 * this can cause the initial burst of packets after the addba
5073 	 * negotiation to "hang", as they quickly fall outside the BAW.
5074 	 *
5075 	 * The "eventual" solution should be to tag these packets with
5076 	 * dobaw. Although net80211 has given us a sequence number,
5077 	 * it'll be "after" the left edge of the BAW and thus it'll
5078 	 * fall within it.
5079 	 */
5080 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
5081 	/*
5082 	 * This is a bit annoying.  Until net80211 HT code inherits some
5083 	 * (any) locking, we may have this called in parallel BUT only
5084 	 * one response/timeout will be called.  Grr.
5085 	 */
5086 	if (atid->addba_tx_pending == 0) {
5087 		ath_tx_tid_pause(sc, atid);
5088 		atid->addba_tx_pending = 1;
5089 	}
5090 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
5091 
5092 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5093 	    "%s: called; dialogtoken=%d, baparamset=%d, batimeout=%d\n",
5094 	    __func__, dialogtoken, baparamset, batimeout);
5095 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5096 	    "%s: txa_start=%d, ni_txseqs=%d\n",
5097 	    __func__, tap->txa_start, ni->ni_txseqs[tid]);
5098 
5099 	return sc->sc_addba_request(ni, tap, dialogtoken, baparamset,
5100 	    batimeout);
5101 }
5102 
5103 /*
5104  * Handle an ADDBA response.
5105  *
5106  * We unpause the queue so TX'ing can resume.
5107  *
5108  * Any packets TX'ed from this point should be "aggregate" (whether
5109  * aggregate or not) so the BAW is updated.
5110  *
5111  * Note! net80211 keeps self-assigning sequence numbers until
5112  * ampdu is negotiated. This means the initially-negotiated BAW left
5113  * edge won't match the ni->ni_txseq.
5114  *
5115  * So, being very dirty, the BAW left edge is "slid" here to match
5116  * ni->ni_txseq.
5117  *
5118  * What likely SHOULD happen is that all packets subsequent to the
5119  * addba request should be tagged as aggregate and queued as non-aggregate
5120  * frames; thus updating the BAW. For now though, I'll just slide the
5121  * window.
5122  */
5123 int
5124 ath_addba_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
5125     int status, int code, int batimeout)
5126 {
5127 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5128 	int tid = tap->txa_tid;
5129 	struct ath_node *an = ATH_NODE(ni);
5130 	struct ath_tid *atid = &an->an_tid[tid];
5131 	int r;
5132 
5133 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5134 	    "%s: called; status=%d, code=%d, batimeout=%d\n", __func__,
5135 	    status, code, batimeout);
5136 
5137 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5138 	    "%s: txa_start=%d, ni_txseqs=%d\n",
5139 	    __func__, tap->txa_start, ni->ni_txseqs[tid]);
5140 
5141 	/*
5142 	 * Call this first, so the interface flags get updated
5143 	 * before the TID is unpaused. Otherwise a race condition
5144 	 * exists where the unpaused TID still doesn't yet have
5145 	 * IEEE80211_AGGR_RUNNING set.
5146 	 */
5147 	r = sc->sc_addba_response(ni, tap, status, code, batimeout);
5148 
5149 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
5150 	atid->addba_tx_pending = 0;
5151 	/*
5152 	 * XXX dirty!
5153 	 * Slide the BAW left edge to wherever net80211 left it for us.
5154 	 * Read above for more information.
5155 	 */
5156 	tap->txa_start = ni->ni_txseqs[tid];
5157 	ath_tx_tid_resume(sc, atid);
5158 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
5159 	return r;
5160 }
5161 
5162 
5163 /*
5164  * Stop ADDBA on a queue.
5165  *
5166  * This can be called whilst BAR TX is currently active on the queue,
5167  * so make sure this is unblocked before continuing.
5168  */
5169 void
5170 ath_addba_stop(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap)
5171 {
5172 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5173 	int tid = tap->txa_tid;
5174 	struct ath_node *an = ATH_NODE(ni);
5175 	struct ath_tid *atid = &an->an_tid[tid];
5176 
5177 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL, "%s: called\n", __func__);
5178 
5179 	/*
5180 	 * Pause TID traffic early, so there aren't any races
5181 	 * Unblock the pending BAR held traffic, if it's currently paused.
5182 	 */
5183 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
5184 	ath_tx_tid_pause(sc, atid);
5185 	if (atid->bar_wait) {
5186 		/*
5187 		 * bar_unsuspend() expects bar_tx == 1, as it should be
5188 		 * called from the TX completion path.  This quietens
5189 		 * the warning.  It's cleared for us anyway.
5190 		 */
5191 		atid->bar_tx = 1;
5192 		ath_tx_tid_bar_unsuspend(sc, atid);
5193 	}
5194 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
5195 
5196 	/* There's no need to hold the TXQ lock here */
5197 	sc->sc_addba_stop(ni, tap);
5198 
5199 	/*
5200 	 * ath_tx_tid_cleanup will resume the TID if possible, otherwise
5201 	 * it'll set the cleanup flag, and it'll be unpaused once
5202 	 * things have been cleaned up.
5203 	 */
5204 	ath_tx_tid_cleanup(sc, an, tid);
5205 }
5206 
5207 /*
5208  * Note: net80211 bar_timeout() doesn't call this function on BAR failure;
5209  * it simply tears down the aggregation session. Ew.
5210  *
5211  * It however will call ieee80211_ampdu_stop() which will call
5212  * ic->ic_addba_stop().
5213  *
5214  * XXX This uses a hard-coded max BAR count value; the whole
5215  * XXX BAR TX success or failure should be better handled!
5216  */
5217 void
5218 ath_bar_response(struct ieee80211_node *ni, struct ieee80211_tx_ampdu *tap,
5219     int status)
5220 {
5221 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5222 	int tid = tap->txa_tid;
5223 	struct ath_node *an = ATH_NODE(ni);
5224 	struct ath_tid *atid = &an->an_tid[tid];
5225 	int attempts = tap->txa_attempts;
5226 
5227 	DPRINTF(sc, ATH_DEBUG_SW_TX_BAR,
5228 	    "%s: called; tap=%p, atid=%p, txa_tid=%d, atid->tid=%d, status=%d, attempts=%d\n",
5229 	    __func__,
5230 	    tap,
5231 	    atid,
5232 	    tap->txa_tid,
5233 	    atid->tid,
5234 	    status,
5235 	    attempts);
5236 
5237 	/* Note: This may update the BAW details */
5238 	sc->sc_bar_response(ni, tap, status);
5239 
5240 	/* Unpause the TID */
5241 	/*
5242 	 * XXX if this is attempt=50, the TID will be downgraded
5243 	 * XXX to a non-aggregate session. So we must unpause the
5244 	 * XXX TID here or it'll never be done.
5245 	 *
5246 	 * Also, don't call it if bar_tx/bar_wait are 0; something
5247 	 * has beaten us to the punch? (XXX figure out what?)
5248 	 */
5249 	if (status == 0 || attempts == 50) {
5250 		ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
5251 		if (atid->bar_tx == 0 || atid->bar_wait == 0)
5252 			device_printf(sc->sc_dev,
5253 			    "%s: huh? bar_tx=%d, bar_wait=%d\n",
5254 			    __func__,
5255 			    atid->bar_tx, atid->bar_wait);
5256 		else
5257 			ath_tx_tid_bar_unsuspend(sc, atid);
5258 		ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
5259 	}
5260 }
5261 
5262 /*
5263  * This is called whenever the pending ADDBA request times out.
5264  * Unpause and reschedule the TID.
5265  */
5266 void
5267 ath_addba_response_timeout(struct ieee80211_node *ni,
5268     struct ieee80211_tx_ampdu *tap)
5269 {
5270 	struct ath_softc *sc = ni->ni_ic->ic_ifp->if_softc;
5271 	int tid = tap->txa_tid;
5272 	struct ath_node *an = ATH_NODE(ni);
5273 	struct ath_tid *atid = &an->an_tid[tid];
5274 
5275 	DPRINTF(sc, ATH_DEBUG_SW_TX_CTRL,
5276 	    "%s: called; resuming\n", __func__);
5277 
5278 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
5279 	atid->addba_tx_pending = 0;
5280 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
5281 
5282 	/* Note: This updates the aggregate state to (again) pending */
5283 	sc->sc_addba_response_timeout(ni, tap);
5284 
5285 	/* Unpause the TID; which reschedules it */
5286 	ATH_TXQ_LOCK(sc->sc_ac2q[atid->ac]);
5287 	ath_tx_tid_resume(sc, atid);
5288 	ATH_TXQ_UNLOCK(sc->sc_ac2q[atid->ac]);
5289 }
5290 
5291 #if 0
5292 /*
5293  * Check if a node is asleep or not.
5294  */
5295 static int
5296 ath_tx_node_is_asleep(struct ath_softc *sc, struct ath_node *an)
5297 {
5298 
5299 	ATH_NODE_LOCK_ASSERT(an);
5300 
5301 	return (an->an_is_powersave);
5302 }
5303 #endif
5304 
5305 /*
5306  * Mark a node as currently "in powersaving."
5307  * This suspends all traffic on the node.
5308  *
5309  * This must be called with the node/tx locks free.
5310  *
5311  * XXX TODO: the locking silliness below is due to how the node
5312  * locking currently works.  Right now, the node lock is grabbed
5313  * to do rate control lookups and these are done with the TX
5314  * queue lock held.  This means the node lock can't be grabbed
5315  * first here or a LOR will occur.
5316  *
5317  * Eventually (hopefully!) the TX path code will only grab
5318  * the TXQ lock when transmitting and the ath_node lock when
5319  * doing node/TID operations.  There are other complications -
5320  * the sched/unsched operations involve walking the per-txq
5321  * 'active tid' list and this requires both locks to be held.
5322  */
5323 void
5324 ath_tx_node_sleep(struct ath_softc *sc, struct ath_node *an)
5325 {
5326 	struct ath_tid *atid;
5327 	struct ath_txq *txq;
5328 	int tid;
5329 
5330 	ATH_NODE_UNLOCK_ASSERT(an);
5331 
5332 	/*
5333 	 * It's possible that a parallel call to ath_tx_node_wakeup()
5334 	 * will unpause these queues.
5335 	 *
5336 	 * The node lock can't just be grabbed here, as there's places
5337 	 * in the driver where the node lock is grabbed _within_ a
5338 	 * TXQ lock.
5339 	 * So, we do this delicately and unwind state if needed.
5340 	 *
5341 	 * + Pause all the queues
5342 	 * + Grab the node lock
5343 	 * + If the queue is already asleep, unpause and quit
5344 	 * + else just mark as asleep.
5345 	 *
5346 	 * A parallel sleep() call will just pause and then
5347 	 * find they're already paused, so undo it.
5348 	 *
5349 	 * A parallel wakeup() call will check if asleep is 1
5350 	 * and if it's not (ie, it's 0), it'll treat it as already
5351 	 * being awake. If it's 1, it'll mark it as 0 and then
5352 	 * unpause everything.
5353 	 *
5354 	 * (Talk about a delicate hack.)
5355 	 */
5356 
5357 	/* Suspend all traffic on the node */
5358 	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
5359 		atid = &an->an_tid[tid];
5360 		txq = sc->sc_ac2q[atid->ac];
5361 
5362 		ATH_TXQ_LOCK(txq);
5363 		ath_tx_tid_pause(sc, atid);
5364 		ATH_TXQ_UNLOCK(txq);
5365 	}
5366 
5367 	ATH_NODE_LOCK(an);
5368 
5369 	/* In case of concurrency races from net80211.. */
5370 	if (an->an_is_powersave == 1) {
5371 		ATH_NODE_UNLOCK(an);
5372 		device_printf(sc->sc_dev,
5373 		    "%s: an=%p: node was already asleep\n",
5374 		    __func__, an);
5375 		for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
5376 			atid = &an->an_tid[tid];
5377 			txq = sc->sc_ac2q[atid->ac];
5378 
5379 			ATH_TXQ_LOCK(txq);
5380 			ath_tx_tid_resume(sc, atid);
5381 			ATH_TXQ_UNLOCK(txq);
5382 		}
5383 		return;
5384 	}
5385 
5386 	/* Mark node as in powersaving */
5387 	an->an_is_powersave = 1;
5388 
5389 	ATH_NODE_UNLOCK(an);
5390 }
5391 
5392 /*
5393  * Mark a node as currently "awake."
5394  * This resumes all traffic to the node.
5395  */
5396 void
5397 ath_tx_node_wakeup(struct ath_softc *sc, struct ath_node *an)
5398 {
5399 	struct ath_tid *atid;
5400 	struct ath_txq *txq;
5401 	int tid;
5402 
5403 	ATH_NODE_UNLOCK_ASSERT(an);
5404 	ATH_NODE_LOCK(an);
5405 
5406 	/* In case of concurrency races from net80211.. */
5407 	if (an->an_is_powersave == 0) {
5408 		ATH_NODE_UNLOCK(an);
5409 		device_printf(sc->sc_dev,
5410 		    "%s: an=%p: node was already awake\n",
5411 		    __func__, an);
5412 		return;
5413 	}
5414 
5415 	/* Mark node as awake */
5416 	an->an_is_powersave = 0;
5417 
5418 	ATH_NODE_UNLOCK(an);
5419 
5420 	for (tid = 0; tid < IEEE80211_TID_SIZE; tid++) {
5421 		atid = &an->an_tid[tid];
5422 		txq = sc->sc_ac2q[atid->ac];
5423 
5424 		ATH_TXQ_LOCK(txq);
5425 		ath_tx_tid_resume(sc, atid);
5426 		ATH_TXQ_UNLOCK(txq);
5427 	}
5428 }
5429 
5430 static int
5431 ath_legacy_dma_txsetup(struct ath_softc *sc)
5432 {
5433 
5434 	/* nothing new needed */
5435 	return (0);
5436 }
5437 
5438 static int
5439 ath_legacy_dma_txteardown(struct ath_softc *sc)
5440 {
5441 
5442 	/* nothing new needed */
5443 	return (0);
5444 }
5445 
5446 void
5447 ath_xmit_setup_legacy(struct ath_softc *sc)
5448 {
5449 	/*
5450 	 * For now, just set the descriptor length to sizeof(ath_desc);
5451 	 * worry about extracting the real length out of the HAL later.
5452 	 */
5453 	sc->sc_tx_desclen = sizeof(struct ath_desc);
5454 	sc->sc_tx_statuslen = 0;
5455 	sc->sc_tx_nmaps = 1;	/* only one buffer per TX desc */
5456 
5457 	sc->sc_tx.xmit_setup = ath_legacy_dma_txsetup;
5458 	sc->sc_tx.xmit_teardown = ath_legacy_dma_txteardown;
5459 	sc->sc_tx.xmit_attach_comp_func = ath_legacy_attach_comp_func;
5460 
5461 	sc->sc_tx.xmit_dma_restart = ath_legacy_tx_dma_restart;
5462 	sc->sc_tx.xmit_handoff = ath_legacy_xmit_handoff;
5463 
5464 	sc->sc_tx.xmit_drain = ath_legacy_tx_drain;
5465 }
5466