xref: /freebsd/sys/dev/ath/if_ath_tx_ht.c (revision b3512b30dbec579da28028e29d8b33ec7242af68)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Adrian Chadd, Xenion Pty Ltd.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer,
12  *    without modification.
13  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
14  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
15  *    redistribution must be conditioned upon including a substantially
16  *    similar Disclaimer requirement for further binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
24  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
27  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
29  * THE POSSIBILITY OF SUCH DAMAGES.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 #include "opt_ath.h"
37 #include "opt_wlan.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/mbuf.h>
43 #include <sys/malloc.h>
44 #include <sys/lock.h>
45 #include <sys/mutex.h>
46 #include <sys/kernel.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/errno.h>
50 #include <sys/callout.h>
51 #include <sys/bus.h>
52 #include <sys/endian.h>
53 #include <sys/kthread.h>
54 #include <sys/taskqueue.h>
55 #include <sys/priv.h>
56 
57 #include <machine/bus.h>
58 
59 #include <net/if.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_arp.h>
64 #include <net/ethernet.h>
65 #include <net/if_llc.h>
66 
67 #include <net80211/ieee80211_var.h>
68 #include <net80211/ieee80211_regdomain.h>
69 #ifdef IEEE80211_SUPPORT_SUPERG
70 #include <net80211/ieee80211_superg.h>
71 #endif
72 #ifdef IEEE80211_SUPPORT_TDMA
73 #include <net80211/ieee80211_tdma.h>
74 #endif
75 
76 #include <net/bpf.h>
77 
78 #ifdef INET
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #endif
82 
83 #include <dev/ath/if_athvar.h>
84 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
85 #include <dev/ath/ath_hal/ah_diagcodes.h>
86 
87 #ifdef ATH_TX99_DIAG
88 #include <dev/ath/ath_tx99/ath_tx99.h>
89 #endif
90 
91 #include <dev/ath/if_ath_tx.h>		/* XXX for some support functions */
92 #include <dev/ath/if_ath_tx_ht.h>
93 #include <dev/ath/if_athrate.h>
94 #include <dev/ath/if_ath_debug.h>
95 
96 /*
97  * XXX net80211?
98  */
99 #define	IEEE80211_AMPDU_SUBFRAME_DEFAULT		32
100 
101 #define	ATH_AGGR_DELIM_SZ	4	/* delimiter size */
102 #define	ATH_AGGR_MINPLEN	256	/* in bytes, minimum packet length */
103 /* number of delimiters for encryption padding */
104 #define	ATH_AGGR_ENCRYPTDELIM	10
105 
106 /*
107  * returns delimiter padding required given the packet length
108  */
109 #define	ATH_AGGR_GET_NDELIM(_len)					\
110 	    (((((_len) + ATH_AGGR_DELIM_SZ) < ATH_AGGR_MINPLEN) ?	\
111 	    (ATH_AGGR_MINPLEN - (_len) - ATH_AGGR_DELIM_SZ) : 0) >> 2)
112 
113 #define	PADBYTES(_len)		((4 - ((_len) % 4)) % 4)
114 
115 int ath_max_4ms_framelen[4][32] = {
116 	[MCS_HT20] = {
117 		3212,  6432,  9648,  12864,  19300,  25736,  28952,  32172,
118 		6424,  12852, 19280, 25708,  38568,  51424,  57852,  64280,
119 		9628,  19260, 28896, 38528,  57792,  65532,  65532,  65532,
120 		12828, 25656, 38488, 51320,  65532,  65532,  65532,  65532,
121 	},
122 	[MCS_HT20_SGI] = {
123 		3572,  7144,  10720,  14296,  21444,  28596,  32172,  35744,
124 		7140,  14284, 21428,  28568,  42856,  57144,  64288,  65532,
125 		10700, 21408, 32112,  42816,  64228,  65532,  65532,  65532,
126 		14256, 28516, 42780,  57040,  65532,  65532,  65532,  65532,
127 	},
128 	[MCS_HT40] = {
129 		6680,  13360,  20044,  26724,  40092,  53456,  60140,  65532,
130 		13348, 26700,  40052,  53400,  65532,  65532,  65532,  65532,
131 		20004, 40008,  60016,  65532,  65532,  65532,  65532,  65532,
132 		26644, 53292,  65532,  65532,  65532,  65532,  65532,  65532,
133 	},
134 	[MCS_HT40_SGI] = {
135 		7420,  14844,  22272,  29696,  44544,  59396,  65532,  65532,
136 		14832, 29668,  44504,  59340,  65532,  65532,  65532,  65532,
137 		22232, 44464,  65532,  65532,  65532,  65532,  65532,  65532,
138 		29616, 59232,  65532,  65532,  65532,  65532,  65532,  65532,
139 	}
140 };
141 
142 /*
143  * XXX should be in net80211
144  */
145 static int ieee80211_mpdudensity_map[] = {
146 	0,		/* IEEE80211_HTCAP_MPDUDENSITY_NA */
147 	25,		/* IEEE80211_HTCAP_MPDUDENSITY_025 */
148 	50,		/* IEEE80211_HTCAP_MPDUDENSITY_05 */
149 	100,		/* IEEE80211_HTCAP_MPDUDENSITY_1 */
150 	200,		/* IEEE80211_HTCAP_MPDUDENSITY_2 */
151 	400,		/* IEEE80211_HTCAP_MPDUDENSITY_4 */
152 	800,		/* IEEE80211_HTCAP_MPDUDENSITY_8 */
153 	1600,		/* IEEE80211_HTCAP_MPDUDENSITY_16 */
154 };
155 
156 /*
157  * XXX should be in the HAL/net80211 ?
158  */
159 #define	BITS_PER_BYTE		8
160 #define	OFDM_PLCP_BITS		22
161 #define	HT_RC_2_MCS(_rc)	((_rc) & 0x7f)
162 #define	HT_RC_2_STREAMS(_rc)	((((_rc) & 0x78) >> 3) + 1)
163 #define	L_STF			8
164 #define	L_LTF			8
165 #define	L_SIG			4
166 #define	HT_SIG			8
167 #define	HT_STF			4
168 #define	HT_LTF(_ns)		(4 * (_ns))
169 #define	SYMBOL_TIME(_ns)	((_ns) << 2)		// ns * 4 us
170 #define	SYMBOL_TIME_HALFGI(_ns)	(((_ns) * 18 + 4) / 5)	// ns * 3.6 us
171 #define	NUM_SYMBOLS_PER_USEC(_usec)	(_usec >> 2)
172 #define	NUM_SYMBOLS_PER_USEC_HALFGI(_usec)	(((_usec*5)-4)/18)
173 #define	IS_HT_RATE(_rate)	((_rate) & 0x80)
174 
175 const uint32_t bits_per_symbol[][2] = {
176     /* 20MHz 40MHz */
177     {    26,   54 },     //  0: BPSK
178     {    52,  108 },     //  1: QPSK 1/2
179     {    78,  162 },     //  2: QPSK 3/4
180     {   104,  216 },     //  3: 16-QAM 1/2
181     {   156,  324 },     //  4: 16-QAM 3/4
182     {   208,  432 },     //  5: 64-QAM 2/3
183     {   234,  486 },     //  6: 64-QAM 3/4
184     {   260,  540 },     //  7: 64-QAM 5/6
185     {    52,  108 },     //  8: BPSK
186     {   104,  216 },     //  9: QPSK 1/2
187     {   156,  324 },     // 10: QPSK 3/4
188     {   208,  432 },     // 11: 16-QAM 1/2
189     {   312,  648 },     // 12: 16-QAM 3/4
190     {   416,  864 },     // 13: 64-QAM 2/3
191     {   468,  972 },     // 14: 64-QAM 3/4
192     {   520, 1080 },     // 15: 64-QAM 5/6
193     {    78,  162 },     // 16: BPSK
194     {   156,  324 },     // 17: QPSK 1/2
195     {   234,  486 },     // 18: QPSK 3/4
196     {   312,  648 },     // 19: 16-QAM 1/2
197     {   468,  972 },     // 20: 16-QAM 3/4
198     {   624, 1296 },     // 21: 64-QAM 2/3
199     {   702, 1458 },     // 22: 64-QAM 3/4
200     {   780, 1620 },     // 23: 64-QAM 5/6
201     {   104,  216 },     // 24: BPSK
202     {   208,  432 },     // 25: QPSK 1/2
203     {   312,  648 },     // 26: QPSK 3/4
204     {   416,  864 },     // 27: 16-QAM 1/2
205     {   624, 1296 },     // 28: 16-QAM 3/4
206     {   832, 1728 },     // 29: 64-QAM 2/3
207     {   936, 1944 },     // 30: 64-QAM 3/4
208     {  1040, 2160 },     // 31: 64-QAM 5/6
209 };
210 
211 /*
212  * Fill in the rate array information based on the current
213  * node configuration and the choices made by the rate
214  * selection code and ath_buf setup code.
215  *
216  * Later on, this may end up also being made by the
217  * rate control code, but for now it can live here.
218  *
219  * This needs to be called just before the packet is
220  * queued to the software queue or hardware queue,
221  * so all of the needed fields in bf_state are setup.
222  */
223 void
224 ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf)
225 {
226 	struct ieee80211_node *ni = bf->bf_node;
227 	struct ieee80211vap *vap = ni->ni_vap;
228 	struct ieee80211com *ic = ni->ni_ic;
229 	const HAL_RATE_TABLE *rt = sc->sc_currates;
230 	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
231 	uint8_t rate;
232 	int i;
233 	int do_ldpc;
234 	int do_stbc;
235 
236 	/*
237 	 * We only do LDPC if the rate is 11n, both we and the
238 	 * receiver support LDPC and it's enabled.
239 	 *
240 	 * It's a global flag, not a per-try flag, so we clear
241 	 * it if any of the rate entries aren't 11n.
242 	 */
243 	do_ldpc = 0;
244 	if ((ni->ni_vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX) &&
245 	    (ni->ni_htcap & IEEE80211_HTCAP_LDPC))
246 		do_ldpc = 1;
247 
248 	/*
249 	 * The 11n duration calculation doesn't know about LDPC,
250 	 * so don't enable it for positioning.
251 	 */
252 	if (bf->bf_flags & ATH_BUF_TOA_PROBE)
253 		do_ldpc = 0;
254 
255 	do_stbc = 0;
256 
257 	for (i = 0; i < ATH_RC_NUM; i++) {
258 		rc[i].flags = 0;
259 		if (rc[i].tries == 0)
260 			continue;
261 
262 		rate = rt->info[rc[i].rix].rateCode;
263 
264 		/*
265 		 * Only enable short preamble for legacy rates
266 		 */
267 		if ((! IS_HT_RATE(rate)) && bf->bf_state.bfs_shpream)
268 			rate |= rt->info[rc[i].rix].shortPreamble;
269 
270 		/*
271 		 * Save this, used by the TX and completion code
272 		 */
273 		rc[i].ratecode = rate;
274 
275 		if (bf->bf_state.bfs_txflags &
276 		    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA))
277 			rc[i].flags |= ATH_RC_RTSCTS_FLAG;
278 
279 		/*
280 		 * If we can't do LDPC, don't.
281 		 */
282 		if (! IS_HT_RATE(rate))
283 			do_ldpc = 0;
284 
285 		/* Only enable shortgi, 2040, dual-stream if HT is set */
286 		if (IS_HT_RATE(rate)) {
287 			rc[i].flags |= ATH_RC_HT_FLAG;
288 
289 			if (ni->ni_chw == 40)
290 				rc[i].flags |= ATH_RC_CW40_FLAG;
291 
292 			/*
293 			 * NOTE: Don't do short-gi for positioning frames.
294 			 *
295 			 * For now, the ath_hal and net80211 HT duration
296 			 * calculation rounds up the 11n data txtime
297 			 * to the nearest multiple of 3.6 microseconds
298 			 * and doesn't return the fractional part, so
299 			 * we are always "out" by some amount.
300 			 */
301 			if (ni->ni_chw == 40 &&
302 			    ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40 &&
303 			    ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40 &&
304 			    vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40 &&
305 			    (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
306 				rc[i].flags |= ATH_RC_SGI_FLAG;
307 			}
308 
309 			if (ni->ni_chw == 20 &&
310 			    ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20 &&
311 			    ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20 &&
312 			    vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20 &&
313 			    (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
314 				rc[i].flags |= ATH_RC_SGI_FLAG;
315 			}
316 
317 			/*
318 			 * If we have STBC TX enabled and the receiver
319 			 * can receive (at least) 1 stream STBC, AND it's
320 			 * MCS 0-7, AND we have at least two chains enabled,
321 			 * and we're not doing positioning, enable STBC.
322 			 */
323 			if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC &&
324 			    (ni->ni_vap->iv_flags_ht & IEEE80211_FHT_STBC_TX) &&
325 			    (ni->ni_htcap & IEEE80211_HTCAP_RXSTBC) &&
326 			    (sc->sc_cur_txchainmask > 1) &&
327 			    (HT_RC_2_STREAMS(rate) == 1) &&
328 			    (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
329 				rc[i].flags |= ATH_RC_STBC_FLAG;
330 				do_stbc = 1;
331 			}
332 
333 			/*
334 			 * Dual / Triple stream rate?
335 			 */
336 			if (HT_RC_2_STREAMS(rate) == 2)
337 				rc[i].flags |= ATH_RC_DS_FLAG;
338 			else if (HT_RC_2_STREAMS(rate) == 3)
339 				rc[i].flags |= ATH_RC_TS_FLAG;
340 		}
341 
342 		/*
343 		 * Calculate the maximum TX power cap for the current
344 		 * node.
345 		 */
346 		rc[i].tx_power_cap = ieee80211_get_node_txpower(ni);
347 
348 		/*
349 		 * Calculate the maximum 4ms frame length based
350 		 * on the MCS rate, SGI and channel width flags.
351 		 */
352 		if ((rc[i].flags & ATH_RC_HT_FLAG) &&
353 		    (HT_RC_2_MCS(rate) < 32)) {
354 			int j;
355 			if (rc[i].flags & ATH_RC_CW40_FLAG) {
356 				if (rc[i].flags & ATH_RC_SGI_FLAG)
357 					j = MCS_HT40_SGI;
358 				else
359 					j = MCS_HT40;
360 			} else {
361 				if (rc[i].flags & ATH_RC_SGI_FLAG)
362 					j = MCS_HT20_SGI;
363 				else
364 					j = MCS_HT20;
365 			}
366 			rc[i].max4msframelen =
367 			    ath_max_4ms_framelen[j][HT_RC_2_MCS(rate)];
368 		} else
369 			rc[i].max4msframelen = 0;
370 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
371 		    "%s: i=%d, rate=0x%x, flags=0x%x, max4ms=%d\n",
372 		    __func__, i, rate, rc[i].flags, rc[i].max4msframelen);
373 	}
374 
375 	/*
376 	 * LDPC is a global flag, so ...
377 	 */
378 	if (do_ldpc) {
379 		bf->bf_state.bfs_txflags |= HAL_TXDESC_LDPC;
380 		sc->sc_stats.ast_tx_ldpc++;
381 	}
382 
383 	if (do_stbc) {
384 		sc->sc_stats.ast_tx_stbc++;
385 	}
386 }
387 
388 /*
389  * Return the number of delimiters to be added to
390  * meet the minimum required mpdudensity.
391  *
392  * Caller should make sure that the rate is HT.
393  *
394  * TODO: is this delimiter calculation supposed to be the
395  * total frame length, the hdr length, the data length (including
396  * delimiters, padding, CRC, etc) or ?
397  *
398  * TODO: this should ensure that the rate control information
399  * HAS been setup for the first rate.
400  *
401  * TODO: ensure this is only called for MCS rates.
402  *
403  * TODO: enforce MCS < 31
404  */
405 static int
406 ath_compute_num_delims(struct ath_softc *sc, struct ath_buf *first_bf,
407     uint16_t pktlen, int is_first)
408 {
409 #define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
410 	const HAL_RATE_TABLE *rt = sc->sc_currates;
411 	struct ieee80211_node *ni = first_bf->bf_node;
412 	struct ieee80211vap *vap = ni->ni_vap;
413 	int ndelim, mindelim = 0;
414 	int mpdudensity;	/* in 1/100'th of a microsecond */
415 	int peer_mpdudensity;	/* net80211 value */
416 	uint8_t rc, rix, flags;
417 	int width, half_gi;
418 	uint32_t nsymbits, nsymbols;
419 	uint16_t minlen;
420 
421 	/*
422 	 * Get the advertised density from the node.
423 	 */
424 	peer_mpdudensity = MS(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY);
425 
426 	/*
427 	 * vap->iv_ampdu_density is a net80211 value, rather than the actual
428 	 * density.  Larger values are longer A-MPDU density spacing values,
429 	 * and we want to obey larger configured / negotiated density values
430 	 * per station if we get it.
431 	 */
432 	if (vap->iv_ampdu_density > peer_mpdudensity)
433 		peer_mpdudensity = vap->iv_ampdu_density;
434 
435 	/*
436 	 * Convert the A-MPDU density net80211 value to a 1/100 microsecond
437 	 * value for subsequent calculations.
438 	 */
439 	if (peer_mpdudensity > IEEE80211_HTCAP_MPDUDENSITY_16)
440 		mpdudensity = 1600;		/* maximum density */
441 	else
442 		mpdudensity = ieee80211_mpdudensity_map[peer_mpdudensity];
443 
444 	/* Select standard number of delimiters based on frame length */
445 	ndelim = ATH_AGGR_GET_NDELIM(pktlen);
446 
447 	/*
448 	 * If encryption is enabled, add extra delimiters to let the
449 	 * crypto hardware catch up. This could be tuned per-MAC and
450 	 * per-rate, but for now we'll simply assume encryption is
451 	 * always enabled.
452 	 *
453 	 * Also note that the Atheros reference driver inserts two
454 	 * delimiters by default for pre-AR9380 peers.  This will
455 	 * include "that" required delimiter.
456 	 */
457 	ndelim += ATH_AGGR_ENCRYPTDELIM;
458 
459 	/*
460 	 * For AR9380, there's a minimum number of delimeters
461 	 * required when doing RTS.
462 	 *
463 	 * XXX TODO: this is only needed if (a) RTS/CTS is enabled for
464 	 * this exchange, and (b) (done) this is the first sub-frame
465 	 * in the aggregate.
466 	 */
467 	if (sc->sc_use_ent && (sc->sc_ent_cfg & AH_ENT_RTSCTS_DELIM_WAR)
468 	    && ndelim < AH_FIRST_DESC_NDELIMS && is_first)
469 		ndelim = AH_FIRST_DESC_NDELIMS;
470 
471 	/*
472 	 * If sc_delim_min_pad is non-zero, enforce it as the minimum
473 	 * pad delimiter count.
474 	 */
475 	if (sc->sc_delim_min_pad != 0)
476 		ndelim = MAX(ndelim, sc->sc_delim_min_pad);
477 
478 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
479 	    "%s: pktlen=%d, ndelim=%d, mpdudensity=%d\n",
480 	    __func__, pktlen, ndelim, mpdudensity);
481 
482 	/*
483 	 * If the MPDU density is 0, we can return here.
484 	 * Otherwise, we need to convert the desired mpdudensity
485 	 * into a byte length, based on the rate in the subframe.
486 	 */
487 	if (mpdudensity == 0)
488 		return ndelim;
489 
490 	/*
491 	 * Convert desired mpdu density from microeconds to bytes based
492 	 * on highest rate in rate series (i.e. first rate) to determine
493 	 * required minimum length for subframe. Take into account
494 	 * whether high rate is 20 or 40Mhz and half or full GI.
495 	 */
496 	rix = first_bf->bf_state.bfs_rc[0].rix;
497 	rc = rt->info[rix].rateCode;
498 	flags = first_bf->bf_state.bfs_rc[0].flags;
499 	width = !! (flags & ATH_RC_CW40_FLAG);
500 	half_gi = !! (flags & ATH_RC_SGI_FLAG);
501 
502 	/*
503 	 * mpdudensity is in 1/100th of a usec, so divide by 100
504 	 */
505 	if (half_gi)
506 		nsymbols = NUM_SYMBOLS_PER_USEC_HALFGI(mpdudensity);
507 	else
508 		nsymbols = NUM_SYMBOLS_PER_USEC(mpdudensity);
509 	nsymbols /= 100;
510 
511 	if (nsymbols == 0)
512 		nsymbols = 1;
513 
514 	nsymbits = bits_per_symbol[HT_RC_2_MCS(rc)][width];
515 	minlen = (nsymbols * nsymbits) / BITS_PER_BYTE;
516 
517 	/*
518 	 * Min length is the minimum frame length for the
519 	 * required MPDU density.
520 	 */
521 	if (pktlen < minlen) {
522 		mindelim = (minlen - pktlen) / ATH_AGGR_DELIM_SZ;
523 		ndelim = MAX(mindelim, ndelim);
524 	}
525 
526 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
527 	    "%s: pktlen=%d, minlen=%d, rix=%x, rc=%x, width=%d, hgi=%d, ndelim=%d\n",
528 	    __func__, pktlen, minlen, rix, rc, width, half_gi, ndelim);
529 
530 	return ndelim;
531 #undef	MS
532 }
533 
534 /*
535  * XXX TODO: put into net80211
536  */
537 static int
538 ath_rx_ampdu_to_byte(char a)
539 {
540 	switch (a) {
541 	case IEEE80211_HTCAP_MAXRXAMPDU_16K:
542 		return 16384;
543 		break;
544 	case IEEE80211_HTCAP_MAXRXAMPDU_32K:
545 		return 32768;
546 		break;
547 	case IEEE80211_HTCAP_MAXRXAMPDU_64K:
548 		return 65536;
549 		break;
550 	case IEEE80211_HTCAP_MAXRXAMPDU_8K:
551 	default:
552 		return 8192;
553 		break;
554 	}
555 }
556 
557 /*
558  * Fetch the aggregation limit.
559  *
560  * It's the lowest of the four rate series 4ms frame length.
561  *
562  * Also take into account the hardware specific limits (8KiB on AR5416)
563  * and per-peer limits in non-STA mode.
564  */
565 static int
566 ath_get_aggr_limit(struct ath_softc *sc, struct ieee80211_node *ni,
567     struct ath_buf *bf)
568 {
569 	struct ieee80211vap *vap = ni->ni_vap;
570 
571 #define	MS(_v, _f)	(((_v) & _f) >> _f##_S)
572 	int amin = ATH_AGGR_MAXSIZE;
573 	int i;
574 
575 	/* Extract out the maximum configured driver A-MPDU limit */
576 	if (sc->sc_aggr_limit > 0 && sc->sc_aggr_limit < ATH_AGGR_MAXSIZE)
577 		amin = sc->sc_aggr_limit;
578 
579 	/* Check the vap configured transmit limit */
580 	amin = MIN(amin, ath_rx_ampdu_to_byte(vap->iv_ampdu_limit));
581 
582 	/*
583 	 * Check the HTCAP field for the maximum size the node has
584 	 * negotiated.  If it's smaller than what we have, cap it there.
585 	 */
586 	amin = MIN(amin, ath_rx_ampdu_to_byte(MS(ni->ni_htparam,
587 	    IEEE80211_HTCAP_MAXRXAMPDU)));
588 
589 	for (i = 0; i < ATH_RC_NUM; i++) {
590 		if (bf->bf_state.bfs_rc[i].tries == 0)
591 			continue;
592 		amin = MIN(amin, bf->bf_state.bfs_rc[i].max4msframelen);
593 	}
594 
595 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
596 	    "%s: aggr_limit=%d, iv_ampdu_limit=%d, "
597 	    "peer maxrxampdu=%d, max frame len=%d\n",
598 	    __func__,
599 	    sc->sc_aggr_limit,
600 	    vap->iv_ampdu_limit,
601 	    MS(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU),
602 	    amin);
603 
604 	return amin;
605 #undef	MS
606 }
607 
608 /*
609  * Setup a 11n rate series structure
610  *
611  * This should be called for both legacy and MCS rates.
612  *
613  * This uses the rate series stuf from ath_tx_rate_fill_rcflags().
614  *
615  * It, along with ath_buf_set_rate, must be called -after- a burst
616  * or aggregate is setup.
617  */
618 static void
619 ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni,
620     struct ath_buf *bf, HAL_11N_RATE_SERIES *series)
621 {
622 	struct ieee80211com *ic = ni->ni_ic;
623 	struct ath_hal *ah = sc->sc_ah;
624 	HAL_BOOL shortPreamble = AH_FALSE;
625 	const HAL_RATE_TABLE *rt = sc->sc_currates;
626 	int i;
627 	int pktlen;
628 	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
629 
630 	if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) &&
631 	    (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE))
632 		shortPreamble = AH_TRUE;
633 
634 	/*
635 	 * If this is the first frame in an aggregate series,
636 	 * use the aggregate length.
637 	 */
638 	if (bf->bf_state.bfs_aggr)
639 		pktlen = bf->bf_state.bfs_al;
640 	else
641 		pktlen = bf->bf_state.bfs_pktlen;
642 
643 	/*
644 	 * XXX TODO: modify this routine to use the bfs_rc[x].flags
645 	 * XXX fields.
646 	 */
647 	memset(series, 0, sizeof(HAL_11N_RATE_SERIES) * 4);
648 	for (i = 0; i < ATH_RC_NUM;  i++) {
649 		/* Only set flags for actual TX attempts */
650 		if (rc[i].tries == 0)
651 			continue;
652 
653 		series[i].Tries = rc[i].tries;
654 
655 		/*
656 		 * XXX TODO: When the NIC is capable of three stream TX,
657 		 * transmit 1/2 stream rates on two streams.
658 		 *
659 		 * This reduces the power consumption of the NIC and
660 		 * keeps it within the PCIe slot power limits.
661 		 */
662 		series[i].ChSel = sc->sc_cur_txchainmask;
663 
664 		/*
665 		 * Setup rate and TX power cap for this series.
666 		 */
667 		series[i].Rate = rt->info[rc[i].rix].rateCode;
668 		series[i].RateIndex = rc[i].rix;
669 		series[i].tx_power_cap = rc[i].tx_power_cap;
670 
671 		/*
672 		 * Enable RTS/CTS as appropriate.
673 		 */
674 		if (rc[i].flags & ATH_RC_RTSCTS_FLAG)
675 			series[i].RateFlags |= HAL_RATESERIES_RTS_CTS;
676 
677 		/*
678 		 * 11n rate? Update 11n flags.
679 		 */
680 		if (rc[i].flags & ATH_RC_HT_FLAG) {
681 			if (rc[i].flags & ATH_RC_CW40_FLAG)
682 				series[i].RateFlags |= HAL_RATESERIES_2040;
683 
684 			if (rc[i].flags & ATH_RC_SGI_FLAG)
685 				series[i].RateFlags |= HAL_RATESERIES_HALFGI;
686 
687 			if (rc[i].flags & ATH_RC_STBC_FLAG)
688 				series[i].RateFlags |= HAL_RATESERIES_STBC;
689 		}
690 
691 		/*
692 		 * TODO: If we're all doing 11n rates then we can set LDPC.
693 		 * If we've been asked to /do/ LDPC but we are handed a
694 		 * legacy rate, then we should complain.  Loudly.
695 		 */
696 
697 		/*
698 		 * PktDuration doesn't include slot, ACK, RTS, etc timing -
699 		 * it's just the packet duration
700 		 */
701 		if (rc[i].flags & ATH_RC_HT_FLAG) {
702 			series[i].PktDuration =
703 			    ath_computedur_ht(pktlen
704 				, series[i].Rate
705 				, HT_RC_2_STREAMS(series[i].Rate)
706 				, series[i].RateFlags & HAL_RATESERIES_2040
707 				, series[i].RateFlags & HAL_RATESERIES_HALFGI);
708 		} else {
709 			if (shortPreamble)
710 				series[i].Rate |=
711 				    rt->info[rc[i].rix].shortPreamble;
712 			/* XXX TODO: don't include SIFS */
713 			series[i].PktDuration = ath_hal_computetxtime(ah,
714 			    rt, pktlen, rc[i].rix, shortPreamble, AH_TRUE);
715 		}
716 	}
717 }
718 
719 #ifdef	ATH_DEBUG
720 static void
721 ath_rateseries_print(struct ath_softc *sc, HAL_11N_RATE_SERIES *series)
722 {
723 	int i;
724 	for (i = 0; i < ATH_RC_NUM; i++) {
725 		device_printf(sc->sc_dev ,"series %d: rate %x; tries %d; "
726 		    "pktDuration %d; chSel %d; txpowcap %d, rateFlags %x\n",
727 		    i,
728 		    series[i].Rate,
729 		    series[i].Tries,
730 		    series[i].PktDuration,
731 		    series[i].ChSel,
732 		    series[i].tx_power_cap,
733 		    series[i].RateFlags);
734 	}
735 }
736 #endif
737 
738 /*
739  * Setup the 11n rate scenario and burst duration for the given TX descriptor
740  * list.
741  *
742  * This isn't useful for sending beacon frames, which has different needs
743  * wrt what's passed into the rate scenario function.
744  */
745 void
746 ath_buf_set_rate(struct ath_softc *sc, struct ieee80211_node *ni,
747     struct ath_buf *bf)
748 {
749 	HAL_11N_RATE_SERIES series[4];
750 	struct ath_desc *ds = bf->bf_desc;
751 	struct ath_hal *ah = sc->sc_ah;
752 	int is_pspoll = (bf->bf_state.bfs_atype == HAL_PKT_TYPE_PSPOLL);
753 	int ctsrate = bf->bf_state.bfs_ctsrate;
754 	int flags = bf->bf_state.bfs_txflags;
755 
756 	/* Setup rate scenario */
757 	memset(&series, 0, sizeof(series));
758 
759 	ath_rateseries_setup(sc, ni, bf, series);
760 
761 #ifdef	ATH_DEBUG
762 	if (sc->sc_debug & ATH_DEBUG_XMIT)
763 		ath_rateseries_print(sc, series);
764 #endif
765 
766 	/* Set rate scenario */
767 	/*
768 	 * Note: Don't allow hardware to override the duration on
769 	 * ps-poll packets.
770 	 */
771 	ath_hal_set11nratescenario(ah, ds,
772 	    !is_pspoll,	/* whether to override the duration or not */
773 	    ctsrate,	/* rts/cts rate */
774 	    series,	/* 11n rate series */
775 	    4,		/* number of series */
776 	    flags);
777 
778 	/* Set burst duration */
779 	/*
780 	 * This is only required when doing 11n burst, not aggregation
781 	 * ie, if there's a second frame in a RIFS or A-MPDU burst
782 	 * w/ >1 A-MPDU frame bursting back to back.
783 	 * Normal A-MPDU doesn't do bursting -between- aggregates.
784 	 *
785 	 * .. and it's highly likely this won't ever be implemented
786 	 */
787 	//ath_hal_set11nburstduration(ah, ds, 8192);
788 }
789 
790 /*
791  * Form an aggregate packet list.
792  *
793  * This function enforces the aggregate restrictions/requirements.
794  *
795  * These are:
796  *
797  * + The aggregate size maximum (64k for AR9160 and later, 8K for
798  *   AR5416 when doing RTS frame protection.)
799  * + Maximum number of sub-frames for an aggregate
800  * + The aggregate delimiter size, giving MACs time to do whatever is
801  *   needed before each frame
802  * + Enforce the BAW limit
803  *
804  * Each descriptor queued should have the DMA setup.
805  * The rate series, descriptor setup, linking, etc is all done
806  * externally. This routine simply chains them together.
807  * ath_tx_setds_11n() will take care of configuring the per-
808  * descriptor setup, and ath_buf_set_rate() will configure the
809  * rate control.
810  *
811  * The TID lock is required for the entirety of this function.
812  *
813  * If some code in another thread adds to the head of this
814  * list, very strange behaviour will occur. Since retransmission is the
815  * only reason this will occur, and this routine is designed to be called
816  * from within the scheduler task, it won't ever clash with the completion
817  * task.
818  *
819  * So if you want to call this from an upper layer context (eg, to direct-
820  * dispatch aggregate frames to the hardware), please keep this in mind.
821  */
822 ATH_AGGR_STATUS
823 ath_tx_form_aggr(struct ath_softc *sc, struct ath_node *an,
824     struct ath_tid *tid, ath_bufhead *bf_q)
825 {
826 	//struct ieee80211_node *ni = &an->an_node;
827 	struct ath_buf *bf, *bf_first = NULL, *bf_prev = NULL;
828 	int nframes = 0;
829 	uint16_t aggr_limit = 0, al = 0, bpad = 0, al_delta, h_baw;
830 	struct ieee80211_tx_ampdu *tap;
831 	int status = ATH_AGGR_DONE;
832 	int prev_frames = 0;	/* XXX for AR5416 burst, not done here */
833 	int prev_al = 0;	/* XXX also for AR5416 burst */
834 
835 	ATH_TX_LOCK_ASSERT(sc);
836 
837 	tap = ath_tx_get_tx_tid(an, tid->tid);
838 	if (tap == NULL) {
839 		status = ATH_AGGR_ERROR;
840 		goto finish;
841 	}
842 
843 	/*
844 	 * Limit the maximum number of frames in this A-MPDU
845 	 * to half of the window size.  This is done to prevent
846 	 * sending a LOT of frames that may fail in one batch
847 	 * when operating in higher MCS rates.  If there are more
848 	 * frames available to send then up to two A-MPDUs will
849 	 * be queued per hardware queue, so we'll "just" get
850 	 * a second A-MPDU.
851 	 */
852 	h_baw = tap->txa_wnd / 2;
853 
854 	for (;;) {
855 		bf = ATH_TID_FIRST(tid);
856 		if (bf == NULL) {
857 			status = ATH_AGGR_DONE;
858 			break;
859 		}
860 		if (bf_first == NULL) {
861 			bf_first = bf;
862 			/*
863 			 * It's the first frame;
864 			 * set the aggregation limit based on the
865 			 * rate control decision that has been made.
866 			 */
867 			aggr_limit = ath_get_aggr_limit(sc, &an->an_node,
868 			    bf_first);
869 			if (bf_first->bf_state.bfs_rc_maxpktlen > 0) {
870 				aggr_limit = MIN(aggr_limit,
871 				    bf_first->bf_state.bfs_rc_maxpktlen);
872 			}
873 		}
874 
875 		/* Set this early just so things don't get confused */
876 		bf->bf_next = NULL;
877 
878 		/*
879 		 * If the frame doesn't have a sequence number that we're
880 		 * tracking in the BAW (eg NULL QOS data frame), we can't
881 		 * aggregate it. Stop the aggregation process; the sender
882 		 * can then TX what's in the list thus far and then
883 		 * TX the frame individually.
884 		 */
885 		if (! bf->bf_state.bfs_dobaw) {
886 			status = ATH_AGGR_NONAGGR;
887 			break;
888 		}
889 
890 		/*
891 		 * If any of the rates are non-HT, this packet
892 		 * can't be aggregated.
893 		 * XXX TODO: add a bf_state flag which gets marked
894 		 * if any active rate is non-HT.
895 		 */
896 
897 		/*
898 		 * do not exceed aggregation limit
899 		 */
900 		al_delta = ATH_AGGR_DELIM_SZ + bf->bf_state.bfs_pktlen;
901 		if (nframes &&
902 		    (aggr_limit < (al + bpad + al_delta + prev_al))) {
903 			status = ATH_AGGR_LIMITED;
904 			break;
905 		}
906 
907 		/*
908 		 * If RTS/CTS is set on the first frame, enforce
909 		 * the RTS aggregate limit.
910 		 */
911 		if (bf_first->bf_state.bfs_txflags &
912 		    (HAL_TXDESC_CTSENA | HAL_TXDESC_RTSENA)) {
913 			if (nframes &&
914 			   (sc->sc_rts_aggr_limit <
915 			     (al + bpad + al_delta + prev_al))) {
916 				status = ATH_AGGR_8K_LIMITED;
917 				break;
918 			}
919 		}
920 
921 		/*
922 		 * Do not exceed subframe limit.
923 		 */
924 		if ((nframes + prev_frames) >= MIN((h_baw),
925 		    IEEE80211_AMPDU_SUBFRAME_DEFAULT)) {
926 			status = ATH_AGGR_LIMITED;
927 			break;
928 		}
929 
930 		/*
931 		 * If the current frame has an RTS/CTS configuration
932 		 * that differs from the first frame, override the
933 		 * subsequent frame with this config.
934 		 */
935 		if (bf != bf_first) {
936 			bf->bf_state.bfs_txflags &=
937 			    ~ (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA);
938 			bf->bf_state.bfs_txflags |=
939 			    bf_first->bf_state.bfs_txflags &
940 			    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA);
941 		}
942 
943 		/*
944 		 * If the packet has a sequence number, do not
945 		 * step outside of the block-ack window.
946 		 */
947 		if (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
948 		    SEQNO(bf->bf_state.bfs_seqno))) {
949 			status = ATH_AGGR_BAW_CLOSED;
950 			break;
951 		}
952 
953 		/*
954 		 * this packet is part of an aggregate.
955 		 */
956 		ATH_TID_REMOVE(tid, bf, bf_list);
957 
958 		/* The TID lock is required for the BAW update */
959 		ath_tx_addto_baw(sc, an, tid, bf);
960 		bf->bf_state.bfs_addedbaw = 1;
961 
962 		/*
963 		 * XXX enforce ACK for aggregate frames (this needs to be
964 		 * XXX handled more gracefully?
965 		 */
966 		if (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) {
967 			device_printf(sc->sc_dev,
968 			    "%s: HAL_TXDESC_NOACK set for an aggregate frame?\n",
969 			    __func__);
970 			bf->bf_state.bfs_txflags &= (~HAL_TXDESC_NOACK);
971 		}
972 
973 		/*
974 		 * Add the now owned buffer (which isn't
975 		 * on the software TXQ any longer) to our
976 		 * aggregate frame list.
977 		 */
978 		TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
979 		nframes ++;
980 
981 		/* Completion handler */
982 		bf->bf_comp = ath_tx_aggr_comp;
983 
984 		/*
985 		 * add padding for previous frame to aggregation length
986 		 */
987 		al += bpad + al_delta;
988 
989 		/*
990 		 * Calculate delimiters needed for the current frame
991 		 */
992 		bf->bf_state.bfs_ndelim =
993 		    ath_compute_num_delims(sc, bf_first,
994 		    bf->bf_state.bfs_pktlen, (bf_first == bf));
995 
996 		/*
997 		 * Calculate the padding needed from this set of delimiters,
998 		 * used when calculating if the next frame will fit in
999 		 * the aggregate.
1000 		 */
1001 		bpad = PADBYTES(al_delta) + (bf->bf_state.bfs_ndelim << 2);
1002 
1003 		/*
1004 		 * Chain the buffers together
1005 		 */
1006 		if (bf_prev)
1007 			bf_prev->bf_next = bf;
1008 		bf_prev = bf;
1009 
1010 		/*
1011 		 * If we're leaking frames, just return at this point;
1012 		 * we've queued a single frame and we don't want to add
1013 		 * any more.
1014 		 */
1015 		if (tid->an->an_leak_count) {
1016 			status = ATH_AGGR_LEAK_CLOSED;
1017 			break;
1018 		}
1019 
1020 #if 0
1021 		/*
1022 		 * terminate aggregation on a small packet boundary
1023 		 */
1024 		if (bf->bf_state.bfs_pktlen < ATH_AGGR_MINPLEN) {
1025 			status = ATH_AGGR_SHORTPKT;
1026 			break;
1027 		}
1028 #endif
1029 
1030 	}
1031 
1032 finish:
1033 	/*
1034 	 * Just in case the list was empty when we tried to
1035 	 * dequeue a packet ..
1036 	 */
1037 	if (bf_first) {
1038 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
1039 		"%s: al=%d bytes; requested %d bytes\n",
1040 		__func__, al, bf_first->bf_state.bfs_rc_maxpktlen);
1041 
1042 		bf_first->bf_state.bfs_al = al;
1043 		bf_first->bf_state.bfs_nframes = nframes;
1044 	}
1045 	return status;
1046 }
1047