xref: /freebsd/sys/dev/ath/if_ath_tx_ht.c (revision 514fb3872166a361faa81d89909f65913e90b1b2)
1 /*-
2  * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
3  *
4  * Copyright (c) 2011 Adrian Chadd, Xenion Pty Ltd.
5  * All rights reserved.
6  *
7  * Redistribution and use in source and binary forms, with or without
8  * modification, are permitted provided that the following conditions
9  * are met:
10  * 1. Redistributions of source code must retain the above copyright
11  *    notice, this list of conditions and the following disclaimer,
12  *    without modification.
13  * 2. Redistributions in binary form must reproduce at minimum a disclaimer
14  *    similar to the "NO WARRANTY" disclaimer below ("Disclaimer") and any
15  *    redistribution must be conditioned upon including a substantially
16  *    similar Disclaimer requirement for further binary redistribution.
17  *
18  * NO WARRANTY
19  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
21  * LIMITED TO, THE IMPLIED WARRANTIES OF NONINFRINGEMENT, MERCHANTIBILITY
22  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL
23  * THE COPYRIGHT HOLDERS OR CONTRIBUTORS BE LIABLE FOR SPECIAL, EXEMPLARY,
24  * OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER
27  * IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
29  * THE POSSIBILITY OF SUCH DAMAGES.
30  */
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include "opt_inet.h"
36 #include "opt_ath.h"
37 #include "opt_wlan.h"
38 
39 #include <sys/param.h>
40 #include <sys/systm.h>
41 #include <sys/sysctl.h>
42 #include <sys/mbuf.h>
43 #include <sys/malloc.h>
44 #include <sys/lock.h>
45 #include <sys/mutex.h>
46 #include <sys/kernel.h>
47 #include <sys/socket.h>
48 #include <sys/sockio.h>
49 #include <sys/errno.h>
50 #include <sys/callout.h>
51 #include <sys/bus.h>
52 #include <sys/endian.h>
53 #include <sys/kthread.h>
54 #include <sys/taskqueue.h>
55 #include <sys/priv.h>
56 
57 #include <machine/bus.h>
58 
59 #include <net/if.h>
60 #include <net/if_dl.h>
61 #include <net/if_media.h>
62 #include <net/if_types.h>
63 #include <net/if_arp.h>
64 #include <net/ethernet.h>
65 #include <net/if_llc.h>
66 
67 #include <net80211/ieee80211_var.h>
68 #include <net80211/ieee80211_regdomain.h>
69 #ifdef IEEE80211_SUPPORT_SUPERG
70 #include <net80211/ieee80211_superg.h>
71 #endif
72 #ifdef IEEE80211_SUPPORT_TDMA
73 #include <net80211/ieee80211_tdma.h>
74 #endif
75 
76 #include <net/bpf.h>
77 
78 #ifdef INET
79 #include <netinet/in.h>
80 #include <netinet/if_ether.h>
81 #endif
82 
83 #include <dev/ath/if_athvar.h>
84 #include <dev/ath/ath_hal/ah_devid.h>		/* XXX for softled */
85 #include <dev/ath/ath_hal/ah_diagcodes.h>
86 
87 #ifdef ATH_TX99_DIAG
88 #include <dev/ath/ath_tx99/ath_tx99.h>
89 #endif
90 
91 #include <dev/ath/if_ath_tx.h>		/* XXX for some support functions */
92 #include <dev/ath/if_ath_tx_ht.h>
93 #include <dev/ath/if_athrate.h>
94 #include <dev/ath/if_ath_debug.h>
95 
96 /*
97  * XXX net80211?
98  */
99 #define	IEEE80211_AMPDU_SUBFRAME_DEFAULT		32
100 
101 #define	ATH_AGGR_DELIM_SZ	4	/* delimiter size */
102 #define	ATH_AGGR_MINPLEN	256	/* in bytes, minimum packet length */
103 /* number of delimiters for encryption padding */
104 #define	ATH_AGGR_ENCRYPTDELIM	10
105 
106 /*
107  * returns delimiter padding required given the packet length
108  */
109 #define	ATH_AGGR_GET_NDELIM(_len)					\
110 	    (((((_len) + ATH_AGGR_DELIM_SZ) < ATH_AGGR_MINPLEN) ?	\
111 	    (ATH_AGGR_MINPLEN - (_len) - ATH_AGGR_DELIM_SZ) : 0) >> 2)
112 
113 #define	PADBYTES(_len)		((4 - ((_len) % 4)) % 4)
114 
115 int ath_max_4ms_framelen[4][32] = {
116 	[MCS_HT20] = {
117 		3212,  6432,  9648,  12864,  19300,  25736,  28952,  32172,
118 		6424,  12852, 19280, 25708,  38568,  51424,  57852,  64280,
119 		9628,  19260, 28896, 38528,  57792,  65532,  65532,  65532,
120 		12828, 25656, 38488, 51320,  65532,  65532,  65532,  65532,
121 	},
122 	[MCS_HT20_SGI] = {
123 		3572,  7144,  10720,  14296,  21444,  28596,  32172,  35744,
124 		7140,  14284, 21428,  28568,  42856,  57144,  64288,  65532,
125 		10700, 21408, 32112,  42816,  64228,  65532,  65532,  65532,
126 		14256, 28516, 42780,  57040,  65532,  65532,  65532,  65532,
127 	},
128 	[MCS_HT40] = {
129 		6680,  13360,  20044,  26724,  40092,  53456,  60140,  65532,
130 		13348, 26700,  40052,  53400,  65532,  65532,  65532,  65532,
131 		20004, 40008,  60016,  65532,  65532,  65532,  65532,  65532,
132 		26644, 53292,  65532,  65532,  65532,  65532,  65532,  65532,
133 	},
134 	[MCS_HT40_SGI] = {
135 		7420,  14844,  22272,  29696,  44544,  59396,  65532,  65532,
136 		14832, 29668,  44504,  59340,  65532,  65532,  65532,  65532,
137 		22232, 44464,  65532,  65532,  65532,  65532,  65532,  65532,
138 		29616, 59232,  65532,  65532,  65532,  65532,  65532,  65532,
139 	}
140 };
141 
142 /*
143  * XXX should be in net80211
144  */
145 static int ieee80211_mpdudensity_map[] = {
146 	0,		/* IEEE80211_HTCAP_MPDUDENSITY_NA */
147 	25,		/* IEEE80211_HTCAP_MPDUDENSITY_025 */
148 	50,		/* IEEE80211_HTCAP_MPDUDENSITY_05 */
149 	100,		/* IEEE80211_HTCAP_MPDUDENSITY_1 */
150 	200,		/* IEEE80211_HTCAP_MPDUDENSITY_2 */
151 	400,		/* IEEE80211_HTCAP_MPDUDENSITY_4 */
152 	800,		/* IEEE80211_HTCAP_MPDUDENSITY_8 */
153 	1600,		/* IEEE80211_HTCAP_MPDUDENSITY_16 */
154 };
155 
156 /*
157  * XXX should be in the HAL/net80211 ?
158  */
159 #define	BITS_PER_BYTE		8
160 #define	OFDM_PLCP_BITS		22
161 #define	HT_RC_2_MCS(_rc)	((_rc) & 0x7f)
162 #define	HT_RC_2_STREAMS(_rc)	((((_rc) & 0x78) >> 3) + 1)
163 #define	L_STF			8
164 #define	L_LTF			8
165 #define	L_SIG			4
166 #define	HT_SIG			8
167 #define	HT_STF			4
168 #define	HT_LTF(_ns)		(4 * (_ns))
169 #define	SYMBOL_TIME(_ns)	((_ns) << 2)		// ns * 4 us
170 #define	SYMBOL_TIME_HALFGI(_ns)	(((_ns) * 18 + 4) / 5)	// ns * 3.6 us
171 #define	NUM_SYMBOLS_PER_USEC(_usec)	(_usec >> 2)
172 #define	NUM_SYMBOLS_PER_USEC_HALFGI(_usec)	(((_usec*5)-4)/18)
173 #define	IS_HT_RATE(_rate)	((_rate) & 0x80)
174 
175 const uint32_t bits_per_symbol[][2] = {
176     /* 20MHz 40MHz */
177     {    26,   54 },     //  0: BPSK
178     {    52,  108 },     //  1: QPSK 1/2
179     {    78,  162 },     //  2: QPSK 3/4
180     {   104,  216 },     //  3: 16-QAM 1/2
181     {   156,  324 },     //  4: 16-QAM 3/4
182     {   208,  432 },     //  5: 64-QAM 2/3
183     {   234,  486 },     //  6: 64-QAM 3/4
184     {   260,  540 },     //  7: 64-QAM 5/6
185     {    52,  108 },     //  8: BPSK
186     {   104,  216 },     //  9: QPSK 1/2
187     {   156,  324 },     // 10: QPSK 3/4
188     {   208,  432 },     // 11: 16-QAM 1/2
189     {   312,  648 },     // 12: 16-QAM 3/4
190     {   416,  864 },     // 13: 64-QAM 2/3
191     {   468,  972 },     // 14: 64-QAM 3/4
192     {   520, 1080 },     // 15: 64-QAM 5/6
193     {    78,  162 },     // 16: BPSK
194     {   156,  324 },     // 17: QPSK 1/2
195     {   234,  486 },     // 18: QPSK 3/4
196     {   312,  648 },     // 19: 16-QAM 1/2
197     {   468,  972 },     // 20: 16-QAM 3/4
198     {   624, 1296 },     // 21: 64-QAM 2/3
199     {   702, 1458 },     // 22: 64-QAM 3/4
200     {   780, 1620 },     // 23: 64-QAM 5/6
201     {   104,  216 },     // 24: BPSK
202     {   208,  432 },     // 25: QPSK 1/2
203     {   312,  648 },     // 26: QPSK 3/4
204     {   416,  864 },     // 27: 16-QAM 1/2
205     {   624, 1296 },     // 28: 16-QAM 3/4
206     {   832, 1728 },     // 29: 64-QAM 2/3
207     {   936, 1944 },     // 30: 64-QAM 3/4
208     {  1040, 2160 },     // 31: 64-QAM 5/6
209 };
210 
211 /*
212  * Fill in the rate array information based on the current
213  * node configuration and the choices made by the rate
214  * selection code and ath_buf setup code.
215  *
216  * Later on, this may end up also being made by the
217  * rate control code, but for now it can live here.
218  *
219  * This needs to be called just before the packet is
220  * queued to the software queue or hardware queue,
221  * so all of the needed fields in bf_state are setup.
222  */
223 void
224 ath_tx_rate_fill_rcflags(struct ath_softc *sc, struct ath_buf *bf)
225 {
226 	struct ieee80211_node *ni = bf->bf_node;
227 	struct ieee80211vap *vap = ni->ni_vap;
228 	struct ieee80211com *ic = ni->ni_ic;
229 	const HAL_RATE_TABLE *rt = sc->sc_currates;
230 	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
231 	uint8_t rate;
232 	int i;
233 	int do_ldpc;
234 	int do_stbc;
235 
236 	/*
237 	 * We only do LDPC if the rate is 11n, both we and the
238 	 * receiver support LDPC and it's enabled.
239 	 *
240 	 * It's a global flag, not a per-try flag, so we clear
241 	 * it if any of the rate entries aren't 11n.
242 	 */
243 	do_ldpc = 0;
244 	if ((ni->ni_vap->iv_flags_ht & IEEE80211_FHT_LDPC_TX) &&
245 	    (ni->ni_htcap & IEEE80211_HTCAP_LDPC))
246 		do_ldpc = 1;
247 
248 	/*
249 	 * The 11n duration calculation doesn't know about LDPC,
250 	 * so don't enable it for positioning.
251 	 */
252 	if (bf->bf_flags & ATH_BUF_TOA_PROBE)
253 		do_ldpc = 0;
254 
255 	do_stbc = 0;
256 
257 	for (i = 0; i < ATH_RC_NUM; i++) {
258 		rc[i].flags = 0;
259 		if (rc[i].tries == 0)
260 			continue;
261 
262 		rate = rt->info[rc[i].rix].rateCode;
263 
264 		/*
265 		 * Only enable short preamble for legacy rates
266 		 */
267 		if ((! IS_HT_RATE(rate)) && bf->bf_state.bfs_shpream)
268 			rate |= rt->info[rc[i].rix].shortPreamble;
269 
270 		/*
271 		 * Save this, used by the TX and completion code
272 		 */
273 		rc[i].ratecode = rate;
274 
275 		if (bf->bf_state.bfs_txflags &
276 		    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA))
277 			rc[i].flags |= ATH_RC_RTSCTS_FLAG;
278 
279 		/*
280 		 * If we can't do LDPC, don't.
281 		 */
282 		if (! IS_HT_RATE(rate))
283 			do_ldpc = 0;
284 
285 		/* Only enable shortgi, 2040, dual-stream if HT is set */
286 		if (IS_HT_RATE(rate)) {
287 			rc[i].flags |= ATH_RC_HT_FLAG;
288 
289 			if (ni->ni_chw == 40)
290 				rc[i].flags |= ATH_RC_CW40_FLAG;
291 
292 			/*
293 			 * NOTE: Don't do short-gi for positioning frames.
294 			 *
295 			 * For now, the ath_hal and net80211 HT duration
296 			 * calculation rounds up the 11n data txtime
297 			 * to the nearest multiple of 3.6 microseconds
298 			 * and doesn't return the fractional part, so
299 			 * we are always "out" by some amount.
300 			 */
301 			if (ni->ni_chw == 40 &&
302 			    ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI40 &&
303 			    ni->ni_htcap & IEEE80211_HTCAP_SHORTGI40 &&
304 			    vap->iv_flags_ht & IEEE80211_FHT_SHORTGI40 &&
305 			    (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
306 				rc[i].flags |= ATH_RC_SGI_FLAG;
307 			}
308 
309 			if (ni->ni_chw == 20 &&
310 			    ic->ic_htcaps & IEEE80211_HTCAP_SHORTGI20 &&
311 			    ni->ni_htcap & IEEE80211_HTCAP_SHORTGI20 &&
312 			    vap->iv_flags_ht & IEEE80211_FHT_SHORTGI20 &&
313 			    (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
314 				rc[i].flags |= ATH_RC_SGI_FLAG;
315 			}
316 
317 			/*
318 			 * If we have STBC TX enabled and the receiver
319 			 * can receive (at least) 1 stream STBC, AND it's
320 			 * MCS 0-7, AND we have at least two chains enabled,
321 			 * and we're not doing positioning, enable STBC.
322 			 */
323 			if (ic->ic_htcaps & IEEE80211_HTCAP_TXSTBC &&
324 			    (ni->ni_vap->iv_flags_ht & IEEE80211_FHT_STBC_TX) &&
325 			    (ni->ni_htcap & IEEE80211_HTCAP_RXSTBC) &&
326 			    (sc->sc_cur_txchainmask > 1) &&
327 			    (HT_RC_2_STREAMS(rate) == 1) &&
328 			    (bf->bf_flags & ATH_BUF_TOA_PROBE) == 0) {
329 				rc[i].flags |= ATH_RC_STBC_FLAG;
330 				do_stbc = 1;
331 			}
332 
333 			/*
334 			 * Dual / Triple stream rate?
335 			 */
336 			if (HT_RC_2_STREAMS(rate) == 2)
337 				rc[i].flags |= ATH_RC_DS_FLAG;
338 			else if (HT_RC_2_STREAMS(rate) == 3)
339 				rc[i].flags |= ATH_RC_TS_FLAG;
340 		}
341 
342 		/*
343 		 * Calculate the maximum TX power cap for the current
344 		 * node.
345 		 */
346 		rc[i].tx_power_cap = ieee80211_get_node_txpower(ni);
347 
348 		/*
349 		 * Calculate the maximum 4ms frame length based
350 		 * on the MCS rate, SGI and channel width flags.
351 		 */
352 		if ((rc[i].flags & ATH_RC_HT_FLAG) &&
353 		    (HT_RC_2_MCS(rate) < 32)) {
354 			int j;
355 			if (rc[i].flags & ATH_RC_CW40_FLAG) {
356 				if (rc[i].flags & ATH_RC_SGI_FLAG)
357 					j = MCS_HT40_SGI;
358 				else
359 					j = MCS_HT40;
360 			} else {
361 				if (rc[i].flags & ATH_RC_SGI_FLAG)
362 					j = MCS_HT20_SGI;
363 				else
364 					j = MCS_HT20;
365 			}
366 			rc[i].max4msframelen =
367 			    ath_max_4ms_framelen[j][HT_RC_2_MCS(rate)];
368 		} else
369 			rc[i].max4msframelen = 0;
370 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
371 		    "%s: i=%d, rate=0x%x, flags=0x%x, max4ms=%d\n",
372 		    __func__, i, rate, rc[i].flags, rc[i].max4msframelen);
373 	}
374 
375 	/*
376 	 * LDPC is a global flag, so ...
377 	 */
378 	if (do_ldpc) {
379 		bf->bf_state.bfs_txflags |= HAL_TXDESC_LDPC;
380 		sc->sc_stats.ast_tx_ldpc++;
381 	}
382 
383 	if (do_stbc) {
384 		sc->sc_stats.ast_tx_stbc++;
385 	}
386 }
387 
388 /*
389  * Return the number of delimiters to be added to
390  * meet the minimum required mpdudensity.
391  *
392  * Caller should make sure that the rate is HT.
393  *
394  * TODO: is this delimiter calculation supposed to be the
395  * total frame length, the hdr length, the data length (including
396  * delimiters, padding, CRC, etc) or ?
397  *
398  * TODO: this should ensure that the rate control information
399  * HAS been setup for the first rate.
400  *
401  * TODO: ensure this is only called for MCS rates.
402  *
403  * TODO: enforce MCS < 31
404  */
405 static int
406 ath_compute_num_delims(struct ath_softc *sc, struct ath_buf *first_bf,
407     uint16_t pktlen, int is_first)
408 {
409 	const HAL_RATE_TABLE *rt = sc->sc_currates;
410 	struct ieee80211_node *ni = first_bf->bf_node;
411 	struct ieee80211vap *vap = ni->ni_vap;
412 	int ndelim, mindelim = 0;
413 	int mpdudensity;	/* in 1/100'th of a microsecond */
414 	int peer_mpdudensity;	/* net80211 value */
415 	uint8_t rc, rix, flags;
416 	int width, half_gi;
417 	uint32_t nsymbits, nsymbols;
418 	uint16_t minlen;
419 
420 	/*
421 	 * Get the advertised density from the node.
422 	 */
423 	peer_mpdudensity =
424 	    _IEEE80211_MASKSHIFT(ni->ni_htparam, IEEE80211_HTCAP_MPDUDENSITY);
425 
426 	/*
427 	 * vap->iv_ampdu_density is a net80211 value, rather than the actual
428 	 * density.  Larger values are longer A-MPDU density spacing values,
429 	 * and we want to obey larger configured / negotiated density values
430 	 * per station if we get it.
431 	 */
432 	if (vap->iv_ampdu_density > peer_mpdudensity)
433 		peer_mpdudensity = vap->iv_ampdu_density;
434 
435 	/*
436 	 * Convert the A-MPDU density net80211 value to a 1/100 microsecond
437 	 * value for subsequent calculations.
438 	 */
439 	if (peer_mpdudensity > IEEE80211_HTCAP_MPDUDENSITY_16)
440 		mpdudensity = 1600;		/* maximum density */
441 	else
442 		mpdudensity = ieee80211_mpdudensity_map[peer_mpdudensity];
443 
444 	/* Select standard number of delimiters based on frame length */
445 	ndelim = ATH_AGGR_GET_NDELIM(pktlen);
446 
447 	/*
448 	 * If encryption is enabled, add extra delimiters to let the
449 	 * crypto hardware catch up. This could be tuned per-MAC and
450 	 * per-rate, but for now we'll simply assume encryption is
451 	 * always enabled.
452 	 *
453 	 * Also note that the Atheros reference driver inserts two
454 	 * delimiters by default for pre-AR9380 peers.  This will
455 	 * include "that" required delimiter.
456 	 */
457 	ndelim += ATH_AGGR_ENCRYPTDELIM;
458 
459 	/*
460 	 * For AR9380, there's a minimum number of delimeters
461 	 * required when doing RTS.
462 	 *
463 	 * XXX TODO: this is only needed if (a) RTS/CTS is enabled for
464 	 * this exchange, and (b) (done) this is the first sub-frame
465 	 * in the aggregate.
466 	 */
467 	if (sc->sc_use_ent && (sc->sc_ent_cfg & AH_ENT_RTSCTS_DELIM_WAR)
468 	    && ndelim < AH_FIRST_DESC_NDELIMS && is_first)
469 		ndelim = AH_FIRST_DESC_NDELIMS;
470 
471 	/*
472 	 * If sc_delim_min_pad is non-zero, enforce it as the minimum
473 	 * pad delimiter count.
474 	 */
475 	if (sc->sc_delim_min_pad != 0)
476 		ndelim = MAX(ndelim, sc->sc_delim_min_pad);
477 
478 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
479 	    "%s: pktlen=%d, ndelim=%d, mpdudensity=%d\n",
480 	    __func__, pktlen, ndelim, mpdudensity);
481 
482 	/*
483 	 * If the MPDU density is 0, we can return here.
484 	 * Otherwise, we need to convert the desired mpdudensity
485 	 * into a byte length, based on the rate in the subframe.
486 	 */
487 	if (mpdudensity == 0)
488 		return ndelim;
489 
490 	/*
491 	 * Convert desired mpdu density from microeconds to bytes based
492 	 * on highest rate in rate series (i.e. first rate) to determine
493 	 * required minimum length for subframe. Take into account
494 	 * whether high rate is 20 or 40Mhz and half or full GI.
495 	 */
496 	rix = first_bf->bf_state.bfs_rc[0].rix;
497 	rc = rt->info[rix].rateCode;
498 	flags = first_bf->bf_state.bfs_rc[0].flags;
499 	width = !! (flags & ATH_RC_CW40_FLAG);
500 	half_gi = !! (flags & ATH_RC_SGI_FLAG);
501 
502 	/*
503 	 * mpdudensity is in 1/100th of a usec, so divide by 100
504 	 */
505 	if (half_gi)
506 		nsymbols = NUM_SYMBOLS_PER_USEC_HALFGI(mpdudensity);
507 	else
508 		nsymbols = NUM_SYMBOLS_PER_USEC(mpdudensity);
509 	nsymbols /= 100;
510 
511 	if (nsymbols == 0)
512 		nsymbols = 1;
513 
514 	nsymbits = bits_per_symbol[HT_RC_2_MCS(rc)][width];
515 	minlen = (nsymbols * nsymbits) / BITS_PER_BYTE;
516 
517 	/*
518 	 * Min length is the minimum frame length for the
519 	 * required MPDU density.
520 	 */
521 	if (pktlen < minlen) {
522 		mindelim = (minlen - pktlen) / ATH_AGGR_DELIM_SZ;
523 		ndelim = MAX(mindelim, ndelim);
524 	}
525 
526 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
527 	    "%s: pktlen=%d, minlen=%d, rix=%x, rc=%x, width=%d, hgi=%d, ndelim=%d\n",
528 	    __func__, pktlen, minlen, rix, rc, width, half_gi, ndelim);
529 
530 	return ndelim;
531 }
532 
533 /*
534  * XXX TODO: put into net80211
535  */
536 static int
537 ath_rx_ampdu_to_byte(char a)
538 {
539 	switch (a) {
540 	case IEEE80211_HTCAP_MAXRXAMPDU_16K:
541 		return 16384;
542 		break;
543 	case IEEE80211_HTCAP_MAXRXAMPDU_32K:
544 		return 32768;
545 		break;
546 	case IEEE80211_HTCAP_MAXRXAMPDU_64K:
547 		return 65536;
548 		break;
549 	case IEEE80211_HTCAP_MAXRXAMPDU_8K:
550 	default:
551 		return 8192;
552 		break;
553 	}
554 }
555 
556 /*
557  * Fetch the aggregation limit.
558  *
559  * It's the lowest of the four rate series 4ms frame length.
560  *
561  * Also take into account the hardware specific limits (8KiB on AR5416)
562  * and per-peer limits in non-STA mode.
563  */
564 static int
565 ath_get_aggr_limit(struct ath_softc *sc, struct ieee80211_node *ni,
566     struct ath_buf *bf)
567 {
568 	struct ieee80211vap *vap = ni->ni_vap;
569 
570 	int amin = ATH_AGGR_MAXSIZE;
571 	int i;
572 
573 	/* Extract out the maximum configured driver A-MPDU limit */
574 	if (sc->sc_aggr_limit > 0 && sc->sc_aggr_limit < ATH_AGGR_MAXSIZE)
575 		amin = sc->sc_aggr_limit;
576 
577 	/* Check the vap configured transmit limit */
578 	amin = MIN(amin, ath_rx_ampdu_to_byte(vap->iv_ampdu_limit));
579 
580 	/*
581 	 * Check the HTCAP field for the maximum size the node has
582 	 * negotiated.  If it's smaller than what we have, cap it there.
583 	 */
584 	amin = MIN(amin, ath_rx_ampdu_to_byte(
585 	    _IEEE80211_MASKSHIFT(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU)));
586 
587 	for (i = 0; i < ATH_RC_NUM; i++) {
588 		if (bf->bf_state.bfs_rc[i].tries == 0)
589 			continue;
590 		amin = MIN(amin, bf->bf_state.bfs_rc[i].max4msframelen);
591 	}
592 
593 	DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
594 	    "%s: aggr_limit=%d, iv_ampdu_limit=%d, "
595 	    "peer maxrxampdu=%d, max frame len=%d\n",
596 	    __func__,
597 	    sc->sc_aggr_limit,
598 	    vap->iv_ampdu_limit,
599 	    _IEEE80211_MASKSHIFT(ni->ni_htparam, IEEE80211_HTCAP_MAXRXAMPDU),
600 	    amin);
601 
602 	return amin;
603 }
604 
605 /*
606  * Setup a 11n rate series structure
607  *
608  * This should be called for both legacy and MCS rates.
609  *
610  * This uses the rate series stuf from ath_tx_rate_fill_rcflags().
611  *
612  * It, along with ath_buf_set_rate, must be called -after- a burst
613  * or aggregate is setup.
614  */
615 static void
616 ath_rateseries_setup(struct ath_softc *sc, struct ieee80211_node *ni,
617     struct ath_buf *bf, HAL_11N_RATE_SERIES *series)
618 {
619 	struct ieee80211com *ic = ni->ni_ic;
620 	struct ath_hal *ah = sc->sc_ah;
621 	HAL_BOOL shortPreamble = AH_FALSE;
622 	const HAL_RATE_TABLE *rt = sc->sc_currates;
623 	int i;
624 	int pktlen;
625 	struct ath_rc_series *rc = bf->bf_state.bfs_rc;
626 
627 	if ((ic->ic_flags & IEEE80211_F_SHPREAMBLE) &&
628 	    (ni->ni_capinfo & IEEE80211_CAPINFO_SHORT_PREAMBLE))
629 		shortPreamble = AH_TRUE;
630 
631 	/*
632 	 * If this is the first frame in an aggregate series,
633 	 * use the aggregate length.
634 	 */
635 	if (bf->bf_state.bfs_aggr)
636 		pktlen = bf->bf_state.bfs_al;
637 	else
638 		pktlen = bf->bf_state.bfs_pktlen;
639 
640 	/*
641 	 * XXX TODO: modify this routine to use the bfs_rc[x].flags
642 	 * XXX fields.
643 	 */
644 	memset(series, 0, sizeof(HAL_11N_RATE_SERIES) * 4);
645 	for (i = 0; i < ATH_RC_NUM;  i++) {
646 		/* Only set flags for actual TX attempts */
647 		if (rc[i].tries == 0)
648 			continue;
649 
650 		series[i].Tries = rc[i].tries;
651 
652 		/*
653 		 * XXX TODO: When the NIC is capable of three stream TX,
654 		 * transmit 1/2 stream rates on two streams.
655 		 *
656 		 * This reduces the power consumption of the NIC and
657 		 * keeps it within the PCIe slot power limits.
658 		 */
659 		series[i].ChSel = sc->sc_cur_txchainmask;
660 
661 		/*
662 		 * Setup rate and TX power cap for this series.
663 		 */
664 		series[i].Rate = rt->info[rc[i].rix].rateCode;
665 		series[i].RateIndex = rc[i].rix;
666 		series[i].tx_power_cap = rc[i].tx_power_cap;
667 
668 		/*
669 		 * Enable RTS/CTS as appropriate.
670 		 */
671 		if (rc[i].flags & ATH_RC_RTSCTS_FLAG)
672 			series[i].RateFlags |= HAL_RATESERIES_RTS_CTS;
673 
674 		/*
675 		 * 11n rate? Update 11n flags.
676 		 */
677 		if (rc[i].flags & ATH_RC_HT_FLAG) {
678 			if (rc[i].flags & ATH_RC_CW40_FLAG)
679 				series[i].RateFlags |= HAL_RATESERIES_2040;
680 
681 			if (rc[i].flags & ATH_RC_SGI_FLAG)
682 				series[i].RateFlags |= HAL_RATESERIES_HALFGI;
683 
684 			if (rc[i].flags & ATH_RC_STBC_FLAG)
685 				series[i].RateFlags |= HAL_RATESERIES_STBC;
686 		}
687 
688 		/*
689 		 * TODO: If we're all doing 11n rates then we can set LDPC.
690 		 * If we've been asked to /do/ LDPC but we are handed a
691 		 * legacy rate, then we should complain.  Loudly.
692 		 */
693 
694 		/*
695 		 * PktDuration doesn't include slot, ACK, RTS, etc timing -
696 		 * it's just the packet duration
697 		 */
698 		if (rc[i].flags & ATH_RC_HT_FLAG) {
699 			series[i].PktDuration =
700 			    ath_computedur_ht(pktlen
701 				, series[i].Rate
702 				, HT_RC_2_STREAMS(series[i].Rate)
703 				, series[i].RateFlags & HAL_RATESERIES_2040
704 				, series[i].RateFlags & HAL_RATESERIES_HALFGI);
705 		} else {
706 			if (shortPreamble)
707 				series[i].Rate |=
708 				    rt->info[rc[i].rix].shortPreamble;
709 			/* XXX TODO: don't include SIFS */
710 			series[i].PktDuration = ath_hal_computetxtime(ah,
711 			    rt, pktlen, rc[i].rix, shortPreamble, AH_TRUE);
712 		}
713 	}
714 }
715 
716 #ifdef	ATH_DEBUG
717 static void
718 ath_rateseries_print(struct ath_softc *sc, HAL_11N_RATE_SERIES *series)
719 {
720 	int i;
721 	for (i = 0; i < ATH_RC_NUM; i++) {
722 		device_printf(sc->sc_dev ,"series %d: rate %x; tries %d; "
723 		    "pktDuration %d; chSel %d; txpowcap %d, rateFlags %x\n",
724 		    i,
725 		    series[i].Rate,
726 		    series[i].Tries,
727 		    series[i].PktDuration,
728 		    series[i].ChSel,
729 		    series[i].tx_power_cap,
730 		    series[i].RateFlags);
731 	}
732 }
733 #endif
734 
735 /*
736  * Setup the 11n rate scenario and burst duration for the given TX descriptor
737  * list.
738  *
739  * This isn't useful for sending beacon frames, which has different needs
740  * wrt what's passed into the rate scenario function.
741  */
742 void
743 ath_buf_set_rate(struct ath_softc *sc, struct ieee80211_node *ni,
744     struct ath_buf *bf)
745 {
746 	HAL_11N_RATE_SERIES series[4];
747 	struct ath_desc *ds = bf->bf_desc;
748 	struct ath_hal *ah = sc->sc_ah;
749 	int is_pspoll = (bf->bf_state.bfs_atype == HAL_PKT_TYPE_PSPOLL);
750 	int ctsrate = bf->bf_state.bfs_ctsrate;
751 	int flags = bf->bf_state.bfs_txflags;
752 
753 	/* Setup rate scenario */
754 	memset(&series, 0, sizeof(series));
755 
756 	ath_rateseries_setup(sc, ni, bf, series);
757 
758 #ifdef	ATH_DEBUG
759 	if (sc->sc_debug & ATH_DEBUG_XMIT)
760 		ath_rateseries_print(sc, series);
761 #endif
762 
763 	/* Set rate scenario */
764 	/*
765 	 * Note: Don't allow hardware to override the duration on
766 	 * ps-poll packets.
767 	 */
768 	ath_hal_set11nratescenario(ah, ds,
769 	    !is_pspoll,	/* whether to override the duration or not */
770 	    ctsrate,	/* rts/cts rate */
771 	    series,	/* 11n rate series */
772 	    4,		/* number of series */
773 	    flags);
774 
775 	/* Set burst duration */
776 	/*
777 	 * This is only required when doing 11n burst, not aggregation
778 	 * ie, if there's a second frame in a RIFS or A-MPDU burst
779 	 * w/ >1 A-MPDU frame bursting back to back.
780 	 * Normal A-MPDU doesn't do bursting -between- aggregates.
781 	 *
782 	 * .. and it's highly likely this won't ever be implemented
783 	 */
784 	//ath_hal_set11nburstduration(ah, ds, 8192);
785 }
786 
787 /*
788  * Form an aggregate packet list.
789  *
790  * This function enforces the aggregate restrictions/requirements.
791  *
792  * These are:
793  *
794  * + The aggregate size maximum (64k for AR9160 and later, 8K for
795  *   AR5416 when doing RTS frame protection.)
796  * + Maximum number of sub-frames for an aggregate
797  * + The aggregate delimiter size, giving MACs time to do whatever is
798  *   needed before each frame
799  * + Enforce the BAW limit
800  *
801  * Each descriptor queued should have the DMA setup.
802  * The rate series, descriptor setup, linking, etc is all done
803  * externally. This routine simply chains them together.
804  * ath_tx_setds_11n() will take care of configuring the per-
805  * descriptor setup, and ath_buf_set_rate() will configure the
806  * rate control.
807  *
808  * The TID lock is required for the entirety of this function.
809  *
810  * If some code in another thread adds to the head of this
811  * list, very strange behaviour will occur. Since retransmission is the
812  * only reason this will occur, and this routine is designed to be called
813  * from within the scheduler task, it won't ever clash with the completion
814  * task.
815  *
816  * So if you want to call this from an upper layer context (eg, to direct-
817  * dispatch aggregate frames to the hardware), please keep this in mind.
818  */
819 ATH_AGGR_STATUS
820 ath_tx_form_aggr(struct ath_softc *sc, struct ath_node *an,
821     struct ath_tid *tid, ath_bufhead *bf_q)
822 {
823 	//struct ieee80211_node *ni = &an->an_node;
824 	struct ath_buf *bf, *bf_first = NULL, *bf_prev = NULL;
825 	int nframes = 0;
826 	uint16_t aggr_limit = 0, al = 0, bpad = 0, al_delta, h_baw;
827 	struct ieee80211_tx_ampdu *tap;
828 	int status = ATH_AGGR_DONE;
829 	int prev_frames = 0;	/* XXX for AR5416 burst, not done here */
830 	int prev_al = 0;	/* XXX also for AR5416 burst */
831 
832 	ATH_TX_LOCK_ASSERT(sc);
833 
834 	tap = ath_tx_get_tx_tid(an, tid->tid);
835 	if (tap == NULL) {
836 		status = ATH_AGGR_ERROR;
837 		goto finish;
838 	}
839 
840 	/*
841 	 * Limit the maximum number of frames in this A-MPDU
842 	 * to half of the window size.  This is done to prevent
843 	 * sending a LOT of frames that may fail in one batch
844 	 * when operating in higher MCS rates.  If there are more
845 	 * frames available to send then up to two A-MPDUs will
846 	 * be queued per hardware queue, so we'll "just" get
847 	 * a second A-MPDU.
848 	 */
849 	h_baw = tap->txa_wnd / 2;
850 
851 	for (;;) {
852 		bf = ATH_TID_FIRST(tid);
853 		if (bf == NULL) {
854 			status = ATH_AGGR_DONE;
855 			break;
856 		}
857 		if (bf_first == NULL) {
858 			bf_first = bf;
859 			/*
860 			 * It's the first frame;
861 			 * set the aggregation limit based on the
862 			 * rate control decision that has been made.
863 			 */
864 			aggr_limit = ath_get_aggr_limit(sc, &an->an_node,
865 			    bf_first);
866 			if (bf_first->bf_state.bfs_rc_maxpktlen > 0) {
867 				aggr_limit = MIN(aggr_limit,
868 				    bf_first->bf_state.bfs_rc_maxpktlen);
869 			}
870 		}
871 
872 		/* Set this early just so things don't get confused */
873 		bf->bf_next = NULL;
874 
875 		/*
876 		 * If the frame doesn't have a sequence number that we're
877 		 * tracking in the BAW (eg NULL QOS data frame), we can't
878 		 * aggregate it. Stop the aggregation process; the sender
879 		 * can then TX what's in the list thus far and then
880 		 * TX the frame individually.
881 		 */
882 		if (! bf->bf_state.bfs_dobaw) {
883 			status = ATH_AGGR_NONAGGR;
884 			break;
885 		}
886 
887 		/*
888 		 * If any of the rates are non-HT, this packet
889 		 * can't be aggregated.
890 		 * XXX TODO: add a bf_state flag which gets marked
891 		 * if any active rate is non-HT.
892 		 */
893 
894 		/*
895 		 * do not exceed aggregation limit
896 		 */
897 		al_delta = ATH_AGGR_DELIM_SZ + bf->bf_state.bfs_pktlen;
898 		if (nframes &&
899 		    (aggr_limit < (al + bpad + al_delta + prev_al))) {
900 			status = ATH_AGGR_LIMITED;
901 			break;
902 		}
903 
904 		/*
905 		 * If RTS/CTS is set on the first frame, enforce
906 		 * the RTS aggregate limit.
907 		 */
908 		if (bf_first->bf_state.bfs_txflags &
909 		    (HAL_TXDESC_CTSENA | HAL_TXDESC_RTSENA)) {
910 			if (nframes &&
911 			   (sc->sc_rts_aggr_limit <
912 			     (al + bpad + al_delta + prev_al))) {
913 				status = ATH_AGGR_8K_LIMITED;
914 				break;
915 			}
916 		}
917 
918 		/*
919 		 * Do not exceed subframe limit.
920 		 */
921 		if ((nframes + prev_frames) >= MIN((h_baw),
922 		    IEEE80211_AMPDU_SUBFRAME_DEFAULT)) {
923 			status = ATH_AGGR_LIMITED;
924 			break;
925 		}
926 
927 		/*
928 		 * If the current frame has an RTS/CTS configuration
929 		 * that differs from the first frame, override the
930 		 * subsequent frame with this config.
931 		 */
932 		if (bf != bf_first) {
933 			bf->bf_state.bfs_txflags &=
934 			    ~ (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA);
935 			bf->bf_state.bfs_txflags |=
936 			    bf_first->bf_state.bfs_txflags &
937 			    (HAL_TXDESC_RTSENA | HAL_TXDESC_CTSENA);
938 		}
939 
940 		/*
941 		 * If the packet has a sequence number, do not
942 		 * step outside of the block-ack window.
943 		 */
944 		if (! BAW_WITHIN(tap->txa_start, tap->txa_wnd,
945 		    SEQNO(bf->bf_state.bfs_seqno))) {
946 			status = ATH_AGGR_BAW_CLOSED;
947 			break;
948 		}
949 
950 		/*
951 		 * this packet is part of an aggregate.
952 		 */
953 		ATH_TID_REMOVE(tid, bf, bf_list);
954 
955 		/* The TID lock is required for the BAW update */
956 		ath_tx_addto_baw(sc, an, tid, bf);
957 		bf->bf_state.bfs_addedbaw = 1;
958 
959 		/*
960 		 * XXX enforce ACK for aggregate frames (this needs to be
961 		 * XXX handled more gracefully?
962 		 */
963 		if (bf->bf_state.bfs_txflags & HAL_TXDESC_NOACK) {
964 			device_printf(sc->sc_dev,
965 			    "%s: HAL_TXDESC_NOACK set for an aggregate frame?\n",
966 			    __func__);
967 			bf->bf_state.bfs_txflags &= (~HAL_TXDESC_NOACK);
968 		}
969 
970 		/*
971 		 * Add the now owned buffer (which isn't
972 		 * on the software TXQ any longer) to our
973 		 * aggregate frame list.
974 		 */
975 		TAILQ_INSERT_TAIL(bf_q, bf, bf_list);
976 		nframes ++;
977 
978 		/* Completion handler */
979 		bf->bf_comp = ath_tx_aggr_comp;
980 
981 		/*
982 		 * add padding for previous frame to aggregation length
983 		 */
984 		al += bpad + al_delta;
985 
986 		/*
987 		 * Calculate delimiters needed for the current frame
988 		 */
989 		bf->bf_state.bfs_ndelim =
990 		    ath_compute_num_delims(sc, bf_first,
991 		    bf->bf_state.bfs_pktlen, (bf_first == bf));
992 
993 		/*
994 		 * Calculate the padding needed from this set of delimiters,
995 		 * used when calculating if the next frame will fit in
996 		 * the aggregate.
997 		 */
998 		bpad = PADBYTES(al_delta) + (bf->bf_state.bfs_ndelim << 2);
999 
1000 		/*
1001 		 * Chain the buffers together
1002 		 */
1003 		if (bf_prev)
1004 			bf_prev->bf_next = bf;
1005 		bf_prev = bf;
1006 
1007 		/*
1008 		 * If we're leaking frames, just return at this point;
1009 		 * we've queued a single frame and we don't want to add
1010 		 * any more.
1011 		 */
1012 		if (tid->an->an_leak_count) {
1013 			status = ATH_AGGR_LEAK_CLOSED;
1014 			break;
1015 		}
1016 
1017 #if 0
1018 		/*
1019 		 * terminate aggregation on a small packet boundary
1020 		 */
1021 		if (bf->bf_state.bfs_pktlen < ATH_AGGR_MINPLEN) {
1022 			status = ATH_AGGR_SHORTPKT;
1023 			break;
1024 		}
1025 #endif
1026 	}
1027 
1028 finish:
1029 	/*
1030 	 * Just in case the list was empty when we tried to
1031 	 * dequeue a packet ..
1032 	 */
1033 	if (bf_first) {
1034 		DPRINTF(sc, ATH_DEBUG_SW_TX_AGGR,
1035 		"%s: al=%d bytes; requested %d bytes\n",
1036 		__func__, al, bf_first->bf_state.bfs_rc_maxpktlen);
1037 
1038 		bf_first->bf_state.bfs_al = al;
1039 		bf_first->bf_state.bfs_nframes = nframes;
1040 	}
1041 	return status;
1042 }
1043