xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision eb6d21b4ca6d668cf89afd99eef7baeafa712197)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2009, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 
30 #include <sys/cdefs.h>
31 __FBSDID("$FreeBSD$");
32 
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/bus.h>
38 #include <sys/conf.h>
39 #include <machine/bus.h>
40 #include <machine/resource.h>
41 #include <sys/bus_dma.h>
42 #include <sys/rman.h>
43 #include <sys/queue.h>
44 #include <sys/sysctl.h>
45 #include <sys/taskqueue.h>
46 
47 #include <sys/proc.h>
48 #include <sys/sbuf.h>
49 #include <sys/sched.h>
50 #include <sys/smp.h>
51 #include <sys/systm.h>
52 #include <sys/syslog.h>
53 
54 #include <net/bpf.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #include <cxgb_include.h>
68 #include <sys/mvec.h>
69 
70 int	txq_fills = 0;
71 int	multiq_tx_enable = 1;
72 
73 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
74 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
75 TUNABLE_INT("hw.cxgb.txq_mr_size", &cxgb_txq_buf_ring_size);
76 SYSCTL_UINT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
77     "size of per-queue mbuf ring");
78 
79 static int cxgb_tx_coalesce_force = 0;
80 TUNABLE_INT("hw.cxgb.tx_coalesce_force", &cxgb_tx_coalesce_force);
81 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RW,
82     &cxgb_tx_coalesce_force, 0,
83     "coalesce small packets into a single work request regardless of ring state");
84 
85 #define	COALESCE_START_DEFAULT		TX_ETH_Q_SIZE>>1
86 #define	COALESCE_START_MAX		(TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
87 #define	COALESCE_STOP_DEFAULT		TX_ETH_Q_SIZE>>2
88 #define	COALESCE_STOP_MIN		TX_ETH_Q_SIZE>>5
89 #define	TX_RECLAIM_DEFAULT		TX_ETH_Q_SIZE>>5
90 #define	TX_RECLAIM_MAX			TX_ETH_Q_SIZE>>2
91 #define	TX_RECLAIM_MIN			TX_ETH_Q_SIZE>>6
92 
93 
94 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
95 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_start",
96     &cxgb_tx_coalesce_enable_start);
97 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RW,
98     &cxgb_tx_coalesce_enable_start, 0,
99     "coalesce enable threshold");
100 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
101 TUNABLE_INT("hw.cxgb.tx_coalesce_enable_stop", &cxgb_tx_coalesce_enable_stop);
102 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RW,
103     &cxgb_tx_coalesce_enable_stop, 0,
104     "coalesce disable threshold");
105 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
106 TUNABLE_INT("hw.cxgb.tx_reclaim_threshold", &cxgb_tx_reclaim_threshold);
107 SYSCTL_UINT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RW,
108     &cxgb_tx_reclaim_threshold, 0,
109     "tx cleaning minimum threshold");
110 
111 /*
112  * XXX don't re-enable this until TOE stops assuming
113  * we have an m_ext
114  */
115 static int recycle_enable = 0;
116 int cxgb_ext_freed = 0;
117 int cxgb_ext_inited = 0;
118 int fl_q_size = 0;
119 int jumbo_q_size = 0;
120 
121 extern int cxgb_use_16k_clusters;
122 extern int nmbjumbo4;
123 extern int nmbjumbo9;
124 extern int nmbjumbo16;
125 
126 #define USE_GTS 0
127 
128 #define SGE_RX_SM_BUF_SIZE	1536
129 #define SGE_RX_DROP_THRES	16
130 #define SGE_RX_COPY_THRES	128
131 
132 /*
133  * Period of the Tx buffer reclaim timer.  This timer does not need to run
134  * frequently as Tx buffers are usually reclaimed by new Tx packets.
135  */
136 #define TX_RECLAIM_PERIOD       (hz >> 1)
137 
138 /*
139  * Values for sge_txq.flags
140  */
141 enum {
142 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
143 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
144 };
145 
146 struct tx_desc {
147 	uint64_t	flit[TX_DESC_FLITS];
148 } __packed;
149 
150 struct rx_desc {
151 	uint32_t	addr_lo;
152 	uint32_t	len_gen;
153 	uint32_t	gen2;
154 	uint32_t	addr_hi;
155 } __packed;;
156 
157 struct rsp_desc {               /* response queue descriptor */
158 	struct rss_header	rss_hdr;
159 	uint32_t		flags;
160 	uint32_t		len_cq;
161 	uint8_t			imm_data[47];
162 	uint8_t			intr_gen;
163 } __packed;
164 
165 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
166 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
167 #define RX_SW_DESC_INUSE        (1 << 3)
168 #define TX_SW_DESC_MAPPED       (1 << 4)
169 
170 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
171 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
172 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
173 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
174 
175 struct tx_sw_desc {                /* SW state per Tx descriptor */
176 	struct mbuf	*m;
177 	bus_dmamap_t	map;
178 	int		flags;
179 };
180 
181 struct rx_sw_desc {                /* SW state per Rx descriptor */
182 	caddr_t		rxsd_cl;
183 	struct mbuf	*m;
184 	bus_dmamap_t	map;
185 	int		flags;
186 };
187 
188 struct txq_state {
189 	unsigned int	compl;
190 	unsigned int	gen;
191 	unsigned int	pidx;
192 };
193 
194 struct refill_fl_cb_arg {
195 	int               error;
196 	bus_dma_segment_t seg;
197 	int               nseg;
198 };
199 
200 
201 /*
202  * Maps a number of flits to the number of Tx descriptors that can hold them.
203  * The formula is
204  *
205  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
206  *
207  * HW allows up to 4 descriptors to be combined into a WR.
208  */
209 static uint8_t flit_desc_map[] = {
210 	0,
211 #if SGE_NUM_GENBITS == 1
212 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
213 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
214 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
215 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
216 #elif SGE_NUM_GENBITS == 2
217 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
218 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
219 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
220 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
221 #else
222 # error "SGE_NUM_GENBITS must be 1 or 2"
223 #endif
224 };
225 
226 #define	TXQ_LOCK_ASSERT(qs)	mtx_assert(&(qs)->lock, MA_OWNED)
227 #define	TXQ_TRYLOCK(qs)		mtx_trylock(&(qs)->lock)
228 #define	TXQ_LOCK(qs)		mtx_lock(&(qs)->lock)
229 #define	TXQ_UNLOCK(qs)		mtx_unlock(&(qs)->lock)
230 #define	TXQ_RING_EMPTY(qs)	drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
231 #define	TXQ_RING_FLUSH(qs)	drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
232 #define	TXQ_RING_DEQUEUE_COND(qs, func, arg)				\
233 	drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
234 #define	TXQ_RING_DEQUEUE(qs) \
235 	drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 
237 int cxgb_debug = 0;
238 
239 static void sge_timer_cb(void *arg);
240 static void sge_timer_reclaim(void *arg, int ncount);
241 static void sge_txq_reclaim_handler(void *arg, int ncount);
242 static void cxgb_start_locked(struct sge_qset *qs);
243 
244 /*
245  * XXX need to cope with bursty scheduling by looking at a wider
246  * window than we are now for determining the need for coalescing
247  *
248  */
249 static __inline uint64_t
250 check_pkt_coalesce(struct sge_qset *qs)
251 {
252         struct adapter *sc;
253         struct sge_txq *txq;
254 	uint8_t *fill;
255 
256 	if (__predict_false(cxgb_tx_coalesce_force))
257 		return (1);
258 	txq = &qs->txq[TXQ_ETH];
259         sc = qs->port->adapter;
260 	fill = &sc->tunq_fill[qs->idx];
261 
262 	if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
263 		cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
264 	if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
265 		cxgb_tx_coalesce_enable_start = COALESCE_STOP_MIN;
266 	/*
267 	 * if the hardware transmit queue is more than 1/8 full
268 	 * we mark it as coalescing - we drop back from coalescing
269 	 * when we go below 1/32 full and there are no packets enqueued,
270 	 * this provides us with some degree of hysteresis
271 	 */
272         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
273 	    TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
274                 *fill = 0;
275         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
276                 *fill = 1;
277 
278 	return (sc->tunq_coalesce);
279 }
280 
281 #ifdef __LP64__
282 static void
283 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
284 {
285 	uint64_t wr_hilo;
286 #if _BYTE_ORDER == _LITTLE_ENDIAN
287 	wr_hilo = wr_hi;
288 	wr_hilo |= (((uint64_t)wr_lo)<<32);
289 #else
290 	wr_hilo = wr_lo;
291 	wr_hilo |= (((uint64_t)wr_hi)<<32);
292 #endif
293 	wrp->wrh_hilo = wr_hilo;
294 }
295 #else
296 static void
297 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
298 {
299 
300 	wrp->wrh_hi = wr_hi;
301 	wmb();
302 	wrp->wrh_lo = wr_lo;
303 }
304 #endif
305 
306 struct coalesce_info {
307 	int count;
308 	int nbytes;
309 };
310 
311 static int
312 coalesce_check(struct mbuf *m, void *arg)
313 {
314 	struct coalesce_info *ci = arg;
315 	int *count = &ci->count;
316 	int *nbytes = &ci->nbytes;
317 
318 	if ((*nbytes == 0) || ((*nbytes + m->m_len <= 10500) &&
319 		(*count < 7) && (m->m_next == NULL))) {
320 		*count += 1;
321 		*nbytes += m->m_len;
322 		return (1);
323 	}
324 	return (0);
325 }
326 
327 static struct mbuf *
328 cxgb_dequeue(struct sge_qset *qs)
329 {
330 	struct mbuf *m, *m_head, *m_tail;
331 	struct coalesce_info ci;
332 
333 
334 	if (check_pkt_coalesce(qs) == 0)
335 		return TXQ_RING_DEQUEUE(qs);
336 
337 	m_head = m_tail = NULL;
338 	ci.count = ci.nbytes = 0;
339 	do {
340 		m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
341 		if (m_head == NULL) {
342 			m_tail = m_head = m;
343 		} else if (m != NULL) {
344 			m_tail->m_nextpkt = m;
345 			m_tail = m;
346 		}
347 	} while (m != NULL);
348 	if (ci.count > 7)
349 		panic("trying to coalesce %d packets in to one WR", ci.count);
350 	return (m_head);
351 }
352 
353 /**
354  *	reclaim_completed_tx - reclaims completed Tx descriptors
355  *	@adapter: the adapter
356  *	@q: the Tx queue to reclaim completed descriptors from
357  *
358  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
359  *	and frees the associated buffers if possible.  Called with the Tx
360  *	queue's lock held.
361  */
362 static __inline int
363 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
364 {
365 	struct sge_txq *q = &qs->txq[queue];
366 	int reclaim = desc_reclaimable(q);
367 
368 	if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
369 	    (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
370 		cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
371 
372 	if (reclaim < reclaim_min)
373 		return (0);
374 
375 	mtx_assert(&qs->lock, MA_OWNED);
376 	if (reclaim > 0) {
377 		t3_free_tx_desc(qs, reclaim, queue);
378 		q->cleaned += reclaim;
379 		q->in_use -= reclaim;
380 	}
381 	if (isset(&qs->txq_stopped, TXQ_ETH))
382                 clrbit(&qs->txq_stopped, TXQ_ETH);
383 
384 	return (reclaim);
385 }
386 
387 /**
388  *	should_restart_tx - are there enough resources to restart a Tx queue?
389  *	@q: the Tx queue
390  *
391  *	Checks if there are enough descriptors to restart a suspended Tx queue.
392  */
393 static __inline int
394 should_restart_tx(const struct sge_txq *q)
395 {
396 	unsigned int r = q->processed - q->cleaned;
397 
398 	return q->in_use - r < (q->size >> 1);
399 }
400 
401 /**
402  *	t3_sge_init - initialize SGE
403  *	@adap: the adapter
404  *	@p: the SGE parameters
405  *
406  *	Performs SGE initialization needed every time after a chip reset.
407  *	We do not initialize any of the queue sets here, instead the driver
408  *	top-level must request those individually.  We also do not enable DMA
409  *	here, that should be done after the queues have been set up.
410  */
411 void
412 t3_sge_init(adapter_t *adap, struct sge_params *p)
413 {
414 	u_int ctrl, ups;
415 
416 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
417 
418 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
419 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
420 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
421 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
422 #if SGE_NUM_GENBITS == 1
423 	ctrl |= F_EGRGENCTRL;
424 #endif
425 	if (adap->params.rev > 0) {
426 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
427 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
428 	}
429 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
430 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
431 		     V_LORCQDRBTHRSH(512));
432 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
433 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
434 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
435 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
436 		     adap->params.rev < T3_REV_C ? 1000 : 500);
437 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
438 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
439 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
440 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
441 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
442 }
443 
444 
445 /**
446  *	sgl_len - calculates the size of an SGL of the given capacity
447  *	@n: the number of SGL entries
448  *
449  *	Calculates the number of flits needed for a scatter/gather list that
450  *	can hold the given number of entries.
451  */
452 static __inline unsigned int
453 sgl_len(unsigned int n)
454 {
455 	return ((3 * n) / 2 + (n & 1));
456 }
457 
458 /**
459  *	get_imm_packet - return the next ingress packet buffer from a response
460  *	@resp: the response descriptor containing the packet data
461  *
462  *	Return a packet containing the immediate data of the given response.
463  */
464 static int
465 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
466 {
467 
468 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
469 	m->m_ext.ext_buf = NULL;
470 	m->m_ext.ext_type = 0;
471 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
472 	return (0);
473 }
474 
475 static __inline u_int
476 flits_to_desc(u_int n)
477 {
478 	return (flit_desc_map[n]);
479 }
480 
481 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
482 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
483 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
484 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
485 		    F_HIRCQPARITYERROR)
486 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
487 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
488 		      F_RSPQDISABLED)
489 
490 /**
491  *	t3_sge_err_intr_handler - SGE async event interrupt handler
492  *	@adapter: the adapter
493  *
494  *	Interrupt handler for SGE asynchronous (non-data) events.
495  */
496 void
497 t3_sge_err_intr_handler(adapter_t *adapter)
498 {
499 	unsigned int v, status;
500 
501 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
502 	if (status & SGE_PARERR)
503 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
504 			 status & SGE_PARERR);
505 	if (status & SGE_FRAMINGERR)
506 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
507 			 status & SGE_FRAMINGERR);
508 	if (status & F_RSPQCREDITOVERFOW)
509 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
510 
511 	if (status & F_RSPQDISABLED) {
512 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
513 
514 		CH_ALERT(adapter,
515 			 "packet delivered to disabled response queue (0x%x)\n",
516 			 (v >> S_RSPQ0DISABLED) & 0xff);
517 	}
518 
519 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
520 	if (status & SGE_FATALERR)
521 		t3_fatal_err(adapter);
522 }
523 
524 void
525 t3_sge_prep(adapter_t *adap, struct sge_params *p)
526 {
527 	int i, nqsets;
528 
529 	nqsets = min(SGE_QSETS, mp_ncpus*4);
530 
531 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
532 
533 	while (!powerof2(fl_q_size))
534 		fl_q_size--;
535 #if __FreeBSD_version >= 700111
536 	if (cxgb_use_16k_clusters)
537 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
538 	else
539 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
540 #else
541 	jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
542 #endif
543 	while (!powerof2(jumbo_q_size))
544 		jumbo_q_size--;
545 
546 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
547 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
548 
549 	for (i = 0; i < SGE_QSETS; ++i) {
550 		struct qset_params *q = p->qset + i;
551 
552 		if (adap->params.nports > 2) {
553 			q->coalesce_usecs = 50;
554 		} else {
555 #ifdef INVARIANTS
556 			q->coalesce_usecs = 10;
557 #else
558 			q->coalesce_usecs = 5;
559 #endif
560 		}
561 		q->polling = 0;
562 		q->rspq_size = RSPQ_Q_SIZE;
563 		q->fl_size = fl_q_size;
564 		q->jumbo_size = jumbo_q_size;
565 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
566 		q->txq_size[TXQ_OFLD] = 1024;
567 		q->txq_size[TXQ_CTRL] = 256;
568 		q->cong_thres = 0;
569 	}
570 }
571 
572 int
573 t3_sge_alloc(adapter_t *sc)
574 {
575 
576 	/* The parent tag. */
577 	if (bus_dma_tag_create( NULL,			/* parent */
578 				1, 0,			/* algnmnt, boundary */
579 				BUS_SPACE_MAXADDR,	/* lowaddr */
580 				BUS_SPACE_MAXADDR,	/* highaddr */
581 				NULL, NULL,		/* filter, filterarg */
582 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
583 				BUS_SPACE_UNRESTRICTED, /* nsegments */
584 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
585 				0,			/* flags */
586 				NULL, NULL,		/* lock, lockarg */
587 				&sc->parent_dmat)) {
588 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
589 		return (ENOMEM);
590 	}
591 
592 	/*
593 	 * DMA tag for normal sized RX frames
594 	 */
595 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
596 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
597 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
598 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
599 		return (ENOMEM);
600 	}
601 
602 	/*
603 	 * DMA tag for jumbo sized RX frames.
604 	 */
605 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
606 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
607 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
608 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
609 		return (ENOMEM);
610 	}
611 
612 	/*
613 	 * DMA tag for TX frames.
614 	 */
615 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
616 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
617 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
618 		NULL, NULL, &sc->tx_dmat)) {
619 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
620 		return (ENOMEM);
621 	}
622 
623 	return (0);
624 }
625 
626 int
627 t3_sge_free(struct adapter * sc)
628 {
629 
630 	if (sc->tx_dmat != NULL)
631 		bus_dma_tag_destroy(sc->tx_dmat);
632 
633 	if (sc->rx_jumbo_dmat != NULL)
634 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
635 
636 	if (sc->rx_dmat != NULL)
637 		bus_dma_tag_destroy(sc->rx_dmat);
638 
639 	if (sc->parent_dmat != NULL)
640 		bus_dma_tag_destroy(sc->parent_dmat);
641 
642 	return (0);
643 }
644 
645 void
646 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
647 {
648 
649 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
650 	qs->rspq.polling = 0 /* p->polling */;
651 }
652 
653 #if !defined(__i386__) && !defined(__amd64__)
654 static void
655 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
656 {
657 	struct refill_fl_cb_arg *cb_arg = arg;
658 
659 	cb_arg->error = error;
660 	cb_arg->seg = segs[0];
661 	cb_arg->nseg = nseg;
662 
663 }
664 #endif
665 /**
666  *	refill_fl - refill an SGE free-buffer list
667  *	@sc: the controller softc
668  *	@q: the free-list to refill
669  *	@n: the number of new buffers to allocate
670  *
671  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
672  *	The caller must assure that @n does not exceed the queue's capacity.
673  */
674 static void
675 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
676 {
677 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
678 	struct rx_desc *d = &q->desc[q->pidx];
679 	struct refill_fl_cb_arg cb_arg;
680 	struct mbuf *m;
681 	caddr_t cl;
682 	int err, count = 0;
683 
684 	cb_arg.error = 0;
685 	while (n--) {
686 		/*
687 		 * We only allocate a cluster, mbuf allocation happens after rx
688 		 */
689 		if (q->zone == zone_pack) {
690 			if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
691 				break;
692 			cl = m->m_ext.ext_buf;
693 		} else {
694 			if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
695 				break;
696 			if ((m = m_gethdr(M_NOWAIT, MT_NOINIT)) == NULL) {
697 				uma_zfree(q->zone, cl);
698 				break;
699 			}
700 		}
701 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
702 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
703 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
704 				uma_zfree(q->zone, cl);
705 				goto done;
706 			}
707 			sd->flags |= RX_SW_DESC_MAP_CREATED;
708 		}
709 #if !defined(__i386__) && !defined(__amd64__)
710 		err = bus_dmamap_load(q->entry_tag, sd->map,
711 		    cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
712 
713 		if (err != 0 || cb_arg.error) {
714 			if (q->zone == zone_pack)
715 				uma_zfree(q->zone, cl);
716 			m_free(m);
717 			goto done;
718 		}
719 #else
720 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
721 #endif
722 		sd->flags |= RX_SW_DESC_INUSE;
723 		sd->rxsd_cl = cl;
724 		sd->m = m;
725 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
726 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
727 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
728 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
729 
730 		d++;
731 		sd++;
732 
733 		if (++q->pidx == q->size) {
734 			q->pidx = 0;
735 			q->gen ^= 1;
736 			sd = q->sdesc;
737 			d = q->desc;
738 		}
739 		q->credits++;
740 		count++;
741 	}
742 
743 done:
744 	if (count)
745 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
746 }
747 
748 
749 /**
750  *	free_rx_bufs - free the Rx buffers on an SGE free list
751  *	@sc: the controle softc
752  *	@q: the SGE free list to clean up
753  *
754  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
755  *	this queue should be stopped before calling this function.
756  */
757 static void
758 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
759 {
760 	u_int cidx = q->cidx;
761 
762 	while (q->credits--) {
763 		struct rx_sw_desc *d = &q->sdesc[cidx];
764 
765 		if (d->flags & RX_SW_DESC_INUSE) {
766 			bus_dmamap_unload(q->entry_tag, d->map);
767 			bus_dmamap_destroy(q->entry_tag, d->map);
768 			if (q->zone == zone_pack) {
769 				m_init(d->m, zone_pack, MCLBYTES,
770 				    M_NOWAIT, MT_DATA, M_EXT);
771 				uma_zfree(zone_pack, d->m);
772 			} else {
773 				m_init(d->m, zone_mbuf, MLEN,
774 				    M_NOWAIT, MT_DATA, 0);
775 				uma_zfree(zone_mbuf, d->m);
776 				uma_zfree(q->zone, d->rxsd_cl);
777 			}
778 		}
779 
780 		d->rxsd_cl = NULL;
781 		d->m = NULL;
782 		if (++cidx == q->size)
783 			cidx = 0;
784 	}
785 }
786 
787 static __inline void
788 __refill_fl(adapter_t *adap, struct sge_fl *fl)
789 {
790 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
791 }
792 
793 static __inline void
794 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
795 {
796 	if ((fl->size - fl->credits) < max)
797 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
798 }
799 
800 /**
801  *	recycle_rx_buf - recycle a receive buffer
802  *	@adapter: the adapter
803  *	@q: the SGE free list
804  *	@idx: index of buffer to recycle
805  *
806  *	Recycles the specified buffer on the given free list by adding it at
807  *	the next available slot on the list.
808  */
809 static void
810 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
811 {
812 	struct rx_desc *from = &q->desc[idx];
813 	struct rx_desc *to   = &q->desc[q->pidx];
814 
815 	q->sdesc[q->pidx] = q->sdesc[idx];
816 	to->addr_lo = from->addr_lo;        // already big endian
817 	to->addr_hi = from->addr_hi;        // likewise
818 	wmb();	/* necessary ? */
819 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
820 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
821 	q->credits++;
822 
823 	if (++q->pidx == q->size) {
824 		q->pidx = 0;
825 		q->gen ^= 1;
826 	}
827 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
828 }
829 
830 static void
831 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
832 {
833 	uint32_t *addr;
834 
835 	addr = arg;
836 	*addr = segs[0].ds_addr;
837 }
838 
839 static int
840 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
841     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
842     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
843 {
844 	size_t len = nelem * elem_size;
845 	void *s = NULL;
846 	void *p = NULL;
847 	int err;
848 
849 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
850 				      BUS_SPACE_MAXADDR_32BIT,
851 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
852 				      len, 0, NULL, NULL, tag)) != 0) {
853 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
854 		return (ENOMEM);
855 	}
856 
857 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
858 				    map)) != 0) {
859 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
860 		return (ENOMEM);
861 	}
862 
863 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
864 	bzero(p, len);
865 	*(void **)desc = p;
866 
867 	if (sw_size) {
868 		len = nelem * sw_size;
869 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
870 		*(void **)sdesc = s;
871 	}
872 	if (parent_entry_tag == NULL)
873 		return (0);
874 
875 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
876 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
877 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
878 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
879 		                      NULL, NULL, entry_tag)) != 0) {
880 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
881 		return (ENOMEM);
882 	}
883 	return (0);
884 }
885 
886 static void
887 sge_slow_intr_handler(void *arg, int ncount)
888 {
889 	adapter_t *sc = arg;
890 
891 	t3_slow_intr_handler(sc);
892 }
893 
894 /**
895  *	sge_timer_cb - perform periodic maintenance of an SGE qset
896  *	@data: the SGE queue set to maintain
897  *
898  *	Runs periodically from a timer to perform maintenance of an SGE queue
899  *	set.  It performs two tasks:
900  *
901  *	a) Cleans up any completed Tx descriptors that may still be pending.
902  *	Normal descriptor cleanup happens when new packets are added to a Tx
903  *	queue so this timer is relatively infrequent and does any cleanup only
904  *	if the Tx queue has not seen any new packets in a while.  We make a
905  *	best effort attempt to reclaim descriptors, in that we don't wait
906  *	around if we cannot get a queue's lock (which most likely is because
907  *	someone else is queueing new packets and so will also handle the clean
908  *	up).  Since control queues use immediate data exclusively we don't
909  *	bother cleaning them up here.
910  *
911  *	b) Replenishes Rx queues that have run out due to memory shortage.
912  *	Normally new Rx buffers are added when existing ones are consumed but
913  *	when out of memory a queue can become empty.  We try to add only a few
914  *	buffers here, the queue will be replenished fully as these new buffers
915  *	are used up if memory shortage has subsided.
916  *
917  *	c) Return coalesced response queue credits in case a response queue is
918  *	starved.
919  *
920  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
921  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
922  */
923 static void
924 sge_timer_cb(void *arg)
925 {
926 	adapter_t *sc = arg;
927 	if ((sc->flags & USING_MSIX) == 0) {
928 
929 		struct port_info *pi;
930 		struct sge_qset *qs;
931 		struct sge_txq  *txq;
932 		int i, j;
933 		int reclaim_ofl, refill_rx;
934 
935 		if (sc->open_device_map == 0)
936 			return;
937 
938 		for (i = 0; i < sc->params.nports; i++) {
939 			pi = &sc->port[i];
940 			for (j = 0; j < pi->nqsets; j++) {
941 				qs = &sc->sge.qs[pi->first_qset + j];
942 				txq = &qs->txq[0];
943 				reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
944 				refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
945 				    (qs->fl[1].credits < qs->fl[1].size));
946 				if (reclaim_ofl || refill_rx) {
947 					taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
948 					break;
949 				}
950 			}
951 		}
952 	}
953 
954 	if (sc->params.nports > 2) {
955 		int i;
956 
957 		for_each_port(sc, i) {
958 			struct port_info *pi = &sc->port[i];
959 
960 			t3_write_reg(sc, A_SG_KDOORBELL,
961 				     F_SELEGRCNTX |
962 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
963 		}
964 	}
965 	if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
966 	    sc->open_device_map != 0)
967 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
968 }
969 
970 /*
971  * This is meant to be a catch-all function to keep sge state private
972  * to sge.c
973  *
974  */
975 int
976 t3_sge_init_adapter(adapter_t *sc)
977 {
978 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
979 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
980 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
981 	return (0);
982 }
983 
984 int
985 t3_sge_reset_adapter(adapter_t *sc)
986 {
987 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
988 	return (0);
989 }
990 
991 int
992 t3_sge_init_port(struct port_info *pi)
993 {
994 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
995 	return (0);
996 }
997 
998 /**
999  *	refill_rspq - replenish an SGE response queue
1000  *	@adapter: the adapter
1001  *	@q: the response queue to replenish
1002  *	@credits: how many new responses to make available
1003  *
1004  *	Replenishes a response queue by making the supplied number of responses
1005  *	available to HW.
1006  */
1007 static __inline void
1008 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1009 {
1010 
1011 	/* mbufs are allocated on demand when a rspq entry is processed. */
1012 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1013 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1014 }
1015 
1016 static void
1017 sge_txq_reclaim_handler(void *arg, int ncount)
1018 {
1019 	struct sge_qset *qs = arg;
1020 	int i;
1021 
1022 	for (i = 0; i < 3; i++)
1023 		reclaim_completed_tx(qs, 16, i);
1024 }
1025 
1026 static void
1027 sge_timer_reclaim(void *arg, int ncount)
1028 {
1029 	struct port_info *pi = arg;
1030 	int i, nqsets = pi->nqsets;
1031 	adapter_t *sc = pi->adapter;
1032 	struct sge_qset *qs;
1033 	struct mtx *lock;
1034 
1035 	KASSERT((sc->flags & USING_MSIX) == 0,
1036 	    ("can't call timer reclaim for msi-x"));
1037 
1038 	for (i = 0; i < nqsets; i++) {
1039 		qs = &sc->sge.qs[pi->first_qset + i];
1040 
1041 		reclaim_completed_tx(qs, 16, TXQ_OFLD);
1042 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1043 			    &sc->sge.qs[0].rspq.lock;
1044 
1045 		if (mtx_trylock(lock)) {
1046 			/* XXX currently assume that we are *NOT* polling */
1047 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1048 
1049 			if (qs->fl[0].credits < qs->fl[0].size - 16)
1050 				__refill_fl(sc, &qs->fl[0]);
1051 			if (qs->fl[1].credits < qs->fl[1].size - 16)
1052 				__refill_fl(sc, &qs->fl[1]);
1053 
1054 			if (status & (1 << qs->rspq.cntxt_id)) {
1055 				if (qs->rspq.credits) {
1056 					refill_rspq(sc, &qs->rspq, 1);
1057 					qs->rspq.credits--;
1058 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1059 					    1 << qs->rspq.cntxt_id);
1060 				}
1061 			}
1062 			mtx_unlock(lock);
1063 		}
1064 	}
1065 }
1066 
1067 /**
1068  *	init_qset_cntxt - initialize an SGE queue set context info
1069  *	@qs: the queue set
1070  *	@id: the queue set id
1071  *
1072  *	Initializes the TIDs and context ids for the queues of a queue set.
1073  */
1074 static void
1075 init_qset_cntxt(struct sge_qset *qs, u_int id)
1076 {
1077 
1078 	qs->rspq.cntxt_id = id;
1079 	qs->fl[0].cntxt_id = 2 * id;
1080 	qs->fl[1].cntxt_id = 2 * id + 1;
1081 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1082 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1083 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1084 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1085 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1086 
1087 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
1088 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
1089 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
1090 }
1091 
1092 
1093 static void
1094 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1095 {
1096 	txq->in_use += ndesc;
1097 	/*
1098 	 * XXX we don't handle stopping of queue
1099 	 * presumably start handles this when we bump against the end
1100 	 */
1101 	txqs->gen = txq->gen;
1102 	txq->unacked += ndesc;
1103 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1104 	txq->unacked &= 31;
1105 	txqs->pidx = txq->pidx;
1106 	txq->pidx += ndesc;
1107 #ifdef INVARIANTS
1108 	if (((txqs->pidx > txq->cidx) &&
1109 		(txq->pidx < txqs->pidx) &&
1110 		(txq->pidx >= txq->cidx)) ||
1111 	    ((txqs->pidx < txq->cidx) &&
1112 		(txq->pidx >= txq-> cidx)) ||
1113 	    ((txqs->pidx < txq->cidx) &&
1114 		(txq->cidx < txqs->pidx)))
1115 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1116 		    txqs->pidx, txq->pidx, txq->cidx);
1117 #endif
1118 	if (txq->pidx >= txq->size) {
1119 		txq->pidx -= txq->size;
1120 		txq->gen ^= 1;
1121 	}
1122 
1123 }
1124 
1125 /**
1126  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
1127  *	@m: the packet mbufs
1128  *      @nsegs: the number of segments
1129  *
1130  * 	Returns the number of Tx descriptors needed for the given Ethernet
1131  * 	packet.  Ethernet packets require addition of WR and CPL headers.
1132  */
1133 static __inline unsigned int
1134 calc_tx_descs(const struct mbuf *m, int nsegs)
1135 {
1136 	unsigned int flits;
1137 
1138 	if (m->m_pkthdr.len <= PIO_LEN)
1139 		return 1;
1140 
1141 	flits = sgl_len(nsegs) + 2;
1142 #ifdef TSO_SUPPORTED
1143 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
1144 		flits++;
1145 #endif
1146 	return flits_to_desc(flits);
1147 }
1148 
1149 static unsigned int
1150 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1151     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1152 {
1153 	struct mbuf *m0;
1154 	int err, pktlen, pass = 0;
1155 	bus_dma_tag_t tag = txq->entry_tag;
1156 
1157 retry:
1158 	err = 0;
1159 	m0 = *m;
1160 	pktlen = m0->m_pkthdr.len;
1161 #if defined(__i386__) || defined(__amd64__)
1162 	if (busdma_map_sg_collapse(tag, txsd->map, m, segs, nsegs) == 0) {
1163 		goto done;
1164 	} else
1165 #endif
1166 		err = bus_dmamap_load_mbuf_sg(tag, txsd->map, m0, segs, nsegs, 0);
1167 
1168 	if (err == 0) {
1169 		goto done;
1170 	}
1171 	if (err == EFBIG && pass == 0) {
1172 		pass = 1;
1173 		/* Too many segments, try to defrag */
1174 		m0 = m_defrag(m0, M_DONTWAIT);
1175 		if (m0 == NULL) {
1176 			m_freem(*m);
1177 			*m = NULL;
1178 			return (ENOBUFS);
1179 		}
1180 		*m = m0;
1181 		goto retry;
1182 	} else if (err == ENOMEM) {
1183 		return (err);
1184 	} if (err) {
1185 		if (cxgb_debug)
1186 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1187 		m_freem(m0);
1188 		*m = NULL;
1189 		return (err);
1190 	}
1191 done:
1192 #if !defined(__i386__) && !defined(__amd64__)
1193 	bus_dmamap_sync(tag, txsd->map, BUS_DMASYNC_PREWRITE);
1194 #endif
1195 	txsd->flags |= TX_SW_DESC_MAPPED;
1196 
1197 	return (0);
1198 }
1199 
1200 /**
1201  *	make_sgl - populate a scatter/gather list for a packet
1202  *	@sgp: the SGL to populate
1203  *	@segs: the packet dma segments
1204  *	@nsegs: the number of segments
1205  *
1206  *	Generates a scatter/gather list for the buffers that make up a packet
1207  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1208  *	appropriately.
1209  */
1210 static __inline void
1211 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1212 {
1213 	int i, idx;
1214 
1215 	for (idx = 0, i = 0; i < nsegs; i++) {
1216 		/*
1217 		 * firmware doesn't like empty segments
1218 		 */
1219 		if (segs[i].ds_len == 0)
1220 			continue;
1221 		if (i && idx == 0)
1222 			++sgp;
1223 
1224 		sgp->len[idx] = htobe32(segs[i].ds_len);
1225 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1226 		idx ^= 1;
1227 	}
1228 
1229 	if (idx) {
1230 		sgp->len[idx] = 0;
1231 		sgp->addr[idx] = 0;
1232 	}
1233 }
1234 
1235 /**
1236  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1237  *	@adap: the adapter
1238  *	@q: the Tx queue
1239  *
1240  *	Ring the doorbell if a Tx queue is asleep.  There is a natural race,
1241  *	where the HW is going to sleep just after we checked, however,
1242  *	then the interrupt handler will detect the outstanding TX packet
1243  *	and ring the doorbell for us.
1244  *
1245  *	When GTS is disabled we unconditionally ring the doorbell.
1246  */
1247 static __inline void
1248 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1249 {
1250 #if USE_GTS
1251 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1252 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1253 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1254 #ifdef T3_TRACE
1255 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1256 			  q->cntxt_id);
1257 #endif
1258 		t3_write_reg(adap, A_SG_KDOORBELL,
1259 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1260 	}
1261 #else
1262 	wmb();            /* write descriptors before telling HW */
1263 	t3_write_reg(adap, A_SG_KDOORBELL,
1264 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1265 #endif
1266 }
1267 
1268 static __inline void
1269 wr_gen2(struct tx_desc *d, unsigned int gen)
1270 {
1271 #if SGE_NUM_GENBITS == 2
1272 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1273 #endif
1274 }
1275 
1276 /**
1277  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1278  *	@ndesc: number of Tx descriptors spanned by the SGL
1279  *	@txd: first Tx descriptor to be written
1280  *	@txqs: txq state (generation and producer index)
1281  *	@txq: the SGE Tx queue
1282  *	@sgl: the SGL
1283  *	@flits: number of flits to the start of the SGL in the first descriptor
1284  *	@sgl_flits: the SGL size in flits
1285  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1286  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1287  *
1288  *	Write a work request header and an associated SGL.  If the SGL is
1289  *	small enough to fit into one Tx descriptor it has already been written
1290  *	and we just need to write the WR header.  Otherwise we distribute the
1291  *	SGL across the number of descriptors it spans.
1292  */
1293 static void
1294 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1295     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1296     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1297 {
1298 
1299 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1300 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1301 
1302 	if (__predict_true(ndesc == 1)) {
1303 		set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1304 			V_WR_SGLSFLT(flits)) | wr_hi,
1305 		    htonl(V_WR_LEN(flits + sgl_flits) |
1306 			V_WR_GEN(txqs->gen)) | wr_lo);
1307 		/* XXX gen? */
1308 		wr_gen2(txd, txqs->gen);
1309 
1310 	} else {
1311 		unsigned int ogen = txqs->gen;
1312 		const uint64_t *fp = (const uint64_t *)sgl;
1313 		struct work_request_hdr *wp = wrp;
1314 
1315 		wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1316 		    V_WR_SGLSFLT(flits)) | wr_hi;
1317 
1318 		while (sgl_flits) {
1319 			unsigned int avail = WR_FLITS - flits;
1320 
1321 			if (avail > sgl_flits)
1322 				avail = sgl_flits;
1323 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1324 			sgl_flits -= avail;
1325 			ndesc--;
1326 			if (!sgl_flits)
1327 				break;
1328 
1329 			fp += avail;
1330 			txd++;
1331 			txsd++;
1332 			if (++txqs->pidx == txq->size) {
1333 				txqs->pidx = 0;
1334 				txqs->gen ^= 1;
1335 				txd = txq->desc;
1336 				txsd = txq->sdesc;
1337 			}
1338 
1339 			/*
1340 			 * when the head of the mbuf chain
1341 			 * is freed all clusters will be freed
1342 			 * with it
1343 			 */
1344 			wrp = (struct work_request_hdr *)txd;
1345 			wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1346 			    V_WR_SGLSFLT(1)) | wr_hi;
1347 			wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1348 				    sgl_flits + 1)) |
1349 			    V_WR_GEN(txqs->gen)) | wr_lo;
1350 			wr_gen2(txd, txqs->gen);
1351 			flits = 1;
1352 		}
1353 		wrp->wrh_hi |= htonl(F_WR_EOP);
1354 		wmb();
1355 		wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1356 		wr_gen2((struct tx_desc *)wp, ogen);
1357 	}
1358 }
1359 
1360 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1361 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1362 
1363 #ifdef VLAN_SUPPORTED
1364 #define GET_VTAG(cntrl, m) \
1365 do { \
1366 	if ((m)->m_flags & M_VLANTAG)					            \
1367 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1368 } while (0)
1369 
1370 #else
1371 #define GET_VTAG(cntrl, m)
1372 #endif
1373 
1374 static int
1375 t3_encap(struct sge_qset *qs, struct mbuf **m)
1376 {
1377 	adapter_t *sc;
1378 	struct mbuf *m0;
1379 	struct sge_txq *txq;
1380 	struct txq_state txqs;
1381 	struct port_info *pi;
1382 	unsigned int ndesc, flits, cntrl, mlen;
1383 	int err, nsegs, tso_info = 0;
1384 
1385 	struct work_request_hdr *wrp;
1386 	struct tx_sw_desc *txsd;
1387 	struct sg_ent *sgp, *sgl;
1388 	uint32_t wr_hi, wr_lo, sgl_flits;
1389 	bus_dma_segment_t segs[TX_MAX_SEGS];
1390 
1391 	struct tx_desc *txd;
1392 
1393 	pi = qs->port;
1394 	sc = pi->adapter;
1395 	txq = &qs->txq[TXQ_ETH];
1396 	txd = &txq->desc[txq->pidx];
1397 	txsd = &txq->sdesc[txq->pidx];
1398 	sgl = txq->txq_sgl;
1399 
1400 	prefetch(txd);
1401 	m0 = *m;
1402 
1403 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1404 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1405 
1406 	mtx_assert(&qs->lock, MA_OWNED);
1407 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1408 	KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1409 
1410 #ifdef VLAN_SUPPORTED
1411 	if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1412 	    m0->m_pkthdr.csum_flags & (CSUM_TSO))
1413 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1414 #endif
1415 	if (m0->m_nextpkt != NULL) {
1416 		busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1417 		ndesc = 1;
1418 		mlen = 0;
1419 	} else {
1420 		if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1421 		    &m0, segs, &nsegs))) {
1422 			if (cxgb_debug)
1423 				printf("failed ... err=%d\n", err);
1424 			return (err);
1425 		}
1426 		mlen = m0->m_pkthdr.len;
1427 		ndesc = calc_tx_descs(m0, nsegs);
1428 	}
1429 	txq_prod(txq, ndesc, &txqs);
1430 
1431 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1432 	txsd->m = m0;
1433 
1434 	if (m0->m_nextpkt != NULL) {
1435 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1436 		int i, fidx;
1437 
1438 		if (nsegs > 7)
1439 			panic("trying to coalesce %d packets in to one WR", nsegs);
1440 		txq->txq_coalesced += nsegs;
1441 		wrp = (struct work_request_hdr *)txd;
1442 		flits = nsegs*2 + 1;
1443 
1444 		for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1445 			struct cpl_tx_pkt_batch_entry *cbe;
1446 			uint64_t flit;
1447 			uint32_t *hflit = (uint32_t *)&flit;
1448 			int cflags = m0->m_pkthdr.csum_flags;
1449 
1450 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1451 			GET_VTAG(cntrl, m0);
1452 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1453 			if (__predict_false(!(cflags & CSUM_IP)))
1454 				cntrl |= F_TXPKT_IPCSUM_DIS;
1455 			if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP))))
1456 				cntrl |= F_TXPKT_L4CSUM_DIS;
1457 
1458 			hflit[0] = htonl(cntrl);
1459 			hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1460 			flit |= htobe64(1 << 24);
1461 			cbe = &cpl_batch->pkt_entry[i];
1462 			cbe->cntrl = hflit[0];
1463 			cbe->len = hflit[1];
1464 			cbe->addr = htobe64(segs[i].ds_addr);
1465 		}
1466 
1467 		wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1468 		    V_WR_SGLSFLT(flits)) |
1469 		    htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1470 		wr_lo = htonl(V_WR_LEN(flits) |
1471 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1472 		set_wr_hdr(wrp, wr_hi, wr_lo);
1473 		wmb();
1474 		wr_gen2(txd, txqs.gen);
1475 		check_ring_tx_db(sc, txq);
1476 		return (0);
1477 	} else if (tso_info) {
1478 		int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1479 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1480 		struct ip *ip;
1481 		struct tcphdr *tcp;
1482 		char *pkthdr;
1483 
1484 		txd->flit[2] = 0;
1485 		GET_VTAG(cntrl, m0);
1486 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1487 		hdr->cntrl = htonl(cntrl);
1488 		hdr->len = htonl(mlen | 0x80000000);
1489 
1490 		DPRINTF("tso buf len=%d\n", mlen);
1491 
1492 		tagged = m0->m_flags & M_VLANTAG;
1493 		if (!tagged)
1494 			min_size -= ETHER_VLAN_ENCAP_LEN;
1495 
1496 		if (__predict_false(mlen < min_size)) {
1497 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1498 			    m0, mlen, m0->m_pkthdr.tso_segsz,
1499 			    m0->m_pkthdr.csum_flags, m0->m_flags);
1500 			panic("tx tso packet too small");
1501 		}
1502 
1503 		/* Make sure that ether, ip, tcp headers are all in m0 */
1504 		if (__predict_false(m0->m_len < min_size)) {
1505 			m0 = m_pullup(m0, min_size);
1506 			if (__predict_false(m0 == NULL)) {
1507 				/* XXX panic probably an overreaction */
1508 				panic("couldn't fit header into mbuf");
1509 			}
1510 		}
1511 		pkthdr = m0->m_data;
1512 
1513 		if (tagged) {
1514 			eth_type = CPL_ETH_II_VLAN;
1515 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1516 			    ETHER_VLAN_ENCAP_LEN);
1517 		} else {
1518 			eth_type = CPL_ETH_II;
1519 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1520 		}
1521 		tcp = (struct tcphdr *)((uint8_t *)ip +
1522 		    sizeof(*ip));
1523 
1524 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1525 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1526 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1527 		hdr->lso_info = htonl(tso_info);
1528 
1529 		if (__predict_false(mlen <= PIO_LEN)) {
1530 			/* pkt not undersized but fits in PIO_LEN
1531 			 * Indicates a TSO bug at the higher levels.
1532 			 *
1533 			 */
1534 			DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1535 			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1536 			txsd->m = NULL;
1537 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1538 			flits = (mlen + 7) / 8 + 3;
1539 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1540 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1541 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1542 			wr_lo = htonl(V_WR_LEN(flits) |
1543 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1544 			set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1545 			wmb();
1546 			wr_gen2(txd, txqs.gen);
1547 			check_ring_tx_db(sc, txq);
1548 			return (0);
1549 		}
1550 		flits = 3;
1551 	} else {
1552 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1553 
1554 		GET_VTAG(cntrl, m0);
1555 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1556 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1557 			cntrl |= F_TXPKT_IPCSUM_DIS;
1558 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1559 			cntrl |= F_TXPKT_L4CSUM_DIS;
1560 		cpl->cntrl = htonl(cntrl);
1561 		cpl->len = htonl(mlen | 0x80000000);
1562 
1563 		if (mlen <= PIO_LEN) {
1564 			txsd->m = NULL;
1565 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1566 			flits = (mlen + 7) / 8 + 2;
1567 
1568 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1569 			    V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1570 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1571 			wr_lo = htonl(V_WR_LEN(flits) |
1572 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1573 			set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1574 			wmb();
1575 			wr_gen2(txd, txqs.gen);
1576 			check_ring_tx_db(sc, txq);
1577 			return (0);
1578 		}
1579 		flits = 2;
1580 	}
1581 	wrp = (struct work_request_hdr *)txd;
1582 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1583 	make_sgl(sgp, segs, nsegs);
1584 
1585 	sgl_flits = sgl_len(nsegs);
1586 
1587 	KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1588 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1589 	wr_lo = htonl(V_WR_TID(txq->token));
1590 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1591 	    sgl_flits, wr_hi, wr_lo);
1592 	check_ring_tx_db(pi->adapter, txq);
1593 
1594 	return (0);
1595 }
1596 
1597 void
1598 cxgb_tx_watchdog(void *arg)
1599 {
1600 	struct sge_qset *qs = arg;
1601 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1602 
1603         if (qs->coalescing != 0 &&
1604 	    (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1605 	    TXQ_RING_EMPTY(qs))
1606                 qs->coalescing = 0;
1607         else if (qs->coalescing == 0 &&
1608 	    (txq->in_use >= cxgb_tx_coalesce_enable_start))
1609                 qs->coalescing = 1;
1610 	if (TXQ_TRYLOCK(qs)) {
1611 		qs->qs_flags |= QS_FLUSHING;
1612 		cxgb_start_locked(qs);
1613 		qs->qs_flags &= ~QS_FLUSHING;
1614 		TXQ_UNLOCK(qs);
1615 	}
1616 	if (qs->port->ifp->if_drv_flags & IFF_DRV_RUNNING)
1617 		callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1618 		    qs, txq->txq_watchdog.c_cpu);
1619 }
1620 
1621 static void
1622 cxgb_tx_timeout(void *arg)
1623 {
1624 	struct sge_qset *qs = arg;
1625 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1626 
1627 	if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1628                 qs->coalescing = 1;
1629 	if (TXQ_TRYLOCK(qs)) {
1630 		qs->qs_flags |= QS_TIMEOUT;
1631 		cxgb_start_locked(qs);
1632 		qs->qs_flags &= ~QS_TIMEOUT;
1633 		TXQ_UNLOCK(qs);
1634 	}
1635 }
1636 
1637 static void
1638 cxgb_start_locked(struct sge_qset *qs)
1639 {
1640 	struct mbuf *m_head = NULL;
1641 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1642 	int avail, txmax;
1643 	int in_use_init = txq->in_use;
1644 	struct port_info *pi = qs->port;
1645 	struct ifnet *ifp = pi->ifp;
1646 	avail = txq->size - txq->in_use - 4;
1647 	txmax = min(TX_START_MAX_DESC, avail);
1648 
1649 	if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1650 		reclaim_completed_tx(qs, 0, TXQ_ETH);
1651 
1652 	if (!pi->link_config.link_ok) {
1653 		TXQ_RING_FLUSH(qs);
1654 		return;
1655 	}
1656 	TXQ_LOCK_ASSERT(qs);
1657 	while ((txq->in_use - in_use_init < txmax) &&
1658 	    !TXQ_RING_EMPTY(qs) &&
1659 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) &&
1660 	    pi->link_config.link_ok) {
1661 		reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1662 
1663 		if ((m_head = cxgb_dequeue(qs)) == NULL)
1664 			break;
1665 		/*
1666 		 *  Encapsulation can modify our pointer, and or make it
1667 		 *  NULL on failure.  In that event, we can't requeue.
1668 		 */
1669 		if (t3_encap(qs, &m_head) || m_head == NULL)
1670 			break;
1671 
1672 		/* Send a copy of the frame to the BPF listener */
1673 		ETHER_BPF_MTAP(ifp, m_head);
1674 
1675 		/*
1676 		 * We sent via PIO, no longer need a copy
1677 		 */
1678 		if (m_head->m_nextpkt == NULL &&
1679 		    m_head->m_pkthdr.len <= PIO_LEN)
1680 			m_freem(m_head);
1681 
1682 		m_head = NULL;
1683 	}
1684 	if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1685 	    pi->link_config.link_ok)
1686 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1687 		    qs, txq->txq_timer.c_cpu);
1688 	if (m_head != NULL)
1689 		m_freem(m_head);
1690 }
1691 
1692 static int
1693 cxgb_transmit_locked(struct ifnet *ifp, struct sge_qset *qs, struct mbuf *m)
1694 {
1695 	struct port_info *pi = qs->port;
1696 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1697 	struct buf_ring *br = txq->txq_mr;
1698 	int error, avail;
1699 
1700 	avail = txq->size - txq->in_use;
1701 	TXQ_LOCK_ASSERT(qs);
1702 
1703 	/*
1704 	 * We can only do a direct transmit if the following are true:
1705 	 * - we aren't coalescing (ring < 3/4 full)
1706 	 * - the link is up -- checked in caller
1707 	 * - there are no packets enqueued already
1708 	 * - there is space in hardware transmit queue
1709 	 */
1710 	if (check_pkt_coalesce(qs) == 0 &&
1711 	    TXQ_RING_EMPTY(qs) && avail > 4) {
1712 		if (t3_encap(qs, &m)) {
1713 			if (m != NULL &&
1714 			    (error = drbr_enqueue(ifp, br, m)) != 0)
1715 				return (error);
1716 		} else {
1717 			/*
1718 			 * We've bypassed the buf ring so we need to update
1719 			 * the stats directly
1720 			 */
1721 			txq->txq_direct_packets++;
1722 			txq->txq_direct_bytes += m->m_pkthdr.len;
1723 			/*
1724 			** Send a copy of the frame to the BPF
1725 			** listener and set the watchdog on.
1726 			*/
1727 			ETHER_BPF_MTAP(ifp, m);
1728 			/*
1729 			 * We sent via PIO, no longer need a copy
1730 			 */
1731 			if (m->m_pkthdr.len <= PIO_LEN)
1732 				m_freem(m);
1733 
1734 		}
1735 	} else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1736 		return (error);
1737 
1738 	reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1739 	if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1740 	    (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1741 		cxgb_start_locked(qs);
1742 	else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1743 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1744 		    qs, txq->txq_timer.c_cpu);
1745 	return (0);
1746 }
1747 
1748 int
1749 cxgb_transmit(struct ifnet *ifp, struct mbuf *m)
1750 {
1751 	struct sge_qset *qs;
1752 	struct port_info *pi = ifp->if_softc;
1753 	int error, qidx = pi->first_qset;
1754 
1755 	if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0
1756 	    ||(!pi->link_config.link_ok)) {
1757 		m_freem(m);
1758 		return (0);
1759 	}
1760 
1761 	if (m->m_flags & M_FLOWID)
1762 		qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1763 
1764 	qs = &pi->adapter->sge.qs[qidx];
1765 
1766 	if (TXQ_TRYLOCK(qs)) {
1767 		/* XXX running */
1768 		error = cxgb_transmit_locked(ifp, qs, m);
1769 		TXQ_UNLOCK(qs);
1770 	} else
1771 		error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1772 	return (error);
1773 }
1774 void
1775 cxgb_start(struct ifnet *ifp)
1776 {
1777 	struct port_info *pi = ifp->if_softc;
1778 	struct sge_qset *qs = &pi->adapter->sge.qs[pi->first_qset];
1779 
1780 	if (!pi->link_config.link_ok)
1781 		return;
1782 
1783 	TXQ_LOCK(qs);
1784 	cxgb_start_locked(qs);
1785 	TXQ_UNLOCK(qs);
1786 }
1787 
1788 void
1789 cxgb_qflush(struct ifnet *ifp)
1790 {
1791 	/*
1792 	 * flush any enqueued mbufs in the buf_rings
1793 	 * and in the transmit queues
1794 	 * no-op for now
1795 	 */
1796 	return;
1797 }
1798 
1799 /**
1800  *	write_imm - write a packet into a Tx descriptor as immediate data
1801  *	@d: the Tx descriptor to write
1802  *	@m: the packet
1803  *	@len: the length of packet data to write as immediate data
1804  *	@gen: the generation bit value to write
1805  *
1806  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1807  *	contains a work request at its beginning.  We must write the packet
1808  *	carefully so the SGE doesn't read accidentally before it's written in
1809  *	its entirety.
1810  */
1811 static __inline void
1812 write_imm(struct tx_desc *d, struct mbuf *m,
1813 	  unsigned int len, unsigned int gen)
1814 {
1815 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1816 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1817 	uint32_t wr_hi, wr_lo;
1818 
1819 	if (len > WR_LEN)
1820 		panic("len too big %d\n", len);
1821 	if (len < sizeof(*from))
1822 		panic("len too small %d", len);
1823 
1824 	memcpy(&to[1], &from[1], len - sizeof(*from));
1825 	wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1826 					V_WR_BCNTLFLT(len & 7));
1827 	wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) |
1828 					V_WR_LEN((len + 7) / 8));
1829 	set_wr_hdr(to, wr_hi, wr_lo);
1830 	wmb();
1831 	wr_gen2(d, gen);
1832 
1833 	/*
1834 	 * This check is a hack we should really fix the logic so
1835 	 * that this can't happen
1836 	 */
1837 	if (m->m_type != MT_DONTFREE)
1838 		m_freem(m);
1839 
1840 }
1841 
1842 /**
1843  *	check_desc_avail - check descriptor availability on a send queue
1844  *	@adap: the adapter
1845  *	@q: the TX queue
1846  *	@m: the packet needing the descriptors
1847  *	@ndesc: the number of Tx descriptors needed
1848  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1849  *
1850  *	Checks if the requested number of Tx descriptors is available on an
1851  *	SGE send queue.  If the queue is already suspended or not enough
1852  *	descriptors are available the packet is queued for later transmission.
1853  *	Must be called with the Tx queue locked.
1854  *
1855  *	Returns 0 if enough descriptors are available, 1 if there aren't
1856  *	enough descriptors and the packet has been queued, and 2 if the caller
1857  *	needs to retry because there weren't enough descriptors at the
1858  *	beginning of the call but some freed up in the mean time.
1859  */
1860 static __inline int
1861 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1862 		 struct mbuf *m, unsigned int ndesc,
1863 		 unsigned int qid)
1864 {
1865 	/*
1866 	 * XXX We currently only use this for checking the control queue
1867 	 * the control queue is only used for binding qsets which happens
1868 	 * at init time so we are guaranteed enough descriptors
1869 	 */
1870 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1871 addq_exit:	mbufq_tail(&q->sendq, m);
1872 		return 1;
1873 	}
1874 	if (__predict_false(q->size - q->in_use < ndesc)) {
1875 
1876 		struct sge_qset *qs = txq_to_qset(q, qid);
1877 
1878 		setbit(&qs->txq_stopped, qid);
1879 		if (should_restart_tx(q) &&
1880 		    test_and_clear_bit(qid, &qs->txq_stopped))
1881 			return 2;
1882 
1883 		q->stops++;
1884 		goto addq_exit;
1885 	}
1886 	return 0;
1887 }
1888 
1889 
1890 /**
1891  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1892  *	@q: the SGE control Tx queue
1893  *
1894  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1895  *	that send only immediate data (presently just the control queues) and
1896  *	thus do not have any mbufs
1897  */
1898 static __inline void
1899 reclaim_completed_tx_imm(struct sge_txq *q)
1900 {
1901 	unsigned int reclaim = q->processed - q->cleaned;
1902 
1903 	q->in_use -= reclaim;
1904 	q->cleaned += reclaim;
1905 }
1906 
1907 static __inline int
1908 immediate(const struct mbuf *m)
1909 {
1910 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1911 }
1912 
1913 /**
1914  *	ctrl_xmit - send a packet through an SGE control Tx queue
1915  *	@adap: the adapter
1916  *	@q: the control queue
1917  *	@m: the packet
1918  *
1919  *	Send a packet through an SGE control Tx queue.  Packets sent through
1920  *	a control queue must fit entirely as immediate data in a single Tx
1921  *	descriptor and have no page fragments.
1922  */
1923 static int
1924 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1925 {
1926 	int ret;
1927 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1928 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1929 
1930 	if (__predict_false(!immediate(m))) {
1931 		m_freem(m);
1932 		return 0;
1933 	}
1934 
1935 	wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1936 	wrp->wrh_lo = htonl(V_WR_TID(q->token));
1937 
1938 	TXQ_LOCK(qs);
1939 again:	reclaim_completed_tx_imm(q);
1940 
1941 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1942 	if (__predict_false(ret)) {
1943 		if (ret == 1) {
1944 			TXQ_UNLOCK(qs);
1945 			return (ENOSPC);
1946 		}
1947 		goto again;
1948 	}
1949 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1950 
1951 	q->in_use++;
1952 	if (++q->pidx >= q->size) {
1953 		q->pidx = 0;
1954 		q->gen ^= 1;
1955 	}
1956 	TXQ_UNLOCK(qs);
1957 	wmb();
1958 	t3_write_reg(adap, A_SG_KDOORBELL,
1959 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1960 	return (0);
1961 }
1962 
1963 
1964 /**
1965  *	restart_ctrlq - restart a suspended control queue
1966  *	@qs: the queue set cotaining the control queue
1967  *
1968  *	Resumes transmission on a suspended Tx control queue.
1969  */
1970 static void
1971 restart_ctrlq(void *data, int npending)
1972 {
1973 	struct mbuf *m;
1974 	struct sge_qset *qs = (struct sge_qset *)data;
1975 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1976 	adapter_t *adap = qs->port->adapter;
1977 
1978 	TXQ_LOCK(qs);
1979 again:	reclaim_completed_tx_imm(q);
1980 
1981 	while (q->in_use < q->size &&
1982 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1983 
1984 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1985 
1986 		if (++q->pidx >= q->size) {
1987 			q->pidx = 0;
1988 			q->gen ^= 1;
1989 		}
1990 		q->in_use++;
1991 	}
1992 	if (!mbufq_empty(&q->sendq)) {
1993 		setbit(&qs->txq_stopped, TXQ_CTRL);
1994 
1995 		if (should_restart_tx(q) &&
1996 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1997 			goto again;
1998 		q->stops++;
1999 	}
2000 	TXQ_UNLOCK(qs);
2001 	t3_write_reg(adap, A_SG_KDOORBELL,
2002 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2003 }
2004 
2005 
2006 /*
2007  * Send a management message through control queue 0
2008  */
2009 int
2010 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
2011 {
2012 	return ctrl_xmit(adap, &adap->sge.qs[0], m);
2013 }
2014 
2015 /**
2016  *	free_qset - free the resources of an SGE queue set
2017  *	@sc: the controller owning the queue set
2018  *	@q: the queue set
2019  *
2020  *	Release the HW and SW resources associated with an SGE queue set, such
2021  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
2022  *	queue set must be quiesced prior to calling this.
2023  */
2024 static void
2025 t3_free_qset(adapter_t *sc, struct sge_qset *q)
2026 {
2027 	int i;
2028 
2029 	reclaim_completed_tx(q, 0, TXQ_ETH);
2030 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2031 		if (q->txq[i].txq_mr != NULL)
2032 			buf_ring_free(q->txq[i].txq_mr, M_DEVBUF);
2033 		if (q->txq[i].txq_ifq != NULL) {
2034 			ifq_delete(q->txq[i].txq_ifq);
2035 			free(q->txq[i].txq_ifq, M_DEVBUF);
2036 		}
2037 	}
2038 
2039 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2040 		if (q->fl[i].desc) {
2041 			mtx_lock_spin(&sc->sge.reg_lock);
2042 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2043 			mtx_unlock_spin(&sc->sge.reg_lock);
2044 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2045 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2046 					q->fl[i].desc_map);
2047 			bus_dma_tag_destroy(q->fl[i].desc_tag);
2048 			bus_dma_tag_destroy(q->fl[i].entry_tag);
2049 		}
2050 		if (q->fl[i].sdesc) {
2051 			free_rx_bufs(sc, &q->fl[i]);
2052 			free(q->fl[i].sdesc, M_DEVBUF);
2053 		}
2054 	}
2055 
2056 	mtx_unlock(&q->lock);
2057 	MTX_DESTROY(&q->lock);
2058 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2059 		if (q->txq[i].desc) {
2060 			mtx_lock_spin(&sc->sge.reg_lock);
2061 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2062 			mtx_unlock_spin(&sc->sge.reg_lock);
2063 			bus_dmamap_unload(q->txq[i].desc_tag,
2064 					q->txq[i].desc_map);
2065 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2066 					q->txq[i].desc_map);
2067 			bus_dma_tag_destroy(q->txq[i].desc_tag);
2068 			bus_dma_tag_destroy(q->txq[i].entry_tag);
2069 		}
2070 		if (q->txq[i].sdesc) {
2071 			free(q->txq[i].sdesc, M_DEVBUF);
2072 		}
2073 	}
2074 
2075 	if (q->rspq.desc) {
2076 		mtx_lock_spin(&sc->sge.reg_lock);
2077 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2078 		mtx_unlock_spin(&sc->sge.reg_lock);
2079 
2080 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2081 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2082 			        q->rspq.desc_map);
2083 		bus_dma_tag_destroy(q->rspq.desc_tag);
2084 		MTX_DESTROY(&q->rspq.lock);
2085 	}
2086 
2087 #ifdef LRO_SUPPORTED
2088 	tcp_lro_free(&q->lro.ctrl);
2089 #endif
2090 
2091 	bzero(q, sizeof(*q));
2092 }
2093 
2094 /**
2095  *	t3_free_sge_resources - free SGE resources
2096  *	@sc: the adapter softc
2097  *
2098  *	Frees resources used by the SGE queue sets.
2099  */
2100 void
2101 t3_free_sge_resources(adapter_t *sc)
2102 {
2103 	int i, nqsets;
2104 
2105 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
2106 		nqsets += sc->port[i].nqsets;
2107 
2108 	for (i = 0; i < nqsets; ++i) {
2109 		TXQ_LOCK(&sc->sge.qs[i]);
2110 		t3_free_qset(sc, &sc->sge.qs[i]);
2111 	}
2112 
2113 }
2114 
2115 /**
2116  *	t3_sge_start - enable SGE
2117  *	@sc: the controller softc
2118  *
2119  *	Enables the SGE for DMAs.  This is the last step in starting packet
2120  *	transfers.
2121  */
2122 void
2123 t3_sge_start(adapter_t *sc)
2124 {
2125 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2126 }
2127 
2128 /**
2129  *	t3_sge_stop - disable SGE operation
2130  *	@sc: the adapter
2131  *
2132  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
2133  *	from error interrupts) or from normal process context.  In the latter
2134  *	case it also disables any pending queue restart tasklets.  Note that
2135  *	if it is called in interrupt context it cannot disable the restart
2136  *	tasklets as it cannot wait, however the tasklets will have no effect
2137  *	since the doorbells are disabled and the driver will call this again
2138  *	later from process context, at which time the tasklets will be stopped
2139  *	if they are still running.
2140  */
2141 void
2142 t3_sge_stop(adapter_t *sc)
2143 {
2144 	int i, nqsets;
2145 
2146 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2147 
2148 	if (sc->tq == NULL)
2149 		return;
2150 
2151 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
2152 		nqsets += sc->port[i].nqsets;
2153 #ifdef notyet
2154 	/*
2155 	 *
2156 	 * XXX
2157 	 */
2158 	for (i = 0; i < nqsets; ++i) {
2159 		struct sge_qset *qs = &sc->sge.qs[i];
2160 
2161 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2162 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2163 	}
2164 #endif
2165 }
2166 
2167 /**
2168  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
2169  *	@adapter: the adapter
2170  *	@q: the Tx queue to reclaim descriptors from
2171  *	@reclaimable: the number of descriptors to reclaim
2172  *      @m_vec_size: maximum number of buffers to reclaim
2173  *      @desc_reclaimed: returns the number of descriptors reclaimed
2174  *
2175  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2176  *	Tx buffers.  Called with the Tx queue lock held.
2177  *
2178  *      Returns number of buffers of reclaimed
2179  */
2180 void
2181 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2182 {
2183 	struct tx_sw_desc *txsd;
2184 	unsigned int cidx, mask;
2185 	struct sge_txq *q = &qs->txq[queue];
2186 
2187 #ifdef T3_TRACE
2188 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
2189 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2190 #endif
2191 	cidx = q->cidx;
2192 	mask = q->size - 1;
2193 	txsd = &q->sdesc[cidx];
2194 
2195 	mtx_assert(&qs->lock, MA_OWNED);
2196 	while (reclaimable--) {
2197 		prefetch(q->sdesc[(cidx + 1) & mask].m);
2198 		prefetch(q->sdesc[(cidx + 2) & mask].m);
2199 
2200 		if (txsd->m != NULL) {
2201 			if (txsd->flags & TX_SW_DESC_MAPPED) {
2202 				bus_dmamap_unload(q->entry_tag, txsd->map);
2203 				txsd->flags &= ~TX_SW_DESC_MAPPED;
2204 			}
2205 			m_freem_list(txsd->m);
2206 			txsd->m = NULL;
2207 		} else
2208 			q->txq_skipped++;
2209 
2210 		++txsd;
2211 		if (++cidx == q->size) {
2212 			cidx = 0;
2213 			txsd = q->sdesc;
2214 		}
2215 	}
2216 	q->cidx = cidx;
2217 
2218 }
2219 
2220 /**
2221  *	is_new_response - check if a response is newly written
2222  *	@r: the response descriptor
2223  *	@q: the response queue
2224  *
2225  *	Returns true if a response descriptor contains a yet unprocessed
2226  *	response.
2227  */
2228 static __inline int
2229 is_new_response(const struct rsp_desc *r,
2230     const struct sge_rspq *q)
2231 {
2232 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2233 }
2234 
2235 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2236 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2237 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2238 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2239 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2240 
2241 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2242 #define NOMEM_INTR_DELAY 2500
2243 
2244 /**
2245  *	write_ofld_wr - write an offload work request
2246  *	@adap: the adapter
2247  *	@m: the packet to send
2248  *	@q: the Tx queue
2249  *	@pidx: index of the first Tx descriptor to write
2250  *	@gen: the generation value to use
2251  *	@ndesc: number of descriptors the packet will occupy
2252  *
2253  *	Write an offload work request to send the supplied packet.  The packet
2254  *	data already carry the work request with most fields populated.
2255  */
2256 static void
2257 write_ofld_wr(adapter_t *adap, struct mbuf *m,
2258     struct sge_txq *q, unsigned int pidx,
2259     unsigned int gen, unsigned int ndesc,
2260     bus_dma_segment_t *segs, unsigned int nsegs)
2261 {
2262 	unsigned int sgl_flits, flits;
2263 	struct work_request_hdr *from;
2264 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
2265 	struct tx_desc *d = &q->desc[pidx];
2266 	struct txq_state txqs;
2267 
2268 	if (immediate(m) && nsegs == 0) {
2269 		write_imm(d, m, m->m_len, gen);
2270 		return;
2271 	}
2272 
2273 	/* Only TX_DATA builds SGLs */
2274 	from = mtod(m, struct work_request_hdr *);
2275 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
2276 
2277 	flits = m->m_len / 8;
2278 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
2279 
2280 	make_sgl(sgp, segs, nsegs);
2281 	sgl_flits = sgl_len(nsegs);
2282 
2283 	txqs.gen = gen;
2284 	txqs.pidx = pidx;
2285 	txqs.compl = 0;
2286 
2287 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2288 	    from->wrh_hi, from->wrh_lo);
2289 }
2290 
2291 /**
2292  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2293  *	@m: the packet
2294  *
2295  * 	Returns the number of Tx descriptors needed for the given offload
2296  * 	packet.  These packets are already fully constructed.
2297  */
2298 static __inline unsigned int
2299 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2300 {
2301 	unsigned int flits, cnt = 0;
2302 	int ndescs;
2303 
2304 	if (m->m_len <= WR_LEN && nsegs == 0)
2305 		return (1);                 /* packet fits as immediate data */
2306 
2307 	/*
2308 	 * This needs to be re-visited for TOE
2309 	 */
2310 
2311 	cnt = nsegs;
2312 
2313 	/* headers */
2314 	flits = m->m_len / 8;
2315 
2316 	ndescs = flits_to_desc(flits + sgl_len(cnt));
2317 
2318 	return (ndescs);
2319 }
2320 
2321 /**
2322  *	ofld_xmit - send a packet through an offload queue
2323  *	@adap: the adapter
2324  *	@q: the Tx offload queue
2325  *	@m: the packet
2326  *
2327  *	Send an offload packet through an SGE offload queue.
2328  */
2329 static int
2330 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2331 {
2332 	int ret, nsegs;
2333 	unsigned int ndesc;
2334 	unsigned int pidx, gen;
2335 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2336 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2337 	struct tx_sw_desc *stx;
2338 
2339 	nsegs = m_get_sgllen(m);
2340 	vsegs = m_get_sgl(m);
2341 	ndesc = calc_tx_descs_ofld(m, nsegs);
2342 	busdma_map_sgl(vsegs, segs, nsegs);
2343 
2344 	stx = &q->sdesc[q->pidx];
2345 
2346 	TXQ_LOCK(qs);
2347 again:	reclaim_completed_tx(qs, 16, TXQ_OFLD);
2348 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2349 	if (__predict_false(ret)) {
2350 		if (ret == 1) {
2351 			printf("no ofld desc avail\n");
2352 
2353 			m_set_priority(m, ndesc);     /* save for restart */
2354 			TXQ_UNLOCK(qs);
2355 			return (EINTR);
2356 		}
2357 		goto again;
2358 	}
2359 
2360 	gen = q->gen;
2361 	q->in_use += ndesc;
2362 	pidx = q->pidx;
2363 	q->pidx += ndesc;
2364 	if (q->pidx >= q->size) {
2365 		q->pidx -= q->size;
2366 		q->gen ^= 1;
2367 	}
2368 #ifdef T3_TRACE
2369 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2370 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2371 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2372 		  skb_shinfo(skb)->nr_frags);
2373 #endif
2374 	TXQ_UNLOCK(qs);
2375 
2376 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2377 	check_ring_tx_db(adap, q);
2378 	return (0);
2379 }
2380 
2381 /**
2382  *	restart_offloadq - restart a suspended offload queue
2383  *	@qs: the queue set cotaining the offload queue
2384  *
2385  *	Resumes transmission on a suspended Tx offload queue.
2386  */
2387 static void
2388 restart_offloadq(void *data, int npending)
2389 {
2390 	struct mbuf *m;
2391 	struct sge_qset *qs = data;
2392 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2393 	adapter_t *adap = qs->port->adapter;
2394 	bus_dma_segment_t segs[TX_MAX_SEGS];
2395 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2396 	int nsegs, cleaned;
2397 
2398 	TXQ_LOCK(qs);
2399 again:	cleaned = reclaim_completed_tx(qs, 16, TXQ_OFLD);
2400 
2401 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2402 		unsigned int gen, pidx;
2403 		unsigned int ndesc = m_get_priority(m);
2404 
2405 		if (__predict_false(q->size - q->in_use < ndesc)) {
2406 			setbit(&qs->txq_stopped, TXQ_OFLD);
2407 			if (should_restart_tx(q) &&
2408 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2409 				goto again;
2410 			q->stops++;
2411 			break;
2412 		}
2413 
2414 		gen = q->gen;
2415 		q->in_use += ndesc;
2416 		pidx = q->pidx;
2417 		q->pidx += ndesc;
2418 		if (q->pidx >= q->size) {
2419 			q->pidx -= q->size;
2420 			q->gen ^= 1;
2421 		}
2422 
2423 		(void)mbufq_dequeue(&q->sendq);
2424 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2425 		TXQ_UNLOCK(qs);
2426 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2427 		TXQ_LOCK(qs);
2428 	}
2429 #if USE_GTS
2430 	set_bit(TXQ_RUNNING, &q->flags);
2431 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2432 #endif
2433 	TXQ_UNLOCK(qs);
2434 	wmb();
2435 	t3_write_reg(adap, A_SG_KDOORBELL,
2436 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2437 }
2438 
2439 /**
2440  *	queue_set - return the queue set a packet should use
2441  *	@m: the packet
2442  *
2443  *	Maps a packet to the SGE queue set it should use.  The desired queue
2444  *	set is carried in bits 1-3 in the packet's priority.
2445  */
2446 static __inline int
2447 queue_set(const struct mbuf *m)
2448 {
2449 	return m_get_priority(m) >> 1;
2450 }
2451 
2452 /**
2453  *	is_ctrl_pkt - return whether an offload packet is a control packet
2454  *	@m: the packet
2455  *
2456  *	Determines whether an offload packet should use an OFLD or a CTRL
2457  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2458  */
2459 static __inline int
2460 is_ctrl_pkt(const struct mbuf *m)
2461 {
2462 	return m_get_priority(m) & 1;
2463 }
2464 
2465 /**
2466  *	t3_offload_tx - send an offload packet
2467  *	@tdev: the offload device to send to
2468  *	@m: the packet
2469  *
2470  *	Sends an offload packet.  We use the packet priority to select the
2471  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2472  *	should be sent as regular or control, bits 1-3 select the queue set.
2473  */
2474 int
2475 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2476 {
2477 	adapter_t *adap = tdev2adap(tdev);
2478 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2479 
2480 	if (__predict_false(is_ctrl_pkt(m)))
2481 		return ctrl_xmit(adap, qs, m);
2482 
2483 	return ofld_xmit(adap, qs, m);
2484 }
2485 
2486 /**
2487  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2488  *	@tdev: the offload device that will be receiving the packets
2489  *	@q: the SGE response queue that assembled the bundle
2490  *	@m: the partial bundle
2491  *	@n: the number of packets in the bundle
2492  *
2493  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2494  */
2495 static __inline void
2496 deliver_partial_bundle(struct t3cdev *tdev,
2497 			struct sge_rspq *q,
2498 			struct mbuf *mbufs[], int n)
2499 {
2500 	if (n) {
2501 		q->offload_bundles++;
2502 		cxgb_ofld_recv(tdev, mbufs, n);
2503 	}
2504 }
2505 
2506 static __inline int
2507 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2508     struct mbuf *m, struct mbuf *rx_gather[],
2509     unsigned int gather_idx)
2510 {
2511 
2512 	rq->offload_pkts++;
2513 	m->m_pkthdr.header = mtod(m, void *);
2514 	rx_gather[gather_idx++] = m;
2515 	if (gather_idx == RX_BUNDLE_SIZE) {
2516 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2517 		gather_idx = 0;
2518 		rq->offload_bundles++;
2519 	}
2520 	return (gather_idx);
2521 }
2522 
2523 static void
2524 restart_tx(struct sge_qset *qs)
2525 {
2526 	struct adapter *sc = qs->port->adapter;
2527 
2528 
2529 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2530 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2531 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2532 		qs->txq[TXQ_OFLD].restarts++;
2533 		DPRINTF("restarting TXQ_OFLD\n");
2534 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2535 	}
2536 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2537 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2538 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2539 	    qs->txq[TXQ_CTRL].in_use);
2540 
2541 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2542 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2543 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2544 		qs->txq[TXQ_CTRL].restarts++;
2545 		DPRINTF("restarting TXQ_CTRL\n");
2546 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2547 	}
2548 }
2549 
2550 /**
2551  *	t3_sge_alloc_qset - initialize an SGE queue set
2552  *	@sc: the controller softc
2553  *	@id: the queue set id
2554  *	@nports: how many Ethernet ports will be using this queue set
2555  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2556  *	@p: configuration parameters for this queue set
2557  *	@ntxq: number of Tx queues for the queue set
2558  *	@pi: port info for queue set
2559  *
2560  *	Allocate resources and initialize an SGE queue set.  A queue set
2561  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2562  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2563  *	queue, offload queue, and control queue.
2564  */
2565 int
2566 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2567 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2568 {
2569 	struct sge_qset *q = &sc->sge.qs[id];
2570 	int i, ret = 0;
2571 
2572 	MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2573 	q->port = pi;
2574 
2575 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2576 
2577 		if ((q->txq[i].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2578 			    M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2579 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2580 			goto err;
2581 		}
2582 		if ((q->txq[i].txq_ifq =
2583 			malloc(sizeof(struct ifaltq), M_DEVBUF, M_NOWAIT|M_ZERO))
2584 		    == NULL) {
2585 			device_printf(sc->dev, "failed to allocate ifq\n");
2586 			goto err;
2587 		}
2588 		ifq_init(q->txq[i].txq_ifq, pi->ifp);
2589 		callout_init(&q->txq[i].txq_timer, 1);
2590 		callout_init(&q->txq[i].txq_watchdog, 1);
2591 		q->txq[i].txq_timer.c_cpu = id % mp_ncpus;
2592 		q->txq[i].txq_watchdog.c_cpu = id % mp_ncpus;
2593 	}
2594 	init_qset_cntxt(q, id);
2595 	q->idx = id;
2596 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2597 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2598 		    &q->fl[0].desc, &q->fl[0].sdesc,
2599 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2600 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2601 		printf("error %d from alloc ring fl0\n", ret);
2602 		goto err;
2603 	}
2604 
2605 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2606 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2607 		    &q->fl[1].desc, &q->fl[1].sdesc,
2608 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2609 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2610 		printf("error %d from alloc ring fl1\n", ret);
2611 		goto err;
2612 	}
2613 
2614 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2615 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2616 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2617 		    NULL, NULL)) != 0) {
2618 		printf("error %d from alloc ring rspq\n", ret);
2619 		goto err;
2620 	}
2621 
2622 	for (i = 0; i < ntxq; ++i) {
2623 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2624 
2625 		if ((ret = alloc_ring(sc, p->txq_size[i],
2626 			    sizeof(struct tx_desc), sz,
2627 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2628 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2629 			    &q->txq[i].desc_map,
2630 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2631 			printf("error %d from alloc ring tx %i\n", ret, i);
2632 			goto err;
2633 		}
2634 		mbufq_init(&q->txq[i].sendq);
2635 		q->txq[i].gen = 1;
2636 		q->txq[i].size = p->txq_size[i];
2637 	}
2638 
2639 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2640 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2641 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2642 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2643 
2644 	q->fl[0].gen = q->fl[1].gen = 1;
2645 	q->fl[0].size = p->fl_size;
2646 	q->fl[1].size = p->jumbo_size;
2647 
2648 	q->rspq.gen = 1;
2649 	q->rspq.cidx = 0;
2650 	q->rspq.size = p->rspq_size;
2651 
2652 	q->txq[TXQ_ETH].stop_thres = nports *
2653 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2654 
2655 	q->fl[0].buf_size = MCLBYTES;
2656 	q->fl[0].zone = zone_pack;
2657 	q->fl[0].type = EXT_PACKET;
2658 #if __FreeBSD_version > 800000
2659 	if (cxgb_use_16k_clusters) {
2660 		q->fl[1].buf_size = MJUM16BYTES;
2661 		q->fl[1].zone = zone_jumbo16;
2662 		q->fl[1].type = EXT_JUMBO16;
2663 	} else {
2664 		q->fl[1].buf_size = MJUM9BYTES;
2665 		q->fl[1].zone = zone_jumbo9;
2666 		q->fl[1].type = EXT_JUMBO9;
2667 	}
2668 #else
2669 	q->fl[1].buf_size = MJUMPAGESIZE;
2670 	q->fl[1].zone = zone_jumbop;
2671 	q->fl[1].type = EXT_JUMBOP;
2672 #endif
2673 
2674 #ifdef LRO_SUPPORTED
2675 	/* Allocate and setup the lro_ctrl structure */
2676 	q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2677 	ret = tcp_lro_init(&q->lro.ctrl);
2678 	if (ret) {
2679 		printf("error %d from tcp_lro_init\n", ret);
2680 		goto err;
2681 	}
2682 	q->lro.ctrl.ifp = pi->ifp;
2683 #endif
2684 
2685 	mtx_lock_spin(&sc->sge.reg_lock);
2686 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2687 				   q->rspq.phys_addr, q->rspq.size,
2688 				   q->fl[0].buf_size, 1, 0);
2689 	if (ret) {
2690 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2691 		goto err_unlock;
2692 	}
2693 
2694 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2695 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2696 					  q->fl[i].phys_addr, q->fl[i].size,
2697 					  q->fl[i].buf_size, p->cong_thres, 1,
2698 					  0);
2699 		if (ret) {
2700 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2701 			goto err_unlock;
2702 		}
2703 	}
2704 
2705 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2706 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2707 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2708 				 1, 0);
2709 	if (ret) {
2710 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2711 		goto err_unlock;
2712 	}
2713 
2714 	if (ntxq > 1) {
2715 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2716 					 USE_GTS, SGE_CNTXT_OFLD, id,
2717 					 q->txq[TXQ_OFLD].phys_addr,
2718 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2719 		if (ret) {
2720 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2721 			goto err_unlock;
2722 		}
2723 	}
2724 
2725 	if (ntxq > 2) {
2726 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2727 					 SGE_CNTXT_CTRL, id,
2728 					 q->txq[TXQ_CTRL].phys_addr,
2729 					 q->txq[TXQ_CTRL].size,
2730 					 q->txq[TXQ_CTRL].token, 1, 0);
2731 		if (ret) {
2732 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2733 			goto err_unlock;
2734 		}
2735 	}
2736 
2737 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2738 	    device_get_unit(sc->dev), irq_vec_idx);
2739 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2740 
2741 	mtx_unlock_spin(&sc->sge.reg_lock);
2742 	t3_update_qset_coalesce(q, p);
2743 	q->port = pi;
2744 
2745 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2746 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2747 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2748 
2749 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2750 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2751 
2752 	return (0);
2753 
2754 err_unlock:
2755 	mtx_unlock_spin(&sc->sge.reg_lock);
2756 err:
2757 	TXQ_LOCK(q);
2758 	t3_free_qset(sc, q);
2759 
2760 	return (ret);
2761 }
2762 
2763 /*
2764  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2765  * ethernet data.  Hardware assistance with various checksums and any vlan tag
2766  * will also be taken into account here.
2767  */
2768 void
2769 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2770 {
2771 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2772 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2773 	struct ifnet *ifp = pi->ifp;
2774 
2775 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2776 
2777 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2778 	    cpl->csum_valid && cpl->csum == 0xffff) {
2779 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2780 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2781 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2782 		m->m_pkthdr.csum_data = 0xffff;
2783 	}
2784 	/*
2785 	 * XXX need to add VLAN support for 6.x
2786 	 */
2787 #ifdef VLAN_SUPPORTED
2788 	if (__predict_false(cpl->vlan_valid)) {
2789 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2790 		m->m_flags |= M_VLANTAG;
2791 	}
2792 #endif
2793 
2794 	m->m_pkthdr.rcvif = ifp;
2795 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2796 	/*
2797 	 * adjust after conversion to mbuf chain
2798 	 */
2799 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2800 	m->m_len -= (sizeof(*cpl) + ethpad);
2801 	m->m_data += (sizeof(*cpl) + ethpad);
2802 }
2803 
2804 /**
2805  *	get_packet - return the next ingress packet buffer from a free list
2806  *	@adap: the adapter that received the packet
2807  *	@drop_thres: # of remaining buffers before we start dropping packets
2808  *	@qs: the qset that the SGE free list holding the packet belongs to
2809  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2810  *      @r: response descriptor
2811  *
2812  *	Get the next packet from a free list and complete setup of the
2813  *	sk_buff.  If the packet is small we make a copy and recycle the
2814  *	original buffer, otherwise we use the original buffer itself.  If a
2815  *	positive drop threshold is supplied packets are dropped and their
2816  *	buffers recycled if (a) the number of remaining buffers is under the
2817  *	threshold and the packet is too big to copy, or (b) the packet should
2818  *	be copied but there is no memory for the copy.
2819  */
2820 static int
2821 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2822     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2823 {
2824 
2825 	unsigned int len_cq =  ntohl(r->len_cq);
2826 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2827 	int mask, cidx = fl->cidx;
2828 	struct rx_sw_desc *sd = &fl->sdesc[cidx];
2829 	uint32_t len = G_RSPD_LEN(len_cq);
2830 	uint32_t flags = M_EXT;
2831 	uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2832 	caddr_t cl;
2833 	struct mbuf *m;
2834 	int ret = 0;
2835 
2836 	mask = fl->size - 1;
2837 	prefetch(fl->sdesc[(cidx + 1) & mask].m);
2838 	prefetch(fl->sdesc[(cidx + 2) & mask].m);
2839 	prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2840 	prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2841 
2842 	fl->credits--;
2843 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2844 
2845 	if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2846 	    sopeop == RSPQ_SOP_EOP) {
2847 		if ((m = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2848 			goto skip_recycle;
2849 		cl = mtod(m, void *);
2850 		memcpy(cl, sd->rxsd_cl, len);
2851 		recycle_rx_buf(adap, fl, fl->cidx);
2852 		m->m_pkthdr.len = m->m_len = len;
2853 		m->m_flags = 0;
2854 		mh->mh_head = mh->mh_tail = m;
2855 		ret = 1;
2856 		goto done;
2857 	} else {
2858 	skip_recycle:
2859 		bus_dmamap_unload(fl->entry_tag, sd->map);
2860 		cl = sd->rxsd_cl;
2861 		m = sd->m;
2862 
2863 		if ((sopeop == RSPQ_SOP_EOP) ||
2864 		    (sopeop == RSPQ_SOP))
2865 			flags |= M_PKTHDR;
2866 		m_init(m, fl->zone, fl->buf_size, M_NOWAIT, MT_DATA, flags);
2867 		if (fl->zone == zone_pack) {
2868 			/*
2869 			 * restore clobbered data pointer
2870 			 */
2871 			m->m_data = m->m_ext.ext_buf;
2872 		} else {
2873 			m_cljset(m, cl, fl->type);
2874 		}
2875 		m->m_len = len;
2876 	}
2877 	switch(sopeop) {
2878 	case RSPQ_SOP_EOP:
2879 		ret = 1;
2880 		/* FALLTHROUGH */
2881 	case RSPQ_SOP:
2882 		mh->mh_head = mh->mh_tail = m;
2883 		m->m_pkthdr.len = len;
2884 		break;
2885 	case RSPQ_EOP:
2886 		ret = 1;
2887 		/* FALLTHROUGH */
2888 	case RSPQ_NSOP_NEOP:
2889 		if (mh->mh_tail == NULL) {
2890 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2891 			m_freem(m);
2892 			break;
2893 		}
2894 		mh->mh_tail->m_next = m;
2895 		mh->mh_tail = m;
2896 		mh->mh_head->m_pkthdr.len += len;
2897 		break;
2898 	}
2899 	if (cxgb_debug)
2900 		printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2901 done:
2902 	if (++fl->cidx == fl->size)
2903 		fl->cidx = 0;
2904 
2905 	return (ret);
2906 }
2907 
2908 /**
2909  *	handle_rsp_cntrl_info - handles control information in a response
2910  *	@qs: the queue set corresponding to the response
2911  *	@flags: the response control flags
2912  *
2913  *	Handles the control information of an SGE response, such as GTS
2914  *	indications and completion credits for the queue set's Tx queues.
2915  *	HW coalesces credits, we don't do any extra SW coalescing.
2916  */
2917 static __inline void
2918 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2919 {
2920 	unsigned int credits;
2921 
2922 #if USE_GTS
2923 	if (flags & F_RSPD_TXQ0_GTS)
2924 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2925 #endif
2926 	credits = G_RSPD_TXQ0_CR(flags);
2927 	if (credits)
2928 		qs->txq[TXQ_ETH].processed += credits;
2929 
2930 	credits = G_RSPD_TXQ2_CR(flags);
2931 	if (credits)
2932 		qs->txq[TXQ_CTRL].processed += credits;
2933 
2934 # if USE_GTS
2935 	if (flags & F_RSPD_TXQ1_GTS)
2936 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2937 # endif
2938 	credits = G_RSPD_TXQ1_CR(flags);
2939 	if (credits)
2940 		qs->txq[TXQ_OFLD].processed += credits;
2941 
2942 }
2943 
2944 static void
2945 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2946     unsigned int sleeping)
2947 {
2948 	;
2949 }
2950 
2951 /**
2952  *	process_responses - process responses from an SGE response queue
2953  *	@adap: the adapter
2954  *	@qs: the queue set to which the response queue belongs
2955  *	@budget: how many responses can be processed in this round
2956  *
2957  *	Process responses from an SGE response queue up to the supplied budget.
2958  *	Responses include received packets as well as credits and other events
2959  *	for the queues that belong to the response queue's queue set.
2960  *	A negative budget is effectively unlimited.
2961  *
2962  *	Additionally choose the interrupt holdoff time for the next interrupt
2963  *	on this queue.  If the system is under memory shortage use a fairly
2964  *	long delay to help recovery.
2965  */
2966 static int
2967 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2968 {
2969 	struct sge_rspq *rspq = &qs->rspq;
2970 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2971 	int budget_left = budget;
2972 	unsigned int sleeping = 0;
2973 #ifdef LRO_SUPPORTED
2974 	int lro_enabled = qs->lro.enabled;
2975 	int skip_lro;
2976 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2977 #endif
2978 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2979 	int ngathered = 0;
2980 #ifdef DEBUG
2981 	static int last_holdoff = 0;
2982 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2983 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2984 		last_holdoff = rspq->holdoff_tmr;
2985 	}
2986 #endif
2987 	rspq->next_holdoff = rspq->holdoff_tmr;
2988 
2989 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2990 		int eth, eop = 0, ethpad = 0;
2991 		uint32_t flags = ntohl(r->flags);
2992 		uint32_t rss_csum = *(const uint32_t *)r;
2993 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2994 
2995 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2996 
2997 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2998 			struct mbuf *m;
2999 
3000 			if (cxgb_debug)
3001 				printf("async notification\n");
3002 
3003 			if (rspq->rspq_mh.mh_head == NULL) {
3004 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3005 				m = rspq->rspq_mh.mh_head;
3006 			} else {
3007 				m = m_gethdr(M_DONTWAIT, MT_DATA);
3008 			}
3009 			if (m == NULL)
3010 				goto no_mem;
3011 
3012                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
3013 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
3014                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
3015 			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
3016 			eop = 1;
3017                         rspq->async_notif++;
3018 			goto skip;
3019 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
3020 			struct mbuf *m = NULL;
3021 
3022 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
3023 			    r->rss_hdr.opcode, rspq->cidx);
3024 			if (rspq->rspq_mh.mh_head == NULL)
3025 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
3026                         else
3027 				m = m_gethdr(M_DONTWAIT, MT_DATA);
3028 
3029 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
3030 		no_mem:
3031 				rspq->next_holdoff = NOMEM_INTR_DELAY;
3032 				budget_left--;
3033 				break;
3034 			}
3035 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
3036 			eop = 1;
3037 			rspq->imm_data++;
3038 		} else if (r->len_cq) {
3039 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
3040 
3041 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
3042 			if (eop) {
3043 				rspq->rspq_mh.mh_head->m_flags |= M_FLOWID;
3044 				rspq->rspq_mh.mh_head->m_pkthdr.flowid = rss_hash;
3045 			}
3046 
3047 			ethpad = 2;
3048 		} else {
3049 			rspq->pure_rsps++;
3050 		}
3051 	skip:
3052 		if (flags & RSPD_CTRL_MASK) {
3053 			sleeping |= flags & RSPD_GTS_MASK;
3054 			handle_rsp_cntrl_info(qs, flags);
3055 		}
3056 
3057 		r++;
3058 		if (__predict_false(++rspq->cidx == rspq->size)) {
3059 			rspq->cidx = 0;
3060 			rspq->gen ^= 1;
3061 			r = rspq->desc;
3062 		}
3063 
3064 		if (++rspq->credits >= (rspq->size / 4)) {
3065 			refill_rspq(adap, rspq, rspq->credits);
3066 			rspq->credits = 0;
3067 		}
3068 		if (!eth && eop) {
3069 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
3070 			/*
3071 			 * XXX size mismatch
3072 			 */
3073 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
3074 
3075 
3076 			ngathered = rx_offload(&adap->tdev, rspq,
3077 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
3078 			rspq->rspq_mh.mh_head = NULL;
3079 			DPRINTF("received offload packet\n");
3080 
3081 		} else if (eth && eop) {
3082 			struct mbuf *m = rspq->rspq_mh.mh_head;
3083 
3084 			t3_rx_eth(adap, rspq, m, ethpad);
3085 
3086 #ifdef LRO_SUPPORTED
3087 			/*
3088 			 * The T304 sends incoming packets on any qset.  If LRO
3089 			 * is also enabled, we could end up sending packet up
3090 			 * lro_ctrl->ifp's input.  That is incorrect.
3091 			 *
3092 			 * The mbuf's rcvif was derived from the cpl header and
3093 			 * is accurate.  Skip LRO and just use that.
3094 			 */
3095 			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
3096 
3097 			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
3098 			    (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
3099 				/* successfully queue'd for LRO */
3100 			} else
3101 #endif
3102 			{
3103 				/*
3104 				 * LRO not enabled, packet unsuitable for LRO,
3105 				 * or unable to queue.  Pass it up right now in
3106 				 * either case.
3107 				 */
3108 				struct ifnet *ifp = m->m_pkthdr.rcvif;
3109 				(*ifp->if_input)(ifp, m);
3110 			}
3111 			rspq->rspq_mh.mh_head = NULL;
3112 
3113 		}
3114 		__refill_fl_lt(adap, &qs->fl[0], 32);
3115 		__refill_fl_lt(adap, &qs->fl[1], 32);
3116 		--budget_left;
3117 	}
3118 
3119 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
3120 
3121 #ifdef LRO_SUPPORTED
3122 	/* Flush LRO */
3123 	while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
3124 		struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
3125 		SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
3126 		tcp_lro_flush(lro_ctrl, queued);
3127 	}
3128 #endif
3129 
3130 	if (sleeping)
3131 		check_ring_db(adap, qs, sleeping);
3132 
3133 	mb();  /* commit Tx queue processed updates */
3134 	if (__predict_false(qs->txq_stopped > 1))
3135 		restart_tx(qs);
3136 
3137 	__refill_fl_lt(adap, &qs->fl[0], 512);
3138 	__refill_fl_lt(adap, &qs->fl[1], 512);
3139 	budget -= budget_left;
3140 	return (budget);
3141 }
3142 
3143 /*
3144  * A helper function that processes responses and issues GTS.
3145  */
3146 static __inline int
3147 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3148 {
3149 	int work;
3150 	static int last_holdoff = 0;
3151 
3152 	work = process_responses(adap, rspq_to_qset(rq), -1);
3153 
3154 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3155 		printf("next_holdoff=%d\n", rq->next_holdoff);
3156 		last_holdoff = rq->next_holdoff;
3157 	}
3158 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3159 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3160 
3161 	return (work);
3162 }
3163 
3164 
3165 /*
3166  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3167  * Handles data events from SGE response queues as well as error and other
3168  * async events as they all use the same interrupt pin.  We use one SGE
3169  * response queue per port in this mode and protect all response queues with
3170  * queue 0's lock.
3171  */
3172 void
3173 t3b_intr(void *data)
3174 {
3175 	uint32_t i, map;
3176 	adapter_t *adap = data;
3177 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3178 
3179 	t3_write_reg(adap, A_PL_CLI, 0);
3180 	map = t3_read_reg(adap, A_SG_DATA_INTR);
3181 
3182 	if (!map)
3183 		return;
3184 
3185 	if (__predict_false(map & F_ERRINTR))
3186 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3187 
3188 	mtx_lock(&q0->lock);
3189 	for_each_port(adap, i)
3190 	    if (map & (1 << i))
3191 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3192 	mtx_unlock(&q0->lock);
3193 }
3194 
3195 /*
3196  * The MSI interrupt handler.  This needs to handle data events from SGE
3197  * response queues as well as error and other async events as they all use
3198  * the same MSI vector.  We use one SGE response queue per port in this mode
3199  * and protect all response queues with queue 0's lock.
3200  */
3201 void
3202 t3_intr_msi(void *data)
3203 {
3204 	adapter_t *adap = data;
3205 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3206 	int i, new_packets = 0;
3207 
3208 	mtx_lock(&q0->lock);
3209 
3210 	for_each_port(adap, i)
3211 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3212 		    new_packets = 1;
3213 	mtx_unlock(&q0->lock);
3214 	if (new_packets == 0)
3215 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3216 }
3217 
3218 void
3219 t3_intr_msix(void *data)
3220 {
3221 	struct sge_qset *qs = data;
3222 	adapter_t *adap = qs->port->adapter;
3223 	struct sge_rspq *rspq = &qs->rspq;
3224 
3225 	if (process_responses_gts(adap, rspq) == 0)
3226 		rspq->unhandled_irqs++;
3227 }
3228 
3229 #define QDUMP_SBUF_SIZE		32 * 400
3230 static int
3231 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3232 {
3233 	struct sge_rspq *rspq;
3234 	struct sge_qset *qs;
3235 	int i, err, dump_end, idx;
3236 	static int multiplier = 1;
3237 	struct sbuf *sb;
3238 	struct rsp_desc *rspd;
3239 	uint32_t data[4];
3240 
3241 	rspq = arg1;
3242 	qs = rspq_to_qset(rspq);
3243 	if (rspq->rspq_dump_count == 0)
3244 		return (0);
3245 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3246 		log(LOG_WARNING,
3247 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3248 		rspq->rspq_dump_count = 0;
3249 		return (EINVAL);
3250 	}
3251 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3252 		log(LOG_WARNING,
3253 		    "dump start of %d is greater than queue size\n",
3254 		    rspq->rspq_dump_start);
3255 		rspq->rspq_dump_start = 0;
3256 		return (EINVAL);
3257 	}
3258 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3259 	if (err)
3260 		return (err);
3261 retry_sbufops:
3262 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3263 
3264 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3265 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3266 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3267 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3268 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3269 
3270 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3271 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3272 
3273 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3274 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3275 		idx = i & (RSPQ_Q_SIZE-1);
3276 
3277 		rspd = &rspq->desc[idx];
3278 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3279 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3280 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3281 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3282 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3283 		    be32toh(rspd->len_cq), rspd->intr_gen);
3284 	}
3285 	if (sbuf_overflowed(sb)) {
3286 		sbuf_delete(sb);
3287 		multiplier++;
3288 		goto retry_sbufops;
3289 	}
3290 	sbuf_finish(sb);
3291 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3292 	sbuf_delete(sb);
3293 	return (err);
3294 }
3295 
3296 static int
3297 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3298 {
3299 	struct sge_txq *txq;
3300 	struct sge_qset *qs;
3301 	int i, j, err, dump_end;
3302 	static int multiplier = 1;
3303 	struct sbuf *sb;
3304 	struct tx_desc *txd;
3305 	uint32_t *WR, wr_hi, wr_lo, gen;
3306 	uint32_t data[4];
3307 
3308 	txq = arg1;
3309 	qs = txq_to_qset(txq, TXQ_ETH);
3310 	if (txq->txq_dump_count == 0) {
3311 		return (0);
3312 	}
3313 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3314 		log(LOG_WARNING,
3315 		    "dump count is too large %d\n", txq->txq_dump_count);
3316 		txq->txq_dump_count = 1;
3317 		return (EINVAL);
3318 	}
3319 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3320 		log(LOG_WARNING,
3321 		    "dump start of %d is greater than queue size\n",
3322 		    txq->txq_dump_start);
3323 		txq->txq_dump_start = 0;
3324 		return (EINVAL);
3325 	}
3326 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3327 	if (err)
3328 		return (err);
3329 
3330 
3331 retry_sbufops:
3332 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3333 
3334 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3335 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3336 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3337 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3338 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3339 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3340 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3341 	    txq->txq_dump_start,
3342 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3343 
3344 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3345 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3346 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3347 		WR = (uint32_t *)txd->flit;
3348 		wr_hi = ntohl(WR[0]);
3349 		wr_lo = ntohl(WR[1]);
3350 		gen = G_WR_GEN(wr_lo);
3351 
3352 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3353 		    wr_hi, wr_lo, gen);
3354 		for (j = 2; j < 30; j += 4)
3355 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3356 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3357 
3358 	}
3359 	if (sbuf_overflowed(sb)) {
3360 		sbuf_delete(sb);
3361 		multiplier++;
3362 		goto retry_sbufops;
3363 	}
3364 	sbuf_finish(sb);
3365 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3366 	sbuf_delete(sb);
3367 	return (err);
3368 }
3369 
3370 static int
3371 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3372 {
3373 	struct sge_txq *txq;
3374 	struct sge_qset *qs;
3375 	int i, j, err, dump_end;
3376 	static int multiplier = 1;
3377 	struct sbuf *sb;
3378 	struct tx_desc *txd;
3379 	uint32_t *WR, wr_hi, wr_lo, gen;
3380 
3381 	txq = arg1;
3382 	qs = txq_to_qset(txq, TXQ_CTRL);
3383 	if (txq->txq_dump_count == 0) {
3384 		return (0);
3385 	}
3386 	if (txq->txq_dump_count > 256) {
3387 		log(LOG_WARNING,
3388 		    "dump count is too large %d\n", txq->txq_dump_count);
3389 		txq->txq_dump_count = 1;
3390 		return (EINVAL);
3391 	}
3392 	if (txq->txq_dump_start > 255) {
3393 		log(LOG_WARNING,
3394 		    "dump start of %d is greater than queue size\n",
3395 		    txq->txq_dump_start);
3396 		txq->txq_dump_start = 0;
3397 		return (EINVAL);
3398 	}
3399 
3400 retry_sbufops:
3401 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3402 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3403 	    txq->txq_dump_start,
3404 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3405 
3406 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3407 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3408 		txd = &txq->desc[i & (255)];
3409 		WR = (uint32_t *)txd->flit;
3410 		wr_hi = ntohl(WR[0]);
3411 		wr_lo = ntohl(WR[1]);
3412 		gen = G_WR_GEN(wr_lo);
3413 
3414 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3415 		    wr_hi, wr_lo, gen);
3416 		for (j = 2; j < 30; j += 4)
3417 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3418 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3419 
3420 	}
3421 	if (sbuf_overflowed(sb)) {
3422 		sbuf_delete(sb);
3423 		multiplier++;
3424 		goto retry_sbufops;
3425 	}
3426 	sbuf_finish(sb);
3427 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3428 	sbuf_delete(sb);
3429 	return (err);
3430 }
3431 
3432 static int
3433 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3434 {
3435 	adapter_t *sc = arg1;
3436 	struct qset_params *qsp = &sc->params.sge.qset[0];
3437 	int coalesce_usecs;
3438 	struct sge_qset *qs;
3439 	int i, j, err, nqsets = 0;
3440 	struct mtx *lock;
3441 
3442 	if ((sc->flags & FULL_INIT_DONE) == 0)
3443 		return (ENXIO);
3444 
3445 	coalesce_usecs = qsp->coalesce_usecs;
3446         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3447 
3448 	if (err != 0) {
3449 		return (err);
3450 	}
3451 	if (coalesce_usecs == qsp->coalesce_usecs)
3452 		return (0);
3453 
3454 	for (i = 0; i < sc->params.nports; i++)
3455 		for (j = 0; j < sc->port[i].nqsets; j++)
3456 			nqsets++;
3457 
3458 	coalesce_usecs = max(1, coalesce_usecs);
3459 
3460 	for (i = 0; i < nqsets; i++) {
3461 		qs = &sc->sge.qs[i];
3462 		qsp = &sc->params.sge.qset[i];
3463 		qsp->coalesce_usecs = coalesce_usecs;
3464 
3465 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3466 			    &sc->sge.qs[0].rspq.lock;
3467 
3468 		mtx_lock(lock);
3469 		t3_update_qset_coalesce(qs, qsp);
3470 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3471 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3472 		mtx_unlock(lock);
3473 	}
3474 
3475 	return (0);
3476 }
3477 
3478 
3479 void
3480 t3_add_attach_sysctls(adapter_t *sc)
3481 {
3482 	struct sysctl_ctx_list *ctx;
3483 	struct sysctl_oid_list *children;
3484 
3485 	ctx = device_get_sysctl_ctx(sc->dev);
3486 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3487 
3488 	/* random information */
3489 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3490 	    "firmware_version",
3491 	    CTLFLAG_RD, &sc->fw_version,
3492 	    0, "firmware version");
3493 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3494 	    "hw_revision",
3495 	    CTLFLAG_RD, &sc->params.rev,
3496 	    0, "chip model");
3497 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3498 	    "port_types",
3499 	    CTLFLAG_RD, &sc->port_types,
3500 	    0, "type of ports");
3501 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3502 	    "enable_debug",
3503 	    CTLFLAG_RW, &cxgb_debug,
3504 	    0, "enable verbose debugging output");
3505 	SYSCTL_ADD_QUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3506 	    CTLFLAG_RD, &sc->tunq_coalesce,
3507 	    "#tunneled packets freed");
3508 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3509 	    "txq_overrun",
3510 	    CTLFLAG_RD, &txq_fills,
3511 	    0, "#times txq overrun");
3512 }
3513 
3514 
3515 static const char *rspq_name = "rspq";
3516 static const char *txq_names[] =
3517 {
3518 	"txq_eth",
3519 	"txq_ofld",
3520 	"txq_ctrl"
3521 };
3522 
3523 static int
3524 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3525 {
3526 	struct port_info *p = arg1;
3527 	uint64_t *parg;
3528 
3529 	if (!p)
3530 		return (EINVAL);
3531 
3532 	parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3533 	PORT_LOCK(p);
3534 	t3_mac_update_stats(&p->mac);
3535 	PORT_UNLOCK(p);
3536 
3537 	return (sysctl_handle_quad(oidp, parg, 0, req));
3538 }
3539 
3540 void
3541 t3_add_configured_sysctls(adapter_t *sc)
3542 {
3543 	struct sysctl_ctx_list *ctx;
3544 	struct sysctl_oid_list *children;
3545 	int i, j;
3546 
3547 	ctx = device_get_sysctl_ctx(sc->dev);
3548 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3549 
3550 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3551 	    "intr_coal",
3552 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3553 	    0, t3_set_coalesce_usecs,
3554 	    "I", "interrupt coalescing timer (us)");
3555 
3556 	for (i = 0; i < sc->params.nports; i++) {
3557 		struct port_info *pi = &sc->port[i];
3558 		struct sysctl_oid *poid;
3559 		struct sysctl_oid_list *poidlist;
3560 		struct mac_stats *mstats = &pi->mac.stats;
3561 
3562 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3563 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3564 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3565 		poidlist = SYSCTL_CHILDREN(poid);
3566 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3567 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3568 		    0, "#queue sets");
3569 
3570 		for (j = 0; j < pi->nqsets; j++) {
3571 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3572 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3573 					  *ctrlqpoid, *lropoid;
3574 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3575 					       *txqpoidlist, *ctrlqpoidlist,
3576 					       *lropoidlist;
3577 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3578 
3579 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3580 
3581 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3582 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3583 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3584 
3585 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3586 					CTLFLAG_RD, &qs->fl[0].empty, 0,
3587 					"freelist #0 empty");
3588 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3589 					CTLFLAG_RD, &qs->fl[1].empty, 0,
3590 					"freelist #1 empty");
3591 
3592 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3593 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3594 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3595 
3596 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3597 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3598 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3599 
3600 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3601 			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3602 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3603 
3604 			lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3605 			    "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3606 			lropoidlist = SYSCTL_CHILDREN(lropoid);
3607 
3608 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3609 			    CTLFLAG_RD, &qs->rspq.size,
3610 			    0, "#entries in response queue");
3611 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3612 			    CTLFLAG_RD, &qs->rspq.cidx,
3613 			    0, "consumer index");
3614 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3615 			    CTLFLAG_RD, &qs->rspq.credits,
3616 			    0, "#credits");
3617 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3618 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3619 			    "physical_address_of the queue");
3620 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3621 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3622 			    0, "start rspq dump entry");
3623 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3624 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3625 			    0, "#rspq entries to dump");
3626 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3627 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3628 			    0, t3_dump_rspq, "A", "dump of the response queue");
3629 
3630 
3631 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3632 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3633 			    0, "#tunneled packets dropped");
3634 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3635 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3636 			    0, "#tunneled packets waiting to be sent");
3637 #if 0
3638 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3639 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3640 			    0, "#tunneled packets queue producer index");
3641 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3642 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3643 			    0, "#tunneled packets queue consumer index");
3644 #endif
3645 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3646 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3647 			    0, "#tunneled packets processed by the card");
3648 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3649 			    CTLFLAG_RD, &txq->cleaned,
3650 			    0, "#tunneled packets cleaned");
3651 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3652 			    CTLFLAG_RD, &txq->in_use,
3653 			    0, "#tunneled packet slots in use");
3654 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3655 			    CTLFLAG_RD, &txq->txq_frees,
3656 			    "#tunneled packets freed");
3657 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3658 			    CTLFLAG_RD, &txq->txq_skipped,
3659 			    0, "#tunneled packet descriptors skipped");
3660 			SYSCTL_ADD_QUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3661 			    CTLFLAG_RD, &txq->txq_coalesced,
3662 			    "#tunneled packets coalesced");
3663 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3664 			    CTLFLAG_RD, &txq->txq_enqueued,
3665 			    0, "#tunneled packets enqueued to hardware");
3666 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3667 			    CTLFLAG_RD, &qs->txq_stopped,
3668 			    0, "tx queues stopped");
3669 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3670 			    CTLFLAG_RD, &txq->phys_addr,
3671 			    "physical_address_of the queue");
3672 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3673 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3674 			    0, "txq generation");
3675 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3676 			    CTLFLAG_RD, &txq->cidx,
3677 			    0, "hardware queue cidx");
3678 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3679 			    CTLFLAG_RD, &txq->pidx,
3680 			    0, "hardware queue pidx");
3681 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3682 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3683 			    0, "txq start idx for dump");
3684 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3685 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3686 			    0, "txq #entries to dump");
3687 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3688 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3689 			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3690 
3691 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3692 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3693 			    0, "ctrlq start idx for dump");
3694 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3695 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3696 			    0, "ctrl #entries to dump");
3697 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3698 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3699 			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3700 
3701 #ifdef LRO_SUPPORTED
3702 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3703 			    CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3704 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3705 			    CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3706 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3707 			    CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3708 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3709 			    CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3710 #endif
3711 		}
3712 
3713 		/* Now add a node for mac stats. */
3714 		poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3715 		    CTLFLAG_RD, NULL, "MAC statistics");
3716 		poidlist = SYSCTL_CHILDREN(poid);
3717 
3718 		/*
3719 		 * We (ab)use the length argument (arg2) to pass on the offset
3720 		 * of the data that we are interested in.  This is only required
3721 		 * for the quad counters that are updated from the hardware (we
3722 		 * make sure that we return the latest value).
3723 		 * sysctl_handle_macstat first updates *all* the counters from
3724 		 * the hardware, and then returns the latest value of the
3725 		 * requested counter.  Best would be to update only the
3726 		 * requested counter from hardware, but t3_mac_update_stats()
3727 		 * hides all the register details and we don't want to dive into
3728 		 * all that here.
3729 		 */
3730 #define CXGB_SYSCTL_ADD_QUAD(a)	SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3731     (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3732     sysctl_handle_macstat, "QU", 0)
3733 		CXGB_SYSCTL_ADD_QUAD(tx_octets);
3734 		CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3735 		CXGB_SYSCTL_ADD_QUAD(tx_frames);
3736 		CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3737 		CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3738 		CXGB_SYSCTL_ADD_QUAD(tx_pause);
3739 		CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3740 		CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3741 		CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3742 		CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3743 		CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3744 		CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3745 		CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3746 		CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3747 		CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3748 		CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3749 		CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3750 		CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3751 		CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3752 		CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3753 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3754 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3755 		CXGB_SYSCTL_ADD_QUAD(rx_octets);
3756 		CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3757 		CXGB_SYSCTL_ADD_QUAD(rx_frames);
3758 		CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3759 		CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3760 		CXGB_SYSCTL_ADD_QUAD(rx_pause);
3761 		CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3762 		CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3763 		CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3764 		CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3765 		CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3766 		CXGB_SYSCTL_ADD_QUAD(rx_runt);
3767 		CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3768 		CXGB_SYSCTL_ADD_QUAD(rx_short);
3769 		CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3770 		CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3771 		CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3772 		CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3773 		CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3774 		CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3775 		CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3776 		CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3777 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3778 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3779 #undef CXGB_SYSCTL_ADD_QUAD
3780 
3781 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3782     CTLFLAG_RD, &mstats->a, 0)
3783 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3784 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3785 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3786 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3787 		CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3788 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3789 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3790 		CXGB_SYSCTL_ADD_ULONG(num_toggled);
3791 		CXGB_SYSCTL_ADD_ULONG(num_resets);
3792 		CXGB_SYSCTL_ADD_ULONG(link_faults);
3793 #undef CXGB_SYSCTL_ADD_ULONG
3794 	}
3795 }
3796 
3797 /**
3798  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3799  *	@qs: the queue set
3800  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3801  *	@idx: the descriptor index in the queue
3802  *	@data: where to dump the descriptor contents
3803  *
3804  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3805  *	size of the descriptor.
3806  */
3807 int
3808 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3809 		unsigned char *data)
3810 {
3811 	if (qnum >= 6)
3812 		return (EINVAL);
3813 
3814 	if (qnum < 3) {
3815 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3816 			return -EINVAL;
3817 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3818 		return sizeof(struct tx_desc);
3819 	}
3820 
3821 	if (qnum == 3) {
3822 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3823 			return (EINVAL);
3824 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3825 		return sizeof(struct rsp_desc);
3826 	}
3827 
3828 	qnum -= 4;
3829 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3830 		return (EINVAL);
3831 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3832 	return sizeof(struct rx_desc);
3833 }
3834