xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision 357378bbdedf24ce2b90e9bd831af4a9db3ec70a)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 #include "opt_inet6.h"
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/rman.h>
44 #include <sys/queue.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 
48 #include <sys/proc.h>
49 #include <sys/sbuf.h>
50 #include <sys/sched.h>
51 #include <sys/smp.h>
52 #include <sys/systm.h>
53 #include <sys/syslog.h>
54 #include <sys/socket.h>
55 #include <sys/sglist.h>
56 
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/bpf.h>
60 #include <net/ethernet.h>
61 #include <net/if_vlan_var.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 
69 #include <dev/pci/pcireg.h>
70 #include <dev/pci/pcivar.h>
71 
72 #include <vm/vm.h>
73 #include <vm/pmap.h>
74 
75 #include <cxgb_include.h>
76 #include <sys/mvec.h>
77 
78 int	txq_fills = 0;
79 int	multiq_tx_enable = 1;
80 
81 #ifdef TCP_OFFLOAD
82 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
83 #endif
84 
85 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
86 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
87 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
88     "size of per-queue mbuf ring");
89 
90 static int cxgb_tx_coalesce_force = 0;
91 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
92     &cxgb_tx_coalesce_force, 0,
93     "coalesce small packets into a single work request regardless of ring state");
94 
95 #define	COALESCE_START_DEFAULT		TX_ETH_Q_SIZE>>1
96 #define	COALESCE_START_MAX		(TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
97 #define	COALESCE_STOP_DEFAULT		TX_ETH_Q_SIZE>>2
98 #define	COALESCE_STOP_MIN		TX_ETH_Q_SIZE>>5
99 #define	TX_RECLAIM_DEFAULT		TX_ETH_Q_SIZE>>5
100 #define	TX_RECLAIM_MAX			TX_ETH_Q_SIZE>>2
101 #define	TX_RECLAIM_MIN			TX_ETH_Q_SIZE>>6
102 
103 
104 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
105 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
106     &cxgb_tx_coalesce_enable_start, 0,
107     "coalesce enable threshold");
108 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
109 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
110     &cxgb_tx_coalesce_enable_stop, 0,
111     "coalesce disable threshold");
112 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
113 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
114     &cxgb_tx_reclaim_threshold, 0,
115     "tx cleaning minimum threshold");
116 
117 /*
118  * XXX don't re-enable this until TOE stops assuming
119  * we have an m_ext
120  */
121 static int recycle_enable = 0;
122 
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
127 
128 #define USE_GTS 0
129 
130 #define SGE_RX_SM_BUF_SIZE	1536
131 #define SGE_RX_DROP_THRES	16
132 #define SGE_RX_COPY_THRES	128
133 
134 /*
135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
137  */
138 #define TX_RECLAIM_PERIOD       (hz >> 1)
139 
140 /*
141  * Values for sge_txq.flags
142  */
143 enum {
144 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
145 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
146 };
147 
148 struct tx_desc {
149 	uint64_t	flit[TX_DESC_FLITS];
150 } __packed;
151 
152 struct rx_desc {
153 	uint32_t	addr_lo;
154 	uint32_t	len_gen;
155 	uint32_t	gen2;
156 	uint32_t	addr_hi;
157 } __packed;
158 
159 struct rsp_desc {               /* response queue descriptor */
160 	struct rss_header	rss_hdr;
161 	uint32_t		flags;
162 	uint32_t		len_cq;
163 	uint8_t			imm_data[47];
164 	uint8_t			intr_gen;
165 } __packed;
166 
167 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
168 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
169 #define RX_SW_DESC_INUSE        (1 << 3)
170 #define TX_SW_DESC_MAPPED       (1 << 4)
171 
172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
176 
177 struct tx_sw_desc {                /* SW state per Tx descriptor */
178 	struct mbuf	*m;
179 	bus_dmamap_t	map;
180 	int		flags;
181 };
182 
183 struct rx_sw_desc {                /* SW state per Rx descriptor */
184 	caddr_t		rxsd_cl;
185 	struct mbuf	*m;
186 	bus_dmamap_t	map;
187 	int		flags;
188 };
189 
190 struct txq_state {
191 	unsigned int	compl;
192 	unsigned int	gen;
193 	unsigned int	pidx;
194 };
195 
196 struct refill_fl_cb_arg {
197 	int               error;
198 	bus_dma_segment_t seg;
199 	int               nseg;
200 };
201 
202 
203 /*
204  * Maps a number of flits to the number of Tx descriptors that can hold them.
205  * The formula is
206  *
207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
208  *
209  * HW allows up to 4 descriptors to be combined into a WR.
210  */
211 static uint8_t flit_desc_map[] = {
212 	0,
213 #if SGE_NUM_GENBITS == 1
214 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
223 #else
224 # error "SGE_NUM_GENBITS must be 1 or 2"
225 #endif
226 };
227 
228 #define	TXQ_LOCK_ASSERT(qs)	mtx_assert(&(qs)->lock, MA_OWNED)
229 #define	TXQ_TRYLOCK(qs)		mtx_trylock(&(qs)->lock)
230 #define	TXQ_LOCK(qs)		mtx_lock(&(qs)->lock)
231 #define	TXQ_UNLOCK(qs)		mtx_unlock(&(qs)->lock)
232 #define	TXQ_RING_EMPTY(qs)	drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define	TXQ_RING_NEEDS_ENQUEUE(qs)					\
234 	drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define	TXQ_RING_FLUSH(qs)	drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define	TXQ_RING_DEQUEUE_COND(qs, func, arg)				\
237 	drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define	TXQ_RING_DEQUEUE(qs) \
239 	drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
240 
241 int cxgb_debug = 0;
242 
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
247 
248 /*
249  * XXX need to cope with bursty scheduling by looking at a wider
250  * window than we are now for determining the need for coalescing
251  *
252  */
253 static __inline uint64_t
254 check_pkt_coalesce(struct sge_qset *qs)
255 {
256         struct adapter *sc;
257         struct sge_txq *txq;
258 	uint8_t *fill;
259 
260 	if (__predict_false(cxgb_tx_coalesce_force))
261 		return (1);
262 	txq = &qs->txq[TXQ_ETH];
263         sc = qs->port->adapter;
264 	fill = &sc->tunq_fill[qs->idx];
265 
266 	if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 		cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 	if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 		cxgb_tx_coalesce_enable_stop = COALESCE_STOP_MIN;
270 	/*
271 	 * if the hardware transmit queue is more than 1/8 full
272 	 * we mark it as coalescing - we drop back from coalescing
273 	 * when we go below 1/32 full and there are no packets enqueued,
274 	 * this provides us with some degree of hysteresis
275 	 */
276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 	    TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
278                 *fill = 0;
279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
280                 *fill = 1;
281 
282 	return (sc->tunq_coalesce);
283 }
284 
285 #ifdef __LP64__
286 static void
287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
288 {
289 	uint64_t wr_hilo;
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
291 	wr_hilo = wr_hi;
292 	wr_hilo |= (((uint64_t)wr_lo)<<32);
293 #else
294 	wr_hilo = wr_lo;
295 	wr_hilo |= (((uint64_t)wr_hi)<<32);
296 #endif
297 	wrp->wrh_hilo = wr_hilo;
298 }
299 #else
300 static void
301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
302 {
303 
304 	wrp->wrh_hi = wr_hi;
305 	wmb();
306 	wrp->wrh_lo = wr_lo;
307 }
308 #endif
309 
310 struct coalesce_info {
311 	int count;
312 	int nbytes;
313 	int noncoal;
314 };
315 
316 static int
317 coalesce_check(struct mbuf *m, void *arg)
318 {
319 	struct coalesce_info *ci = arg;
320 
321 	if ((m->m_next != NULL) ||
322 	    ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE))
323 		ci->noncoal = 1;
324 
325 	if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) &&
326 	    (ci->nbytes + m->m_len <= 10500))) {
327 		ci->count++;
328 		ci->nbytes += m->m_len;
329 		return (1);
330 	}
331 	return (0);
332 }
333 
334 static struct mbuf *
335 cxgb_dequeue(struct sge_qset *qs)
336 {
337 	struct mbuf *m, *m_head, *m_tail;
338 	struct coalesce_info ci;
339 
340 
341 	if (check_pkt_coalesce(qs) == 0)
342 		return TXQ_RING_DEQUEUE(qs);
343 
344 	m_head = m_tail = NULL;
345 	ci.count = ci.nbytes = ci.noncoal = 0;
346 	do {
347 		m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
348 		if (m_head == NULL) {
349 			m_tail = m_head = m;
350 		} else if (m != NULL) {
351 			m_tail->m_nextpkt = m;
352 			m_tail = m;
353 		}
354 	} while (m != NULL);
355 	if (ci.count > 7)
356 		panic("trying to coalesce %d packets in to one WR", ci.count);
357 	return (m_head);
358 }
359 
360 /**
361  *	reclaim_completed_tx - reclaims completed Tx descriptors
362  *	@adapter: the adapter
363  *	@q: the Tx queue to reclaim completed descriptors from
364  *
365  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
366  *	and frees the associated buffers if possible.  Called with the Tx
367  *	queue's lock held.
368  */
369 static __inline int
370 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
371 {
372 	struct sge_txq *q = &qs->txq[queue];
373 	int reclaim = desc_reclaimable(q);
374 
375 	if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
376 	    (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
377 		cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
378 
379 	if (reclaim < reclaim_min)
380 		return (0);
381 
382 	mtx_assert(&qs->lock, MA_OWNED);
383 	if (reclaim > 0) {
384 		t3_free_tx_desc(qs, reclaim, queue);
385 		q->cleaned += reclaim;
386 		q->in_use -= reclaim;
387 	}
388 	if (isset(&qs->txq_stopped, TXQ_ETH))
389                 clrbit(&qs->txq_stopped, TXQ_ETH);
390 
391 	return (reclaim);
392 }
393 
394 #ifdef DEBUGNET
395 int
396 cxgb_debugnet_poll_tx(struct sge_qset *qs)
397 {
398 
399 	return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
400 }
401 #endif
402 
403 /**
404  *	should_restart_tx - are there enough resources to restart a Tx queue?
405  *	@q: the Tx queue
406  *
407  *	Checks if there are enough descriptors to restart a suspended Tx queue.
408  */
409 static __inline int
410 should_restart_tx(const struct sge_txq *q)
411 {
412 	unsigned int r = q->processed - q->cleaned;
413 
414 	return q->in_use - r < (q->size >> 1);
415 }
416 
417 /**
418  *	t3_sge_init - initialize SGE
419  *	@adap: the adapter
420  *	@p: the SGE parameters
421  *
422  *	Performs SGE initialization needed every time after a chip reset.
423  *	We do not initialize any of the queue sets here, instead the driver
424  *	top-level must request those individually.  We also do not enable DMA
425  *	here, that should be done after the queues have been set up.
426  */
427 void
428 t3_sge_init(adapter_t *adap, struct sge_params *p)
429 {
430 	u_int ctrl, ups;
431 
432 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
433 
434 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
435 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
436 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
437 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
438 #if SGE_NUM_GENBITS == 1
439 	ctrl |= F_EGRGENCTRL;
440 #endif
441 	if (adap->params.rev > 0) {
442 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
443 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
444 	}
445 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
446 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
447 		     V_LORCQDRBTHRSH(512));
448 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
449 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
450 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
451 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
452 		     adap->params.rev < T3_REV_C ? 1000 : 500);
453 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
454 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
455 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
456 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
457 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
458 }
459 
460 
461 /**
462  *	sgl_len - calculates the size of an SGL of the given capacity
463  *	@n: the number of SGL entries
464  *
465  *	Calculates the number of flits needed for a scatter/gather list that
466  *	can hold the given number of entries.
467  */
468 static __inline unsigned int
469 sgl_len(unsigned int n)
470 {
471 	return ((3 * n) / 2 + (n & 1));
472 }
473 
474 /**
475  *	get_imm_packet - return the next ingress packet buffer from a response
476  *	@resp: the response descriptor containing the packet data
477  *
478  *	Return a packet containing the immediate data of the given response.
479  */
480 static int
481 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
482 {
483 
484 	if (resp->rss_hdr.opcode == CPL_RX_DATA) {
485 		const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
486 		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
487 	} else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
488 		const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
489 		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
490 	} else
491 		m->m_len = IMMED_PKT_SIZE;
492 	m->m_ext.ext_buf = NULL;
493 	m->m_ext.ext_type = 0;
494 	memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
495 	return (0);
496 }
497 
498 static __inline u_int
499 flits_to_desc(u_int n)
500 {
501 	return (flit_desc_map[n]);
502 }
503 
504 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
505 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
506 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
507 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
508 		    F_HIRCQPARITYERROR)
509 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
510 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
511 		      F_RSPQDISABLED)
512 
513 /**
514  *	t3_sge_err_intr_handler - SGE async event interrupt handler
515  *	@adapter: the adapter
516  *
517  *	Interrupt handler for SGE asynchronous (non-data) events.
518  */
519 void
520 t3_sge_err_intr_handler(adapter_t *adapter)
521 {
522 	unsigned int v, status;
523 
524 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
525 	if (status & SGE_PARERR)
526 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
527 			 status & SGE_PARERR);
528 	if (status & SGE_FRAMINGERR)
529 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
530 			 status & SGE_FRAMINGERR);
531 	if (status & F_RSPQCREDITOVERFOW)
532 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
533 
534 	if (status & F_RSPQDISABLED) {
535 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
536 
537 		CH_ALERT(adapter,
538 			 "packet delivered to disabled response queue (0x%x)\n",
539 			 (v >> S_RSPQ0DISABLED) & 0xff);
540 	}
541 
542 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
543 	if (status & SGE_FATALERR)
544 		t3_fatal_err(adapter);
545 }
546 
547 void
548 t3_sge_prep(adapter_t *adap, struct sge_params *p)
549 {
550 	int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
551 
552 	nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
553 	nqsets *= adap->params.nports;
554 
555 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
556 	fl_q_size = rounddown_pow_of_two(fl_q_size);
557 
558 	use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
559 	    is_offload(adap);
560 
561 	if (use_16k) {
562 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
563 		jumbo_buf_size = MJUM16BYTES;
564 	} else {
565 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
566 		jumbo_buf_size = MJUM9BYTES;
567 	}
568 	jumbo_q_size = rounddown_pow_of_two(jumbo_q_size);
569 
570 	if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
571 		device_printf(adap->dev,
572 		    "Insufficient clusters and/or jumbo buffers.\n");
573 
574 	p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
575 
576 	for (i = 0; i < SGE_QSETS; ++i) {
577 		struct qset_params *q = p->qset + i;
578 
579 		if (adap->params.nports > 2) {
580 			q->coalesce_usecs = 50;
581 		} else {
582 #ifdef INVARIANTS
583 			q->coalesce_usecs = 10;
584 #else
585 			q->coalesce_usecs = 5;
586 #endif
587 		}
588 		q->polling = 0;
589 		q->rspq_size = RSPQ_Q_SIZE;
590 		q->fl_size = fl_q_size;
591 		q->jumbo_size = jumbo_q_size;
592 		q->jumbo_buf_size = jumbo_buf_size;
593 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
594 		q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
595 		q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
596 		q->cong_thres = 0;
597 	}
598 }
599 
600 int
601 t3_sge_alloc(adapter_t *sc)
602 {
603 
604 	/* The parent tag. */
605 	if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
606 				1, 0,			/* algnmnt, boundary */
607 				BUS_SPACE_MAXADDR,	/* lowaddr */
608 				BUS_SPACE_MAXADDR,	/* highaddr */
609 				NULL, NULL,		/* filter, filterarg */
610 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
611 				BUS_SPACE_UNRESTRICTED, /* nsegments */
612 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
613 				0,			/* flags */
614 				NULL, NULL,		/* lock, lockarg */
615 				&sc->parent_dmat)) {
616 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
617 		return (ENOMEM);
618 	}
619 
620 	/*
621 	 * DMA tag for normal sized RX frames
622 	 */
623 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
624 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
625 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
626 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
627 		return (ENOMEM);
628 	}
629 
630 	/*
631 	 * DMA tag for jumbo sized RX frames.
632 	 */
633 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
634 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
635 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
636 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
637 		return (ENOMEM);
638 	}
639 
640 	/*
641 	 * DMA tag for TX frames.
642 	 */
643 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
644 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
645 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
646 		NULL, NULL, &sc->tx_dmat)) {
647 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
648 		return (ENOMEM);
649 	}
650 
651 	return (0);
652 }
653 
654 int
655 t3_sge_free(struct adapter * sc)
656 {
657 
658 	if (sc->tx_dmat != NULL)
659 		bus_dma_tag_destroy(sc->tx_dmat);
660 
661 	if (sc->rx_jumbo_dmat != NULL)
662 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
663 
664 	if (sc->rx_dmat != NULL)
665 		bus_dma_tag_destroy(sc->rx_dmat);
666 
667 	if (sc->parent_dmat != NULL)
668 		bus_dma_tag_destroy(sc->parent_dmat);
669 
670 	return (0);
671 }
672 
673 void
674 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
675 {
676 
677 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
678 	qs->rspq.polling = 0 /* p->polling */;
679 }
680 
681 #if !defined(__i386__) && !defined(__amd64__)
682 static void
683 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
684 {
685 	struct refill_fl_cb_arg *cb_arg = arg;
686 
687 	cb_arg->error = error;
688 	cb_arg->seg = segs[0];
689 	cb_arg->nseg = nseg;
690 
691 }
692 #endif
693 /**
694  *	refill_fl - refill an SGE free-buffer list
695  *	@sc: the controller softc
696  *	@q: the free-list to refill
697  *	@n: the number of new buffers to allocate
698  *
699  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
700  *	The caller must assure that @n does not exceed the queue's capacity.
701  */
702 static void
703 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
704 {
705 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
706 	struct rx_desc *d = &q->desc[q->pidx];
707 	struct refill_fl_cb_arg cb_arg;
708 	struct mbuf *m;
709 	caddr_t cl;
710 	int err;
711 
712 	cb_arg.error = 0;
713 	while (n--) {
714 		/*
715 		 * We allocate an uninitialized mbuf + cluster, mbuf is
716 		 * initialized after rx.
717 		 */
718 		if (q->zone == zone_pack) {
719 			if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
720 				break;
721 			cl = m->m_ext.ext_buf;
722 		} else {
723 			if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
724 				break;
725 			if ((m = m_gethdr_raw(M_NOWAIT, 0)) == NULL) {
726 				uma_zfree(q->zone, cl);
727 				break;
728 			}
729 		}
730 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
731 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
732 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
733 				uma_zfree(q->zone, cl);
734 				goto done;
735 			}
736 			sd->flags |= RX_SW_DESC_MAP_CREATED;
737 		}
738 #if !defined(__i386__) && !defined(__amd64__)
739 		err = bus_dmamap_load(q->entry_tag, sd->map,
740 		    cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
741 
742 		if (err != 0 || cb_arg.error) {
743 			if (q->zone != zone_pack)
744 				uma_zfree(q->zone, cl);
745 			m_free(m);
746 			goto done;
747 		}
748 #else
749 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
750 #endif
751 		sd->flags |= RX_SW_DESC_INUSE;
752 		sd->rxsd_cl = cl;
753 		sd->m = m;
754 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
755 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
756 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
757 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
758 
759 		d++;
760 		sd++;
761 
762 		if (++q->pidx == q->size) {
763 			q->pidx = 0;
764 			q->gen ^= 1;
765 			sd = q->sdesc;
766 			d = q->desc;
767 		}
768 		q->credits++;
769 		q->db_pending++;
770 	}
771 
772 done:
773 	if (q->db_pending >= 32) {
774 		q->db_pending = 0;
775 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
776 	}
777 }
778 
779 
780 /**
781  *	free_rx_bufs - free the Rx buffers on an SGE free list
782  *	@sc: the controle softc
783  *	@q: the SGE free list to clean up
784  *
785  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
786  *	this queue should be stopped before calling this function.
787  */
788 static void
789 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
790 {
791 	u_int cidx = q->cidx;
792 
793 	while (q->credits--) {
794 		struct rx_sw_desc *d = &q->sdesc[cidx];
795 
796 		if (d->flags & RX_SW_DESC_INUSE) {
797 			bus_dmamap_unload(q->entry_tag, d->map);
798 			bus_dmamap_destroy(q->entry_tag, d->map);
799 			if (q->zone == zone_pack) {
800 				m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
801 				uma_zfree(zone_pack, d->m);
802 			} else {
803 				m_init(d->m, M_NOWAIT, MT_DATA, 0);
804 				m_free_raw(d->m);
805 				uma_zfree(q->zone, d->rxsd_cl);
806 			}
807 		}
808 
809 		d->rxsd_cl = NULL;
810 		d->m = NULL;
811 		if (++cidx == q->size)
812 			cidx = 0;
813 	}
814 }
815 
816 static __inline void
817 __refill_fl(adapter_t *adap, struct sge_fl *fl)
818 {
819 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
820 }
821 
822 static __inline void
823 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
824 {
825 	uint32_t reclaimable = fl->size - fl->credits;
826 
827 	if (reclaimable > 0)
828 		refill_fl(adap, fl, min(max, reclaimable));
829 }
830 
831 /**
832  *	recycle_rx_buf - recycle a receive buffer
833  *	@adapter: the adapter
834  *	@q: the SGE free list
835  *	@idx: index of buffer to recycle
836  *
837  *	Recycles the specified buffer on the given free list by adding it at
838  *	the next available slot on the list.
839  */
840 static void
841 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
842 {
843 	struct rx_desc *from = &q->desc[idx];
844 	struct rx_desc *to   = &q->desc[q->pidx];
845 
846 	q->sdesc[q->pidx] = q->sdesc[idx];
847 	to->addr_lo = from->addr_lo;        // already big endian
848 	to->addr_hi = from->addr_hi;        // likewise
849 	wmb();	/* necessary ? */
850 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
851 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
852 	q->credits++;
853 
854 	if (++q->pidx == q->size) {
855 		q->pidx = 0;
856 		q->gen ^= 1;
857 	}
858 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
859 }
860 
861 static void
862 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
863 {
864 	uint32_t *addr;
865 
866 	addr = arg;
867 	*addr = segs[0].ds_addr;
868 }
869 
870 static int
871 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
872     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
873     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
874 {
875 	size_t len = nelem * elem_size;
876 	void *s = NULL;
877 	void *p = NULL;
878 	int err;
879 
880 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
881 				      BUS_SPACE_MAXADDR_32BIT,
882 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
883 				      len, 0, NULL, NULL, tag)) != 0) {
884 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
885 		return (ENOMEM);
886 	}
887 
888 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
889 				    map)) != 0) {
890 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
891 		return (ENOMEM);
892 	}
893 
894 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
895 	bzero(p, len);
896 	*(void **)desc = p;
897 
898 	if (sw_size) {
899 		len = nelem * sw_size;
900 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
901 		*(void **)sdesc = s;
902 	}
903 	if (parent_entry_tag == NULL)
904 		return (0);
905 
906 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
907 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
908 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
909 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
910 		                      NULL, NULL, entry_tag)) != 0) {
911 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
912 		return (ENOMEM);
913 	}
914 	return (0);
915 }
916 
917 static void
918 sge_slow_intr_handler(void *arg, int ncount)
919 {
920 	adapter_t *sc = arg;
921 
922 	t3_slow_intr_handler(sc);
923 	t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
924 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
925 }
926 
927 /**
928  *	sge_timer_cb - perform periodic maintenance of an SGE qset
929  *	@data: the SGE queue set to maintain
930  *
931  *	Runs periodically from a timer to perform maintenance of an SGE queue
932  *	set.  It performs two tasks:
933  *
934  *	a) Cleans up any completed Tx descriptors that may still be pending.
935  *	Normal descriptor cleanup happens when new packets are added to a Tx
936  *	queue so this timer is relatively infrequent and does any cleanup only
937  *	if the Tx queue has not seen any new packets in a while.  We make a
938  *	best effort attempt to reclaim descriptors, in that we don't wait
939  *	around if we cannot get a queue's lock (which most likely is because
940  *	someone else is queueing new packets and so will also handle the clean
941  *	up).  Since control queues use immediate data exclusively we don't
942  *	bother cleaning them up here.
943  *
944  *	b) Replenishes Rx queues that have run out due to memory shortage.
945  *	Normally new Rx buffers are added when existing ones are consumed but
946  *	when out of memory a queue can become empty.  We try to add only a few
947  *	buffers here, the queue will be replenished fully as these new buffers
948  *	are used up if memory shortage has subsided.
949  *
950  *	c) Return coalesced response queue credits in case a response queue is
951  *	starved.
952  *
953  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
954  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
955  */
956 static void
957 sge_timer_cb(void *arg)
958 {
959 	adapter_t *sc = arg;
960 	if ((sc->flags & USING_MSIX) == 0) {
961 
962 		struct port_info *pi;
963 		struct sge_qset *qs;
964 		struct sge_txq  *txq;
965 		int i, j;
966 		int reclaim_ofl, refill_rx;
967 
968 		if (sc->open_device_map == 0)
969 			return;
970 
971 		for (i = 0; i < sc->params.nports; i++) {
972 			pi = &sc->port[i];
973 			for (j = 0; j < pi->nqsets; j++) {
974 				qs = &sc->sge.qs[pi->first_qset + j];
975 				txq = &qs->txq[0];
976 				reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
977 				refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
978 				    (qs->fl[1].credits < qs->fl[1].size));
979 				if (reclaim_ofl || refill_rx) {
980 					taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
981 					break;
982 				}
983 			}
984 		}
985 	}
986 
987 	if (sc->params.nports > 2) {
988 		int i;
989 
990 		for_each_port(sc, i) {
991 			struct port_info *pi = &sc->port[i];
992 
993 			t3_write_reg(sc, A_SG_KDOORBELL,
994 				     F_SELEGRCNTX |
995 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
996 		}
997 	}
998 	if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
999 	    sc->open_device_map != 0)
1000 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1001 }
1002 
1003 /*
1004  * This is meant to be a catch-all function to keep sge state private
1005  * to sge.c
1006  *
1007  */
1008 int
1009 t3_sge_init_adapter(adapter_t *sc)
1010 {
1011 	callout_init(&sc->sge_timer_ch, 1);
1012 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1013 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1014 	return (0);
1015 }
1016 
1017 int
1018 t3_sge_reset_adapter(adapter_t *sc)
1019 {
1020 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1021 	return (0);
1022 }
1023 
1024 int
1025 t3_sge_init_port(struct port_info *pi)
1026 {
1027 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1028 	return (0);
1029 }
1030 
1031 /**
1032  *	refill_rspq - replenish an SGE response queue
1033  *	@adapter: the adapter
1034  *	@q: the response queue to replenish
1035  *	@credits: how many new responses to make available
1036  *
1037  *	Replenishes a response queue by making the supplied number of responses
1038  *	available to HW.
1039  */
1040 static __inline void
1041 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1042 {
1043 
1044 	/* mbufs are allocated on demand when a rspq entry is processed. */
1045 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1046 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1047 }
1048 
1049 static void
1050 sge_txq_reclaim_handler(void *arg, int ncount)
1051 {
1052 	struct sge_qset *qs = arg;
1053 	int i;
1054 
1055 	for (i = 0; i < 3; i++)
1056 		reclaim_completed_tx(qs, 16, i);
1057 }
1058 
1059 static void
1060 sge_timer_reclaim(void *arg, int ncount)
1061 {
1062 	struct port_info *pi = arg;
1063 	int i, nqsets = pi->nqsets;
1064 	adapter_t *sc = pi->adapter;
1065 	struct sge_qset *qs;
1066 	struct mtx *lock;
1067 
1068 	KASSERT((sc->flags & USING_MSIX) == 0,
1069 	    ("can't call timer reclaim for msi-x"));
1070 
1071 	for (i = 0; i < nqsets; i++) {
1072 		qs = &sc->sge.qs[pi->first_qset + i];
1073 
1074 		reclaim_completed_tx(qs, 16, TXQ_OFLD);
1075 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1076 			    &sc->sge.qs[0].rspq.lock;
1077 
1078 		if (mtx_trylock(lock)) {
1079 			/* XXX currently assume that we are *NOT* polling */
1080 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1081 
1082 			if (qs->fl[0].credits < qs->fl[0].size - 16)
1083 				__refill_fl(sc, &qs->fl[0]);
1084 			if (qs->fl[1].credits < qs->fl[1].size - 16)
1085 				__refill_fl(sc, &qs->fl[1]);
1086 
1087 			if (status & (1 << qs->rspq.cntxt_id)) {
1088 				if (qs->rspq.credits) {
1089 					refill_rspq(sc, &qs->rspq, 1);
1090 					qs->rspq.credits--;
1091 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1092 					    1 << qs->rspq.cntxt_id);
1093 				}
1094 			}
1095 			mtx_unlock(lock);
1096 		}
1097 	}
1098 }
1099 
1100 /**
1101  *	init_qset_cntxt - initialize an SGE queue set context info
1102  *	@qs: the queue set
1103  *	@id: the queue set id
1104  *
1105  *	Initializes the TIDs and context ids for the queues of a queue set.
1106  */
1107 static void
1108 init_qset_cntxt(struct sge_qset *qs, u_int id)
1109 {
1110 
1111 	qs->rspq.cntxt_id = id;
1112 	qs->fl[0].cntxt_id = 2 * id;
1113 	qs->fl[1].cntxt_id = 2 * id + 1;
1114 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1115 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1116 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1117 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1118 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1119 
1120 	/* XXX: a sane limit is needed instead of INT_MAX */
1121 	mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
1122 	mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
1123 	mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
1124 }
1125 
1126 
1127 static void
1128 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1129 {
1130 	txq->in_use += ndesc;
1131 	/*
1132 	 * XXX we don't handle stopping of queue
1133 	 * presumably start handles this when we bump against the end
1134 	 */
1135 	txqs->gen = txq->gen;
1136 	txq->unacked += ndesc;
1137 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1138 	txq->unacked &= 31;
1139 	txqs->pidx = txq->pidx;
1140 	txq->pidx += ndesc;
1141 #ifdef INVARIANTS
1142 	if (((txqs->pidx > txq->cidx) &&
1143 		(txq->pidx < txqs->pidx) &&
1144 		(txq->pidx >= txq->cidx)) ||
1145 	    ((txqs->pidx < txq->cidx) &&
1146 		(txq->pidx >= txq-> cidx)) ||
1147 	    ((txqs->pidx < txq->cidx) &&
1148 		(txq->cidx < txqs->pidx)))
1149 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1150 		    txqs->pidx, txq->pidx, txq->cidx);
1151 #endif
1152 	if (txq->pidx >= txq->size) {
1153 		txq->pidx -= txq->size;
1154 		txq->gen ^= 1;
1155 	}
1156 
1157 }
1158 
1159 /**
1160  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
1161  *	@m: the packet mbufs
1162  *      @nsegs: the number of segments
1163  *
1164  * 	Returns the number of Tx descriptors needed for the given Ethernet
1165  * 	packet.  Ethernet packets require addition of WR and CPL headers.
1166  */
1167 static __inline unsigned int
1168 calc_tx_descs(const struct mbuf *m, int nsegs)
1169 {
1170 	unsigned int flits;
1171 
1172 	if (m->m_pkthdr.len <= PIO_LEN)
1173 		return 1;
1174 
1175 	flits = sgl_len(nsegs) + 2;
1176 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
1177 		flits++;
1178 
1179 	return flits_to_desc(flits);
1180 }
1181 
1182 /**
1183  *	make_sgl - populate a scatter/gather list for a packet
1184  *	@sgp: the SGL to populate
1185  *	@segs: the packet dma segments
1186  *	@nsegs: the number of segments
1187  *
1188  *	Generates a scatter/gather list for the buffers that make up a packet
1189  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1190  *	appropriately.
1191  */
1192 static __inline void
1193 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1194 {
1195 	int i, idx;
1196 
1197 	for (idx = 0, i = 0; i < nsegs; i++) {
1198 		/*
1199 		 * firmware doesn't like empty segments
1200 		 */
1201 		if (segs[i].ds_len == 0)
1202 			continue;
1203 		if (i && idx == 0)
1204 			++sgp;
1205 
1206 		sgp->len[idx] = htobe32(segs[i].ds_len);
1207 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1208 		idx ^= 1;
1209 	}
1210 
1211 	if (idx) {
1212 		sgp->len[idx] = 0;
1213 		sgp->addr[idx] = 0;
1214 	}
1215 }
1216 
1217 /**
1218  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1219  *	@adap: the adapter
1220  *	@q: the Tx queue
1221  *
1222  *	Ring the doorbell if a Tx queue is asleep.  There is a natural race,
1223  *	where the HW is going to sleep just after we checked, however,
1224  *	then the interrupt handler will detect the outstanding TX packet
1225  *	and ring the doorbell for us.
1226  *
1227  *	When GTS is disabled we unconditionally ring the doorbell.
1228  */
1229 static __inline void
1230 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1231 {
1232 #if USE_GTS
1233 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1234 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1235 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1236 #ifdef T3_TRACE
1237 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1238 			  q->cntxt_id);
1239 #endif
1240 		t3_write_reg(adap, A_SG_KDOORBELL,
1241 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1242 	}
1243 #else
1244 	if (mustring || ++q->db_pending >= 32) {
1245 		wmb();            /* write descriptors before telling HW */
1246 		t3_write_reg(adap, A_SG_KDOORBELL,
1247 		    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1248 		q->db_pending = 0;
1249 	}
1250 #endif
1251 }
1252 
1253 static __inline void
1254 wr_gen2(struct tx_desc *d, unsigned int gen)
1255 {
1256 #if SGE_NUM_GENBITS == 2
1257 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1258 #endif
1259 }
1260 
1261 /**
1262  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1263  *	@ndesc: number of Tx descriptors spanned by the SGL
1264  *	@txd: first Tx descriptor to be written
1265  *	@txqs: txq state (generation and producer index)
1266  *	@txq: the SGE Tx queue
1267  *	@sgl: the SGL
1268  *	@flits: number of flits to the start of the SGL in the first descriptor
1269  *	@sgl_flits: the SGL size in flits
1270  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1271  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1272  *
1273  *	Write a work request header and an associated SGL.  If the SGL is
1274  *	small enough to fit into one Tx descriptor it has already been written
1275  *	and we just need to write the WR header.  Otherwise we distribute the
1276  *	SGL across the number of descriptors it spans.
1277  */
1278 static void
1279 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1280     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1281     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1282 {
1283 
1284 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1285 
1286 	if (__predict_true(ndesc == 1)) {
1287 		set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1288 		    V_WR_SGLSFLT(flits)) | wr_hi,
1289 		    htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1290 		    wr_lo);
1291 
1292 		wr_gen2(txd, txqs->gen);
1293 
1294 	} else {
1295 		unsigned int ogen = txqs->gen;
1296 		const uint64_t *fp = (const uint64_t *)sgl;
1297 		struct work_request_hdr *wp = wrp;
1298 
1299 		wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1300 		    V_WR_SGLSFLT(flits)) | wr_hi;
1301 
1302 		while (sgl_flits) {
1303 			unsigned int avail = WR_FLITS - flits;
1304 
1305 			if (avail > sgl_flits)
1306 				avail = sgl_flits;
1307 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1308 			sgl_flits -= avail;
1309 			ndesc--;
1310 			if (!sgl_flits)
1311 				break;
1312 
1313 			fp += avail;
1314 			txd++;
1315 			if (++txqs->pidx == txq->size) {
1316 				txqs->pidx = 0;
1317 				txqs->gen ^= 1;
1318 				txd = txq->desc;
1319 			}
1320 
1321 			/*
1322 			 * when the head of the mbuf chain
1323 			 * is freed all clusters will be freed
1324 			 * with it
1325 			 */
1326 			wrp = (struct work_request_hdr *)txd;
1327 			wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1328 			    V_WR_SGLSFLT(1)) | wr_hi;
1329 			wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1330 				    sgl_flits + 1)) |
1331 			    V_WR_GEN(txqs->gen)) | wr_lo;
1332 			wr_gen2(txd, txqs->gen);
1333 			flits = 1;
1334 		}
1335 		wrp->wrh_hi |= htonl(F_WR_EOP);
1336 		wmb();
1337 		wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1338 		wr_gen2((struct tx_desc *)wp, ogen);
1339 	}
1340 }
1341 
1342 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1343 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1344 
1345 #define GET_VTAG(cntrl, m) \
1346 do { \
1347 	if ((m)->m_flags & M_VLANTAG)					            \
1348 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1349 } while (0)
1350 
1351 static int
1352 t3_encap(struct sge_qset *qs, struct mbuf **m)
1353 {
1354 	adapter_t *sc;
1355 	struct mbuf *m0;
1356 	struct sge_txq *txq;
1357 	struct txq_state txqs;
1358 	struct port_info *pi;
1359 	unsigned int ndesc, flits, cntrl, mlen;
1360 	int err, nsegs, tso_info = 0;
1361 
1362 	struct work_request_hdr *wrp;
1363 	struct tx_sw_desc *txsd;
1364 	struct sg_ent *sgp, *sgl;
1365 	uint32_t wr_hi, wr_lo, sgl_flits;
1366 	bus_dma_segment_t segs[TX_MAX_SEGS];
1367 
1368 	struct tx_desc *txd;
1369 
1370 	pi = qs->port;
1371 	sc = pi->adapter;
1372 	txq = &qs->txq[TXQ_ETH];
1373 	txd = &txq->desc[txq->pidx];
1374 	txsd = &txq->sdesc[txq->pidx];
1375 	sgl = txq->txq_sgl;
1376 
1377 	prefetch(txd);
1378 	m0 = *m;
1379 
1380 	mtx_assert(&qs->lock, MA_OWNED);
1381 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1382 	KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1383 
1384 	if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1385 	    m0->m_pkthdr.csum_flags & (CSUM_TSO))
1386 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1387 
1388 	if (m0->m_nextpkt != NULL) {
1389 		busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1390 		ndesc = 1;
1391 		mlen = 0;
1392 	} else {
1393 		if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1394 		    &m0, segs, &nsegs))) {
1395 			if (cxgb_debug)
1396 				printf("failed ... err=%d\n", err);
1397 			return (err);
1398 		}
1399 		mlen = m0->m_pkthdr.len;
1400 		ndesc = calc_tx_descs(m0, nsegs);
1401 	}
1402 	txq_prod(txq, ndesc, &txqs);
1403 
1404 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1405 	txsd->m = m0;
1406 
1407 	if (m0->m_nextpkt != NULL) {
1408 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1409 		int i, fidx;
1410 
1411 		if (nsegs > 7)
1412 			panic("trying to coalesce %d packets in to one WR", nsegs);
1413 		txq->txq_coalesced += nsegs;
1414 		wrp = (struct work_request_hdr *)txd;
1415 		flits = nsegs*2 + 1;
1416 
1417 		for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1418 			struct cpl_tx_pkt_batch_entry *cbe;
1419 			uint64_t flit;
1420 			uint32_t *hflit = (uint32_t *)&flit;
1421 			int cflags = m0->m_pkthdr.csum_flags;
1422 
1423 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1424 			GET_VTAG(cntrl, m0);
1425 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1426 			if (__predict_false(!(cflags & CSUM_IP)))
1427 				cntrl |= F_TXPKT_IPCSUM_DIS;
1428 			if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1429 			    CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1430 				cntrl |= F_TXPKT_L4CSUM_DIS;
1431 
1432 			hflit[0] = htonl(cntrl);
1433 			hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1434 			flit |= htobe64(1 << 24);
1435 			cbe = &cpl_batch->pkt_entry[i];
1436 			cbe->cntrl = hflit[0];
1437 			cbe->len = hflit[1];
1438 			cbe->addr = htobe64(segs[i].ds_addr);
1439 		}
1440 
1441 		wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1442 		    V_WR_SGLSFLT(flits)) |
1443 		    htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1444 		wr_lo = htonl(V_WR_LEN(flits) |
1445 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1446 		set_wr_hdr(wrp, wr_hi, wr_lo);
1447 		wmb();
1448 		ETHER_BPF_MTAP(pi->ifp, m0);
1449 		wr_gen2(txd, txqs.gen);
1450 		check_ring_tx_db(sc, txq, 0);
1451 		return (0);
1452 	} else if (tso_info) {
1453 		uint16_t eth_type;
1454 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1455 		struct ether_header *eh;
1456 		void *l3hdr;
1457 		struct tcphdr *tcp;
1458 
1459 		txd->flit[2] = 0;
1460 		GET_VTAG(cntrl, m0);
1461 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1462 		hdr->cntrl = htonl(cntrl);
1463 		hdr->len = htonl(mlen | 0x80000000);
1464 
1465 		if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1466 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1467 			    m0, mlen, m0->m_pkthdr.tso_segsz,
1468 			    (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1469 			panic("tx tso packet too small");
1470 		}
1471 
1472 		/* Make sure that ether, ip, tcp headers are all in m0 */
1473 		if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1474 			m0 = m_pullup(m0, TCPPKTHDRSIZE);
1475 			if (__predict_false(m0 == NULL)) {
1476 				/* XXX panic probably an overreaction */
1477 				panic("couldn't fit header into mbuf");
1478 			}
1479 		}
1480 
1481 		eh = mtod(m0, struct ether_header *);
1482 		eth_type = eh->ether_type;
1483 		if (eth_type == htons(ETHERTYPE_VLAN)) {
1484 			struct ether_vlan_header *evh = (void *)eh;
1485 
1486 			tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1487 			l3hdr = evh + 1;
1488 			eth_type = evh->evl_proto;
1489 		} else {
1490 			tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1491 			l3hdr = eh + 1;
1492 		}
1493 
1494 		if (eth_type == htons(ETHERTYPE_IP)) {
1495 			struct ip *ip = l3hdr;
1496 
1497 			tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1498 			tcp = (struct tcphdr *)(ip + 1);
1499 		} else if (eth_type == htons(ETHERTYPE_IPV6)) {
1500 			struct ip6_hdr *ip6 = l3hdr;
1501 
1502 			KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1503 			    ("%s: CSUM_TSO with ip6_nxt %d",
1504 			    __func__, ip6->ip6_nxt));
1505 
1506 			tso_info |= F_LSO_IPV6;
1507 			tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1508 			tcp = (struct tcphdr *)(ip6 + 1);
1509 		} else
1510 			panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1511 
1512 		tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1513 		hdr->lso_info = htonl(tso_info);
1514 
1515 		if (__predict_false(mlen <= PIO_LEN)) {
1516 			/*
1517 			 * pkt not undersized but fits in PIO_LEN
1518 			 * Indicates a TSO bug at the higher levels.
1519 			 */
1520 			txsd->m = NULL;
1521 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1522 			flits = (mlen + 7) / 8 + 3;
1523 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1524 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1525 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1526 			wr_lo = htonl(V_WR_LEN(flits) |
1527 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1528 			set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1529 			wmb();
1530 			ETHER_BPF_MTAP(pi->ifp, m0);
1531 			wr_gen2(txd, txqs.gen);
1532 			check_ring_tx_db(sc, txq, 0);
1533 			m_freem(m0);
1534 			return (0);
1535 		}
1536 		flits = 3;
1537 	} else {
1538 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1539 
1540 		GET_VTAG(cntrl, m0);
1541 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1542 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1543 			cntrl |= F_TXPKT_IPCSUM_DIS;
1544 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1545 		    CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1546 			cntrl |= F_TXPKT_L4CSUM_DIS;
1547 		cpl->cntrl = htonl(cntrl);
1548 		cpl->len = htonl(mlen | 0x80000000);
1549 
1550 		if (mlen <= PIO_LEN) {
1551 			txsd->m = NULL;
1552 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1553 			flits = (mlen + 7) / 8 + 2;
1554 
1555 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1556 			    V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1557 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1558 			wr_lo = htonl(V_WR_LEN(flits) |
1559 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1560 			set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1561 			wmb();
1562 			ETHER_BPF_MTAP(pi->ifp, m0);
1563 			wr_gen2(txd, txqs.gen);
1564 			check_ring_tx_db(sc, txq, 0);
1565 			m_freem(m0);
1566 			return (0);
1567 		}
1568 		flits = 2;
1569 	}
1570 	wrp = (struct work_request_hdr *)txd;
1571 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1572 	make_sgl(sgp, segs, nsegs);
1573 
1574 	sgl_flits = sgl_len(nsegs);
1575 
1576 	ETHER_BPF_MTAP(pi->ifp, m0);
1577 
1578 	KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1579 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1580 	wr_lo = htonl(V_WR_TID(txq->token));
1581 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1582 	    sgl_flits, wr_hi, wr_lo);
1583 	check_ring_tx_db(sc, txq, 0);
1584 
1585 	return (0);
1586 }
1587 
1588 #ifdef DEBUGNET
1589 int
1590 cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m)
1591 {
1592 	int error;
1593 
1594 	error = t3_encap(qs, m);
1595 	if (error == 0)
1596 		check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
1597 	else if (*m != NULL) {
1598 		m_freem(*m);
1599 		*m = NULL;
1600 	}
1601 	return (error);
1602 }
1603 #endif
1604 
1605 void
1606 cxgb_tx_watchdog(void *arg)
1607 {
1608 	struct sge_qset *qs = arg;
1609 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1610 
1611         if (qs->coalescing != 0 &&
1612 	    (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1613 	    TXQ_RING_EMPTY(qs))
1614                 qs->coalescing = 0;
1615         else if (qs->coalescing == 0 &&
1616 	    (txq->in_use >= cxgb_tx_coalesce_enable_start))
1617                 qs->coalescing = 1;
1618 	if (TXQ_TRYLOCK(qs)) {
1619 		qs->qs_flags |= QS_FLUSHING;
1620 		cxgb_start_locked(qs);
1621 		qs->qs_flags &= ~QS_FLUSHING;
1622 		TXQ_UNLOCK(qs);
1623 	}
1624 	if (if_getdrvflags(qs->port->ifp) & IFF_DRV_RUNNING)
1625 		callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1626 		    qs, txq->txq_watchdog.c_cpu);
1627 }
1628 
1629 static void
1630 cxgb_tx_timeout(void *arg)
1631 {
1632 	struct sge_qset *qs = arg;
1633 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1634 
1635 	if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1636                 qs->coalescing = 1;
1637 	if (TXQ_TRYLOCK(qs)) {
1638 		qs->qs_flags |= QS_TIMEOUT;
1639 		cxgb_start_locked(qs);
1640 		qs->qs_flags &= ~QS_TIMEOUT;
1641 		TXQ_UNLOCK(qs);
1642 	}
1643 }
1644 
1645 static void
1646 cxgb_start_locked(struct sge_qset *qs)
1647 {
1648 	struct mbuf *m_head = NULL;
1649 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1650 	struct port_info *pi = qs->port;
1651 	if_t ifp = pi->ifp;
1652 
1653 	if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1654 		reclaim_completed_tx(qs, 0, TXQ_ETH);
1655 
1656 	if (!pi->link_config.link_ok) {
1657 		TXQ_RING_FLUSH(qs);
1658 		return;
1659 	}
1660 	TXQ_LOCK_ASSERT(qs);
1661 	while (!TXQ_RING_EMPTY(qs) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) &&
1662 	    pi->link_config.link_ok) {
1663 		reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1664 
1665 		if (txq->size - txq->in_use <= TX_MAX_DESC)
1666 			break;
1667 
1668 		if ((m_head = cxgb_dequeue(qs)) == NULL)
1669 			break;
1670 		/*
1671 		 *  Encapsulation can modify our pointer, and or make it
1672 		 *  NULL on failure.  In that event, we can't requeue.
1673 		 */
1674 		if (t3_encap(qs, &m_head) || m_head == NULL)
1675 			break;
1676 
1677 		m_head = NULL;
1678 	}
1679 
1680 	if (txq->db_pending)
1681 		check_ring_tx_db(pi->adapter, txq, 1);
1682 
1683 	if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1684 	    pi->link_config.link_ok)
1685 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1686 		    qs, txq->txq_timer.c_cpu);
1687 	if (m_head != NULL)
1688 		m_freem(m_head);
1689 }
1690 
1691 static int
1692 cxgb_transmit_locked(if_t ifp, struct sge_qset *qs, struct mbuf *m)
1693 {
1694 	struct port_info *pi = qs->port;
1695 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1696 	struct buf_ring *br = txq->txq_mr;
1697 	int error, avail;
1698 
1699 	avail = txq->size - txq->in_use;
1700 	TXQ_LOCK_ASSERT(qs);
1701 
1702 	/*
1703 	 * We can only do a direct transmit if the following are true:
1704 	 * - we aren't coalescing (ring < 3/4 full)
1705 	 * - the link is up -- checked in caller
1706 	 * - there are no packets enqueued already
1707 	 * - there is space in hardware transmit queue
1708 	 */
1709 	if (check_pkt_coalesce(qs) == 0 &&
1710 	    !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1711 		if (t3_encap(qs, &m)) {
1712 			if (m != NULL &&
1713 			    (error = drbr_enqueue(ifp, br, m)) != 0)
1714 				return (error);
1715 		} else {
1716 			if (txq->db_pending)
1717 				check_ring_tx_db(pi->adapter, txq, 1);
1718 
1719 			/*
1720 			 * We've bypassed the buf ring so we need to update
1721 			 * the stats directly
1722 			 */
1723 			txq->txq_direct_packets++;
1724 			txq->txq_direct_bytes += m->m_pkthdr.len;
1725 		}
1726 	} else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1727 		return (error);
1728 
1729 	reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1730 	if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1731 	    (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1732 		cxgb_start_locked(qs);
1733 	else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1734 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1735 		    qs, txq->txq_timer.c_cpu);
1736 	return (0);
1737 }
1738 
1739 int
1740 cxgb_transmit(if_t ifp, struct mbuf *m)
1741 {
1742 	struct sge_qset *qs;
1743 	struct port_info *pi = if_getsoftc(ifp);
1744 	int error, qidx = pi->first_qset;
1745 
1746 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0
1747 	    ||(!pi->link_config.link_ok)) {
1748 		m_freem(m);
1749 		return (0);
1750 	}
1751 
1752 	/* check if flowid is set */
1753 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1754 		qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1755 
1756 	qs = &pi->adapter->sge.qs[qidx];
1757 
1758 	if (TXQ_TRYLOCK(qs)) {
1759 		/* XXX running */
1760 		error = cxgb_transmit_locked(ifp, qs, m);
1761 		TXQ_UNLOCK(qs);
1762 	} else
1763 		error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1764 	return (error);
1765 }
1766 
1767 void
1768 cxgb_qflush(if_t ifp)
1769 {
1770 	/*
1771 	 * flush any enqueued mbufs in the buf_rings
1772 	 * and in the transmit queues
1773 	 * no-op for now
1774 	 */
1775 	return;
1776 }
1777 
1778 /**
1779  *	write_imm - write a packet into a Tx descriptor as immediate data
1780  *	@d: the Tx descriptor to write
1781  *	@m: the packet
1782  *	@len: the length of packet data to write as immediate data
1783  *	@gen: the generation bit value to write
1784  *
1785  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1786  *	contains a work request at its beginning.  We must write the packet
1787  *	carefully so the SGE doesn't read accidentally before it's written in
1788  *	its entirety.
1789  */
1790 static __inline void
1791 write_imm(struct tx_desc *d, caddr_t src,
1792 	  unsigned int len, unsigned int gen)
1793 {
1794 	struct work_request_hdr *from = (struct work_request_hdr *)src;
1795 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1796 	uint32_t wr_hi, wr_lo;
1797 
1798 	KASSERT(len <= WR_LEN && len >= sizeof(*from),
1799 	    ("%s: invalid len %d", __func__, len));
1800 
1801 	memcpy(&to[1], &from[1], len - sizeof(*from));
1802 	wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1803 	    V_WR_BCNTLFLT(len & 7));
1804 	wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1805 	set_wr_hdr(to, wr_hi, wr_lo);
1806 	wmb();
1807 	wr_gen2(d, gen);
1808 }
1809 
1810 /**
1811  *	check_desc_avail - check descriptor availability on a send queue
1812  *	@adap: the adapter
1813  *	@q: the TX queue
1814  *	@m: the packet needing the descriptors
1815  *	@ndesc: the number of Tx descriptors needed
1816  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1817  *
1818  *	Checks if the requested number of Tx descriptors is available on an
1819  *	SGE send queue.  If the queue is already suspended or not enough
1820  *	descriptors are available the packet is queued for later transmission.
1821  *	Must be called with the Tx queue locked.
1822  *
1823  *	Returns 0 if enough descriptors are available, 1 if there aren't
1824  *	enough descriptors and the packet has been queued, and 2 if the caller
1825  *	needs to retry because there weren't enough descriptors at the
1826  *	beginning of the call but some freed up in the mean time.
1827  */
1828 static __inline int
1829 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1830 		 struct mbuf *m, unsigned int ndesc,
1831 		 unsigned int qid)
1832 {
1833 	/*
1834 	 * XXX We currently only use this for checking the control queue
1835 	 * the control queue is only used for binding qsets which happens
1836 	 * at init time so we are guaranteed enough descriptors
1837 	 */
1838 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1839 addq_exit:	(void )mbufq_enqueue(&q->sendq, m);
1840 		return 1;
1841 	}
1842 	if (__predict_false(q->size - q->in_use < ndesc)) {
1843 
1844 		struct sge_qset *qs = txq_to_qset(q, qid);
1845 
1846 		setbit(&qs->txq_stopped, qid);
1847 		if (should_restart_tx(q) &&
1848 		    test_and_clear_bit(qid, &qs->txq_stopped))
1849 			return 2;
1850 
1851 		q->stops++;
1852 		goto addq_exit;
1853 	}
1854 	return 0;
1855 }
1856 
1857 
1858 /**
1859  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1860  *	@q: the SGE control Tx queue
1861  *
1862  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1863  *	that send only immediate data (presently just the control queues) and
1864  *	thus do not have any mbufs
1865  */
1866 static __inline void
1867 reclaim_completed_tx_imm(struct sge_txq *q)
1868 {
1869 	unsigned int reclaim = q->processed - q->cleaned;
1870 
1871 	q->in_use -= reclaim;
1872 	q->cleaned += reclaim;
1873 }
1874 
1875 /**
1876  *	ctrl_xmit - send a packet through an SGE control Tx queue
1877  *	@adap: the adapter
1878  *	@q: the control queue
1879  *	@m: the packet
1880  *
1881  *	Send a packet through an SGE control Tx queue.  Packets sent through
1882  *	a control queue must fit entirely as immediate data in a single Tx
1883  *	descriptor and have no page fragments.
1884  */
1885 static int
1886 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1887 {
1888 	int ret;
1889 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1890 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1891 
1892 	KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1893 
1894 	wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1895 	wrp->wrh_lo = htonl(V_WR_TID(q->token));
1896 
1897 	TXQ_LOCK(qs);
1898 again:	reclaim_completed_tx_imm(q);
1899 
1900 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1901 	if (__predict_false(ret)) {
1902 		if (ret == 1) {
1903 			TXQ_UNLOCK(qs);
1904 			return (ENOSPC);
1905 		}
1906 		goto again;
1907 	}
1908 	write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1909 
1910 	q->in_use++;
1911 	if (++q->pidx >= q->size) {
1912 		q->pidx = 0;
1913 		q->gen ^= 1;
1914 	}
1915 	TXQ_UNLOCK(qs);
1916 	wmb();
1917 	t3_write_reg(adap, A_SG_KDOORBELL,
1918 	    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1919 
1920 	m_free(m);
1921 	return (0);
1922 }
1923 
1924 
1925 /**
1926  *	restart_ctrlq - restart a suspended control queue
1927  *	@qs: the queue set cotaining the control queue
1928  *
1929  *	Resumes transmission on a suspended Tx control queue.
1930  */
1931 static void
1932 restart_ctrlq(void *data, int npending)
1933 {
1934 	struct mbuf *m;
1935 	struct sge_qset *qs = (struct sge_qset *)data;
1936 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1937 	adapter_t *adap = qs->port->adapter;
1938 
1939 	TXQ_LOCK(qs);
1940 again:	reclaim_completed_tx_imm(q);
1941 
1942 	while (q->in_use < q->size &&
1943 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1944 
1945 		write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1946 		m_free(m);
1947 
1948 		if (++q->pidx >= q->size) {
1949 			q->pidx = 0;
1950 			q->gen ^= 1;
1951 		}
1952 		q->in_use++;
1953 	}
1954 	if (!mbufq_empty(&q->sendq)) {
1955 		setbit(&qs->txq_stopped, TXQ_CTRL);
1956 
1957 		if (should_restart_tx(q) &&
1958 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1959 			goto again;
1960 		q->stops++;
1961 	}
1962 	TXQ_UNLOCK(qs);
1963 	t3_write_reg(adap, A_SG_KDOORBELL,
1964 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1965 }
1966 
1967 
1968 /*
1969  * Send a management message through control queue 0
1970  */
1971 int
1972 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1973 {
1974 	return ctrl_xmit(adap, &adap->sge.qs[0], m);
1975 }
1976 
1977 /**
1978  *	free_qset - free the resources of an SGE queue set
1979  *	@sc: the controller owning the queue set
1980  *	@q: the queue set
1981  *
1982  *	Release the HW and SW resources associated with an SGE queue set, such
1983  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1984  *	queue set must be quiesced prior to calling this.
1985  */
1986 static void
1987 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1988 {
1989 	int i;
1990 
1991 	reclaim_completed_tx(q, 0, TXQ_ETH);
1992 	if (q->txq[TXQ_ETH].txq_mr != NULL)
1993 		buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
1994 	if (q->txq[TXQ_ETH].txq_ifq != NULL) {
1995 		ifq_delete(q->txq[TXQ_ETH].txq_ifq);
1996 		free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
1997 	}
1998 
1999 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2000 		if (q->fl[i].desc) {
2001 			mtx_lock_spin(&sc->sge.reg_lock);
2002 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2003 			mtx_unlock_spin(&sc->sge.reg_lock);
2004 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2005 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2006 					q->fl[i].desc_map);
2007 			bus_dma_tag_destroy(q->fl[i].desc_tag);
2008 			bus_dma_tag_destroy(q->fl[i].entry_tag);
2009 		}
2010 		if (q->fl[i].sdesc) {
2011 			free_rx_bufs(sc, &q->fl[i]);
2012 			free(q->fl[i].sdesc, M_DEVBUF);
2013 		}
2014 	}
2015 
2016 	mtx_unlock(&q->lock);
2017 	MTX_DESTROY(&q->lock);
2018 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2019 		if (q->txq[i].desc) {
2020 			mtx_lock_spin(&sc->sge.reg_lock);
2021 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2022 			mtx_unlock_spin(&sc->sge.reg_lock);
2023 			bus_dmamap_unload(q->txq[i].desc_tag,
2024 					q->txq[i].desc_map);
2025 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2026 					q->txq[i].desc_map);
2027 			bus_dma_tag_destroy(q->txq[i].desc_tag);
2028 			bus_dma_tag_destroy(q->txq[i].entry_tag);
2029 		}
2030 		if (q->txq[i].sdesc) {
2031 			free(q->txq[i].sdesc, M_DEVBUF);
2032 		}
2033 	}
2034 
2035 	if (q->rspq.desc) {
2036 		mtx_lock_spin(&sc->sge.reg_lock);
2037 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2038 		mtx_unlock_spin(&sc->sge.reg_lock);
2039 
2040 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2041 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2042 			        q->rspq.desc_map);
2043 		bus_dma_tag_destroy(q->rspq.desc_tag);
2044 		MTX_DESTROY(&q->rspq.lock);
2045 	}
2046 
2047 #if defined(INET6) || defined(INET)
2048 	tcp_lro_free(&q->lro.ctrl);
2049 #endif
2050 
2051 	bzero(q, sizeof(*q));
2052 }
2053 
2054 /**
2055  *	t3_free_sge_resources - free SGE resources
2056  *	@sc: the adapter softc
2057  *
2058  *	Frees resources used by the SGE queue sets.
2059  */
2060 void
2061 t3_free_sge_resources(adapter_t *sc, int nqsets)
2062 {
2063 	int i;
2064 
2065 	for (i = 0; i < nqsets; ++i) {
2066 		TXQ_LOCK(&sc->sge.qs[i]);
2067 		t3_free_qset(sc, &sc->sge.qs[i]);
2068 	}
2069 }
2070 
2071 /**
2072  *	t3_sge_start - enable SGE
2073  *	@sc: the controller softc
2074  *
2075  *	Enables the SGE for DMAs.  This is the last step in starting packet
2076  *	transfers.
2077  */
2078 void
2079 t3_sge_start(adapter_t *sc)
2080 {
2081 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2082 }
2083 
2084 /**
2085  *	t3_sge_stop - disable SGE operation
2086  *	@sc: the adapter
2087  *
2088  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
2089  *	from error interrupts) or from normal process context.  In the latter
2090  *	case it also disables any pending queue restart tasklets.  Note that
2091  *	if it is called in interrupt context it cannot disable the restart
2092  *	tasklets as it cannot wait, however the tasklets will have no effect
2093  *	since the doorbells are disabled and the driver will call this again
2094  *	later from process context, at which time the tasklets will be stopped
2095  *	if they are still running.
2096  */
2097 void
2098 t3_sge_stop(adapter_t *sc)
2099 {
2100 
2101 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2102 }
2103 
2104 /**
2105  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
2106  *	@adapter: the adapter
2107  *	@q: the Tx queue to reclaim descriptors from
2108  *	@reclaimable: the number of descriptors to reclaim
2109  *      @m_vec_size: maximum number of buffers to reclaim
2110  *      @desc_reclaimed: returns the number of descriptors reclaimed
2111  *
2112  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2113  *	Tx buffers.  Called with the Tx queue lock held.
2114  *
2115  *      Returns number of buffers of reclaimed
2116  */
2117 void
2118 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2119 {
2120 	struct tx_sw_desc *txsd;
2121 	unsigned int cidx, mask;
2122 	struct sge_txq *q = &qs->txq[queue];
2123 
2124 #ifdef T3_TRACE
2125 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
2126 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2127 #endif
2128 	cidx = q->cidx;
2129 	mask = q->size - 1;
2130 	txsd = &q->sdesc[cidx];
2131 
2132 	mtx_assert(&qs->lock, MA_OWNED);
2133 	while (reclaimable--) {
2134 		prefetch(q->sdesc[(cidx + 1) & mask].m);
2135 		prefetch(q->sdesc[(cidx + 2) & mask].m);
2136 
2137 		if (txsd->m != NULL) {
2138 			if (txsd->flags & TX_SW_DESC_MAPPED) {
2139 				bus_dmamap_unload(q->entry_tag, txsd->map);
2140 				txsd->flags &= ~TX_SW_DESC_MAPPED;
2141 			}
2142 			m_freem_list(txsd->m);
2143 			txsd->m = NULL;
2144 		} else
2145 			q->txq_skipped++;
2146 
2147 		++txsd;
2148 		if (++cidx == q->size) {
2149 			cidx = 0;
2150 			txsd = q->sdesc;
2151 		}
2152 	}
2153 	q->cidx = cidx;
2154 
2155 }
2156 
2157 /**
2158  *	is_new_response - check if a response is newly written
2159  *	@r: the response descriptor
2160  *	@q: the response queue
2161  *
2162  *	Returns true if a response descriptor contains a yet unprocessed
2163  *	response.
2164  */
2165 static __inline int
2166 is_new_response(const struct rsp_desc *r,
2167     const struct sge_rspq *q)
2168 {
2169 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2170 }
2171 
2172 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2173 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2174 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2175 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2176 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2177 
2178 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2179 #define NOMEM_INTR_DELAY 2500
2180 
2181 #ifdef TCP_OFFLOAD
2182 /**
2183  *	write_ofld_wr - write an offload work request
2184  *	@adap: the adapter
2185  *	@m: the packet to send
2186  *	@q: the Tx queue
2187  *	@pidx: index of the first Tx descriptor to write
2188  *	@gen: the generation value to use
2189  *	@ndesc: number of descriptors the packet will occupy
2190  *
2191  *	Write an offload work request to send the supplied packet.  The packet
2192  *	data already carry the work request with most fields populated.
2193  */
2194 static void
2195 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2196     unsigned int pidx, unsigned int gen, unsigned int ndesc)
2197 {
2198 	unsigned int sgl_flits, flits;
2199 	int i, idx, nsegs, wrlen;
2200 	struct work_request_hdr *from;
2201 	struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2202 	struct tx_desc *d = &q->desc[pidx];
2203 	struct txq_state txqs;
2204 	struct sglist_seg *segs;
2205 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2206 	struct sglist *sgl;
2207 
2208 	from = (void *)(oh + 1);	/* Start of WR within mbuf */
2209 	wrlen = m->m_len - sizeof(*oh);
2210 
2211 	if (!(oh->flags & F_HDR_SGL)) {
2212 		write_imm(d, (caddr_t)from, wrlen, gen);
2213 
2214 		/*
2215 		 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2216 		 * t3_push_frames and freed in wr_ack.  Others, like those sent
2217 		 * down by close_conn, t3_send_reset, etc. should be freed here.
2218 		 */
2219 		if (!(oh->flags & F_HDR_DF))
2220 			m_free(m);
2221 		return;
2222 	}
2223 
2224 	memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2225 
2226 	sgl = oh->sgl;
2227 	flits = wrlen / 8;
2228 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2229 
2230 	nsegs = sgl->sg_nseg;
2231 	segs = sgl->sg_segs;
2232 	for (idx = 0, i = 0; i < nsegs; i++) {
2233 		KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2234 		if (i && idx == 0)
2235 			++sgp;
2236 		sgp->len[idx] = htobe32(segs[i].ss_len);
2237 		sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2238 		idx ^= 1;
2239 	}
2240 	if (idx) {
2241 		sgp->len[idx] = 0;
2242 		sgp->addr[idx] = 0;
2243 	}
2244 
2245 	sgl_flits = sgl_len(nsegs);
2246 	txqs.gen = gen;
2247 	txqs.pidx = pidx;
2248 	txqs.compl = 0;
2249 
2250 	write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2251 	    from->wrh_hi, from->wrh_lo);
2252 }
2253 
2254 /**
2255  *	ofld_xmit - send a packet through an offload queue
2256  *	@adap: the adapter
2257  *	@q: the Tx offload queue
2258  *	@m: the packet
2259  *
2260  *	Send an offload packet through an SGE offload queue.
2261  */
2262 static int
2263 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2264 {
2265 	int ret;
2266 	unsigned int ndesc;
2267 	unsigned int pidx, gen;
2268 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2269 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2270 
2271 	ndesc = G_HDR_NDESC(oh->flags);
2272 
2273 	TXQ_LOCK(qs);
2274 again:	reclaim_completed_tx(qs, 16, TXQ_OFLD);
2275 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2276 	if (__predict_false(ret)) {
2277 		if (ret == 1) {
2278 			TXQ_UNLOCK(qs);
2279 			return (EINTR);
2280 		}
2281 		goto again;
2282 	}
2283 
2284 	gen = q->gen;
2285 	q->in_use += ndesc;
2286 	pidx = q->pidx;
2287 	q->pidx += ndesc;
2288 	if (q->pidx >= q->size) {
2289 		q->pidx -= q->size;
2290 		q->gen ^= 1;
2291 	}
2292 
2293 	write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2294 	check_ring_tx_db(adap, q, 1);
2295 	TXQ_UNLOCK(qs);
2296 
2297 	return (0);
2298 }
2299 
2300 /**
2301  *	restart_offloadq - restart a suspended offload queue
2302  *	@qs: the queue set cotaining the offload queue
2303  *
2304  *	Resumes transmission on a suspended Tx offload queue.
2305  */
2306 static void
2307 restart_offloadq(void *data, int npending)
2308 {
2309 	struct mbuf *m;
2310 	struct sge_qset *qs = data;
2311 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2312 	adapter_t *adap = qs->port->adapter;
2313 
2314 	TXQ_LOCK(qs);
2315 again:
2316 	while ((m = mbufq_first(&q->sendq)) != NULL) {
2317 		unsigned int gen, pidx;
2318 		struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2319 		unsigned int ndesc = G_HDR_NDESC(oh->flags);
2320 
2321 		if (__predict_false(q->size - q->in_use < ndesc)) {
2322 			setbit(&qs->txq_stopped, TXQ_OFLD);
2323 			if (should_restart_tx(q) &&
2324 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2325 				goto again;
2326 			q->stops++;
2327 			break;
2328 		}
2329 
2330 		gen = q->gen;
2331 		q->in_use += ndesc;
2332 		pidx = q->pidx;
2333 		q->pidx += ndesc;
2334 		if (q->pidx >= q->size) {
2335 			q->pidx -= q->size;
2336 			q->gen ^= 1;
2337 		}
2338 
2339 		(void)mbufq_dequeue(&q->sendq);
2340 		TXQ_UNLOCK(qs);
2341 		write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2342 		TXQ_LOCK(qs);
2343 	}
2344 #if USE_GTS
2345 	set_bit(TXQ_RUNNING, &q->flags);
2346 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2347 #endif
2348 	TXQ_UNLOCK(qs);
2349 	wmb();
2350 	t3_write_reg(adap, A_SG_KDOORBELL,
2351 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2352 }
2353 
2354 /**
2355  *	t3_offload_tx - send an offload packet
2356  *	@m: the packet
2357  *
2358  *	Sends an offload packet.  We use the packet priority to select the
2359  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2360  *	should be sent as regular or control, bits 1-3 select the queue set.
2361  */
2362 int
2363 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2364 {
2365 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2366 	struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2367 
2368 	if (oh->flags & F_HDR_CTRL) {
2369 		m_adj(m, sizeof (*oh));	/* trim ofld_hdr off */
2370 		return (ctrl_xmit(sc, qs, m));
2371 	} else
2372 		return (ofld_xmit(sc, qs, m));
2373 }
2374 #endif
2375 
2376 static void
2377 restart_tx(struct sge_qset *qs)
2378 {
2379 	struct adapter *sc = qs->port->adapter;
2380 
2381 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2382 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2383 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2384 		qs->txq[TXQ_OFLD].restarts++;
2385 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2386 	}
2387 
2388 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2389 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2390 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2391 		qs->txq[TXQ_CTRL].restarts++;
2392 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2393 	}
2394 }
2395 
2396 /**
2397  *	t3_sge_alloc_qset - initialize an SGE queue set
2398  *	@sc: the controller softc
2399  *	@id: the queue set id
2400  *	@nports: how many Ethernet ports will be using this queue set
2401  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2402  *	@p: configuration parameters for this queue set
2403  *	@ntxq: number of Tx queues for the queue set
2404  *	@pi: port info for queue set
2405  *
2406  *	Allocate resources and initialize an SGE queue set.  A queue set
2407  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2408  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2409  *	queue, offload queue, and control queue.
2410  */
2411 int
2412 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2413 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2414 {
2415 	struct sge_qset *q = &sc->sge.qs[id];
2416 	int i, ret = 0;
2417 
2418 	MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2419 	q->port = pi;
2420 	q->adap = sc;
2421 
2422 	if ((q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2423 	    M_DEVBUF, M_WAITOK, &q->lock)) == NULL) {
2424 		device_printf(sc->dev, "failed to allocate mbuf ring\n");
2425 		goto err;
2426 	}
2427 	if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2428 	    M_NOWAIT | M_ZERO)) == NULL) {
2429 		device_printf(sc->dev, "failed to allocate ifq\n");
2430 		goto err;
2431 	}
2432 	ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2433 	callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2434 	callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2435 	q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2436 	q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2437 
2438 	init_qset_cntxt(q, id);
2439 	q->idx = id;
2440 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2441 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2442 		    &q->fl[0].desc, &q->fl[0].sdesc,
2443 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2444 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2445 		printf("error %d from alloc ring fl0\n", ret);
2446 		goto err;
2447 	}
2448 
2449 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2450 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2451 		    &q->fl[1].desc, &q->fl[1].sdesc,
2452 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2453 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2454 		printf("error %d from alloc ring fl1\n", ret);
2455 		goto err;
2456 	}
2457 
2458 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2459 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2460 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2461 		    NULL, NULL)) != 0) {
2462 		printf("error %d from alloc ring rspq\n", ret);
2463 		goto err;
2464 	}
2465 
2466 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2467 	    device_get_unit(sc->dev), irq_vec_idx);
2468 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2469 
2470 	for (i = 0; i < ntxq; ++i) {
2471 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2472 
2473 		if ((ret = alloc_ring(sc, p->txq_size[i],
2474 			    sizeof(struct tx_desc), sz,
2475 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2476 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2477 			    &q->txq[i].desc_map,
2478 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2479 			printf("error %d from alloc ring tx %i\n", ret, i);
2480 			goto err;
2481 		}
2482 		mbufq_init(&q->txq[i].sendq, INT_MAX);
2483 		q->txq[i].gen = 1;
2484 		q->txq[i].size = p->txq_size[i];
2485 	}
2486 
2487 #ifdef TCP_OFFLOAD
2488 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2489 #endif
2490 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2491 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2492 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2493 
2494 	q->fl[0].gen = q->fl[1].gen = 1;
2495 	q->fl[0].size = p->fl_size;
2496 	q->fl[1].size = p->jumbo_size;
2497 
2498 	q->rspq.gen = 1;
2499 	q->rspq.cidx = 0;
2500 	q->rspq.size = p->rspq_size;
2501 
2502 	q->txq[TXQ_ETH].stop_thres = nports *
2503 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2504 
2505 	q->fl[0].buf_size = MCLBYTES;
2506 	q->fl[0].zone = zone_pack;
2507 	q->fl[0].type = EXT_PACKET;
2508 
2509 	if (p->jumbo_buf_size ==  MJUM16BYTES) {
2510 		q->fl[1].zone = zone_jumbo16;
2511 		q->fl[1].type = EXT_JUMBO16;
2512 	} else if (p->jumbo_buf_size ==  MJUM9BYTES) {
2513 		q->fl[1].zone = zone_jumbo9;
2514 		q->fl[1].type = EXT_JUMBO9;
2515 	} else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
2516 		q->fl[1].zone = zone_jumbop;
2517 		q->fl[1].type = EXT_JUMBOP;
2518 	} else {
2519 		KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2520 		ret = EDOOFUS;
2521 		goto err;
2522 	}
2523 	q->fl[1].buf_size = p->jumbo_buf_size;
2524 
2525 	/* Allocate and setup the lro_ctrl structure */
2526 	q->lro.enabled = !!(if_getcapenable(pi->ifp) & IFCAP_LRO);
2527 #if defined(INET6) || defined(INET)
2528 	ret = tcp_lro_init(&q->lro.ctrl);
2529 	if (ret) {
2530 		printf("error %d from tcp_lro_init\n", ret);
2531 		goto err;
2532 	}
2533 #endif
2534 	q->lro.ctrl.ifp = pi->ifp;
2535 
2536 	mtx_lock_spin(&sc->sge.reg_lock);
2537 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2538 				   q->rspq.phys_addr, q->rspq.size,
2539 				   q->fl[0].buf_size, 1, 0);
2540 	if (ret) {
2541 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2542 		goto err_unlock;
2543 	}
2544 
2545 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2546 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2547 					  q->fl[i].phys_addr, q->fl[i].size,
2548 					  q->fl[i].buf_size, p->cong_thres, 1,
2549 					  0);
2550 		if (ret) {
2551 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2552 			goto err_unlock;
2553 		}
2554 	}
2555 
2556 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2557 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2558 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2559 				 1, 0);
2560 	if (ret) {
2561 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2562 		goto err_unlock;
2563 	}
2564 
2565 	if (ntxq > 1) {
2566 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2567 					 USE_GTS, SGE_CNTXT_OFLD, id,
2568 					 q->txq[TXQ_OFLD].phys_addr,
2569 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2570 		if (ret) {
2571 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2572 			goto err_unlock;
2573 		}
2574 	}
2575 
2576 	if (ntxq > 2) {
2577 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2578 					 SGE_CNTXT_CTRL, id,
2579 					 q->txq[TXQ_CTRL].phys_addr,
2580 					 q->txq[TXQ_CTRL].size,
2581 					 q->txq[TXQ_CTRL].token, 1, 0);
2582 		if (ret) {
2583 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2584 			goto err_unlock;
2585 		}
2586 	}
2587 
2588 	mtx_unlock_spin(&sc->sge.reg_lock);
2589 	t3_update_qset_coalesce(q, p);
2590 
2591 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2592 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2593 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2594 
2595 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2596 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2597 
2598 	return (0);
2599 
2600 err_unlock:
2601 	mtx_unlock_spin(&sc->sge.reg_lock);
2602 err:
2603 	TXQ_LOCK(q);
2604 	t3_free_qset(sc, q);
2605 
2606 	return (ret);
2607 }
2608 
2609 /*
2610  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2611  * ethernet data.  Hardware assistance with various checksums and any vlan tag
2612  * will also be taken into account here.
2613  */
2614 void
2615 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2616 {
2617 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2618 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2619 	if_t ifp = pi->ifp;
2620 
2621 	if (cpl->vlan_valid) {
2622 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2623 		m->m_flags |= M_VLANTAG;
2624 	}
2625 
2626 	m->m_pkthdr.rcvif = ifp;
2627 	/*
2628 	 * adjust after conversion to mbuf chain
2629 	 */
2630 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2631 	m->m_len -= (sizeof(*cpl) + ethpad);
2632 	m->m_data += (sizeof(*cpl) + ethpad);
2633 
2634 	if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2635 		struct ether_header *eh = mtod(m, void *);
2636 		uint16_t eh_type;
2637 
2638 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2639 			struct ether_vlan_header *evh = mtod(m, void *);
2640 
2641 			eh_type = evh->evl_proto;
2642 		} else
2643 			eh_type = eh->ether_type;
2644 
2645 		if (if_getcapenable(ifp) & IFCAP_RXCSUM &&
2646 		    eh_type == htons(ETHERTYPE_IP)) {
2647 			m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2648 			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2649 			m->m_pkthdr.csum_data = 0xffff;
2650 		} else if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6 &&
2651 		    eh_type == htons(ETHERTYPE_IPV6)) {
2652 			m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2653 			    CSUM_PSEUDO_HDR);
2654 			m->m_pkthdr.csum_data = 0xffff;
2655 		}
2656 	}
2657 }
2658 
2659 /**
2660  *	get_packet - return the next ingress packet buffer from a free list
2661  *	@adap: the adapter that received the packet
2662  *	@drop_thres: # of remaining buffers before we start dropping packets
2663  *	@qs: the qset that the SGE free list holding the packet belongs to
2664  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2665  *      @r: response descriptor
2666  *
2667  *	Get the next packet from a free list and complete setup of the
2668  *	sk_buff.  If the packet is small we make a copy and recycle the
2669  *	original buffer, otherwise we use the original buffer itself.  If a
2670  *	positive drop threshold is supplied packets are dropped and their
2671  *	buffers recycled if (a) the number of remaining buffers is under the
2672  *	threshold and the packet is too big to copy, or (b) the packet should
2673  *	be copied but there is no memory for the copy.
2674  */
2675 static int
2676 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2677     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2678 {
2679 
2680 	unsigned int len_cq =  ntohl(r->len_cq);
2681 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2682 	int mask, cidx = fl->cidx;
2683 	struct rx_sw_desc *sd = &fl->sdesc[cidx];
2684 	uint32_t len = G_RSPD_LEN(len_cq);
2685 	uint32_t flags = M_EXT;
2686 	uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2687 	caddr_t cl;
2688 	struct mbuf *m;
2689 	int ret = 0;
2690 
2691 	mask = fl->size - 1;
2692 	prefetch(fl->sdesc[(cidx + 1) & mask].m);
2693 	prefetch(fl->sdesc[(cidx + 2) & mask].m);
2694 	prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2695 	prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2696 
2697 	fl->credits--;
2698 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2699 
2700 	if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2701 	    sopeop == RSPQ_SOP_EOP) {
2702 		if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2703 			goto skip_recycle;
2704 		cl = mtod(m, void *);
2705 		memcpy(cl, sd->rxsd_cl, len);
2706 		recycle_rx_buf(adap, fl, fl->cidx);
2707 		m->m_pkthdr.len = m->m_len = len;
2708 		m->m_flags = 0;
2709 		mh->mh_head = mh->mh_tail = m;
2710 		ret = 1;
2711 		goto done;
2712 	} else {
2713 	skip_recycle:
2714 		bus_dmamap_unload(fl->entry_tag, sd->map);
2715 		cl = sd->rxsd_cl;
2716 		m = sd->m;
2717 
2718 		if ((sopeop == RSPQ_SOP_EOP) ||
2719 		    (sopeop == RSPQ_SOP))
2720 			flags |= M_PKTHDR;
2721 		m_init(m, M_NOWAIT, MT_DATA, flags);
2722 		if (fl->zone == zone_pack) {
2723 			/*
2724 			 * restore clobbered data pointer
2725 			 */
2726 			m->m_data = m->m_ext.ext_buf;
2727 		} else {
2728 			m_cljset(m, cl, fl->type);
2729 		}
2730 		m->m_len = len;
2731 	}
2732 	switch(sopeop) {
2733 	case RSPQ_SOP_EOP:
2734 		ret = 1;
2735 		/* FALLTHROUGH */
2736 	case RSPQ_SOP:
2737 		mh->mh_head = mh->mh_tail = m;
2738 		m->m_pkthdr.len = len;
2739 		break;
2740 	case RSPQ_EOP:
2741 		ret = 1;
2742 		/* FALLTHROUGH */
2743 	case RSPQ_NSOP_NEOP:
2744 		if (mh->mh_tail == NULL) {
2745 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2746 			m_freem(m);
2747 			m = NULL;
2748 			break;
2749 		}
2750 		mh->mh_tail->m_next = m;
2751 		mh->mh_tail = m;
2752 		mh->mh_head->m_pkthdr.len += len;
2753 		break;
2754 	}
2755 	if (cxgb_debug && m != NULL)
2756 		printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2757 done:
2758 	if (++fl->cidx == fl->size)
2759 		fl->cidx = 0;
2760 
2761 	return (ret);
2762 }
2763 
2764 /**
2765  *	handle_rsp_cntrl_info - handles control information in a response
2766  *	@qs: the queue set corresponding to the response
2767  *	@flags: the response control flags
2768  *
2769  *	Handles the control information of an SGE response, such as GTS
2770  *	indications and completion credits for the queue set's Tx queues.
2771  *	HW coalesces credits, we don't do any extra SW coalescing.
2772  */
2773 static __inline void
2774 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2775 {
2776 	unsigned int credits;
2777 
2778 #if USE_GTS
2779 	if (flags & F_RSPD_TXQ0_GTS)
2780 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2781 #endif
2782 	credits = G_RSPD_TXQ0_CR(flags);
2783 	if (credits)
2784 		qs->txq[TXQ_ETH].processed += credits;
2785 
2786 	credits = G_RSPD_TXQ2_CR(flags);
2787 	if (credits)
2788 		qs->txq[TXQ_CTRL].processed += credits;
2789 
2790 # if USE_GTS
2791 	if (flags & F_RSPD_TXQ1_GTS)
2792 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2793 # endif
2794 	credits = G_RSPD_TXQ1_CR(flags);
2795 	if (credits)
2796 		qs->txq[TXQ_OFLD].processed += credits;
2797 
2798 }
2799 
2800 static void
2801 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2802     unsigned int sleeping)
2803 {
2804 	;
2805 }
2806 
2807 /**
2808  *	process_responses - process responses from an SGE response queue
2809  *	@adap: the adapter
2810  *	@qs: the queue set to which the response queue belongs
2811  *	@budget: how many responses can be processed in this round
2812  *
2813  *	Process responses from an SGE response queue up to the supplied budget.
2814  *	Responses include received packets as well as credits and other events
2815  *	for the queues that belong to the response queue's queue set.
2816  *	A negative budget is effectively unlimited.
2817  *
2818  *	Additionally choose the interrupt holdoff time for the next interrupt
2819  *	on this queue.  If the system is under memory shortage use a fairly
2820  *	long delay to help recovery.
2821  */
2822 static int
2823 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2824 {
2825 	struct sge_rspq *rspq = &qs->rspq;
2826 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2827 	int budget_left = budget;
2828 	unsigned int sleeping = 0;
2829 #if defined(INET6) || defined(INET)
2830 	int lro_enabled = qs->lro.enabled;
2831 	int skip_lro;
2832 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2833 #endif
2834 	struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2835 #ifdef DEBUG
2836 	static int last_holdoff = 0;
2837 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2838 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2839 		last_holdoff = rspq->holdoff_tmr;
2840 	}
2841 #endif
2842 	rspq->next_holdoff = rspq->holdoff_tmr;
2843 
2844 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2845 		int eth, eop = 0, ethpad = 0;
2846 		uint32_t flags = ntohl(r->flags);
2847 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2848 		uint8_t opcode = r->rss_hdr.opcode;
2849 
2850 		eth = (opcode == CPL_RX_PKT);
2851 
2852 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2853 			struct mbuf *m;
2854 
2855 			if (cxgb_debug)
2856 				printf("async notification\n");
2857 
2858 			if (mh->mh_head == NULL) {
2859 				mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2860 				m = mh->mh_head;
2861 			} else {
2862 				m = m_gethdr(M_NOWAIT, MT_DATA);
2863 			}
2864 			if (m == NULL)
2865 				goto no_mem;
2866 
2867                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2868 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2869                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
2870 			opcode = CPL_ASYNC_NOTIF;
2871 			eop = 1;
2872                         rspq->async_notif++;
2873 			goto skip;
2874 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2875 			struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2876 
2877 			if (m == NULL) {
2878 		no_mem:
2879 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2880 				budget_left--;
2881 				break;
2882 			}
2883 			if (mh->mh_head == NULL)
2884 				mh->mh_head = m;
2885                         else
2886 				mh->mh_tail->m_next = m;
2887 			mh->mh_tail = m;
2888 
2889 			get_imm_packet(adap, r, m);
2890 			mh->mh_head->m_pkthdr.len += m->m_len;
2891 			eop = 1;
2892 			rspq->imm_data++;
2893 		} else if (r->len_cq) {
2894 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2895 
2896 			eop = get_packet(adap, drop_thresh, qs, mh, r);
2897 			if (eop) {
2898 				if (r->rss_hdr.hash_type && !adap->timestamp) {
2899 					M_HASHTYPE_SET(mh->mh_head,
2900 					    M_HASHTYPE_OPAQUE_HASH);
2901 					mh->mh_head->m_pkthdr.flowid = rss_hash;
2902 				}
2903 			}
2904 
2905 			ethpad = 2;
2906 		} else {
2907 			rspq->pure_rsps++;
2908 		}
2909 	skip:
2910 		if (flags & RSPD_CTRL_MASK) {
2911 			sleeping |= flags & RSPD_GTS_MASK;
2912 			handle_rsp_cntrl_info(qs, flags);
2913 		}
2914 
2915 		if (!eth && eop) {
2916 			rspq->offload_pkts++;
2917 #ifdef TCP_OFFLOAD
2918 			adap->cpl_handler[opcode](qs, r, mh->mh_head);
2919 #else
2920 			m_freem(mh->mh_head);
2921 #endif
2922 			mh->mh_head = NULL;
2923 		} else if (eth && eop) {
2924 			struct mbuf *m = mh->mh_head;
2925 
2926 			t3_rx_eth(adap, m, ethpad);
2927 
2928 			/*
2929 			 * The T304 sends incoming packets on any qset.  If LRO
2930 			 * is also enabled, we could end up sending packet up
2931 			 * lro_ctrl->ifp's input.  That is incorrect.
2932 			 *
2933 			 * The mbuf's rcvif was derived from the cpl header and
2934 			 * is accurate.  Skip LRO and just use that.
2935 			 */
2936 #if defined(INET6) || defined(INET)
2937 			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2938 
2939 			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2940 			    && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2941 			    ) {
2942 				/* successfully queue'd for LRO */
2943 			} else
2944 #endif
2945 			{
2946 				/*
2947 				 * LRO not enabled, packet unsuitable for LRO,
2948 				 * or unable to queue.  Pass it up right now in
2949 				 * either case.
2950 				 */
2951 				if_t ifp = m->m_pkthdr.rcvif;
2952 				if_input(ifp, m);
2953 			}
2954 			mh->mh_head = NULL;
2955 
2956 		}
2957 
2958 		r++;
2959 		if (__predict_false(++rspq->cidx == rspq->size)) {
2960 			rspq->cidx = 0;
2961 			rspq->gen ^= 1;
2962 			r = rspq->desc;
2963 		}
2964 
2965 		if (++rspq->credits >= 64) {
2966 			refill_rspq(adap, rspq, rspq->credits);
2967 			rspq->credits = 0;
2968 		}
2969 		__refill_fl_lt(adap, &qs->fl[0], 32);
2970 		__refill_fl_lt(adap, &qs->fl[1], 32);
2971 		--budget_left;
2972 	}
2973 
2974 #if defined(INET6) || defined(INET)
2975 	/* Flush LRO */
2976 	tcp_lro_flush_all(lro_ctrl);
2977 #endif
2978 
2979 	if (sleeping)
2980 		check_ring_db(adap, qs, sleeping);
2981 
2982 	mb();  /* commit Tx queue processed updates */
2983 	if (__predict_false(qs->txq_stopped > 1))
2984 		restart_tx(qs);
2985 
2986 	__refill_fl_lt(adap, &qs->fl[0], 512);
2987 	__refill_fl_lt(adap, &qs->fl[1], 512);
2988 	budget -= budget_left;
2989 	return (budget);
2990 }
2991 
2992 /*
2993  * A helper function that processes responses and issues GTS.
2994  */
2995 static __inline int
2996 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2997 {
2998 	int work;
2999 	static int last_holdoff = 0;
3000 
3001 	work = process_responses(adap, rspq_to_qset(rq), -1);
3002 
3003 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3004 		printf("next_holdoff=%d\n", rq->next_holdoff);
3005 		last_holdoff = rq->next_holdoff;
3006 	}
3007 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3008 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3009 
3010 	return (work);
3011 }
3012 
3013 #ifdef DEBUGNET
3014 int
3015 cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs)
3016 {
3017 
3018 	return (process_responses_gts(adap, &qs->rspq));
3019 }
3020 #endif
3021 
3022 /*
3023  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3024  * Handles data events from SGE response queues as well as error and other
3025  * async events as they all use the same interrupt pin.  We use one SGE
3026  * response queue per port in this mode and protect all response queues with
3027  * queue 0's lock.
3028  */
3029 void
3030 t3b_intr(void *data)
3031 {
3032 	uint32_t i, map;
3033 	adapter_t *adap = data;
3034 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3035 
3036 	t3_write_reg(adap, A_PL_CLI, 0);
3037 	map = t3_read_reg(adap, A_SG_DATA_INTR);
3038 
3039 	if (!map)
3040 		return;
3041 
3042 	if (__predict_false(map & F_ERRINTR)) {
3043 		t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3044 		(void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3045 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3046 	}
3047 
3048 	mtx_lock(&q0->lock);
3049 	for_each_port(adap, i)
3050 	    if (map & (1 << i))
3051 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3052 	mtx_unlock(&q0->lock);
3053 }
3054 
3055 /*
3056  * The MSI interrupt handler.  This needs to handle data events from SGE
3057  * response queues as well as error and other async events as they all use
3058  * the same MSI vector.  We use one SGE response queue per port in this mode
3059  * and protect all response queues with queue 0's lock.
3060  */
3061 void
3062 t3_intr_msi(void *data)
3063 {
3064 	adapter_t *adap = data;
3065 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3066 	int i, new_packets = 0;
3067 
3068 	mtx_lock(&q0->lock);
3069 
3070 	for_each_port(adap, i)
3071 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3072 		    new_packets = 1;
3073 	mtx_unlock(&q0->lock);
3074 	if (new_packets == 0) {
3075 		t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3076 		(void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3077 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3078 	}
3079 }
3080 
3081 void
3082 t3_intr_msix(void *data)
3083 {
3084 	struct sge_qset *qs = data;
3085 	adapter_t *adap = qs->port->adapter;
3086 	struct sge_rspq *rspq = &qs->rspq;
3087 
3088 	if (process_responses_gts(adap, rspq) == 0)
3089 		rspq->unhandled_irqs++;
3090 }
3091 
3092 #define QDUMP_SBUF_SIZE		32 * 400
3093 static int
3094 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3095 {
3096 	struct sge_rspq *rspq;
3097 	struct sge_qset *qs;
3098 	int i, err, dump_end, idx;
3099 	struct sbuf *sb;
3100 	struct rsp_desc *rspd;
3101 	uint32_t data[4];
3102 
3103 	rspq = arg1;
3104 	qs = rspq_to_qset(rspq);
3105 	if (rspq->rspq_dump_count == 0)
3106 		return (0);
3107 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3108 		log(LOG_WARNING,
3109 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3110 		rspq->rspq_dump_count = 0;
3111 		return (EINVAL);
3112 	}
3113 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3114 		log(LOG_WARNING,
3115 		    "dump start of %d is greater than queue size\n",
3116 		    rspq->rspq_dump_start);
3117 		rspq->rspq_dump_start = 0;
3118 		return (EINVAL);
3119 	}
3120 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3121 	if (err)
3122 		return (err);
3123 	err = sysctl_wire_old_buffer(req, 0);
3124 	if (err)
3125 		return (err);
3126 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3127 
3128 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3129 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3130 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3131 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3132 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3133 
3134 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3135 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3136 
3137 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3138 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3139 		idx = i & (RSPQ_Q_SIZE-1);
3140 
3141 		rspd = &rspq->desc[idx];
3142 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3143 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3144 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3145 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3146 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3147 		    be32toh(rspd->len_cq), rspd->intr_gen);
3148 	}
3149 
3150 	err = sbuf_finish(sb);
3151 	sbuf_delete(sb);
3152 	return (err);
3153 }
3154 
3155 static int
3156 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3157 {
3158 	struct sge_txq *txq;
3159 	struct sge_qset *qs;
3160 	int i, j, err, dump_end;
3161 	struct sbuf *sb;
3162 	struct tx_desc *txd;
3163 	uint32_t *WR, wr_hi, wr_lo, gen;
3164 	uint32_t data[4];
3165 
3166 	txq = arg1;
3167 	qs = txq_to_qset(txq, TXQ_ETH);
3168 	if (txq->txq_dump_count == 0) {
3169 		return (0);
3170 	}
3171 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3172 		log(LOG_WARNING,
3173 		    "dump count is too large %d\n", txq->txq_dump_count);
3174 		txq->txq_dump_count = 1;
3175 		return (EINVAL);
3176 	}
3177 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3178 		log(LOG_WARNING,
3179 		    "dump start of %d is greater than queue size\n",
3180 		    txq->txq_dump_start);
3181 		txq->txq_dump_start = 0;
3182 		return (EINVAL);
3183 	}
3184 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3185 	if (err)
3186 		return (err);
3187 	err = sysctl_wire_old_buffer(req, 0);
3188 	if (err)
3189 		return (err);
3190 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3191 
3192 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3193 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3194 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3195 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3196 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3197 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3198 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3199 	    txq->txq_dump_start,
3200 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3201 
3202 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3203 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3204 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3205 		WR = (uint32_t *)txd->flit;
3206 		wr_hi = ntohl(WR[0]);
3207 		wr_lo = ntohl(WR[1]);
3208 		gen = G_WR_GEN(wr_lo);
3209 
3210 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3211 		    wr_hi, wr_lo, gen);
3212 		for (j = 2; j < 30; j += 4)
3213 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3214 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3215 
3216 	}
3217 	err = sbuf_finish(sb);
3218 	sbuf_delete(sb);
3219 	return (err);
3220 }
3221 
3222 static int
3223 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3224 {
3225 	struct sge_txq *txq;
3226 	struct sge_qset *qs;
3227 	int i, j, err, dump_end;
3228 	struct sbuf *sb;
3229 	struct tx_desc *txd;
3230 	uint32_t *WR, wr_hi, wr_lo, gen;
3231 
3232 	txq = arg1;
3233 	qs = txq_to_qset(txq, TXQ_CTRL);
3234 	if (txq->txq_dump_count == 0) {
3235 		return (0);
3236 	}
3237 	if (txq->txq_dump_count > 256) {
3238 		log(LOG_WARNING,
3239 		    "dump count is too large %d\n", txq->txq_dump_count);
3240 		txq->txq_dump_count = 1;
3241 		return (EINVAL);
3242 	}
3243 	if (txq->txq_dump_start > 255) {
3244 		log(LOG_WARNING,
3245 		    "dump start of %d is greater than queue size\n",
3246 		    txq->txq_dump_start);
3247 		txq->txq_dump_start = 0;
3248 		return (EINVAL);
3249 	}
3250 
3251 	err = sysctl_wire_old_buffer(req, 0);
3252 	if (err != 0)
3253 		return (err);
3254 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3255 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3256 	    txq->txq_dump_start,
3257 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3258 
3259 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3260 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3261 		txd = &txq->desc[i & (255)];
3262 		WR = (uint32_t *)txd->flit;
3263 		wr_hi = ntohl(WR[0]);
3264 		wr_lo = ntohl(WR[1]);
3265 		gen = G_WR_GEN(wr_lo);
3266 
3267 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3268 		    wr_hi, wr_lo, gen);
3269 		for (j = 2; j < 30; j += 4)
3270 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3271 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3272 
3273 	}
3274 	err = sbuf_finish(sb);
3275 	sbuf_delete(sb);
3276 	return (err);
3277 }
3278 
3279 static int
3280 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3281 {
3282 	adapter_t *sc = arg1;
3283 	struct qset_params *qsp = &sc->params.sge.qset[0];
3284 	int coalesce_usecs;
3285 	struct sge_qset *qs;
3286 	int i, j, err, nqsets = 0;
3287 	struct mtx *lock;
3288 
3289 	if ((sc->flags & FULL_INIT_DONE) == 0)
3290 		return (ENXIO);
3291 
3292 	coalesce_usecs = qsp->coalesce_usecs;
3293         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3294 
3295 	if (err != 0) {
3296 		return (err);
3297 	}
3298 	if (coalesce_usecs == qsp->coalesce_usecs)
3299 		return (0);
3300 
3301 	for (i = 0; i < sc->params.nports; i++)
3302 		for (j = 0; j < sc->port[i].nqsets; j++)
3303 			nqsets++;
3304 
3305 	coalesce_usecs = max(1, coalesce_usecs);
3306 
3307 	for (i = 0; i < nqsets; i++) {
3308 		qs = &sc->sge.qs[i];
3309 		qsp = &sc->params.sge.qset[i];
3310 		qsp->coalesce_usecs = coalesce_usecs;
3311 
3312 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3313 			    &sc->sge.qs[0].rspq.lock;
3314 
3315 		mtx_lock(lock);
3316 		t3_update_qset_coalesce(qs, qsp);
3317 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3318 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3319 		mtx_unlock(lock);
3320 	}
3321 
3322 	return (0);
3323 }
3324 
3325 static int
3326 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3327 {
3328 	adapter_t *sc = arg1;
3329 	int rc, timestamp;
3330 
3331 	if ((sc->flags & FULL_INIT_DONE) == 0)
3332 		return (ENXIO);
3333 
3334 	timestamp = sc->timestamp;
3335 	rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
3336 
3337 	if (rc != 0)
3338 		return (rc);
3339 
3340 	if (timestamp != sc->timestamp) {
3341 		t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3342 		    timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3343 		sc->timestamp = timestamp;
3344 	}
3345 
3346 	return (0);
3347 }
3348 
3349 void
3350 t3_add_attach_sysctls(adapter_t *sc)
3351 {
3352 	struct sysctl_ctx_list *ctx;
3353 	struct sysctl_oid_list *children;
3354 
3355 	ctx = device_get_sysctl_ctx(sc->dev);
3356 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3357 
3358 	/* random information */
3359 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3360 	    "firmware_version",
3361 	    CTLFLAG_RD, sc->fw_version,
3362 	    0, "firmware version");
3363 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3364 	    "hw_revision",
3365 	    CTLFLAG_RD, &sc->params.rev,
3366 	    0, "chip model");
3367 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3368 	    "port_types",
3369 	    CTLFLAG_RD, sc->port_types,
3370 	    0, "type of ports");
3371 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3372 	    "enable_debug",
3373 	    CTLFLAG_RW, &cxgb_debug,
3374 	    0, "enable verbose debugging output");
3375 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3376 	    CTLFLAG_RD, &sc->tunq_coalesce,
3377 	    "#tunneled packets freed");
3378 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3379 	    "txq_overrun",
3380 	    CTLFLAG_RD, &txq_fills,
3381 	    0, "#times txq overrun");
3382 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3383 	    "core_clock",
3384 	    CTLFLAG_RD, &sc->params.vpd.cclk,
3385 	    0, "core clock frequency (in KHz)");
3386 }
3387 
3388 
3389 static const char *rspq_name = "rspq";
3390 static const char *txq_names[] =
3391 {
3392 	"txq_eth",
3393 	"txq_ofld",
3394 	"txq_ctrl"
3395 };
3396 
3397 static int
3398 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3399 {
3400 	struct port_info *p = arg1;
3401 	uint64_t *parg;
3402 
3403 	if (!p)
3404 		return (EINVAL);
3405 
3406 	cxgb_refresh_stats(p);
3407 	parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3408 
3409 	return (sysctl_handle_64(oidp, parg, 0, req));
3410 }
3411 
3412 void
3413 t3_add_configured_sysctls(adapter_t *sc)
3414 {
3415 	struct sysctl_ctx_list *ctx;
3416 	struct sysctl_oid_list *children;
3417 	int i, j;
3418 
3419 	ctx = device_get_sysctl_ctx(sc->dev);
3420 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3421 
3422 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3423 	    "intr_coal",
3424 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
3425 	    0, t3_set_coalesce_usecs,
3426 	    "I", "interrupt coalescing timer (us)");
3427 
3428 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3429 	    "pkt_timestamp",
3430 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
3431 	    0, t3_pkt_timestamp,
3432 	    "I", "provide packet timestamp instead of connection hash");
3433 
3434 	for (i = 0; i < sc->params.nports; i++) {
3435 		struct port_info *pi = &sc->port[i];
3436 		struct sysctl_oid *poid;
3437 		struct sysctl_oid_list *poidlist;
3438 		struct mac_stats *mstats = &pi->mac.stats;
3439 
3440 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3441 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3442 		    pi->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3443 		    "port statistics");
3444 		poidlist = SYSCTL_CHILDREN(poid);
3445 		SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3446 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3447 		    0, "#queue sets");
3448 
3449 		for (j = 0; j < pi->nqsets; j++) {
3450 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3451 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3452 					  *ctrlqpoid, *lropoid;
3453 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3454 					       *txqpoidlist, *ctrlqpoidlist,
3455 					       *lropoidlist;
3456 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3457 
3458 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3459 
3460 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3461 			    qs->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3462 			    "qset statistics");
3463 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3464 
3465 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3466 					CTLFLAG_RD, &qs->fl[0].empty, 0,
3467 					"freelist #0 empty");
3468 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3469 					CTLFLAG_RD, &qs->fl[1].empty, 0,
3470 					"freelist #1 empty");
3471 
3472 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3473 			    rspq_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3474 			    "rspq statistics");
3475 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3476 
3477 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3478 			    txq_names[0], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3479 			    "txq statistics");
3480 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3481 
3482 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3483 			    txq_names[2], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3484 			    "ctrlq statistics");
3485 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3486 
3487 			lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3488 			    "lro_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3489 			    "LRO statistics");
3490 			lropoidlist = SYSCTL_CHILDREN(lropoid);
3491 
3492 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3493 			    CTLFLAG_RD, &qs->rspq.size,
3494 			    0, "#entries in response queue");
3495 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3496 			    CTLFLAG_RD, &qs->rspq.cidx,
3497 			    0, "consumer index");
3498 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3499 			    CTLFLAG_RD, &qs->rspq.credits,
3500 			    0, "#credits");
3501 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3502 			    CTLFLAG_RD, &qs->rspq.starved,
3503 			    0, "#times starved");
3504 			SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3505 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3506 			    "physical_address_of the queue");
3507 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3508 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3509 			    0, "start rspq dump entry");
3510 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3511 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3512 			    0, "#rspq entries to dump");
3513 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3514 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3515 			    &qs->rspq, 0, t3_dump_rspq, "A",
3516 			    "dump of the response queue");
3517 
3518 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3519 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3520 			    "#tunneled packets dropped");
3521 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3522 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
3523 			    0, "#tunneled packets waiting to be sent");
3524 #if 0
3525 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3526 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3527 			    0, "#tunneled packets queue producer index");
3528 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3529 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3530 			    0, "#tunneled packets queue consumer index");
3531 #endif
3532 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3533 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3534 			    0, "#tunneled packets processed by the card");
3535 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3536 			    CTLFLAG_RD, &txq->cleaned,
3537 			    0, "#tunneled packets cleaned");
3538 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3539 			    CTLFLAG_RD, &txq->in_use,
3540 			    0, "#tunneled packet slots in use");
3541 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3542 			    CTLFLAG_RD, &txq->txq_frees,
3543 			    "#tunneled packets freed");
3544 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3545 			    CTLFLAG_RD, &txq->txq_skipped,
3546 			    0, "#tunneled packet descriptors skipped");
3547 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3548 			    CTLFLAG_RD, &txq->txq_coalesced,
3549 			    "#tunneled packets coalesced");
3550 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3551 			    CTLFLAG_RD, &txq->txq_enqueued,
3552 			    0, "#tunneled packets enqueued to hardware");
3553 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3554 			    CTLFLAG_RD, &qs->txq_stopped,
3555 			    0, "tx queues stopped");
3556 			SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3557 			    CTLFLAG_RD, &txq->phys_addr,
3558 			    "physical_address_of the queue");
3559 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3560 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3561 			    0, "txq generation");
3562 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3563 			    CTLFLAG_RD, &txq->cidx,
3564 			    0, "hardware queue cidx");
3565 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3566 			    CTLFLAG_RD, &txq->pidx,
3567 			    0, "hardware queue pidx");
3568 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3569 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3570 			    0, "txq start idx for dump");
3571 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3572 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3573 			    0, "txq #entries to dump");
3574 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3575 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3576 			    &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A",
3577 			    "dump of the transmit queue");
3578 
3579 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3580 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3581 			    0, "ctrlq start idx for dump");
3582 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3583 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3584 			    0, "ctrl #entries to dump");
3585 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3586 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3587 			    &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A",
3588 			    "dump of the transmit queue");
3589 
3590 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
3591 			    CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3592 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3593 			    CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3594 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3595 			    CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3596 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3597 			    CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3598 		}
3599 
3600 		/* Now add a node for mac stats. */
3601 		poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3602 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC statistics");
3603 		poidlist = SYSCTL_CHILDREN(poid);
3604 
3605 		/*
3606 		 * We (ab)use the length argument (arg2) to pass on the offset
3607 		 * of the data that we are interested in.  This is only required
3608 		 * for the quad counters that are updated from the hardware (we
3609 		 * make sure that we return the latest value).
3610 		 * sysctl_handle_macstat first updates *all* the counters from
3611 		 * the hardware, and then returns the latest value of the
3612 		 * requested counter.  Best would be to update only the
3613 		 * requested counter from hardware, but t3_mac_update_stats()
3614 		 * hides all the register details and we don't want to dive into
3615 		 * all that here.
3616 		 */
3617 #define CXGB_SYSCTL_ADD_QUAD(a)	SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3618     CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pi, \
3619     offsetof(struct mac_stats, a), sysctl_handle_macstat, "QU", 0)
3620 		CXGB_SYSCTL_ADD_QUAD(tx_octets);
3621 		CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3622 		CXGB_SYSCTL_ADD_QUAD(tx_frames);
3623 		CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3624 		CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3625 		CXGB_SYSCTL_ADD_QUAD(tx_pause);
3626 		CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3627 		CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3628 		CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3629 		CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3630 		CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3631 		CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3632 		CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3633 		CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3634 		CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3635 		CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3636 		CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3637 		CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3638 		CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3639 		CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3640 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3641 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3642 		CXGB_SYSCTL_ADD_QUAD(rx_octets);
3643 		CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3644 		CXGB_SYSCTL_ADD_QUAD(rx_frames);
3645 		CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3646 		CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3647 		CXGB_SYSCTL_ADD_QUAD(rx_pause);
3648 		CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3649 		CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3650 		CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3651 		CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3652 		CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3653 		CXGB_SYSCTL_ADD_QUAD(rx_runt);
3654 		CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3655 		CXGB_SYSCTL_ADD_QUAD(rx_short);
3656 		CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3657 		CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3658 		CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3659 		CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3660 		CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3661 		CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3662 		CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3663 		CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3664 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3665 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3666 #undef CXGB_SYSCTL_ADD_QUAD
3667 
3668 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3669     CTLFLAG_RD, &mstats->a, 0)
3670 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3671 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3672 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3673 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3674 		CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3675 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3676 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3677 		CXGB_SYSCTL_ADD_ULONG(num_toggled);
3678 		CXGB_SYSCTL_ADD_ULONG(num_resets);
3679 		CXGB_SYSCTL_ADD_ULONG(link_faults);
3680 #undef CXGB_SYSCTL_ADD_ULONG
3681 	}
3682 }
3683 
3684 /**
3685  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3686  *	@qs: the queue set
3687  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3688  *	@idx: the descriptor index in the queue
3689  *	@data: where to dump the descriptor contents
3690  *
3691  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3692  *	size of the descriptor.
3693  */
3694 int
3695 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3696 		unsigned char *data)
3697 {
3698 	if (qnum >= 6)
3699 		return (EINVAL);
3700 
3701 	if (qnum < 3) {
3702 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3703 			return -EINVAL;
3704 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3705 		return sizeof(struct tx_desc);
3706 	}
3707 
3708 	if (qnum == 3) {
3709 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3710 			return (EINVAL);
3711 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3712 		return sizeof(struct rsp_desc);
3713 	}
3714 
3715 	qnum -= 4;
3716 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3717 		return (EINVAL);
3718 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3719 	return sizeof(struct rx_desc);
3720 }
3721