xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision b64c5a0ace59af62eff52bfe110a521dc73c937b)
1 /**************************************************************************
2 SPDX-License-Identifier: BSD-2-Clause
3 
4 Copyright (c) 2007-2009, Chelsio Inc.
5 All rights reserved.
6 
7 Redistribution and use in source and binary forms, with or without
8 modification, are permitted provided that the following conditions are met:
9 
10  1. Redistributions of source code must retain the above copyright notice,
11     this list of conditions and the following disclaimer.
12 
13  2. Neither the name of the Chelsio Corporation nor the names of its
14     contributors may be used to endorse or promote products derived from
15     this software without specific prior written permission.
16 
17 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
18 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
19 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
20 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
21 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
22 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
23 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
24 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
25 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
26 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
27 POSSIBILITY OF SUCH DAMAGE.
28 
29 ***************************************************************************/
30 
31 #include <sys/cdefs.h>
32 #include "opt_inet6.h"
33 #include "opt_inet.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/rman.h>
44 #include <sys/queue.h>
45 #include <sys/sysctl.h>
46 #include <sys/taskqueue.h>
47 
48 #include <sys/proc.h>
49 #include <sys/sbuf.h>
50 #include <sys/sched.h>
51 #include <sys/smp.h>
52 #include <sys/systm.h>
53 #include <sys/syslog.h>
54 #include <sys/socket.h>
55 #include <sys/sglist.h>
56 
57 #include <net/if.h>
58 #include <net/if_var.h>
59 #include <net/bpf.h>
60 #include <net/ethernet.h>
61 #include <net/if_vlan_var.h>
62 
63 #include <netinet/in_systm.h>
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/ip6.h>
67 #include <netinet/tcp.h>
68 
69 #include <dev/pci/pcireg.h>
70 #include <dev/pci/pcivar.h>
71 
72 #include <vm/vm.h>
73 #include <vm/pmap.h>
74 
75 #include <cxgb_include.h>
76 #include <sys/mvec.h>
77 
78 int	txq_fills = 0;
79 int	multiq_tx_enable = 1;
80 
81 #ifdef TCP_OFFLOAD
82 CTASSERT(NUM_CPL_HANDLERS >= NUM_CPL_CMDS);
83 #endif
84 
85 extern struct sysctl_oid_list sysctl__hw_cxgb_children;
86 int cxgb_txq_buf_ring_size = TX_ETH_Q_SIZE;
87 SYSCTL_INT(_hw_cxgb, OID_AUTO, txq_mr_size, CTLFLAG_RDTUN, &cxgb_txq_buf_ring_size, 0,
88     "size of per-queue mbuf ring");
89 
90 static int cxgb_tx_coalesce_force = 0;
91 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_force, CTLFLAG_RWTUN,
92     &cxgb_tx_coalesce_force, 0,
93     "coalesce small packets into a single work request regardless of ring state");
94 
95 #define	COALESCE_START_DEFAULT		TX_ETH_Q_SIZE>>1
96 #define	COALESCE_START_MAX		(TX_ETH_Q_SIZE-(TX_ETH_Q_SIZE>>3))
97 #define	COALESCE_STOP_DEFAULT		TX_ETH_Q_SIZE>>2
98 #define	COALESCE_STOP_MIN		TX_ETH_Q_SIZE>>5
99 #define	TX_RECLAIM_DEFAULT		TX_ETH_Q_SIZE>>5
100 #define	TX_RECLAIM_MAX			TX_ETH_Q_SIZE>>2
101 #define	TX_RECLAIM_MIN			TX_ETH_Q_SIZE>>6
102 
103 
104 static int cxgb_tx_coalesce_enable_start = COALESCE_START_DEFAULT;
105 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_start, CTLFLAG_RWTUN,
106     &cxgb_tx_coalesce_enable_start, 0,
107     "coalesce enable threshold");
108 static int cxgb_tx_coalesce_enable_stop = COALESCE_STOP_DEFAULT;
109 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_coalesce_enable_stop, CTLFLAG_RWTUN,
110     &cxgb_tx_coalesce_enable_stop, 0,
111     "coalesce disable threshold");
112 static int cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
113 SYSCTL_INT(_hw_cxgb, OID_AUTO, tx_reclaim_threshold, CTLFLAG_RWTUN,
114     &cxgb_tx_reclaim_threshold, 0,
115     "tx cleaning minimum threshold");
116 
117 /*
118  * XXX don't re-enable this until TOE stops assuming
119  * we have an m_ext
120  */
121 static int recycle_enable = 0;
122 
123 extern int cxgb_use_16k_clusters;
124 extern int nmbjumbop;
125 extern int nmbjumbo9;
126 extern int nmbjumbo16;
127 
128 #define USE_GTS 0
129 
130 #define SGE_RX_SM_BUF_SIZE	1536
131 #define SGE_RX_DROP_THRES	16
132 #define SGE_RX_COPY_THRES	128
133 
134 /*
135  * Period of the Tx buffer reclaim timer.  This timer does not need to run
136  * frequently as Tx buffers are usually reclaimed by new Tx packets.
137  */
138 #define TX_RECLAIM_PERIOD       (hz >> 1)
139 
140 /*
141  * Values for sge_txq.flags
142  */
143 enum {
144 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
145 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
146 };
147 
148 struct tx_desc {
149 	uint64_t	flit[TX_DESC_FLITS];
150 } __packed;
151 
152 struct rx_desc {
153 	uint32_t	addr_lo;
154 	uint32_t	len_gen;
155 	uint32_t	gen2;
156 	uint32_t	addr_hi;
157 } __packed;
158 
159 struct rsp_desc {               /* response queue descriptor */
160 	struct rss_header	rss_hdr;
161 	uint32_t		flags;
162 	uint32_t		len_cq;
163 	uint8_t			imm_data[47];
164 	uint8_t			intr_gen;
165 } __packed;
166 
167 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
168 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
169 #define RX_SW_DESC_INUSE        (1 << 3)
170 #define TX_SW_DESC_MAPPED       (1 << 4)
171 
172 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
173 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
174 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
175 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
176 
177 struct tx_sw_desc {                /* SW state per Tx descriptor */
178 	struct mbuf	*m;
179 	bus_dmamap_t	map;
180 	int		flags;
181 };
182 
183 struct rx_sw_desc {                /* SW state per Rx descriptor */
184 	caddr_t		rxsd_cl;
185 	struct mbuf	*m;
186 	bus_dmamap_t	map;
187 	int		flags;
188 };
189 
190 struct txq_state {
191 	unsigned int	compl;
192 	unsigned int	gen;
193 	unsigned int	pidx;
194 };
195 
196 struct refill_fl_cb_arg {
197 	int               error;
198 	bus_dma_segment_t seg;
199 	int               nseg;
200 };
201 
202 
203 /*
204  * Maps a number of flits to the number of Tx descriptors that can hold them.
205  * The formula is
206  *
207  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
208  *
209  * HW allows up to 4 descriptors to be combined into a WR.
210  */
211 static uint8_t flit_desc_map[] = {
212 	0,
213 #if SGE_NUM_GENBITS == 1
214 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
215 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
216 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
217 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
218 #elif SGE_NUM_GENBITS == 2
219 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
220 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
221 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
222 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
223 #else
224 # error "SGE_NUM_GENBITS must be 1 or 2"
225 #endif
226 };
227 
228 #define	TXQ_LOCK_ASSERT(qs)	mtx_assert(&(qs)->lock, MA_OWNED)
229 #define	TXQ_TRYLOCK(qs)		mtx_trylock(&(qs)->lock)
230 #define	TXQ_LOCK(qs)		mtx_lock(&(qs)->lock)
231 #define	TXQ_UNLOCK(qs)		mtx_unlock(&(qs)->lock)
232 #define	TXQ_RING_EMPTY(qs)	drbr_empty((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
233 #define	TXQ_RING_NEEDS_ENQUEUE(qs)					\
234 	drbr_needs_enqueue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
235 #define	TXQ_RING_FLUSH(qs)	drbr_flush((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
236 #define	TXQ_RING_DEQUEUE_COND(qs, func, arg)				\
237 	drbr_dequeue_cond((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr, func, arg)
238 #define	TXQ_RING_DEQUEUE(qs) \
239 	drbr_dequeue((qs)->port->ifp, (qs)->txq[TXQ_ETH].txq_mr)
240 
241 int cxgb_debug = 0;
242 
243 static void sge_timer_cb(void *arg);
244 static void sge_timer_reclaim(void *arg, int ncount);
245 static void sge_txq_reclaim_handler(void *arg, int ncount);
246 static void cxgb_start_locked(struct sge_qset *qs);
247 
248 /*
249  * XXX need to cope with bursty scheduling by looking at a wider
250  * window than we are now for determining the need for coalescing
251  *
252  */
253 static __inline uint64_t
254 check_pkt_coalesce(struct sge_qset *qs)
255 {
256         struct adapter *sc;
257         struct sge_txq *txq;
258 	uint8_t *fill;
259 
260 	if (__predict_false(cxgb_tx_coalesce_force))
261 		return (1);
262 	txq = &qs->txq[TXQ_ETH];
263         sc = qs->port->adapter;
264 	fill = &sc->tunq_fill[qs->idx];
265 
266 	if (cxgb_tx_coalesce_enable_start > COALESCE_START_MAX)
267 		cxgb_tx_coalesce_enable_start = COALESCE_START_MAX;
268 	if (cxgb_tx_coalesce_enable_stop < COALESCE_STOP_MIN)
269 		cxgb_tx_coalesce_enable_stop = COALESCE_STOP_MIN;
270 	/*
271 	 * if the hardware transmit queue is more than 1/8 full
272 	 * we mark it as coalescing - we drop back from coalescing
273 	 * when we go below 1/32 full and there are no packets enqueued,
274 	 * this provides us with some degree of hysteresis
275 	 */
276         if (*fill != 0 && (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
277 	    TXQ_RING_EMPTY(qs) && (qs->coalescing == 0))
278                 *fill = 0;
279         else if (*fill == 0 && (txq->in_use >= cxgb_tx_coalesce_enable_start))
280                 *fill = 1;
281 
282 	return (sc->tunq_coalesce);
283 }
284 
285 #ifdef __LP64__
286 static void
287 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
288 {
289 	uint64_t wr_hilo;
290 #if _BYTE_ORDER == _LITTLE_ENDIAN
291 	wr_hilo = wr_hi;
292 	wr_hilo |= (((uint64_t)wr_lo)<<32);
293 #else
294 	wr_hilo = wr_lo;
295 	wr_hilo |= (((uint64_t)wr_hi)<<32);
296 #endif
297 	wrp->wrh_hilo = wr_hilo;
298 }
299 #else
300 static void
301 set_wr_hdr(struct work_request_hdr *wrp, uint32_t wr_hi, uint32_t wr_lo)
302 {
303 
304 	wrp->wrh_hi = wr_hi;
305 	wmb();
306 	wrp->wrh_lo = wr_lo;
307 }
308 #endif
309 
310 struct coalesce_info {
311 	int count;
312 	int nbytes;
313 	int noncoal;
314 };
315 
316 static int
317 coalesce_check(struct mbuf *m, void *arg)
318 {
319 	struct coalesce_info *ci = arg;
320 
321 	if ((m->m_next != NULL) ||
322 	    ((mtod(m, vm_offset_t) & PAGE_MASK) + m->m_len > PAGE_SIZE))
323 		ci->noncoal = 1;
324 
325 	if ((ci->count == 0) || (ci->noncoal == 0 && (ci->count < 7) &&
326 	    (ci->nbytes + m->m_len <= 10500))) {
327 		ci->count++;
328 		ci->nbytes += m->m_len;
329 		return (1);
330 	}
331 	return (0);
332 }
333 
334 static struct mbuf *
335 cxgb_dequeue(struct sge_qset *qs)
336 {
337 	struct mbuf *m, *m_head, *m_tail;
338 	struct coalesce_info ci;
339 
340 
341 	if (check_pkt_coalesce(qs) == 0)
342 		return TXQ_RING_DEQUEUE(qs);
343 
344 	m_head = m_tail = NULL;
345 	ci.count = ci.nbytes = ci.noncoal = 0;
346 	do {
347 		m = TXQ_RING_DEQUEUE_COND(qs, coalesce_check, &ci);
348 		if (m_head == NULL) {
349 			m_tail = m_head = m;
350 		} else if (m != NULL) {
351 			m_tail->m_nextpkt = m;
352 			m_tail = m;
353 		}
354 	} while (m != NULL);
355 	if (ci.count > 7)
356 		panic("trying to coalesce %d packets in to one WR", ci.count);
357 	return (m_head);
358 }
359 
360 /**
361  *	reclaim_completed_tx - reclaims completed Tx descriptors
362  *	@adapter: the adapter
363  *	@q: the Tx queue to reclaim completed descriptors from
364  *
365  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
366  *	and frees the associated buffers if possible.  Called with the Tx
367  *	queue's lock held.
368  */
369 static __inline int
370 reclaim_completed_tx(struct sge_qset *qs, int reclaim_min, int queue)
371 {
372 	struct sge_txq *q = &qs->txq[queue];
373 	int reclaim = desc_reclaimable(q);
374 
375 	if ((cxgb_tx_reclaim_threshold > TX_RECLAIM_MAX) ||
376 	    (cxgb_tx_reclaim_threshold < TX_RECLAIM_MIN))
377 		cxgb_tx_reclaim_threshold = TX_RECLAIM_DEFAULT;
378 
379 	if (reclaim < reclaim_min)
380 		return (0);
381 
382 	mtx_assert(&qs->lock, MA_OWNED);
383 	if (reclaim > 0) {
384 		t3_free_tx_desc(qs, reclaim, queue);
385 		q->cleaned += reclaim;
386 		q->in_use -= reclaim;
387 	}
388 	if (isset(&qs->txq_stopped, TXQ_ETH))
389                 clrbit(&qs->txq_stopped, TXQ_ETH);
390 
391 	return (reclaim);
392 }
393 
394 #ifdef DEBUGNET
395 int
396 cxgb_debugnet_poll_tx(struct sge_qset *qs)
397 {
398 
399 	return (reclaim_completed_tx(qs, TX_RECLAIM_MAX, TXQ_ETH));
400 }
401 #endif
402 
403 /**
404  *	should_restart_tx - are there enough resources to restart a Tx queue?
405  *	@q: the Tx queue
406  *
407  *	Checks if there are enough descriptors to restart a suspended Tx queue.
408  */
409 static __inline int
410 should_restart_tx(const struct sge_txq *q)
411 {
412 	unsigned int r = q->processed - q->cleaned;
413 
414 	return q->in_use - r < (q->size >> 1);
415 }
416 
417 /**
418  *	t3_sge_init - initialize SGE
419  *	@adap: the adapter
420  *	@p: the SGE parameters
421  *
422  *	Performs SGE initialization needed every time after a chip reset.
423  *	We do not initialize any of the queue sets here, instead the driver
424  *	top-level must request those individually.  We also do not enable DMA
425  *	here, that should be done after the queues have been set up.
426  */
427 void
428 t3_sge_init(adapter_t *adap, struct sge_params *p)
429 {
430 	u_int ctrl, ups;
431 
432 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
433 
434 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
435 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
436 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
437 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
438 #if SGE_NUM_GENBITS == 1
439 	ctrl |= F_EGRGENCTRL;
440 #endif
441 	if (adap->params.rev > 0) {
442 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
443 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
444 	}
445 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
446 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
447 		     V_LORCQDRBTHRSH(512));
448 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
449 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
450 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
451 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
452 		     adap->params.rev < T3_REV_C ? 1000 : 500);
453 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
454 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
455 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
456 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
457 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
458 }
459 
460 
461 /**
462  *	sgl_len - calculates the size of an SGL of the given capacity
463  *	@n: the number of SGL entries
464  *
465  *	Calculates the number of flits needed for a scatter/gather list that
466  *	can hold the given number of entries.
467  */
468 static __inline unsigned int
469 sgl_len(unsigned int n)
470 {
471 	return ((3 * n) / 2 + (n & 1));
472 }
473 
474 /**
475  *	get_imm_packet - return the next ingress packet buffer from a response
476  *	@resp: the response descriptor containing the packet data
477  *
478  *	Return a packet containing the immediate data of the given response.
479  */
480 static int
481 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
482 {
483 
484 	if (resp->rss_hdr.opcode == CPL_RX_DATA) {
485 		const struct cpl_rx_data *cpl = (const void *)&resp->imm_data[0];
486 		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
487 	} else if (resp->rss_hdr.opcode == CPL_RX_PKT) {
488 		const struct cpl_rx_pkt *cpl = (const void *)&resp->imm_data[0];
489 		m->m_len = sizeof(*cpl) + ntohs(cpl->len);
490 	} else
491 		m->m_len = IMMED_PKT_SIZE;
492 	m->m_ext.ext_buf = NULL;
493 	m->m_ext.ext_type = 0;
494 	memcpy(mtod(m, uint8_t *), resp->imm_data, m->m_len);
495 	return (0);
496 }
497 
498 static __inline u_int
499 flits_to_desc(u_int n)
500 {
501 	return (flit_desc_map[n]);
502 }
503 
504 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
505 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
506 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
507 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
508 		    F_HIRCQPARITYERROR)
509 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
510 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
511 		      F_RSPQDISABLED)
512 
513 /**
514  *	t3_sge_err_intr_handler - SGE async event interrupt handler
515  *	@adapter: the adapter
516  *
517  *	Interrupt handler for SGE asynchronous (non-data) events.
518  */
519 void
520 t3_sge_err_intr_handler(adapter_t *adapter)
521 {
522 	unsigned int v, status;
523 
524 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
525 	if (status & SGE_PARERR)
526 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
527 			 status & SGE_PARERR);
528 	if (status & SGE_FRAMINGERR)
529 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
530 			 status & SGE_FRAMINGERR);
531 	if (status & F_RSPQCREDITOVERFOW)
532 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
533 
534 	if (status & F_RSPQDISABLED) {
535 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
536 
537 		CH_ALERT(adapter,
538 			 "packet delivered to disabled response queue (0x%x)\n",
539 			 (v >> S_RSPQ0DISABLED) & 0xff);
540 	}
541 
542 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
543 	if (status & SGE_FATALERR)
544 		t3_fatal_err(adapter);
545 }
546 
547 void
548 t3_sge_prep(adapter_t *adap, struct sge_params *p)
549 {
550 	int i, nqsets, fl_q_size, jumbo_q_size, use_16k, jumbo_buf_size;
551 
552 	nqsets = min(SGE_QSETS / adap->params.nports, mp_ncpus);
553 	nqsets *= adap->params.nports;
554 
555 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
556 	fl_q_size = rounddown_pow_of_two(fl_q_size);
557 
558 	use_16k = cxgb_use_16k_clusters != -1 ? cxgb_use_16k_clusters :
559 	    is_offload(adap);
560 
561 	if (use_16k) {
562 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
563 		jumbo_buf_size = MJUM16BYTES;
564 	} else {
565 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
566 		jumbo_buf_size = MJUM9BYTES;
567 	}
568 	jumbo_q_size = rounddown_pow_of_two(jumbo_q_size);
569 
570 	if (fl_q_size < (FL_Q_SIZE / 4) || jumbo_q_size < (JUMBO_Q_SIZE / 2))
571 		device_printf(adap->dev,
572 		    "Insufficient clusters and/or jumbo buffers.\n");
573 
574 	p->max_pkt_size = jumbo_buf_size - sizeof(struct cpl_rx_data);
575 
576 	for (i = 0; i < SGE_QSETS; ++i) {
577 		struct qset_params *q = p->qset + i;
578 
579 		if (adap->params.nports > 2) {
580 			q->coalesce_usecs = 50;
581 		} else {
582 #ifdef INVARIANTS
583 			q->coalesce_usecs = 10;
584 #else
585 			q->coalesce_usecs = 5;
586 #endif
587 		}
588 		q->polling = 0;
589 		q->rspq_size = RSPQ_Q_SIZE;
590 		q->fl_size = fl_q_size;
591 		q->jumbo_size = jumbo_q_size;
592 		q->jumbo_buf_size = jumbo_buf_size;
593 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
594 		q->txq_size[TXQ_OFLD] = is_offload(adap) ? TX_OFLD_Q_SIZE : 16;
595 		q->txq_size[TXQ_CTRL] = TX_CTRL_Q_SIZE;
596 		q->cong_thres = 0;
597 	}
598 }
599 
600 int
601 t3_sge_alloc(adapter_t *sc)
602 {
603 
604 	/* The parent tag. */
605 	if (bus_dma_tag_create( bus_get_dma_tag(sc->dev),/* PCI parent */
606 				1, 0,			/* algnmnt, boundary */
607 				BUS_SPACE_MAXADDR,	/* lowaddr */
608 				BUS_SPACE_MAXADDR,	/* highaddr */
609 				NULL, NULL,		/* filter, filterarg */
610 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
611 				BUS_SPACE_UNRESTRICTED, /* nsegments */
612 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
613 				0,			/* flags */
614 				NULL, NULL,		/* lock, lockarg */
615 				&sc->parent_dmat)) {
616 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
617 		return (ENOMEM);
618 	}
619 
620 	/*
621 	 * DMA tag for normal sized RX frames
622 	 */
623 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
624 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
625 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
626 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
627 		return (ENOMEM);
628 	}
629 
630 	/*
631 	 * DMA tag for jumbo sized RX frames.
632 	 */
633 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
634 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
635 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
636 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
637 		return (ENOMEM);
638 	}
639 
640 	/*
641 	 * DMA tag for TX frames.
642 	 */
643 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
644 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
645 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
646 		NULL, NULL, &sc->tx_dmat)) {
647 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
648 		return (ENOMEM);
649 	}
650 
651 	return (0);
652 }
653 
654 int
655 t3_sge_free(struct adapter * sc)
656 {
657 
658 	if (sc->tx_dmat != NULL)
659 		bus_dma_tag_destroy(sc->tx_dmat);
660 
661 	if (sc->rx_jumbo_dmat != NULL)
662 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
663 
664 	if (sc->rx_dmat != NULL)
665 		bus_dma_tag_destroy(sc->rx_dmat);
666 
667 	if (sc->parent_dmat != NULL)
668 		bus_dma_tag_destroy(sc->parent_dmat);
669 
670 	return (0);
671 }
672 
673 void
674 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
675 {
676 
677 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
678 	qs->rspq.polling = 0 /* p->polling */;
679 }
680 
681 #if !defined(__i386__) && !defined(__amd64__)
682 static void
683 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
684 {
685 	struct refill_fl_cb_arg *cb_arg = arg;
686 
687 	cb_arg->error = error;
688 	cb_arg->seg = segs[0];
689 	cb_arg->nseg = nseg;
690 
691 }
692 #endif
693 /**
694  *	refill_fl - refill an SGE free-buffer list
695  *	@sc: the controller softc
696  *	@q: the free-list to refill
697  *	@n: the number of new buffers to allocate
698  *
699  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
700  *	The caller must assure that @n does not exceed the queue's capacity.
701  */
702 static void
703 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
704 {
705 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
706 	struct rx_desc *d = &q->desc[q->pidx];
707 	struct refill_fl_cb_arg cb_arg;
708 	struct mbuf *m;
709 	caddr_t cl;
710 	int err;
711 
712 	cb_arg.error = 0;
713 	while (n--) {
714 		/*
715 		 * We allocate an uninitialized mbuf + cluster, mbuf is
716 		 * initialized after rx.
717 		 */
718 		if (q->zone == zone_pack) {
719 			if ((m = m_getcl(M_NOWAIT, MT_NOINIT, M_PKTHDR)) == NULL)
720 				break;
721 			cl = m->m_ext.ext_buf;
722 		} else {
723 			if ((cl = m_cljget(NULL, M_NOWAIT, q->buf_size)) == NULL)
724 				break;
725 			if ((m = m_gethdr_raw(M_NOWAIT, 0)) == NULL) {
726 				uma_zfree(q->zone, cl);
727 				break;
728 			}
729 		}
730 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
731 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
732 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
733 				uma_zfree(q->zone, cl);
734 				goto done;
735 			}
736 			sd->flags |= RX_SW_DESC_MAP_CREATED;
737 		}
738 #if !defined(__i386__) && !defined(__amd64__)
739 		err = bus_dmamap_load(q->entry_tag, sd->map,
740 		    cl, q->buf_size, refill_fl_cb, &cb_arg, 0);
741 
742 		if (err != 0 || cb_arg.error) {
743 			if (q->zone != zone_pack)
744 				uma_zfree(q->zone, cl);
745 			m_free(m);
746 			goto done;
747 		}
748 #else
749 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)cl);
750 #endif
751 		sd->flags |= RX_SW_DESC_INUSE;
752 		sd->rxsd_cl = cl;
753 		sd->m = m;
754 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
755 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
756 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
757 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
758 
759 		d++;
760 		sd++;
761 
762 		if (++q->pidx == q->size) {
763 			q->pidx = 0;
764 			q->gen ^= 1;
765 			sd = q->sdesc;
766 			d = q->desc;
767 		}
768 		q->credits++;
769 		q->db_pending++;
770 	}
771 
772 done:
773 	if (q->db_pending >= 32) {
774 		q->db_pending = 0;
775 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
776 	}
777 }
778 
779 
780 /**
781  *	free_rx_bufs - free the Rx buffers on an SGE free list
782  *	@sc: the controle softc
783  *	@q: the SGE free list to clean up
784  *
785  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
786  *	this queue should be stopped before calling this function.
787  */
788 static void
789 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
790 {
791 	u_int cidx = q->cidx;
792 
793 	while (q->credits--) {
794 		struct rx_sw_desc *d = &q->sdesc[cidx];
795 
796 		if (d->flags & RX_SW_DESC_INUSE) {
797 			bus_dmamap_unload(q->entry_tag, d->map);
798 			bus_dmamap_destroy(q->entry_tag, d->map);
799 			if (q->zone == zone_pack) {
800 				m_init(d->m, M_NOWAIT, MT_DATA, M_EXT);
801 				uma_zfree(zone_pack, d->m);
802 			} else {
803 				m_init(d->m, M_NOWAIT, MT_DATA, 0);
804 				m_free_raw(d->m);
805 				uma_zfree(q->zone, d->rxsd_cl);
806 			}
807 		}
808 
809 		d->rxsd_cl = NULL;
810 		d->m = NULL;
811 		if (++cidx == q->size)
812 			cidx = 0;
813 	}
814 }
815 
816 static __inline void
817 __refill_fl(adapter_t *adap, struct sge_fl *fl)
818 {
819 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
820 }
821 
822 static __inline void
823 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
824 {
825 	uint32_t reclaimable = fl->size - fl->credits;
826 
827 	if (reclaimable > 0)
828 		refill_fl(adap, fl, min(max, reclaimable));
829 }
830 
831 /**
832  *	recycle_rx_buf - recycle a receive buffer
833  *	@adapter: the adapter
834  *	@q: the SGE free list
835  *	@idx: index of buffer to recycle
836  *
837  *	Recycles the specified buffer on the given free list by adding it at
838  *	the next available slot on the list.
839  */
840 static void
841 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
842 {
843 	struct rx_desc *from = &q->desc[idx];
844 	struct rx_desc *to   = &q->desc[q->pidx];
845 
846 	q->sdesc[q->pidx] = q->sdesc[idx];
847 	to->addr_lo = from->addr_lo;        // already big endian
848 	to->addr_hi = from->addr_hi;        // likewise
849 	wmb();	/* necessary ? */
850 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
851 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
852 	q->credits++;
853 
854 	if (++q->pidx == q->size) {
855 		q->pidx = 0;
856 		q->gen ^= 1;
857 	}
858 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
859 }
860 
861 static void
862 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
863 {
864 	uint32_t *addr;
865 
866 	addr = arg;
867 	*addr = segs[0].ds_addr;
868 }
869 
870 static int
871 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
872     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
873     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
874 {
875 	size_t len = nelem * elem_size;
876 	void *s = NULL;
877 	void *p = NULL;
878 	int err;
879 
880 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
881 				      BUS_SPACE_MAXADDR_32BIT,
882 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
883 				      len, 0, NULL, NULL, tag)) != 0) {
884 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
885 		return (ENOMEM);
886 	}
887 
888 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
889 				    map)) != 0) {
890 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
891 		return (ENOMEM);
892 	}
893 
894 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
895 	bzero(p, len);
896 	*(void **)desc = p;
897 
898 	if (sw_size) {
899 		len = nelem * sw_size;
900 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
901 		*(void **)sdesc = s;
902 	}
903 	if (parent_entry_tag == NULL)
904 		return (0);
905 
906 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
907 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
908 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
909 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
910 		                      NULL, NULL, entry_tag)) != 0) {
911 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
912 		return (ENOMEM);
913 	}
914 	return (0);
915 }
916 
917 static void
918 sge_slow_intr_handler(void *arg, int ncount)
919 {
920 	adapter_t *sc = arg;
921 
922 	t3_slow_intr_handler(sc);
923 	t3_write_reg(sc, A_PL_INT_ENABLE0, sc->slow_intr_mask);
924 	(void) t3_read_reg(sc, A_PL_INT_ENABLE0);
925 }
926 
927 /**
928  *	sge_timer_cb - perform periodic maintenance of an SGE qset
929  *	@data: the SGE queue set to maintain
930  *
931  *	Runs periodically from a timer to perform maintenance of an SGE queue
932  *	set.  It performs two tasks:
933  *
934  *	a) Cleans up any completed Tx descriptors that may still be pending.
935  *	Normal descriptor cleanup happens when new packets are added to a Tx
936  *	queue so this timer is relatively infrequent and does any cleanup only
937  *	if the Tx queue has not seen any new packets in a while.  We make a
938  *	best effort attempt to reclaim descriptors, in that we don't wait
939  *	around if we cannot get a queue's lock (which most likely is because
940  *	someone else is queueing new packets and so will also handle the clean
941  *	up).  Since control queues use immediate data exclusively we don't
942  *	bother cleaning them up here.
943  *
944  *	b) Replenishes Rx queues that have run out due to memory shortage.
945  *	Normally new Rx buffers are added when existing ones are consumed but
946  *	when out of memory a queue can become empty.  We try to add only a few
947  *	buffers here, the queue will be replenished fully as these new buffers
948  *	are used up if memory shortage has subsided.
949  *
950  *	c) Return coalesced response queue credits in case a response queue is
951  *	starved.
952  *
953  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
954  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
955  */
956 static void
957 sge_timer_cb(void *arg)
958 {
959 	adapter_t *sc = arg;
960 	if ((sc->flags & USING_MSIX) == 0) {
961 
962 		struct port_info *pi;
963 		struct sge_qset *qs;
964 		struct sge_txq  *txq;
965 		int i, j;
966 		int reclaim_ofl, refill_rx;
967 
968 		if (sc->open_device_map == 0)
969 			return;
970 
971 		for (i = 0; i < sc->params.nports; i++) {
972 			pi = &sc->port[i];
973 			for (j = 0; j < pi->nqsets; j++) {
974 				qs = &sc->sge.qs[pi->first_qset + j];
975 				txq = &qs->txq[0];
976 				reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
977 				refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
978 				    (qs->fl[1].credits < qs->fl[1].size));
979 				if (reclaim_ofl || refill_rx) {
980 					taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
981 					break;
982 				}
983 			}
984 		}
985 	}
986 
987 	if (sc->params.nports > 2) {
988 		int i;
989 
990 		for_each_port(sc, i) {
991 			struct port_info *pi = &sc->port[i];
992 
993 			t3_write_reg(sc, A_SG_KDOORBELL,
994 				     F_SELEGRCNTX |
995 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
996 		}
997 	}
998 	if (((sc->flags & USING_MSIX) == 0 || sc->params.nports > 2) &&
999 	    sc->open_device_map != 0)
1000 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1001 }
1002 
1003 /*
1004  * This is meant to be a catch-all function to keep sge state private
1005  * to sge.c
1006  *
1007  */
1008 int
1009 t3_sge_init_adapter(adapter_t *sc)
1010 {
1011 	callout_init(&sc->sge_timer_ch, 1);
1012 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1013 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
1014 	return (0);
1015 }
1016 
1017 int
1018 t3_sge_reset_adapter(adapter_t *sc)
1019 {
1020 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
1021 	return (0);
1022 }
1023 
1024 int
1025 t3_sge_init_port(struct port_info *pi)
1026 {
1027 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
1028 	return (0);
1029 }
1030 
1031 /**
1032  *	refill_rspq - replenish an SGE response queue
1033  *	@adapter: the adapter
1034  *	@q: the response queue to replenish
1035  *	@credits: how many new responses to make available
1036  *
1037  *	Replenishes a response queue by making the supplied number of responses
1038  *	available to HW.
1039  */
1040 static __inline void
1041 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
1042 {
1043 
1044 	/* mbufs are allocated on demand when a rspq entry is processed. */
1045 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
1046 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
1047 }
1048 
1049 static void
1050 sge_txq_reclaim_handler(void *arg, int ncount)
1051 {
1052 	struct sge_qset *qs = arg;
1053 	int i;
1054 
1055 	for (i = 0; i < 3; i++)
1056 		reclaim_completed_tx(qs, 16, i);
1057 }
1058 
1059 static void
1060 sge_timer_reclaim(void *arg, int ncount)
1061 {
1062 	struct port_info *pi = arg;
1063 	int i, nqsets = pi->nqsets;
1064 	adapter_t *sc = pi->adapter;
1065 	struct sge_qset *qs;
1066 	struct mtx *lock;
1067 
1068 	KASSERT((sc->flags & USING_MSIX) == 0,
1069 	    ("can't call timer reclaim for msi-x"));
1070 
1071 	for (i = 0; i < nqsets; i++) {
1072 		qs = &sc->sge.qs[pi->first_qset + i];
1073 
1074 		reclaim_completed_tx(qs, 16, TXQ_OFLD);
1075 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
1076 			    &sc->sge.qs[0].rspq.lock;
1077 
1078 		if (mtx_trylock(lock)) {
1079 			/* XXX currently assume that we are *NOT* polling */
1080 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
1081 
1082 			if (qs->fl[0].credits < qs->fl[0].size - 16)
1083 				__refill_fl(sc, &qs->fl[0]);
1084 			if (qs->fl[1].credits < qs->fl[1].size - 16)
1085 				__refill_fl(sc, &qs->fl[1]);
1086 
1087 			if (status & (1 << qs->rspq.cntxt_id)) {
1088 				if (qs->rspq.credits) {
1089 					refill_rspq(sc, &qs->rspq, 1);
1090 					qs->rspq.credits--;
1091 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
1092 					    1 << qs->rspq.cntxt_id);
1093 				}
1094 			}
1095 			mtx_unlock(lock);
1096 		}
1097 	}
1098 }
1099 
1100 /**
1101  *	init_qset_cntxt - initialize an SGE queue set context info
1102  *	@qs: the queue set
1103  *	@id: the queue set id
1104  *
1105  *	Initializes the TIDs and context ids for the queues of a queue set.
1106  */
1107 static void
1108 init_qset_cntxt(struct sge_qset *qs, u_int id)
1109 {
1110 
1111 	qs->rspq.cntxt_id = id;
1112 	qs->fl[0].cntxt_id = 2 * id;
1113 	qs->fl[1].cntxt_id = 2 * id + 1;
1114 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
1115 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
1116 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
1117 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
1118 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
1119 
1120 	/* XXX: a sane limit is needed instead of INT_MAX */
1121 	mbufq_init(&qs->txq[TXQ_ETH].sendq, INT_MAX);
1122 	mbufq_init(&qs->txq[TXQ_OFLD].sendq, INT_MAX);
1123 	mbufq_init(&qs->txq[TXQ_CTRL].sendq, INT_MAX);
1124 }
1125 
1126 
1127 static void
1128 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
1129 {
1130 	txq->in_use += ndesc;
1131 	/*
1132 	 * XXX we don't handle stopping of queue
1133 	 * presumably start handles this when we bump against the end
1134 	 */
1135 	txqs->gen = txq->gen;
1136 	txq->unacked += ndesc;
1137 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
1138 	txq->unacked &= 31;
1139 	txqs->pidx = txq->pidx;
1140 	txq->pidx += ndesc;
1141 #ifdef INVARIANTS
1142 	if (((txqs->pidx > txq->cidx) &&
1143 		(txq->pidx < txqs->pidx) &&
1144 		(txq->pidx >= txq->cidx)) ||
1145 	    ((txqs->pidx < txq->cidx) &&
1146 		(txq->pidx >= txq-> cidx)) ||
1147 	    ((txqs->pidx < txq->cidx) &&
1148 		(txq->cidx < txqs->pidx)))
1149 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
1150 		    txqs->pidx, txq->pidx, txq->cidx);
1151 #endif
1152 	if (txq->pidx >= txq->size) {
1153 		txq->pidx -= txq->size;
1154 		txq->gen ^= 1;
1155 	}
1156 
1157 }
1158 
1159 /**
1160  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
1161  *	@m: the packet mbufs
1162  *      @nsegs: the number of segments
1163  *
1164  * 	Returns the number of Tx descriptors needed for the given Ethernet
1165  * 	packet.  Ethernet packets require addition of WR and CPL headers.
1166  */
1167 static __inline unsigned int
1168 calc_tx_descs(const struct mbuf *m, int nsegs)
1169 {
1170 	unsigned int flits;
1171 
1172 	if (m->m_pkthdr.len <= PIO_LEN)
1173 		return 1;
1174 
1175 	flits = sgl_len(nsegs) + 2;
1176 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
1177 		flits++;
1178 
1179 	return flits_to_desc(flits);
1180 }
1181 
1182 /**
1183  *	make_sgl - populate a scatter/gather list for a packet
1184  *	@sgp: the SGL to populate
1185  *	@segs: the packet dma segments
1186  *	@nsegs: the number of segments
1187  *
1188  *	Generates a scatter/gather list for the buffers that make up a packet
1189  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1190  *	appropriately.
1191  */
1192 static __inline void
1193 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1194 {
1195 	int i, idx;
1196 
1197 	for (idx = 0, i = 0; i < nsegs; i++) {
1198 		/*
1199 		 * firmware doesn't like empty segments
1200 		 */
1201 		if (segs[i].ds_len == 0)
1202 			continue;
1203 		if (i && idx == 0)
1204 			++sgp;
1205 
1206 		sgp->len[idx] = htobe32(segs[i].ds_len);
1207 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1208 		idx ^= 1;
1209 	}
1210 
1211 	if (idx) {
1212 		sgp->len[idx] = 0;
1213 		sgp->addr[idx] = 0;
1214 	}
1215 }
1216 
1217 /**
1218  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1219  *	@adap: the adapter
1220  *	@q: the Tx queue
1221  *
1222  *	Ring the doorbell if a Tx queue is asleep.  There is a natural race,
1223  *	where the HW is going to sleep just after we checked, however,
1224  *	then the interrupt handler will detect the outstanding TX packet
1225  *	and ring the doorbell for us.
1226  *
1227  *	When GTS is disabled we unconditionally ring the doorbell.
1228  */
1229 static __inline void
1230 check_ring_tx_db(adapter_t *adap, struct sge_txq *q, int mustring)
1231 {
1232 #if USE_GTS
1233 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1234 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1235 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1236 #ifdef T3_TRACE
1237 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1238 			  q->cntxt_id);
1239 #endif
1240 		t3_write_reg(adap, A_SG_KDOORBELL,
1241 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1242 	}
1243 #else
1244 	if (mustring || ++q->db_pending >= 32) {
1245 		wmb();            /* write descriptors before telling HW */
1246 		t3_write_reg(adap, A_SG_KDOORBELL,
1247 		    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1248 		q->db_pending = 0;
1249 	}
1250 #endif
1251 }
1252 
1253 static __inline void
1254 wr_gen2(struct tx_desc *d, unsigned int gen)
1255 {
1256 #if SGE_NUM_GENBITS == 2
1257 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1258 #endif
1259 }
1260 
1261 /**
1262  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1263  *	@ndesc: number of Tx descriptors spanned by the SGL
1264  *	@txd: first Tx descriptor to be written
1265  *	@txqs: txq state (generation and producer index)
1266  *	@txq: the SGE Tx queue
1267  *	@sgl: the SGL
1268  *	@flits: number of flits to the start of the SGL in the first descriptor
1269  *	@sgl_flits: the SGL size in flits
1270  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1271  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1272  *
1273  *	Write a work request header and an associated SGL.  If the SGL is
1274  *	small enough to fit into one Tx descriptor it has already been written
1275  *	and we just need to write the WR header.  Otherwise we distribute the
1276  *	SGL across the number of descriptors it spans.
1277  */
1278 static void
1279 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1280     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1281     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1282 {
1283 
1284 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1285 
1286 	if (__predict_true(ndesc == 1)) {
1287 		set_wr_hdr(wrp, htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1288 		    V_WR_SGLSFLT(flits)) | wr_hi,
1289 		    htonl(V_WR_LEN(flits + sgl_flits) | V_WR_GEN(txqs->gen)) |
1290 		    wr_lo);
1291 
1292 		wr_gen2(txd, txqs->gen);
1293 
1294 	} else {
1295 		unsigned int ogen = txqs->gen;
1296 		const uint64_t *fp = (const uint64_t *)sgl;
1297 		struct work_request_hdr *wp = wrp;
1298 
1299 		wrp->wrh_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1300 		    V_WR_SGLSFLT(flits)) | wr_hi;
1301 
1302 		while (sgl_flits) {
1303 			unsigned int avail = WR_FLITS - flits;
1304 
1305 			if (avail > sgl_flits)
1306 				avail = sgl_flits;
1307 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1308 			sgl_flits -= avail;
1309 			ndesc--;
1310 			if (!sgl_flits)
1311 				break;
1312 
1313 			fp += avail;
1314 			txd++;
1315 			if (++txqs->pidx == txq->size) {
1316 				txqs->pidx = 0;
1317 				txqs->gen ^= 1;
1318 				txd = txq->desc;
1319 			}
1320 
1321 			/*
1322 			 * when the head of the mbuf chain
1323 			 * is freed all clusters will be freed
1324 			 * with it
1325 			 */
1326 			wrp = (struct work_request_hdr *)txd;
1327 			wrp->wrh_hi = htonl(V_WR_DATATYPE(1) |
1328 			    V_WR_SGLSFLT(1)) | wr_hi;
1329 			wrp->wrh_lo = htonl(V_WR_LEN(min(WR_FLITS,
1330 				    sgl_flits + 1)) |
1331 			    V_WR_GEN(txqs->gen)) | wr_lo;
1332 			wr_gen2(txd, txqs->gen);
1333 			flits = 1;
1334 		}
1335 		wrp->wrh_hi |= htonl(F_WR_EOP);
1336 		wmb();
1337 		wp->wrh_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1338 		wr_gen2((struct tx_desc *)wp, ogen);
1339 	}
1340 }
1341 
1342 /* sizeof(*eh) + sizeof(*ip) + sizeof(*tcp) */
1343 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + 20 + 20)
1344 
1345 #define GET_VTAG(cntrl, m) \
1346 do { \
1347 	if ((m)->m_flags & M_VLANTAG)					            \
1348 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1349 } while (0)
1350 
1351 static int
1352 t3_encap(struct sge_qset *qs, struct mbuf **m)
1353 {
1354 	adapter_t *sc;
1355 	struct mbuf *m0;
1356 	struct sge_txq *txq;
1357 	struct txq_state txqs;
1358 	struct port_info *pi;
1359 	unsigned int ndesc, flits, cntrl, mlen;
1360 	int err, nsegs, tso_info = 0;
1361 
1362 	struct work_request_hdr *wrp;
1363 	struct tx_sw_desc *txsd;
1364 	struct sg_ent *sgp, *sgl;
1365 	uint32_t wr_hi, wr_lo, sgl_flits;
1366 	bus_dma_segment_t segs[TX_MAX_SEGS];
1367 
1368 	struct tx_desc *txd;
1369 
1370 	pi = qs->port;
1371 	sc = pi->adapter;
1372 	txq = &qs->txq[TXQ_ETH];
1373 	txd = &txq->desc[txq->pidx];
1374 	txsd = &txq->sdesc[txq->pidx];
1375 	sgl = txq->txq_sgl;
1376 
1377 	prefetch(txd);
1378 	m0 = *m;
1379 
1380 	mtx_assert(&qs->lock, MA_OWNED);
1381 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1382 	KASSERT(m0->m_flags & M_PKTHDR, ("not packet header\n"));
1383 
1384 	if  (m0->m_nextpkt == NULL && m0->m_next != NULL &&
1385 	    m0->m_pkthdr.csum_flags & (CSUM_TSO))
1386 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1387 
1388 	if (m0->m_nextpkt != NULL) {
1389 		busdma_map_sg_vec(txq->entry_tag, txsd->map, m0, segs, &nsegs);
1390 		ndesc = 1;
1391 		mlen = 0;
1392 	} else {
1393 		if ((err = busdma_map_sg_collapse(txq->entry_tag, txsd->map,
1394 		    &m0, segs, &nsegs))) {
1395 			if (cxgb_debug)
1396 				printf("failed ... err=%d\n", err);
1397 			return (err);
1398 		}
1399 		mlen = m0->m_pkthdr.len;
1400 		ndesc = calc_tx_descs(m0, nsegs);
1401 	}
1402 	txq_prod(txq, ndesc, &txqs);
1403 
1404 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d", nsegs));
1405 	txsd->m = m0;
1406 
1407 	if (m0->m_nextpkt != NULL) {
1408 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1409 		int i, fidx;
1410 
1411 		if (nsegs > 7)
1412 			panic("trying to coalesce %d packets in to one WR", nsegs);
1413 		txq->txq_coalesced += nsegs;
1414 		wrp = (struct work_request_hdr *)txd;
1415 		flits = nsegs*2 + 1;
1416 
1417 		for (fidx = 1, i = 0; i < nsegs; i++, fidx += 2) {
1418 			struct cpl_tx_pkt_batch_entry *cbe;
1419 			uint64_t flit;
1420 			uint32_t *hflit = (uint32_t *)&flit;
1421 			int cflags = m0->m_pkthdr.csum_flags;
1422 
1423 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1424 			GET_VTAG(cntrl, m0);
1425 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1426 			if (__predict_false(!(cflags & CSUM_IP)))
1427 				cntrl |= F_TXPKT_IPCSUM_DIS;
1428 			if (__predict_false(!(cflags & (CSUM_TCP | CSUM_UDP |
1429 			    CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1430 				cntrl |= F_TXPKT_L4CSUM_DIS;
1431 
1432 			hflit[0] = htonl(cntrl);
1433 			hflit[1] = htonl(segs[i].ds_len | 0x80000000);
1434 			flit |= htobe64(1 << 24);
1435 			cbe = &cpl_batch->pkt_entry[i];
1436 			cbe->cntrl = hflit[0];
1437 			cbe->len = hflit[1];
1438 			cbe->addr = htobe64(segs[i].ds_addr);
1439 		}
1440 
1441 		wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1442 		    V_WR_SGLSFLT(flits)) |
1443 		    htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1444 		wr_lo = htonl(V_WR_LEN(flits) |
1445 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1446 		set_wr_hdr(wrp, wr_hi, wr_lo);
1447 		wmb();
1448 		ETHER_BPF_MTAP(pi->ifp, m0);
1449 		wr_gen2(txd, txqs.gen);
1450 		check_ring_tx_db(sc, txq, 0);
1451 		return (0);
1452 	} else if (tso_info) {
1453 		uint16_t eth_type;
1454 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1455 		struct ether_header *eh;
1456 		void *l3hdr;
1457 		struct tcphdr *tcp;
1458 
1459 		txd->flit[2] = 0;
1460 		GET_VTAG(cntrl, m0);
1461 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1462 		hdr->cntrl = htonl(cntrl);
1463 		hdr->len = htonl(mlen | 0x80000000);
1464 
1465 		if (__predict_false(mlen < TCPPKTHDRSIZE)) {
1466 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%b,flags=%#x",
1467 			    m0, mlen, m0->m_pkthdr.tso_segsz,
1468 			    (int)m0->m_pkthdr.csum_flags, CSUM_BITS, m0->m_flags);
1469 			panic("tx tso packet too small");
1470 		}
1471 
1472 		/* Make sure that ether, ip, tcp headers are all in m0 */
1473 		if (__predict_false(m0->m_len < TCPPKTHDRSIZE)) {
1474 			m0 = m_pullup(m0, TCPPKTHDRSIZE);
1475 			if (__predict_false(m0 == NULL)) {
1476 				/* XXX panic probably an overreaction */
1477 				panic("couldn't fit header into mbuf");
1478 			}
1479 		}
1480 
1481 		eh = mtod(m0, struct ether_header *);
1482 		eth_type = eh->ether_type;
1483 		if (eth_type == htons(ETHERTYPE_VLAN)) {
1484 			struct ether_vlan_header *evh = (void *)eh;
1485 
1486 			tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II_VLAN);
1487 			l3hdr = evh + 1;
1488 			eth_type = evh->evl_proto;
1489 		} else {
1490 			tso_info |= V_LSO_ETH_TYPE(CPL_ETH_II);
1491 			l3hdr = eh + 1;
1492 		}
1493 
1494 		if (eth_type == htons(ETHERTYPE_IP)) {
1495 			struct ip *ip = l3hdr;
1496 
1497 			tso_info |= V_LSO_IPHDR_WORDS(ip->ip_hl);
1498 			tcp = (struct tcphdr *)(ip + 1);
1499 		} else if (eth_type == htons(ETHERTYPE_IPV6)) {
1500 			struct ip6_hdr *ip6 = l3hdr;
1501 
1502 			KASSERT(ip6->ip6_nxt == IPPROTO_TCP,
1503 			    ("%s: CSUM_TSO with ip6_nxt %d",
1504 			    __func__, ip6->ip6_nxt));
1505 
1506 			tso_info |= F_LSO_IPV6;
1507 			tso_info |= V_LSO_IPHDR_WORDS(sizeof(*ip6) >> 2);
1508 			tcp = (struct tcphdr *)(ip6 + 1);
1509 		} else
1510 			panic("%s: CSUM_TSO but neither ip nor ip6", __func__);
1511 
1512 		tso_info |= V_LSO_TCPHDR_WORDS(tcp->th_off);
1513 		hdr->lso_info = htonl(tso_info);
1514 
1515 		if (__predict_false(mlen <= PIO_LEN)) {
1516 			/*
1517 			 * pkt not undersized but fits in PIO_LEN
1518 			 * Indicates a TSO bug at the higher levels.
1519 			 */
1520 			txsd->m = NULL;
1521 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1522 			flits = (mlen + 7) / 8 + 3;
1523 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1524 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1525 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1526 			wr_lo = htonl(V_WR_LEN(flits) |
1527 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1528 			set_wr_hdr(&hdr->wr, wr_hi, wr_lo);
1529 			wmb();
1530 			ETHER_BPF_MTAP(pi->ifp, m0);
1531 			wr_gen2(txd, txqs.gen);
1532 			check_ring_tx_db(sc, txq, 0);
1533 			m_freem(m0);
1534 			return (0);
1535 		}
1536 		flits = 3;
1537 	} else {
1538 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1539 
1540 		GET_VTAG(cntrl, m0);
1541 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1542 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1543 			cntrl |= F_TXPKT_IPCSUM_DIS;
1544 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP |
1545 		    CSUM_UDP | CSUM_UDP_IPV6 | CSUM_TCP_IPV6))))
1546 			cntrl |= F_TXPKT_L4CSUM_DIS;
1547 		cpl->cntrl = htonl(cntrl);
1548 		cpl->len = htonl(mlen | 0x80000000);
1549 
1550 		if (mlen <= PIO_LEN) {
1551 			txsd->m = NULL;
1552 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1553 			flits = (mlen + 7) / 8 + 2;
1554 
1555 			wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1556 			    V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1557 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1558 			wr_lo = htonl(V_WR_LEN(flits) |
1559 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1560 			set_wr_hdr(&cpl->wr, wr_hi, wr_lo);
1561 			wmb();
1562 			ETHER_BPF_MTAP(pi->ifp, m0);
1563 			wr_gen2(txd, txqs.gen);
1564 			check_ring_tx_db(sc, txq, 0);
1565 			m_freem(m0);
1566 			return (0);
1567 		}
1568 		flits = 2;
1569 	}
1570 	wrp = (struct work_request_hdr *)txd;
1571 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1572 	make_sgl(sgp, segs, nsegs);
1573 
1574 	sgl_flits = sgl_len(nsegs);
1575 
1576 	ETHER_BPF_MTAP(pi->ifp, m0);
1577 
1578 	KASSERT(ndesc <= 4, ("ndesc too large %d", ndesc));
1579 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1580 	wr_lo = htonl(V_WR_TID(txq->token));
1581 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits,
1582 	    sgl_flits, wr_hi, wr_lo);
1583 	check_ring_tx_db(sc, txq, 0);
1584 
1585 	return (0);
1586 }
1587 
1588 #ifdef DEBUGNET
1589 int
1590 cxgb_debugnet_encap(struct sge_qset *qs, struct mbuf **m)
1591 {
1592 	int error;
1593 
1594 	error = t3_encap(qs, m);
1595 	if (error == 0)
1596 		check_ring_tx_db(qs->port->adapter, &qs->txq[TXQ_ETH], 1);
1597 	else if (*m != NULL) {
1598 		m_freem(*m);
1599 		*m = NULL;
1600 	}
1601 	return (error);
1602 }
1603 #endif
1604 
1605 void
1606 cxgb_tx_watchdog(void *arg)
1607 {
1608 	struct sge_qset *qs = arg;
1609 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1610 
1611         if (qs->coalescing != 0 &&
1612 	    (txq->in_use <= cxgb_tx_coalesce_enable_stop) &&
1613 	    TXQ_RING_EMPTY(qs))
1614                 qs->coalescing = 0;
1615         else if (qs->coalescing == 0 &&
1616 	    (txq->in_use >= cxgb_tx_coalesce_enable_start))
1617                 qs->coalescing = 1;
1618 	if (TXQ_TRYLOCK(qs)) {
1619 		qs->qs_flags |= QS_FLUSHING;
1620 		cxgb_start_locked(qs);
1621 		qs->qs_flags &= ~QS_FLUSHING;
1622 		TXQ_UNLOCK(qs);
1623 	}
1624 	if (if_getdrvflags(qs->port->ifp) & IFF_DRV_RUNNING)
1625 		callout_reset_on(&txq->txq_watchdog, hz/4, cxgb_tx_watchdog,
1626 		    qs, txq->txq_watchdog.c_cpu);
1627 }
1628 
1629 static void
1630 cxgb_tx_timeout(void *arg)
1631 {
1632 	struct sge_qset *qs = arg;
1633 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1634 
1635 	if (qs->coalescing == 0 && (txq->in_use >= (txq->size>>3)))
1636                 qs->coalescing = 1;
1637 	if (TXQ_TRYLOCK(qs)) {
1638 		qs->qs_flags |= QS_TIMEOUT;
1639 		cxgb_start_locked(qs);
1640 		qs->qs_flags &= ~QS_TIMEOUT;
1641 		TXQ_UNLOCK(qs);
1642 	}
1643 }
1644 
1645 static void
1646 cxgb_start_locked(struct sge_qset *qs)
1647 {
1648 	struct mbuf *m_head = NULL;
1649 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1650 	struct port_info *pi = qs->port;
1651 	if_t ifp = pi->ifp;
1652 
1653 	if (qs->qs_flags & (QS_FLUSHING|QS_TIMEOUT))
1654 		reclaim_completed_tx(qs, 0, TXQ_ETH);
1655 
1656 	if (!pi->link_config.link_ok) {
1657 		TXQ_RING_FLUSH(qs);
1658 		return;
1659 	}
1660 	TXQ_LOCK_ASSERT(qs);
1661 	while (!TXQ_RING_EMPTY(qs) && (if_getdrvflags(ifp) & IFF_DRV_RUNNING) &&
1662 	    pi->link_config.link_ok) {
1663 		reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1664 
1665 		if (txq->size - txq->in_use <= TX_MAX_DESC)
1666 			break;
1667 
1668 		if ((m_head = cxgb_dequeue(qs)) == NULL)
1669 			break;
1670 		/*
1671 		 *  Encapsulation can modify our pointer, and or make it
1672 		 *  NULL on failure.  In that event, we can't requeue.
1673 		 */
1674 		if (t3_encap(qs, &m_head) || m_head == NULL)
1675 			break;
1676 
1677 		m_head = NULL;
1678 	}
1679 
1680 	if (txq->db_pending)
1681 		check_ring_tx_db(pi->adapter, txq, 1);
1682 
1683 	if (!TXQ_RING_EMPTY(qs) && callout_pending(&txq->txq_timer) == 0 &&
1684 	    pi->link_config.link_ok)
1685 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1686 		    qs, txq->txq_timer.c_cpu);
1687 	if (m_head != NULL)
1688 		m_freem(m_head);
1689 }
1690 
1691 static int
1692 cxgb_transmit_locked(if_t ifp, struct sge_qset *qs, struct mbuf *m)
1693 {
1694 	struct port_info *pi = qs->port;
1695 	struct sge_txq *txq = &qs->txq[TXQ_ETH];
1696 	struct buf_ring *br = txq->txq_mr;
1697 	int error, avail;
1698 
1699 	avail = txq->size - txq->in_use;
1700 	TXQ_LOCK_ASSERT(qs);
1701 
1702 	/*
1703 	 * We can only do a direct transmit if the following are true:
1704 	 * - we aren't coalescing (ring < 3/4 full)
1705 	 * - the link is up -- checked in caller
1706 	 * - there are no packets enqueued already
1707 	 * - there is space in hardware transmit queue
1708 	 */
1709 	if (check_pkt_coalesce(qs) == 0 &&
1710 	    !TXQ_RING_NEEDS_ENQUEUE(qs) && avail > TX_MAX_DESC) {
1711 		if (t3_encap(qs, &m)) {
1712 			if (m != NULL &&
1713 			    (error = drbr_enqueue(ifp, br, m)) != 0)
1714 				return (error);
1715 		} else {
1716 			if (txq->db_pending)
1717 				check_ring_tx_db(pi->adapter, txq, 1);
1718 
1719 			/*
1720 			 * We've bypassed the buf ring so we need to update
1721 			 * the stats directly
1722 			 */
1723 			txq->txq_direct_packets++;
1724 			txq->txq_direct_bytes += m->m_pkthdr.len;
1725 		}
1726 	} else if ((error = drbr_enqueue(ifp, br, m)) != 0)
1727 		return (error);
1728 
1729 	reclaim_completed_tx(qs, cxgb_tx_reclaim_threshold, TXQ_ETH);
1730 	if (!TXQ_RING_EMPTY(qs) && pi->link_config.link_ok &&
1731 	    (!check_pkt_coalesce(qs) || (drbr_inuse(ifp, br) >= 7)))
1732 		cxgb_start_locked(qs);
1733 	else if (!TXQ_RING_EMPTY(qs) && !callout_pending(&txq->txq_timer))
1734 		callout_reset_on(&txq->txq_timer, 1, cxgb_tx_timeout,
1735 		    qs, txq->txq_timer.c_cpu);
1736 	return (0);
1737 }
1738 
1739 int
1740 cxgb_transmit(if_t ifp, struct mbuf *m)
1741 {
1742 	struct sge_qset *qs;
1743 	struct port_info *pi = if_getsoftc(ifp);
1744 	int error, qidx = pi->first_qset;
1745 
1746 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0
1747 	    ||(!pi->link_config.link_ok)) {
1748 		m_freem(m);
1749 		return (0);
1750 	}
1751 
1752 	/* check if flowid is set */
1753 	if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
1754 		qidx = (m->m_pkthdr.flowid % pi->nqsets) + pi->first_qset;
1755 
1756 	qs = &pi->adapter->sge.qs[qidx];
1757 
1758 	if (TXQ_TRYLOCK(qs)) {
1759 		/* XXX running */
1760 		error = cxgb_transmit_locked(ifp, qs, m);
1761 		TXQ_UNLOCK(qs);
1762 	} else
1763 		error = drbr_enqueue(ifp, qs->txq[TXQ_ETH].txq_mr, m);
1764 	return (error);
1765 }
1766 
1767 void
1768 cxgb_qflush(if_t ifp)
1769 {
1770 	/*
1771 	 * flush any enqueued mbufs in the buf_rings
1772 	 * and in the transmit queues
1773 	 * no-op for now
1774 	 */
1775 	return;
1776 }
1777 
1778 /**
1779  *	write_imm - write a packet into a Tx descriptor as immediate data
1780  *	@d: the Tx descriptor to write
1781  *	@m: the packet
1782  *	@len: the length of packet data to write as immediate data
1783  *	@gen: the generation bit value to write
1784  *
1785  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1786  *	contains a work request at its beginning.  We must write the packet
1787  *	carefully so the SGE doesn't read accidentally before it's written in
1788  *	its entirety.
1789  */
1790 static __inline void
1791 write_imm(struct tx_desc *d, caddr_t src,
1792 	  unsigned int len, unsigned int gen)
1793 {
1794 	struct work_request_hdr *from = (struct work_request_hdr *)src;
1795 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1796 	uint32_t wr_hi, wr_lo;
1797 
1798 	KASSERT(len <= WR_LEN && len >= sizeof(*from),
1799 	    ("%s: invalid len %d", __func__, len));
1800 
1801 	memcpy(&to[1], &from[1], len - sizeof(*from));
1802 	wr_hi = from->wrh_hi | htonl(F_WR_SOP | F_WR_EOP |
1803 	    V_WR_BCNTLFLT(len & 7));
1804 	wr_lo = from->wrh_lo | htonl(V_WR_GEN(gen) | V_WR_LEN((len + 7) / 8));
1805 	set_wr_hdr(to, wr_hi, wr_lo);
1806 	wmb();
1807 	wr_gen2(d, gen);
1808 }
1809 
1810 /**
1811  *	check_desc_avail - check descriptor availability on a send queue
1812  *	@adap: the adapter
1813  *	@q: the TX queue
1814  *	@m: the packet needing the descriptors
1815  *	@ndesc: the number of Tx descriptors needed
1816  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1817  *
1818  *	Checks if the requested number of Tx descriptors is available on an
1819  *	SGE send queue.  If the queue is already suspended or not enough
1820  *	descriptors are available the packet is queued for later transmission.
1821  *	Must be called with the Tx queue locked.
1822  *
1823  *	Returns 0 if enough descriptors are available, 1 if there aren't
1824  *	enough descriptors and the packet has been queued, and 2 if the caller
1825  *	needs to retry because there weren't enough descriptors at the
1826  *	beginning of the call but some freed up in the mean time.
1827  */
1828 static __inline int
1829 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1830 		 struct mbuf *m, unsigned int ndesc,
1831 		 unsigned int qid)
1832 {
1833 	/*
1834 	 * XXX We currently only use this for checking the control queue
1835 	 * the control queue is only used for binding qsets which happens
1836 	 * at init time so we are guaranteed enough descriptors
1837 	 */
1838 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1839 addq_exit:	(void )mbufq_enqueue(&q->sendq, m);
1840 		return 1;
1841 	}
1842 	if (__predict_false(q->size - q->in_use < ndesc)) {
1843 
1844 		struct sge_qset *qs = txq_to_qset(q, qid);
1845 
1846 		setbit(&qs->txq_stopped, qid);
1847 		if (should_restart_tx(q) &&
1848 		    test_and_clear_bit(qid, &qs->txq_stopped))
1849 			return 2;
1850 
1851 		q->stops++;
1852 		goto addq_exit;
1853 	}
1854 	return 0;
1855 }
1856 
1857 
1858 /**
1859  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1860  *	@q: the SGE control Tx queue
1861  *
1862  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1863  *	that send only immediate data (presently just the control queues) and
1864  *	thus do not have any mbufs
1865  */
1866 static __inline void
1867 reclaim_completed_tx_imm(struct sge_txq *q)
1868 {
1869 	unsigned int reclaim = q->processed - q->cleaned;
1870 
1871 	q->in_use -= reclaim;
1872 	q->cleaned += reclaim;
1873 }
1874 
1875 /**
1876  *	ctrl_xmit - send a packet through an SGE control Tx queue
1877  *	@adap: the adapter
1878  *	@q: the control queue
1879  *	@m: the packet
1880  *
1881  *	Send a packet through an SGE control Tx queue.  Packets sent through
1882  *	a control queue must fit entirely as immediate data in a single Tx
1883  *	descriptor and have no page fragments.
1884  */
1885 static int
1886 ctrl_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
1887 {
1888 	int ret;
1889 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1890 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1891 
1892 	KASSERT(m->m_len <= WR_LEN, ("%s: bad tx data", __func__));
1893 
1894 	wrp->wrh_hi |= htonl(F_WR_SOP | F_WR_EOP);
1895 	wrp->wrh_lo = htonl(V_WR_TID(q->token));
1896 
1897 	TXQ_LOCK(qs);
1898 again:	reclaim_completed_tx_imm(q);
1899 
1900 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1901 	if (__predict_false(ret)) {
1902 		if (ret == 1) {
1903 			TXQ_UNLOCK(qs);
1904 			return (ENOSPC);
1905 		}
1906 		goto again;
1907 	}
1908 	write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1909 
1910 	q->in_use++;
1911 	if (++q->pidx >= q->size) {
1912 		q->pidx = 0;
1913 		q->gen ^= 1;
1914 	}
1915 	TXQ_UNLOCK(qs);
1916 	wmb();
1917 	t3_write_reg(adap, A_SG_KDOORBELL,
1918 	    F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1919 
1920 	m_free(m);
1921 	return (0);
1922 }
1923 
1924 
1925 /**
1926  *	restart_ctrlq - restart a suspended control queue
1927  *	@qs: the queue set cotaining the control queue
1928  *
1929  *	Resumes transmission on a suspended Tx control queue.
1930  */
1931 static void
1932 restart_ctrlq(void *data, int npending)
1933 {
1934 	struct mbuf *m;
1935 	struct sge_qset *qs = (struct sge_qset *)data;
1936 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1937 	adapter_t *adap = qs->port->adapter;
1938 
1939 	TXQ_LOCK(qs);
1940 again:	reclaim_completed_tx_imm(q);
1941 
1942 	while (q->in_use < q->size &&
1943 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1944 
1945 		write_imm(&q->desc[q->pidx], m->m_data, m->m_len, q->gen);
1946 		m_free(m);
1947 
1948 		if (++q->pidx >= q->size) {
1949 			q->pidx = 0;
1950 			q->gen ^= 1;
1951 		}
1952 		q->in_use++;
1953 	}
1954 	if (!mbufq_empty(&q->sendq)) {
1955 		setbit(&qs->txq_stopped, TXQ_CTRL);
1956 
1957 		if (should_restart_tx(q) &&
1958 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1959 			goto again;
1960 		q->stops++;
1961 	}
1962 	TXQ_UNLOCK(qs);
1963 	t3_write_reg(adap, A_SG_KDOORBELL,
1964 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1965 }
1966 
1967 
1968 /*
1969  * Send a management message through control queue 0
1970  */
1971 int
1972 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1973 {
1974 	return ctrl_xmit(adap, &adap->sge.qs[0], m);
1975 }
1976 
1977 /**
1978  *	free_qset - free the resources of an SGE queue set
1979  *	@sc: the controller owning the queue set
1980  *	@q: the queue set
1981  *
1982  *	Release the HW and SW resources associated with an SGE queue set, such
1983  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1984  *	queue set must be quiesced prior to calling this.
1985  */
1986 static void
1987 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1988 {
1989 	int i;
1990 
1991 	reclaim_completed_tx(q, 0, TXQ_ETH);
1992 	if (q->txq[TXQ_ETH].txq_mr != NULL)
1993 		buf_ring_free(q->txq[TXQ_ETH].txq_mr, M_DEVBUF);
1994 	if (q->txq[TXQ_ETH].txq_ifq != NULL) {
1995 		ifq_delete(q->txq[TXQ_ETH].txq_ifq);
1996 		free(q->txq[TXQ_ETH].txq_ifq, M_DEVBUF);
1997 	}
1998 
1999 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2000 		if (q->fl[i].desc) {
2001 			mtx_lock_spin(&sc->sge.reg_lock);
2002 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
2003 			mtx_unlock_spin(&sc->sge.reg_lock);
2004 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
2005 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
2006 					q->fl[i].desc_map);
2007 			bus_dma_tag_destroy(q->fl[i].desc_tag);
2008 			bus_dma_tag_destroy(q->fl[i].entry_tag);
2009 		}
2010 		if (q->fl[i].sdesc) {
2011 			free_rx_bufs(sc, &q->fl[i]);
2012 			free(q->fl[i].sdesc, M_DEVBUF);
2013 		}
2014 	}
2015 
2016 	mtx_unlock(&q->lock);
2017 	MTX_DESTROY(&q->lock);
2018 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2019 		if (q->txq[i].desc) {
2020 			mtx_lock_spin(&sc->sge.reg_lock);
2021 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
2022 			mtx_unlock_spin(&sc->sge.reg_lock);
2023 			bus_dmamap_unload(q->txq[i].desc_tag,
2024 					q->txq[i].desc_map);
2025 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
2026 					q->txq[i].desc_map);
2027 			bus_dma_tag_destroy(q->txq[i].desc_tag);
2028 			bus_dma_tag_destroy(q->txq[i].entry_tag);
2029 		}
2030 		if (q->txq[i].sdesc) {
2031 			free(q->txq[i].sdesc, M_DEVBUF);
2032 		}
2033 	}
2034 
2035 	if (q->rspq.desc) {
2036 		mtx_lock_spin(&sc->sge.reg_lock);
2037 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
2038 		mtx_unlock_spin(&sc->sge.reg_lock);
2039 
2040 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
2041 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
2042 			        q->rspq.desc_map);
2043 		bus_dma_tag_destroy(q->rspq.desc_tag);
2044 		MTX_DESTROY(&q->rspq.lock);
2045 	}
2046 
2047 #if defined(INET6) || defined(INET)
2048 	tcp_lro_free(&q->lro.ctrl);
2049 #endif
2050 
2051 	bzero(q, sizeof(*q));
2052 }
2053 
2054 /**
2055  *	t3_free_sge_resources - free SGE resources
2056  *	@sc: the adapter softc
2057  *
2058  *	Frees resources used by the SGE queue sets.
2059  */
2060 void
2061 t3_free_sge_resources(adapter_t *sc, int nqsets)
2062 {
2063 	int i;
2064 
2065 	for (i = 0; i < nqsets; ++i) {
2066 		TXQ_LOCK(&sc->sge.qs[i]);
2067 		t3_free_qset(sc, &sc->sge.qs[i]);
2068 	}
2069 }
2070 
2071 /**
2072  *	t3_sge_start - enable SGE
2073  *	@sc: the controller softc
2074  *
2075  *	Enables the SGE for DMAs.  This is the last step in starting packet
2076  *	transfers.
2077  */
2078 void
2079 t3_sge_start(adapter_t *sc)
2080 {
2081 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
2082 }
2083 
2084 /**
2085  *	t3_sge_stop - disable SGE operation
2086  *	@sc: the adapter
2087  *
2088  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
2089  *	from error interrupts) or from normal process context.  In the latter
2090  *	case it also disables any pending queue restart tasklets.  Note that
2091  *	if it is called in interrupt context it cannot disable the restart
2092  *	tasklets as it cannot wait, however the tasklets will have no effect
2093  *	since the doorbells are disabled and the driver will call this again
2094  *	later from process context, at which time the tasklets will be stopped
2095  *	if they are still running.
2096  */
2097 void
2098 t3_sge_stop(adapter_t *sc)
2099 {
2100 
2101 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
2102 }
2103 
2104 /**
2105  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
2106  *	@adapter: the adapter
2107  *	@q: the Tx queue to reclaim descriptors from
2108  *	@reclaimable: the number of descriptors to reclaim
2109  *      @m_vec_size: maximum number of buffers to reclaim
2110  *      @desc_reclaimed: returns the number of descriptors reclaimed
2111  *
2112  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
2113  *	Tx buffers.  Called with the Tx queue lock held.
2114  *
2115  *      Returns number of buffers of reclaimed
2116  */
2117 void
2118 t3_free_tx_desc(struct sge_qset *qs, int reclaimable, int queue)
2119 {
2120 	struct tx_sw_desc *txsd;
2121 	unsigned int cidx, mask;
2122 	struct sge_txq *q = &qs->txq[queue];
2123 
2124 #ifdef T3_TRACE
2125 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
2126 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
2127 #endif
2128 	cidx = q->cidx;
2129 	mask = q->size - 1;
2130 	txsd = &q->sdesc[cidx];
2131 
2132 	mtx_assert(&qs->lock, MA_OWNED);
2133 	while (reclaimable--) {
2134 		prefetch(q->sdesc[(cidx + 1) & mask].m);
2135 		prefetch(q->sdesc[(cidx + 2) & mask].m);
2136 
2137 		if (txsd->m != NULL) {
2138 			if (txsd->flags & TX_SW_DESC_MAPPED) {
2139 				bus_dmamap_unload(q->entry_tag, txsd->map);
2140 				txsd->flags &= ~TX_SW_DESC_MAPPED;
2141 			}
2142 			m_freem_list(txsd->m);
2143 			txsd->m = NULL;
2144 		} else
2145 			q->txq_skipped++;
2146 
2147 		++txsd;
2148 		if (++cidx == q->size) {
2149 			cidx = 0;
2150 			txsd = q->sdesc;
2151 		}
2152 	}
2153 	q->cidx = cidx;
2154 
2155 }
2156 
2157 /**
2158  *	is_new_response - check if a response is newly written
2159  *	@r: the response descriptor
2160  *	@q: the response queue
2161  *
2162  *	Returns true if a response descriptor contains a yet unprocessed
2163  *	response.
2164  */
2165 static __inline int
2166 is_new_response(const struct rsp_desc *r,
2167     const struct sge_rspq *q)
2168 {
2169 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
2170 }
2171 
2172 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
2173 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
2174 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
2175 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
2176 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
2177 
2178 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
2179 #define NOMEM_INTR_DELAY 2500
2180 
2181 #ifdef TCP_OFFLOAD
2182 /**
2183  *	write_ofld_wr - write an offload work request
2184  *	@adap: the adapter
2185  *	@m: the packet to send
2186  *	@q: the Tx queue
2187  *	@pidx: index of the first Tx descriptor to write
2188  *	@gen: the generation value to use
2189  *	@ndesc: number of descriptors the packet will occupy
2190  *
2191  *	Write an offload work request to send the supplied packet.  The packet
2192  *	data already carry the work request with most fields populated.
2193  */
2194 static void
2195 write_ofld_wr(adapter_t *adap, struct mbuf *m, struct sge_txq *q,
2196     unsigned int pidx, unsigned int gen, unsigned int ndesc)
2197 {
2198 	unsigned int sgl_flits, flits;
2199 	int i, idx, nsegs, wrlen;
2200 	struct work_request_hdr *from;
2201 	struct sg_ent *sgp, t3sgl[TX_MAX_SEGS / 2 + 1];
2202 	struct tx_desc *d = &q->desc[pidx];
2203 	struct txq_state txqs;
2204 	struct sglist_seg *segs;
2205 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2206 	struct sglist *sgl;
2207 
2208 	from = (void *)(oh + 1);	/* Start of WR within mbuf */
2209 	wrlen = m->m_len - sizeof(*oh);
2210 
2211 	if (!(oh->flags & F_HDR_SGL)) {
2212 		write_imm(d, (caddr_t)from, wrlen, gen);
2213 
2214 		/*
2215 		 * mbuf with "real" immediate tx data will be enqueue_wr'd by
2216 		 * t3_push_frames and freed in wr_ack.  Others, like those sent
2217 		 * down by close_conn, t3_send_reset, etc. should be freed here.
2218 		 */
2219 		if (!(oh->flags & F_HDR_DF))
2220 			m_free(m);
2221 		return;
2222 	}
2223 
2224 	memcpy(&d->flit[1], &from[1], wrlen - sizeof(*from));
2225 
2226 	sgl = oh->sgl;
2227 	flits = wrlen / 8;
2228 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : t3sgl;
2229 
2230 	nsegs = sgl->sg_nseg;
2231 	segs = sgl->sg_segs;
2232 	for (idx = 0, i = 0; i < nsegs; i++) {
2233 		KASSERT(segs[i].ss_len, ("%s: 0 len in sgl", __func__));
2234 		if (i && idx == 0)
2235 			++sgp;
2236 		sgp->len[idx] = htobe32(segs[i].ss_len);
2237 		sgp->addr[idx] = htobe64(segs[i].ss_paddr);
2238 		idx ^= 1;
2239 	}
2240 	if (idx) {
2241 		sgp->len[idx] = 0;
2242 		sgp->addr[idx] = 0;
2243 	}
2244 
2245 	sgl_flits = sgl_len(nsegs);
2246 	txqs.gen = gen;
2247 	txqs.pidx = pidx;
2248 	txqs.compl = 0;
2249 
2250 	write_wr_hdr_sgl(ndesc, d, &txqs, q, t3sgl, flits, sgl_flits,
2251 	    from->wrh_hi, from->wrh_lo);
2252 }
2253 
2254 /**
2255  *	ofld_xmit - send a packet through an offload queue
2256  *	@adap: the adapter
2257  *	@q: the Tx offload queue
2258  *	@m: the packet
2259  *
2260  *	Send an offload packet through an SGE offload queue.
2261  */
2262 static int
2263 ofld_xmit(adapter_t *adap, struct sge_qset *qs, struct mbuf *m)
2264 {
2265 	int ret;
2266 	unsigned int ndesc;
2267 	unsigned int pidx, gen;
2268 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2269 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2270 
2271 	ndesc = G_HDR_NDESC(oh->flags);
2272 
2273 	TXQ_LOCK(qs);
2274 again:	reclaim_completed_tx(qs, 16, TXQ_OFLD);
2275 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2276 	if (__predict_false(ret)) {
2277 		if (ret == 1) {
2278 			TXQ_UNLOCK(qs);
2279 			return (EINTR);
2280 		}
2281 		goto again;
2282 	}
2283 
2284 	gen = q->gen;
2285 	q->in_use += ndesc;
2286 	pidx = q->pidx;
2287 	q->pidx += ndesc;
2288 	if (q->pidx >= q->size) {
2289 		q->pidx -= q->size;
2290 		q->gen ^= 1;
2291 	}
2292 
2293 	write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2294 	check_ring_tx_db(adap, q, 1);
2295 	TXQ_UNLOCK(qs);
2296 
2297 	return (0);
2298 }
2299 
2300 /**
2301  *	restart_offloadq - restart a suspended offload queue
2302  *	@qs: the queue set cotaining the offload queue
2303  *
2304  *	Resumes transmission on a suspended Tx offload queue.
2305  */
2306 static void
2307 restart_offloadq(void *data, int npending)
2308 {
2309 	struct mbuf *m;
2310 	struct sge_qset *qs = data;
2311 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2312 	adapter_t *adap = qs->port->adapter;
2313 
2314 	TXQ_LOCK(qs);
2315 again:
2316 	while ((m = mbufq_first(&q->sendq)) != NULL) {
2317 		unsigned int gen, pidx;
2318 		struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2319 		unsigned int ndesc = G_HDR_NDESC(oh->flags);
2320 
2321 		if (__predict_false(q->size - q->in_use < ndesc)) {
2322 			setbit(&qs->txq_stopped, TXQ_OFLD);
2323 			if (should_restart_tx(q) &&
2324 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2325 				goto again;
2326 			q->stops++;
2327 			break;
2328 		}
2329 
2330 		gen = q->gen;
2331 		q->in_use += ndesc;
2332 		pidx = q->pidx;
2333 		q->pidx += ndesc;
2334 		if (q->pidx >= q->size) {
2335 			q->pidx -= q->size;
2336 			q->gen ^= 1;
2337 		}
2338 
2339 		(void)mbufq_dequeue(&q->sendq);
2340 		TXQ_UNLOCK(qs);
2341 		write_ofld_wr(adap, m, q, pidx, gen, ndesc);
2342 		TXQ_LOCK(qs);
2343 	}
2344 #if USE_GTS
2345 	set_bit(TXQ_RUNNING, &q->flags);
2346 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2347 #endif
2348 	TXQ_UNLOCK(qs);
2349 	wmb();
2350 	t3_write_reg(adap, A_SG_KDOORBELL,
2351 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2352 }
2353 
2354 /**
2355  *	t3_offload_tx - send an offload packet
2356  *	@m: the packet
2357  *
2358  *	Sends an offload packet.  We use the packet priority to select the
2359  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2360  *	should be sent as regular or control, bits 1-3 select the queue set.
2361  */
2362 int
2363 t3_offload_tx(struct adapter *sc, struct mbuf *m)
2364 {
2365 	struct ofld_hdr *oh = mtod(m, struct ofld_hdr *);
2366 	struct sge_qset *qs = &sc->sge.qs[G_HDR_QSET(oh->flags)];
2367 
2368 	if (oh->flags & F_HDR_CTRL) {
2369 		m_adj(m, sizeof (*oh));	/* trim ofld_hdr off */
2370 		return (ctrl_xmit(sc, qs, m));
2371 	} else
2372 		return (ofld_xmit(sc, qs, m));
2373 }
2374 #endif
2375 
2376 static void
2377 restart_tx(struct sge_qset *qs)
2378 {
2379 	struct adapter *sc = qs->port->adapter;
2380 
2381 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2382 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2383 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2384 		qs->txq[TXQ_OFLD].restarts++;
2385 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2386 	}
2387 
2388 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2389 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2390 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2391 		qs->txq[TXQ_CTRL].restarts++;
2392 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2393 	}
2394 }
2395 
2396 /**
2397  *	t3_sge_alloc_qset - initialize an SGE queue set
2398  *	@sc: the controller softc
2399  *	@id: the queue set id
2400  *	@nports: how many Ethernet ports will be using this queue set
2401  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2402  *	@p: configuration parameters for this queue set
2403  *	@ntxq: number of Tx queues for the queue set
2404  *	@pi: port info for queue set
2405  *
2406  *	Allocate resources and initialize an SGE queue set.  A queue set
2407  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2408  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2409  *	queue, offload queue, and control queue.
2410  */
2411 int
2412 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2413 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2414 {
2415 	struct sge_qset *q = &sc->sge.qs[id];
2416 	int i, ret = 0;
2417 
2418 	MTX_INIT(&q->lock, q->namebuf, NULL, MTX_DEF);
2419 	q->port = pi;
2420 	q->adap = sc;
2421 
2422 	q->txq[TXQ_ETH].txq_mr = buf_ring_alloc(cxgb_txq_buf_ring_size,
2423 	    M_DEVBUF, M_WAITOK, &q->lock);
2424 	if ((q->txq[TXQ_ETH].txq_ifq = malloc(sizeof(struct ifaltq), M_DEVBUF,
2425 	    M_NOWAIT | M_ZERO)) == NULL) {
2426 		device_printf(sc->dev, "failed to allocate ifq\n");
2427 		goto err;
2428 	}
2429 	ifq_init(q->txq[TXQ_ETH].txq_ifq, pi->ifp);
2430 	callout_init(&q->txq[TXQ_ETH].txq_timer, 1);
2431 	callout_init(&q->txq[TXQ_ETH].txq_watchdog, 1);
2432 	q->txq[TXQ_ETH].txq_timer.c_cpu = id % mp_ncpus;
2433 	q->txq[TXQ_ETH].txq_watchdog.c_cpu = id % mp_ncpus;
2434 
2435 	init_qset_cntxt(q, id);
2436 	q->idx = id;
2437 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2438 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2439 		    &q->fl[0].desc, &q->fl[0].sdesc,
2440 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2441 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2442 		printf("error %d from alloc ring fl0\n", ret);
2443 		goto err;
2444 	}
2445 
2446 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2447 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2448 		    &q->fl[1].desc, &q->fl[1].sdesc,
2449 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2450 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2451 		printf("error %d from alloc ring fl1\n", ret);
2452 		goto err;
2453 	}
2454 
2455 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2456 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2457 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2458 		    NULL, NULL)) != 0) {
2459 		printf("error %d from alloc ring rspq\n", ret);
2460 		goto err;
2461 	}
2462 
2463 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2464 	    device_get_unit(sc->dev), irq_vec_idx);
2465 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2466 
2467 	for (i = 0; i < ntxq; ++i) {
2468 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2469 
2470 		if ((ret = alloc_ring(sc, p->txq_size[i],
2471 			    sizeof(struct tx_desc), sz,
2472 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2473 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2474 			    &q->txq[i].desc_map,
2475 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2476 			printf("error %d from alloc ring tx %i\n", ret, i);
2477 			goto err;
2478 		}
2479 		mbufq_init(&q->txq[i].sendq, INT_MAX);
2480 		q->txq[i].gen = 1;
2481 		q->txq[i].size = p->txq_size[i];
2482 	}
2483 
2484 #ifdef TCP_OFFLOAD
2485 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2486 #endif
2487 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2488 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2489 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, q);
2490 
2491 	q->fl[0].gen = q->fl[1].gen = 1;
2492 	q->fl[0].size = p->fl_size;
2493 	q->fl[1].size = p->jumbo_size;
2494 
2495 	q->rspq.gen = 1;
2496 	q->rspq.cidx = 0;
2497 	q->rspq.size = p->rspq_size;
2498 
2499 	q->txq[TXQ_ETH].stop_thres = nports *
2500 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2501 
2502 	q->fl[0].buf_size = MCLBYTES;
2503 	q->fl[0].zone = zone_pack;
2504 	q->fl[0].type = EXT_PACKET;
2505 
2506 	if (p->jumbo_buf_size ==  MJUM16BYTES) {
2507 		q->fl[1].zone = zone_jumbo16;
2508 		q->fl[1].type = EXT_JUMBO16;
2509 	} else if (p->jumbo_buf_size ==  MJUM9BYTES) {
2510 		q->fl[1].zone = zone_jumbo9;
2511 		q->fl[1].type = EXT_JUMBO9;
2512 	} else if (p->jumbo_buf_size ==  MJUMPAGESIZE) {
2513 		q->fl[1].zone = zone_jumbop;
2514 		q->fl[1].type = EXT_JUMBOP;
2515 	} else {
2516 		KASSERT(0, ("can't deal with jumbo_buf_size %d.", p->jumbo_buf_size));
2517 		ret = EDOOFUS;
2518 		goto err;
2519 	}
2520 	q->fl[1].buf_size = p->jumbo_buf_size;
2521 
2522 	/* Allocate and setup the lro_ctrl structure */
2523 	q->lro.enabled = !!(if_getcapenable(pi->ifp) & IFCAP_LRO);
2524 #if defined(INET6) || defined(INET)
2525 	ret = tcp_lro_init(&q->lro.ctrl);
2526 	if (ret) {
2527 		printf("error %d from tcp_lro_init\n", ret);
2528 		goto err;
2529 	}
2530 #endif
2531 	q->lro.ctrl.ifp = pi->ifp;
2532 
2533 	mtx_lock_spin(&sc->sge.reg_lock);
2534 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2535 				   q->rspq.phys_addr, q->rspq.size,
2536 				   q->fl[0].buf_size, 1, 0);
2537 	if (ret) {
2538 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2539 		goto err_unlock;
2540 	}
2541 
2542 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2543 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2544 					  q->fl[i].phys_addr, q->fl[i].size,
2545 					  q->fl[i].buf_size, p->cong_thres, 1,
2546 					  0);
2547 		if (ret) {
2548 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2549 			goto err_unlock;
2550 		}
2551 	}
2552 
2553 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2554 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2555 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2556 				 1, 0);
2557 	if (ret) {
2558 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2559 		goto err_unlock;
2560 	}
2561 
2562 	if (ntxq > 1) {
2563 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2564 					 USE_GTS, SGE_CNTXT_OFLD, id,
2565 					 q->txq[TXQ_OFLD].phys_addr,
2566 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2567 		if (ret) {
2568 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2569 			goto err_unlock;
2570 		}
2571 	}
2572 
2573 	if (ntxq > 2) {
2574 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2575 					 SGE_CNTXT_CTRL, id,
2576 					 q->txq[TXQ_CTRL].phys_addr,
2577 					 q->txq[TXQ_CTRL].size,
2578 					 q->txq[TXQ_CTRL].token, 1, 0);
2579 		if (ret) {
2580 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2581 			goto err_unlock;
2582 		}
2583 	}
2584 
2585 	mtx_unlock_spin(&sc->sge.reg_lock);
2586 	t3_update_qset_coalesce(q, p);
2587 
2588 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2589 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2590 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2591 
2592 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2593 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2594 
2595 	return (0);
2596 
2597 err_unlock:
2598 	mtx_unlock_spin(&sc->sge.reg_lock);
2599 err:
2600 	TXQ_LOCK(q);
2601 	t3_free_qset(sc, q);
2602 
2603 	return (ret);
2604 }
2605 
2606 /*
2607  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2608  * ethernet data.  Hardware assistance with various checksums and any vlan tag
2609  * will also be taken into account here.
2610  */
2611 void
2612 t3_rx_eth(struct adapter *adap, struct mbuf *m, int ethpad)
2613 {
2614 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2615 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2616 	if_t ifp = pi->ifp;
2617 
2618 	if (cpl->vlan_valid) {
2619 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2620 		m->m_flags |= M_VLANTAG;
2621 	}
2622 
2623 	m->m_pkthdr.rcvif = ifp;
2624 	/*
2625 	 * adjust after conversion to mbuf chain
2626 	 */
2627 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2628 	m->m_len -= (sizeof(*cpl) + ethpad);
2629 	m->m_data += (sizeof(*cpl) + ethpad);
2630 
2631 	if (!cpl->fragment && cpl->csum_valid && cpl->csum == 0xffff) {
2632 		struct ether_header *eh = mtod(m, void *);
2633 		uint16_t eh_type;
2634 
2635 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2636 			struct ether_vlan_header *evh = mtod(m, void *);
2637 
2638 			eh_type = evh->evl_proto;
2639 		} else
2640 			eh_type = eh->ether_type;
2641 
2642 		if (if_getcapenable(ifp) & IFCAP_RXCSUM &&
2643 		    eh_type == htons(ETHERTYPE_IP)) {
2644 			m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED |
2645 			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
2646 			m->m_pkthdr.csum_data = 0xffff;
2647 		} else if (if_getcapenable(ifp) & IFCAP_RXCSUM_IPV6 &&
2648 		    eh_type == htons(ETHERTYPE_IPV6)) {
2649 			m->m_pkthdr.csum_flags = (CSUM_DATA_VALID_IPV6 |
2650 			    CSUM_PSEUDO_HDR);
2651 			m->m_pkthdr.csum_data = 0xffff;
2652 		}
2653 	}
2654 }
2655 
2656 /**
2657  *	get_packet - return the next ingress packet buffer from a free list
2658  *	@adap: the adapter that received the packet
2659  *	@drop_thres: # of remaining buffers before we start dropping packets
2660  *	@qs: the qset that the SGE free list holding the packet belongs to
2661  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2662  *      @r: response descriptor
2663  *
2664  *	Get the next packet from a free list and complete setup of the
2665  *	sk_buff.  If the packet is small we make a copy and recycle the
2666  *	original buffer, otherwise we use the original buffer itself.  If a
2667  *	positive drop threshold is supplied packets are dropped and their
2668  *	buffers recycled if (a) the number of remaining buffers is under the
2669  *	threshold and the packet is too big to copy, or (b) the packet should
2670  *	be copied but there is no memory for the copy.
2671  */
2672 static int
2673 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2674     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2675 {
2676 
2677 	unsigned int len_cq =  ntohl(r->len_cq);
2678 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2679 	int mask, cidx = fl->cidx;
2680 	struct rx_sw_desc *sd = &fl->sdesc[cidx];
2681 	uint32_t len = G_RSPD_LEN(len_cq);
2682 	uint32_t flags = M_EXT;
2683 	uint8_t sopeop = G_RSPD_SOP_EOP(ntohl(r->flags));
2684 	caddr_t cl;
2685 	struct mbuf *m;
2686 	int ret = 0;
2687 
2688 	mask = fl->size - 1;
2689 	prefetch(fl->sdesc[(cidx + 1) & mask].m);
2690 	prefetch(fl->sdesc[(cidx + 2) & mask].m);
2691 	prefetch(fl->sdesc[(cidx + 1) & mask].rxsd_cl);
2692 	prefetch(fl->sdesc[(cidx + 2) & mask].rxsd_cl);
2693 
2694 	fl->credits--;
2695 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2696 
2697 	if (recycle_enable && len <= SGE_RX_COPY_THRES &&
2698 	    sopeop == RSPQ_SOP_EOP) {
2699 		if ((m = m_gethdr(M_NOWAIT, MT_DATA)) == NULL)
2700 			goto skip_recycle;
2701 		cl = mtod(m, void *);
2702 		memcpy(cl, sd->rxsd_cl, len);
2703 		recycle_rx_buf(adap, fl, fl->cidx);
2704 		m->m_pkthdr.len = m->m_len = len;
2705 		m->m_flags = 0;
2706 		mh->mh_head = mh->mh_tail = m;
2707 		ret = 1;
2708 		goto done;
2709 	} else {
2710 	skip_recycle:
2711 		bus_dmamap_unload(fl->entry_tag, sd->map);
2712 		cl = sd->rxsd_cl;
2713 		m = sd->m;
2714 
2715 		if ((sopeop == RSPQ_SOP_EOP) ||
2716 		    (sopeop == RSPQ_SOP))
2717 			flags |= M_PKTHDR;
2718 		m_init(m, M_NOWAIT, MT_DATA, flags);
2719 		if (fl->zone == zone_pack) {
2720 			/*
2721 			 * restore clobbered data pointer
2722 			 */
2723 			m->m_data = m->m_ext.ext_buf;
2724 		} else {
2725 			m_cljset(m, cl, fl->type);
2726 		}
2727 		m->m_len = len;
2728 	}
2729 	switch(sopeop) {
2730 	case RSPQ_SOP_EOP:
2731 		ret = 1;
2732 		/* FALLTHROUGH */
2733 	case RSPQ_SOP:
2734 		mh->mh_head = mh->mh_tail = m;
2735 		m->m_pkthdr.len = len;
2736 		break;
2737 	case RSPQ_EOP:
2738 		ret = 1;
2739 		/* FALLTHROUGH */
2740 	case RSPQ_NSOP_NEOP:
2741 		if (mh->mh_tail == NULL) {
2742 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2743 			m_freem(m);
2744 			m = NULL;
2745 			break;
2746 		}
2747 		mh->mh_tail->m_next = m;
2748 		mh->mh_tail = m;
2749 		mh->mh_head->m_pkthdr.len += len;
2750 		break;
2751 	}
2752 	if (cxgb_debug && m != NULL)
2753 		printf("len=%d pktlen=%d\n", m->m_len, m->m_pkthdr.len);
2754 done:
2755 	if (++fl->cidx == fl->size)
2756 		fl->cidx = 0;
2757 
2758 	return (ret);
2759 }
2760 
2761 /**
2762  *	handle_rsp_cntrl_info - handles control information in a response
2763  *	@qs: the queue set corresponding to the response
2764  *	@flags: the response control flags
2765  *
2766  *	Handles the control information of an SGE response, such as GTS
2767  *	indications and completion credits for the queue set's Tx queues.
2768  *	HW coalesces credits, we don't do any extra SW coalescing.
2769  */
2770 static __inline void
2771 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2772 {
2773 	unsigned int credits;
2774 
2775 #if USE_GTS
2776 	if (flags & F_RSPD_TXQ0_GTS)
2777 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2778 #endif
2779 	credits = G_RSPD_TXQ0_CR(flags);
2780 	if (credits)
2781 		qs->txq[TXQ_ETH].processed += credits;
2782 
2783 	credits = G_RSPD_TXQ2_CR(flags);
2784 	if (credits)
2785 		qs->txq[TXQ_CTRL].processed += credits;
2786 
2787 # if USE_GTS
2788 	if (flags & F_RSPD_TXQ1_GTS)
2789 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2790 # endif
2791 	credits = G_RSPD_TXQ1_CR(flags);
2792 	if (credits)
2793 		qs->txq[TXQ_OFLD].processed += credits;
2794 
2795 }
2796 
2797 static void
2798 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2799     unsigned int sleeping)
2800 {
2801 	;
2802 }
2803 
2804 /**
2805  *	process_responses - process responses from an SGE response queue
2806  *	@adap: the adapter
2807  *	@qs: the queue set to which the response queue belongs
2808  *	@budget: how many responses can be processed in this round
2809  *
2810  *	Process responses from an SGE response queue up to the supplied budget.
2811  *	Responses include received packets as well as credits and other events
2812  *	for the queues that belong to the response queue's queue set.
2813  *	A negative budget is effectively unlimited.
2814  *
2815  *	Additionally choose the interrupt holdoff time for the next interrupt
2816  *	on this queue.  If the system is under memory shortage use a fairly
2817  *	long delay to help recovery.
2818  */
2819 static int
2820 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2821 {
2822 	struct sge_rspq *rspq = &qs->rspq;
2823 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2824 	int budget_left = budget;
2825 	unsigned int sleeping = 0;
2826 #if defined(INET6) || defined(INET)
2827 	int lro_enabled = qs->lro.enabled;
2828 	int skip_lro;
2829 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2830 #endif
2831 	struct t3_mbuf_hdr *mh = &rspq->rspq_mh;
2832 #ifdef DEBUG
2833 	static int last_holdoff = 0;
2834 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2835 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2836 		last_holdoff = rspq->holdoff_tmr;
2837 	}
2838 #endif
2839 	rspq->next_holdoff = rspq->holdoff_tmr;
2840 
2841 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2842 		int eth, eop = 0, ethpad = 0;
2843 		uint32_t flags = ntohl(r->flags);
2844 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2845 		uint8_t opcode = r->rss_hdr.opcode;
2846 
2847 		eth = (opcode == CPL_RX_PKT);
2848 
2849 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2850 			struct mbuf *m;
2851 
2852 			if (cxgb_debug)
2853 				printf("async notification\n");
2854 
2855 			if (mh->mh_head == NULL) {
2856 				mh->mh_head = m_gethdr(M_NOWAIT, MT_DATA);
2857 				m = mh->mh_head;
2858 			} else {
2859 				m = m_gethdr(M_NOWAIT, MT_DATA);
2860 			}
2861 			if (m == NULL)
2862 				goto no_mem;
2863 
2864                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2865 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2866                         *mtod(m, uint8_t *) = CPL_ASYNC_NOTIF;
2867 			opcode = CPL_ASYNC_NOTIF;
2868 			eop = 1;
2869                         rspq->async_notif++;
2870 			goto skip;
2871 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2872 			struct mbuf *m = m_gethdr(M_NOWAIT, MT_DATA);
2873 
2874 			if (m == NULL) {
2875 		no_mem:
2876 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2877 				budget_left--;
2878 				break;
2879 			}
2880 			if (mh->mh_head == NULL)
2881 				mh->mh_head = m;
2882                         else
2883 				mh->mh_tail->m_next = m;
2884 			mh->mh_tail = m;
2885 
2886 			get_imm_packet(adap, r, m);
2887 			mh->mh_head->m_pkthdr.len += m->m_len;
2888 			eop = 1;
2889 			rspq->imm_data++;
2890 		} else if (r->len_cq) {
2891 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2892 
2893 			eop = get_packet(adap, drop_thresh, qs, mh, r);
2894 			if (eop) {
2895 				if (r->rss_hdr.hash_type && !adap->timestamp) {
2896 					M_HASHTYPE_SET(mh->mh_head,
2897 					    M_HASHTYPE_OPAQUE_HASH);
2898 					mh->mh_head->m_pkthdr.flowid = rss_hash;
2899 				}
2900 			}
2901 
2902 			ethpad = 2;
2903 		} else {
2904 			rspq->pure_rsps++;
2905 		}
2906 	skip:
2907 		if (flags & RSPD_CTRL_MASK) {
2908 			sleeping |= flags & RSPD_GTS_MASK;
2909 			handle_rsp_cntrl_info(qs, flags);
2910 		}
2911 
2912 		if (!eth && eop) {
2913 			rspq->offload_pkts++;
2914 #ifdef TCP_OFFLOAD
2915 			adap->cpl_handler[opcode](qs, r, mh->mh_head);
2916 #else
2917 			m_freem(mh->mh_head);
2918 #endif
2919 			mh->mh_head = NULL;
2920 		} else if (eth && eop) {
2921 			struct mbuf *m = mh->mh_head;
2922 
2923 			t3_rx_eth(adap, m, ethpad);
2924 
2925 			/*
2926 			 * The T304 sends incoming packets on any qset.  If LRO
2927 			 * is also enabled, we could end up sending packet up
2928 			 * lro_ctrl->ifp's input.  That is incorrect.
2929 			 *
2930 			 * The mbuf's rcvif was derived from the cpl header and
2931 			 * is accurate.  Skip LRO and just use that.
2932 			 */
2933 #if defined(INET6) || defined(INET)
2934 			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2935 
2936 			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro
2937 			    && (tcp_lro_rx(lro_ctrl, m, 0) == 0)
2938 			    ) {
2939 				/* successfully queue'd for LRO */
2940 			} else
2941 #endif
2942 			{
2943 				/*
2944 				 * LRO not enabled, packet unsuitable for LRO,
2945 				 * or unable to queue.  Pass it up right now in
2946 				 * either case.
2947 				 */
2948 				if_t ifp = m->m_pkthdr.rcvif;
2949 				if_input(ifp, m);
2950 			}
2951 			mh->mh_head = NULL;
2952 
2953 		}
2954 
2955 		r++;
2956 		if (__predict_false(++rspq->cidx == rspq->size)) {
2957 			rspq->cidx = 0;
2958 			rspq->gen ^= 1;
2959 			r = rspq->desc;
2960 		}
2961 
2962 		if (++rspq->credits >= 64) {
2963 			refill_rspq(adap, rspq, rspq->credits);
2964 			rspq->credits = 0;
2965 		}
2966 		__refill_fl_lt(adap, &qs->fl[0], 32);
2967 		__refill_fl_lt(adap, &qs->fl[1], 32);
2968 		--budget_left;
2969 	}
2970 
2971 #if defined(INET6) || defined(INET)
2972 	/* Flush LRO */
2973 	tcp_lro_flush_all(lro_ctrl);
2974 #endif
2975 
2976 	if (sleeping)
2977 		check_ring_db(adap, qs, sleeping);
2978 
2979 	mb();  /* commit Tx queue processed updates */
2980 	if (__predict_false(qs->txq_stopped > 1))
2981 		restart_tx(qs);
2982 
2983 	__refill_fl_lt(adap, &qs->fl[0], 512);
2984 	__refill_fl_lt(adap, &qs->fl[1], 512);
2985 	budget -= budget_left;
2986 	return (budget);
2987 }
2988 
2989 /*
2990  * A helper function that processes responses and issues GTS.
2991  */
2992 static __inline int
2993 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2994 {
2995 	int work;
2996 	static int last_holdoff = 0;
2997 
2998 	work = process_responses(adap, rspq_to_qset(rq), -1);
2999 
3000 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3001 		printf("next_holdoff=%d\n", rq->next_holdoff);
3002 		last_holdoff = rq->next_holdoff;
3003 	}
3004 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3005 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3006 
3007 	return (work);
3008 }
3009 
3010 #ifdef DEBUGNET
3011 int
3012 cxgb_debugnet_poll_rx(adapter_t *adap, struct sge_qset *qs)
3013 {
3014 
3015 	return (process_responses_gts(adap, &qs->rspq));
3016 }
3017 #endif
3018 
3019 /*
3020  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3021  * Handles data events from SGE response queues as well as error and other
3022  * async events as they all use the same interrupt pin.  We use one SGE
3023  * response queue per port in this mode and protect all response queues with
3024  * queue 0's lock.
3025  */
3026 void
3027 t3b_intr(void *data)
3028 {
3029 	uint32_t i, map;
3030 	adapter_t *adap = data;
3031 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3032 
3033 	t3_write_reg(adap, A_PL_CLI, 0);
3034 	map = t3_read_reg(adap, A_SG_DATA_INTR);
3035 
3036 	if (!map)
3037 		return;
3038 
3039 	if (__predict_false(map & F_ERRINTR)) {
3040 		t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3041 		(void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3042 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3043 	}
3044 
3045 	mtx_lock(&q0->lock);
3046 	for_each_port(adap, i)
3047 	    if (map & (1 << i))
3048 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3049 	mtx_unlock(&q0->lock);
3050 }
3051 
3052 /*
3053  * The MSI interrupt handler.  This needs to handle data events from SGE
3054  * response queues as well as error and other async events as they all use
3055  * the same MSI vector.  We use one SGE response queue per port in this mode
3056  * and protect all response queues with queue 0's lock.
3057  */
3058 void
3059 t3_intr_msi(void *data)
3060 {
3061 	adapter_t *adap = data;
3062 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3063 	int i, new_packets = 0;
3064 
3065 	mtx_lock(&q0->lock);
3066 
3067 	for_each_port(adap, i)
3068 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3069 		    new_packets = 1;
3070 	mtx_unlock(&q0->lock);
3071 	if (new_packets == 0) {
3072 		t3_write_reg(adap, A_PL_INT_ENABLE0, 0);
3073 		(void) t3_read_reg(adap, A_PL_INT_ENABLE0);
3074 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3075 	}
3076 }
3077 
3078 void
3079 t3_intr_msix(void *data)
3080 {
3081 	struct sge_qset *qs = data;
3082 	adapter_t *adap = qs->port->adapter;
3083 	struct sge_rspq *rspq = &qs->rspq;
3084 
3085 	if (process_responses_gts(adap, rspq) == 0)
3086 		rspq->unhandled_irqs++;
3087 }
3088 
3089 #define QDUMP_SBUF_SIZE		32 * 400
3090 static int
3091 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3092 {
3093 	struct sge_rspq *rspq;
3094 	struct sge_qset *qs;
3095 	int i, err, dump_end, idx;
3096 	struct sbuf *sb;
3097 	struct rsp_desc *rspd;
3098 	uint32_t data[4];
3099 
3100 	rspq = arg1;
3101 	qs = rspq_to_qset(rspq);
3102 	if (rspq->rspq_dump_count == 0)
3103 		return (0);
3104 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3105 		log(LOG_WARNING,
3106 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3107 		rspq->rspq_dump_count = 0;
3108 		return (EINVAL);
3109 	}
3110 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3111 		log(LOG_WARNING,
3112 		    "dump start of %d is greater than queue size\n",
3113 		    rspq->rspq_dump_start);
3114 		rspq->rspq_dump_start = 0;
3115 		return (EINVAL);
3116 	}
3117 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3118 	if (err)
3119 		return (err);
3120 	err = sysctl_wire_old_buffer(req, 0);
3121 	if (err)
3122 		return (err);
3123 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3124 
3125 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3126 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3127 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3128 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3129 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3130 
3131 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3132 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3133 
3134 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3135 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3136 		idx = i & (RSPQ_Q_SIZE-1);
3137 
3138 		rspd = &rspq->desc[idx];
3139 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3140 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3141 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3142 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3143 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3144 		    be32toh(rspd->len_cq), rspd->intr_gen);
3145 	}
3146 
3147 	err = sbuf_finish(sb);
3148 	sbuf_delete(sb);
3149 	return (err);
3150 }
3151 
3152 static int
3153 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3154 {
3155 	struct sge_txq *txq;
3156 	struct sge_qset *qs;
3157 	int i, j, err, dump_end;
3158 	struct sbuf *sb;
3159 	struct tx_desc *txd;
3160 	uint32_t *WR, wr_hi, wr_lo, gen;
3161 	uint32_t data[4];
3162 
3163 	txq = arg1;
3164 	qs = txq_to_qset(txq, TXQ_ETH);
3165 	if (txq->txq_dump_count == 0) {
3166 		return (0);
3167 	}
3168 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3169 		log(LOG_WARNING,
3170 		    "dump count is too large %d\n", txq->txq_dump_count);
3171 		txq->txq_dump_count = 1;
3172 		return (EINVAL);
3173 	}
3174 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3175 		log(LOG_WARNING,
3176 		    "dump start of %d is greater than queue size\n",
3177 		    txq->txq_dump_start);
3178 		txq->txq_dump_start = 0;
3179 		return (EINVAL);
3180 	}
3181 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3182 	if (err)
3183 		return (err);
3184 	err = sysctl_wire_old_buffer(req, 0);
3185 	if (err)
3186 		return (err);
3187 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3188 
3189 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3190 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3191 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3192 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3193 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3194 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3195 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3196 	    txq->txq_dump_start,
3197 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3198 
3199 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3200 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3201 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3202 		WR = (uint32_t *)txd->flit;
3203 		wr_hi = ntohl(WR[0]);
3204 		wr_lo = ntohl(WR[1]);
3205 		gen = G_WR_GEN(wr_lo);
3206 
3207 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3208 		    wr_hi, wr_lo, gen);
3209 		for (j = 2; j < 30; j += 4)
3210 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3211 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3212 
3213 	}
3214 	err = sbuf_finish(sb);
3215 	sbuf_delete(sb);
3216 	return (err);
3217 }
3218 
3219 static int
3220 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3221 {
3222 	struct sge_txq *txq;
3223 	struct sge_qset *qs;
3224 	int i, j, err, dump_end;
3225 	struct sbuf *sb;
3226 	struct tx_desc *txd;
3227 	uint32_t *WR, wr_hi, wr_lo, gen;
3228 
3229 	txq = arg1;
3230 	qs = txq_to_qset(txq, TXQ_CTRL);
3231 	if (txq->txq_dump_count == 0) {
3232 		return (0);
3233 	}
3234 	if (txq->txq_dump_count > 256) {
3235 		log(LOG_WARNING,
3236 		    "dump count is too large %d\n", txq->txq_dump_count);
3237 		txq->txq_dump_count = 1;
3238 		return (EINVAL);
3239 	}
3240 	if (txq->txq_dump_start > 255) {
3241 		log(LOG_WARNING,
3242 		    "dump start of %d is greater than queue size\n",
3243 		    txq->txq_dump_start);
3244 		txq->txq_dump_start = 0;
3245 		return (EINVAL);
3246 	}
3247 
3248 	err = sysctl_wire_old_buffer(req, 0);
3249 	if (err != 0)
3250 		return (err);
3251 	sb = sbuf_new_for_sysctl(NULL, NULL, QDUMP_SBUF_SIZE, req);
3252 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3253 	    txq->txq_dump_start,
3254 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3255 
3256 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3257 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3258 		txd = &txq->desc[i & (255)];
3259 		WR = (uint32_t *)txd->flit;
3260 		wr_hi = ntohl(WR[0]);
3261 		wr_lo = ntohl(WR[1]);
3262 		gen = G_WR_GEN(wr_lo);
3263 
3264 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3265 		    wr_hi, wr_lo, gen);
3266 		for (j = 2; j < 30; j += 4)
3267 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3268 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3269 
3270 	}
3271 	err = sbuf_finish(sb);
3272 	sbuf_delete(sb);
3273 	return (err);
3274 }
3275 
3276 static int
3277 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3278 {
3279 	adapter_t *sc = arg1;
3280 	struct qset_params *qsp = &sc->params.sge.qset[0];
3281 	int coalesce_usecs;
3282 	struct sge_qset *qs;
3283 	int i, j, err, nqsets = 0;
3284 	struct mtx *lock;
3285 
3286 	if ((sc->flags & FULL_INIT_DONE) == 0)
3287 		return (ENXIO);
3288 
3289 	coalesce_usecs = qsp->coalesce_usecs;
3290         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3291 
3292 	if (err != 0) {
3293 		return (err);
3294 	}
3295 	if (coalesce_usecs == qsp->coalesce_usecs)
3296 		return (0);
3297 
3298 	for (i = 0; i < sc->params.nports; i++)
3299 		for (j = 0; j < sc->port[i].nqsets; j++)
3300 			nqsets++;
3301 
3302 	coalesce_usecs = max(1, coalesce_usecs);
3303 
3304 	for (i = 0; i < nqsets; i++) {
3305 		qs = &sc->sge.qs[i];
3306 		qsp = &sc->params.sge.qset[i];
3307 		qsp->coalesce_usecs = coalesce_usecs;
3308 
3309 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3310 			    &sc->sge.qs[0].rspq.lock;
3311 
3312 		mtx_lock(lock);
3313 		t3_update_qset_coalesce(qs, qsp);
3314 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3315 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3316 		mtx_unlock(lock);
3317 	}
3318 
3319 	return (0);
3320 }
3321 
3322 static int
3323 t3_pkt_timestamp(SYSCTL_HANDLER_ARGS)
3324 {
3325 	adapter_t *sc = arg1;
3326 	int rc, timestamp;
3327 
3328 	if ((sc->flags & FULL_INIT_DONE) == 0)
3329 		return (ENXIO);
3330 
3331 	timestamp = sc->timestamp;
3332 	rc = sysctl_handle_int(oidp, &timestamp, arg2, req);
3333 
3334 	if (rc != 0)
3335 		return (rc);
3336 
3337 	if (timestamp != sc->timestamp) {
3338 		t3_set_reg_field(sc, A_TP_PC_CONFIG2, F_ENABLERXPKTTMSTPRSS,
3339 		    timestamp ? F_ENABLERXPKTTMSTPRSS : 0);
3340 		sc->timestamp = timestamp;
3341 	}
3342 
3343 	return (0);
3344 }
3345 
3346 void
3347 t3_add_attach_sysctls(adapter_t *sc)
3348 {
3349 	struct sysctl_ctx_list *ctx;
3350 	struct sysctl_oid_list *children;
3351 
3352 	ctx = device_get_sysctl_ctx(sc->dev);
3353 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3354 
3355 	/* random information */
3356 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3357 	    "firmware_version",
3358 	    CTLFLAG_RD, sc->fw_version,
3359 	    0, "firmware version");
3360 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3361 	    "hw_revision",
3362 	    CTLFLAG_RD, &sc->params.rev,
3363 	    0, "chip model");
3364 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3365 	    "port_types",
3366 	    CTLFLAG_RD, sc->port_types,
3367 	    0, "type of ports");
3368 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3369 	    "enable_debug",
3370 	    CTLFLAG_RW, &cxgb_debug,
3371 	    0, "enable verbose debugging output");
3372 	SYSCTL_ADD_UQUAD(ctx, children, OID_AUTO, "tunq_coalesce",
3373 	    CTLFLAG_RD, &sc->tunq_coalesce,
3374 	    "#tunneled packets freed");
3375 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3376 	    "txq_overrun",
3377 	    CTLFLAG_RD, &txq_fills,
3378 	    0, "#times txq overrun");
3379 	SYSCTL_ADD_UINT(ctx, children, OID_AUTO,
3380 	    "core_clock",
3381 	    CTLFLAG_RD, &sc->params.vpd.cclk,
3382 	    0, "core clock frequency (in KHz)");
3383 }
3384 
3385 
3386 static const char *rspq_name = "rspq";
3387 static const char *txq_names[] =
3388 {
3389 	"txq_eth",
3390 	"txq_ofld",
3391 	"txq_ctrl"
3392 };
3393 
3394 static int
3395 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3396 {
3397 	struct port_info *p = arg1;
3398 	uint64_t *parg;
3399 
3400 	if (!p)
3401 		return (EINVAL);
3402 
3403 	cxgb_refresh_stats(p);
3404 	parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3405 
3406 	return (sysctl_handle_64(oidp, parg, 0, req));
3407 }
3408 
3409 void
3410 t3_add_configured_sysctls(adapter_t *sc)
3411 {
3412 	struct sysctl_ctx_list *ctx;
3413 	struct sysctl_oid_list *children;
3414 	int i, j;
3415 
3416 	ctx = device_get_sysctl_ctx(sc->dev);
3417 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3418 
3419 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3420 	    "intr_coal",
3421 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
3422 	    0, t3_set_coalesce_usecs,
3423 	    "I", "interrupt coalescing timer (us)");
3424 
3425 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3426 	    "pkt_timestamp",
3427 	    CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_NEEDGIANT, sc,
3428 	    0, t3_pkt_timestamp,
3429 	    "I", "provide packet timestamp instead of connection hash");
3430 
3431 	for (i = 0; i < sc->params.nports; i++) {
3432 		struct port_info *pi = &sc->port[i];
3433 		struct sysctl_oid *poid;
3434 		struct sysctl_oid_list *poidlist;
3435 		struct mac_stats *mstats = &pi->mac.stats;
3436 
3437 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3438 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3439 		    pi->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3440 		    "port statistics");
3441 		poidlist = SYSCTL_CHILDREN(poid);
3442 		SYSCTL_ADD_UINT(ctx, poidlist, OID_AUTO,
3443 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3444 		    0, "#queue sets");
3445 
3446 		for (j = 0; j < pi->nqsets; j++) {
3447 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3448 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid,
3449 					  *ctrlqpoid, *lropoid;
3450 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist,
3451 					       *txqpoidlist, *ctrlqpoidlist,
3452 					       *lropoidlist;
3453 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3454 
3455 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3456 
3457 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3458 			    qs->namebuf, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3459 			    "qset statistics");
3460 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3461 
3462 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl0_empty",
3463 					CTLFLAG_RD, &qs->fl[0].empty, 0,
3464 					"freelist #0 empty");
3465 			SYSCTL_ADD_UINT(ctx, qspoidlist, OID_AUTO, "fl1_empty",
3466 					CTLFLAG_RD, &qs->fl[1].empty, 0,
3467 					"freelist #1 empty");
3468 
3469 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3470 			    rspq_name, CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3471 			    "rspq statistics");
3472 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3473 
3474 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3475 			    txq_names[0], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3476 			    "txq statistics");
3477 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3478 
3479 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3480 			    txq_names[2], CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3481 			    "ctrlq statistics");
3482 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3483 
3484 			lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3485 			    "lro_stats", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL,
3486 			    "LRO statistics");
3487 			lropoidlist = SYSCTL_CHILDREN(lropoid);
3488 
3489 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3490 			    CTLFLAG_RD, &qs->rspq.size,
3491 			    0, "#entries in response queue");
3492 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3493 			    CTLFLAG_RD, &qs->rspq.cidx,
3494 			    0, "consumer index");
3495 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3496 			    CTLFLAG_RD, &qs->rspq.credits,
3497 			    0, "#credits");
3498 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "starved",
3499 			    CTLFLAG_RD, &qs->rspq.starved,
3500 			    0, "#times starved");
3501 			SYSCTL_ADD_UAUTO(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3502 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3503 			    "physical_address_of the queue");
3504 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3505 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3506 			    0, "start rspq dump entry");
3507 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3508 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3509 			    0, "#rspq entries to dump");
3510 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3511 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3512 			    &qs->rspq, 0, t3_dump_rspq, "A",
3513 			    "dump of the response queue");
3514 
3515 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "dropped",
3516 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_mr->br_drops,
3517 			    "#tunneled packets dropped");
3518 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3519 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.mq_len,
3520 			    0, "#tunneled packets waiting to be sent");
3521 #if 0
3522 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3523 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3524 			    0, "#tunneled packets queue producer index");
3525 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3526 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3527 			    0, "#tunneled packets queue consumer index");
3528 #endif
3529 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "processed",
3530 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3531 			    0, "#tunneled packets processed by the card");
3532 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3533 			    CTLFLAG_RD, &txq->cleaned,
3534 			    0, "#tunneled packets cleaned");
3535 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3536 			    CTLFLAG_RD, &txq->in_use,
3537 			    0, "#tunneled packet slots in use");
3538 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "frees",
3539 			    CTLFLAG_RD, &txq->txq_frees,
3540 			    "#tunneled packets freed");
3541 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3542 			    CTLFLAG_RD, &txq->txq_skipped,
3543 			    0, "#tunneled packet descriptors skipped");
3544 			SYSCTL_ADD_UQUAD(ctx, txqpoidlist, OID_AUTO, "coalesced",
3545 			    CTLFLAG_RD, &txq->txq_coalesced,
3546 			    "#tunneled packets coalesced");
3547 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3548 			    CTLFLAG_RD, &txq->txq_enqueued,
3549 			    0, "#tunneled packets enqueued to hardware");
3550 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3551 			    CTLFLAG_RD, &qs->txq_stopped,
3552 			    0, "tx queues stopped");
3553 			SYSCTL_ADD_UAUTO(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3554 			    CTLFLAG_RD, &txq->phys_addr,
3555 			    "physical_address_of the queue");
3556 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3557 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3558 			    0, "txq generation");
3559 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3560 			    CTLFLAG_RD, &txq->cidx,
3561 			    0, "hardware queue cidx");
3562 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3563 			    CTLFLAG_RD, &txq->pidx,
3564 			    0, "hardware queue pidx");
3565 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3566 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3567 			    0, "txq start idx for dump");
3568 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3569 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3570 			    0, "txq #entries to dump");
3571 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3572 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3573 			    &qs->txq[TXQ_ETH], 0, t3_dump_txq_eth, "A",
3574 			    "dump of the transmit queue");
3575 
3576 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3577 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3578 			    0, "ctrlq start idx for dump");
3579 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3580 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3581 			    0, "ctrl #entries to dump");
3582 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3583 			    CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_NEEDGIANT,
3584 			    &qs->txq[TXQ_CTRL], 0, t3_dump_txq_ctrl, "A",
3585 			    "dump of the transmit queue");
3586 
3587 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_queued",
3588 			    CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3589 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3590 			    CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3591 			SYSCTL_ADD_U64(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3592 			    CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3593 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3594 			    CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3595 		}
3596 
3597 		/* Now add a node for mac stats. */
3598 		poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3599 		    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "MAC statistics");
3600 		poidlist = SYSCTL_CHILDREN(poid);
3601 
3602 		/*
3603 		 * We (ab)use the length argument (arg2) to pass on the offset
3604 		 * of the data that we are interested in.  This is only required
3605 		 * for the quad counters that are updated from the hardware (we
3606 		 * make sure that we return the latest value).
3607 		 * sysctl_handle_macstat first updates *all* the counters from
3608 		 * the hardware, and then returns the latest value of the
3609 		 * requested counter.  Best would be to update only the
3610 		 * requested counter from hardware, but t3_mac_update_stats()
3611 		 * hides all the register details and we don't want to dive into
3612 		 * all that here.
3613 		 */
3614 #define CXGB_SYSCTL_ADD_QUAD(a)	SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3615     CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_NEEDGIANT, pi, \
3616     offsetof(struct mac_stats, a), sysctl_handle_macstat, "QU", 0)
3617 		CXGB_SYSCTL_ADD_QUAD(tx_octets);
3618 		CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3619 		CXGB_SYSCTL_ADD_QUAD(tx_frames);
3620 		CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3621 		CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3622 		CXGB_SYSCTL_ADD_QUAD(tx_pause);
3623 		CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3624 		CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3625 		CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3626 		CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3627 		CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3628 		CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3629 		CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3630 		CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3631 		CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3632 		CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3633 		CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3634 		CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3635 		CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3636 		CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3637 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3638 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3639 		CXGB_SYSCTL_ADD_QUAD(rx_octets);
3640 		CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3641 		CXGB_SYSCTL_ADD_QUAD(rx_frames);
3642 		CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3643 		CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3644 		CXGB_SYSCTL_ADD_QUAD(rx_pause);
3645 		CXGB_SYSCTL_ADD_QUAD(rx_fcs_errs);
3646 		CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3647 		CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3648 		CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3649 		CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3650 		CXGB_SYSCTL_ADD_QUAD(rx_runt);
3651 		CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3652 		CXGB_SYSCTL_ADD_QUAD(rx_short);
3653 		CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3654 		CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3655 		CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3656 		CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3657 		CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3658 		CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3659 		CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3660 		CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3661 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3662 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3663 #undef CXGB_SYSCTL_ADD_QUAD
3664 
3665 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3666     CTLFLAG_RD, &mstats->a, 0)
3667 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3668 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3669 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3670 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3671 		CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3672 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3673 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3674 		CXGB_SYSCTL_ADD_ULONG(num_toggled);
3675 		CXGB_SYSCTL_ADD_ULONG(num_resets);
3676 		CXGB_SYSCTL_ADD_ULONG(link_faults);
3677 #undef CXGB_SYSCTL_ADD_ULONG
3678 	}
3679 }
3680 
3681 /**
3682  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3683  *	@qs: the queue set
3684  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3685  *	@idx: the descriptor index in the queue
3686  *	@data: where to dump the descriptor contents
3687  *
3688  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3689  *	size of the descriptor.
3690  */
3691 int
3692 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3693 		unsigned char *data)
3694 {
3695 	if (qnum >= 6)
3696 		return (EINVAL);
3697 
3698 	if (qnum < 3) {
3699 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3700 			return -EINVAL;
3701 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3702 		return sizeof(struct tx_desc);
3703 	}
3704 
3705 	if (qnum == 3) {
3706 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3707 			return (EINVAL);
3708 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3709 		return sizeof(struct rsp_desc);
3710 	}
3711 
3712 	qnum -= 4;
3713 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3714 		return (EINVAL);
3715 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3716 	return sizeof(struct rx_desc);
3717 }
3718