xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision 25292deb42c1c99213f6d603cf461b950691cc79)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #ifdef CONFIG_DEFINED
68 #include <cxgb_include.h>
69 #include <sys/mvec.h>
70 #else
71 #include <dev/cxgb/cxgb_include.h>
72 #include <dev/cxgb/sys/mvec.h>
73 #endif
74 
75 int      txq_fills = 0;
76 /*
77  * XXX don't re-enable this until TOE stops assuming
78  * we have an m_ext
79  */
80 static int recycle_enable = 0;
81 extern int cxgb_txq_buf_ring_size;
82 int cxgb_cached_allocations;
83 int cxgb_cached;
84 int cxgb_ext_freed = 0;
85 int cxgb_ext_inited = 0;
86 int fl_q_size = 0;
87 int jumbo_q_size = 0;
88 
89 extern int cxgb_use_16k_clusters;
90 extern int cxgb_pcpu_cache_enable;
91 extern int nmbjumbo4;
92 extern int nmbjumbo9;
93 extern int nmbjumbo16;
94 
95 
96 
97 
98 #define USE_GTS 0
99 
100 #define SGE_RX_SM_BUF_SIZE	1536
101 #define SGE_RX_DROP_THRES	16
102 #define SGE_RX_COPY_THRES	128
103 
104 /*
105  * Period of the Tx buffer reclaim timer.  This timer does not need to run
106  * frequently as Tx buffers are usually reclaimed by new Tx packets.
107  */
108 #define TX_RECLAIM_PERIOD       (hz >> 1)
109 
110 /*
111  * Values for sge_txq.flags
112  */
113 enum {
114 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
115 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
116 };
117 
118 struct tx_desc {
119 	uint64_t	flit[TX_DESC_FLITS];
120 } __packed;
121 
122 struct rx_desc {
123 	uint32_t	addr_lo;
124 	uint32_t	len_gen;
125 	uint32_t	gen2;
126 	uint32_t	addr_hi;
127 } __packed;;
128 
129 struct rsp_desc {               /* response queue descriptor */
130 	struct rss_header	rss_hdr;
131 	uint32_t		flags;
132 	uint32_t		len_cq;
133 	uint8_t			imm_data[47];
134 	uint8_t			intr_gen;
135 } __packed;
136 
137 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
138 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
139 #define RX_SW_DESC_INUSE        (1 << 3)
140 #define TX_SW_DESC_MAPPED       (1 << 4)
141 
142 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
143 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
144 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
145 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
146 
147 struct tx_sw_desc {                /* SW state per Tx descriptor */
148 	struct mbuf_iovec mi;
149 	bus_dmamap_t	map;
150 	int		flags;
151 };
152 
153 struct rx_sw_desc {                /* SW state per Rx descriptor */
154 	caddr_t	         rxsd_cl;
155 	caddr_t	         data;
156 	bus_dmamap_t	  map;
157 	int		  flags;
158 };
159 
160 struct txq_state {
161 	unsigned int compl;
162 	unsigned int gen;
163 	unsigned int pidx;
164 };
165 
166 struct refill_fl_cb_arg {
167 	int               error;
168 	bus_dma_segment_t seg;
169 	int               nseg;
170 };
171 
172 /*
173  * Maps a number of flits to the number of Tx descriptors that can hold them.
174  * The formula is
175  *
176  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
177  *
178  * HW allows up to 4 descriptors to be combined into a WR.
179  */
180 static uint8_t flit_desc_map[] = {
181 	0,
182 #if SGE_NUM_GENBITS == 1
183 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
185 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
186 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
187 #elif SGE_NUM_GENBITS == 2
188 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
189 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
191 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 #else
193 # error "SGE_NUM_GENBITS must be 1 or 2"
194 #endif
195 };
196 
197 
198 int cxgb_debug = 0;
199 
200 static void sge_timer_cb(void *arg);
201 static void sge_timer_reclaim(void *arg, int ncount);
202 static void sge_txq_reclaim_handler(void *arg, int ncount);
203 
204 /**
205  *	reclaim_completed_tx - reclaims completed Tx descriptors
206  *	@adapter: the adapter
207  *	@q: the Tx queue to reclaim completed descriptors from
208  *
209  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
210  *	and frees the associated buffers if possible.  Called with the Tx
211  *	queue's lock held.
212  */
213 static __inline int
214 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
215 {
216 	int reclaim = desc_reclaimable(q);
217 
218 	if (reclaim < reclaim_min)
219 		return (0);
220 
221 	mtx_assert(&q->lock, MA_OWNED);
222 	if (reclaim > 0) {
223 		t3_free_tx_desc(q, reclaim);
224 		q->cleaned += reclaim;
225 		q->in_use -= reclaim;
226 	}
227 	return (reclaim);
228 }
229 
230 /**
231  *	should_restart_tx - are there enough resources to restart a Tx queue?
232  *	@q: the Tx queue
233  *
234  *	Checks if there are enough descriptors to restart a suspended Tx queue.
235  */
236 static __inline int
237 should_restart_tx(const struct sge_txq *q)
238 {
239 	unsigned int r = q->processed - q->cleaned;
240 
241 	return q->in_use - r < (q->size >> 1);
242 }
243 
244 /**
245  *	t3_sge_init - initialize SGE
246  *	@adap: the adapter
247  *	@p: the SGE parameters
248  *
249  *	Performs SGE initialization needed every time after a chip reset.
250  *	We do not initialize any of the queue sets here, instead the driver
251  *	top-level must request those individually.  We also do not enable DMA
252  *	here, that should be done after the queues have been set up.
253  */
254 void
255 t3_sge_init(adapter_t *adap, struct sge_params *p)
256 {
257 	u_int ctrl, ups;
258 
259 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
260 
261 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
262 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
263 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
264 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
265 #if SGE_NUM_GENBITS == 1
266 	ctrl |= F_EGRGENCTRL;
267 #endif
268 	if (adap->params.rev > 0) {
269 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
270 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
271 	}
272 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
273 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
274 		     V_LORCQDRBTHRSH(512));
275 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
276 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
277 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
278 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
279 		     adap->params.rev < T3_REV_C ? 1000 : 500);
280 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
281 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
282 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
283 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
284 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
285 }
286 
287 
288 /**
289  *	sgl_len - calculates the size of an SGL of the given capacity
290  *	@n: the number of SGL entries
291  *
292  *	Calculates the number of flits needed for a scatter/gather list that
293  *	can hold the given number of entries.
294  */
295 static __inline unsigned int
296 sgl_len(unsigned int n)
297 {
298 	return ((3 * n) / 2 + (n & 1));
299 }
300 
301 /**
302  *	get_imm_packet - return the next ingress packet buffer from a response
303  *	@resp: the response descriptor containing the packet data
304  *
305  *	Return a packet containing the immediate data of the given response.
306  */
307 static int
308 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
309 {
310 
311 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
312 	m->m_ext.ext_buf = NULL;
313 	m->m_ext.ext_type = 0;
314 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
315 	return (0);
316 }
317 
318 static __inline u_int
319 flits_to_desc(u_int n)
320 {
321 	return (flit_desc_map[n]);
322 }
323 
324 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
325 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
326 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
327 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
328 		    F_HIRCQPARITYERROR)
329 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
330 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
331 		      F_RSPQDISABLED)
332 
333 /**
334  *	t3_sge_err_intr_handler - SGE async event interrupt handler
335  *	@adapter: the adapter
336  *
337  *	Interrupt handler for SGE asynchronous (non-data) events.
338  */
339 void
340 t3_sge_err_intr_handler(adapter_t *adapter)
341 {
342 	unsigned int v, status;
343 
344 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
345 	if (status & SGE_PARERR)
346 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
347 			 status & SGE_PARERR);
348 	if (status & SGE_FRAMINGERR)
349 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
350 			 status & SGE_FRAMINGERR);
351 	if (status & F_RSPQCREDITOVERFOW)
352 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
353 
354 	if (status & F_RSPQDISABLED) {
355 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
356 
357 		CH_ALERT(adapter,
358 			 "packet delivered to disabled response queue (0x%x)\n",
359 			 (v >> S_RSPQ0DISABLED) & 0xff);
360 	}
361 
362 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
363 	if (status & SGE_FATALERR)
364 		t3_fatal_err(adapter);
365 }
366 
367 void
368 t3_sge_prep(adapter_t *adap, struct sge_params *p)
369 {
370 	int i, nqsets;
371 
372 	nqsets = min(SGE_QSETS, mp_ncpus*4);
373 
374 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
375 
376 	while (!powerof2(fl_q_size))
377 		fl_q_size--;
378 #if __FreeBSD_version > 800000
379 	if (cxgb_use_16k_clusters)
380 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
381 	else
382 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
383 #else
384 	jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
385 #endif
386 	while (!powerof2(jumbo_q_size))
387 		jumbo_q_size--;
388 
389 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
390 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
391 
392 	for (i = 0; i < SGE_QSETS; ++i) {
393 		struct qset_params *q = p->qset + i;
394 
395 		if (adap->params.nports > 2) {
396 			q->coalesce_usecs = 50;
397 		} else {
398 #ifdef INVARIANTS
399 			q->coalesce_usecs = 10;
400 #else
401 			q->coalesce_usecs = 5;
402 #endif
403 		}
404 		q->polling = adap->params.rev > 0;
405 		q->rspq_size = RSPQ_Q_SIZE;
406 		q->fl_size = fl_q_size;
407 		q->jumbo_size = jumbo_q_size;
408 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
409 		q->txq_size[TXQ_OFLD] = 1024;
410 		q->txq_size[TXQ_CTRL] = 256;
411 		q->cong_thres = 0;
412 	}
413 }
414 
415 int
416 t3_sge_alloc(adapter_t *sc)
417 {
418 
419 	/* The parent tag. */
420 	if (bus_dma_tag_create( NULL,			/* parent */
421 				1, 0,			/* algnmnt, boundary */
422 				BUS_SPACE_MAXADDR,	/* lowaddr */
423 				BUS_SPACE_MAXADDR,	/* highaddr */
424 				NULL, NULL,		/* filter, filterarg */
425 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
426 				BUS_SPACE_UNRESTRICTED, /* nsegments */
427 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
428 				0,			/* flags */
429 				NULL, NULL,		/* lock, lockarg */
430 				&sc->parent_dmat)) {
431 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
432 		return (ENOMEM);
433 	}
434 
435 	/*
436 	 * DMA tag for normal sized RX frames
437 	 */
438 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
439 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
440 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
441 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
442 		return (ENOMEM);
443 	}
444 
445 	/*
446 	 * DMA tag for jumbo sized RX frames.
447 	 */
448 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
449 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
450 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
451 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
452 		return (ENOMEM);
453 	}
454 
455 	/*
456 	 * DMA tag for TX frames.
457 	 */
458 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
459 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
460 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
461 		NULL, NULL, &sc->tx_dmat)) {
462 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
463 		return (ENOMEM);
464 	}
465 
466 	return (0);
467 }
468 
469 int
470 t3_sge_free(struct adapter * sc)
471 {
472 
473 	if (sc->tx_dmat != NULL)
474 		bus_dma_tag_destroy(sc->tx_dmat);
475 
476 	if (sc->rx_jumbo_dmat != NULL)
477 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
478 
479 	if (sc->rx_dmat != NULL)
480 		bus_dma_tag_destroy(sc->rx_dmat);
481 
482 	if (sc->parent_dmat != NULL)
483 		bus_dma_tag_destroy(sc->parent_dmat);
484 
485 	return (0);
486 }
487 
488 void
489 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
490 {
491 
492 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
493 	qs->rspq.polling = 0 /* p->polling */;
494 }
495 
496 #if !defined(__i386__) && !defined(__amd64__)
497 static void
498 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
499 {
500 	struct refill_fl_cb_arg *cb_arg = arg;
501 
502 	cb_arg->error = error;
503 	cb_arg->seg = segs[0];
504 	cb_arg->nseg = nseg;
505 
506 }
507 #endif
508 /**
509  *	refill_fl - refill an SGE free-buffer list
510  *	@sc: the controller softc
511  *	@q: the free-list to refill
512  *	@n: the number of new buffers to allocate
513  *
514  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
515  *	The caller must assure that @n does not exceed the queue's capacity.
516  */
517 static void
518 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
519 {
520 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
521 	struct rx_desc *d = &q->desc[q->pidx];
522 	struct refill_fl_cb_arg cb_arg;
523 	caddr_t cl;
524 	int err, count = 0;
525 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
526 
527 	cb_arg.error = 0;
528 	while (n--) {
529 		/*
530 		 * We only allocate a cluster, mbuf allocation happens after rx
531 		 */
532 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
533 			log(LOG_WARNING, "Failed to allocate cluster\n");
534 			goto done;
535 		}
536 
537 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
538 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
539 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
540 				uma_zfree(q->zone, cl);
541 				goto done;
542 			}
543 			sd->flags |= RX_SW_DESC_MAP_CREATED;
544 		}
545 #if !defined(__i386__) && !defined(__amd64__)
546 		err = bus_dmamap_load(q->entry_tag, sd->map,
547 		    cl + header_size, q->buf_size,
548 		    refill_fl_cb, &cb_arg, 0);
549 
550 		if (err != 0 || cb_arg.error) {
551 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
552 			/*
553 			 * XXX free cluster
554 			 */
555 			return;
556 		}
557 #else
558 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
559 #endif
560 		sd->flags |= RX_SW_DESC_INUSE;
561 		sd->rxsd_cl = cl;
562 		sd->data = cl + header_size;
563 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
564 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
565 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
566 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
567 
568 		d++;
569 		sd++;
570 
571 		if (++q->pidx == q->size) {
572 			q->pidx = 0;
573 			q->gen ^= 1;
574 			sd = q->sdesc;
575 			d = q->desc;
576 		}
577 		q->credits++;
578 		count++;
579 	}
580 
581 done:
582 	if (count)
583 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
584 }
585 
586 
587 /**
588  *	free_rx_bufs - free the Rx buffers on an SGE free list
589  *	@sc: the controle softc
590  *	@q: the SGE free list to clean up
591  *
592  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
593  *	this queue should be stopped before calling this function.
594  */
595 static void
596 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
597 {
598 	u_int cidx = q->cidx;
599 
600 	while (q->credits--) {
601 		struct rx_sw_desc *d = &q->sdesc[cidx];
602 
603 		if (d->flags & RX_SW_DESC_INUSE) {
604 			bus_dmamap_unload(q->entry_tag, d->map);
605 			bus_dmamap_destroy(q->entry_tag, d->map);
606 			uma_zfree(q->zone, d->rxsd_cl);
607 		}
608 		d->rxsd_cl = NULL;
609 		if (++cidx == q->size)
610 			cidx = 0;
611 	}
612 }
613 
614 static __inline void
615 __refill_fl(adapter_t *adap, struct sge_fl *fl)
616 {
617 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
618 }
619 
620 static __inline void
621 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
622 {
623 	if ((fl->size - fl->credits) < max)
624 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
625 }
626 
627 void
628 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
629 {
630 	__refill_fl_lt(adap, fl, 512);
631 }
632 
633 /**
634  *	recycle_rx_buf - recycle a receive buffer
635  *	@adapter: the adapter
636  *	@q: the SGE free list
637  *	@idx: index of buffer to recycle
638  *
639  *	Recycles the specified buffer on the given free list by adding it at
640  *	the next available slot on the list.
641  */
642 static void
643 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
644 {
645 	struct rx_desc *from = &q->desc[idx];
646 	struct rx_desc *to   = &q->desc[q->pidx];
647 
648 	q->sdesc[q->pidx] = q->sdesc[idx];
649 	to->addr_lo = from->addr_lo;        // already big endian
650 	to->addr_hi = from->addr_hi;        // likewise
651 	wmb();
652 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
653 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
654 	q->credits++;
655 
656 	if (++q->pidx == q->size) {
657 		q->pidx = 0;
658 		q->gen ^= 1;
659 	}
660 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
661 }
662 
663 static void
664 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
665 {
666 	uint32_t *addr;
667 
668 	addr = arg;
669 	*addr = segs[0].ds_addr;
670 }
671 
672 static int
673 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
674     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
675     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
676 {
677 	size_t len = nelem * elem_size;
678 	void *s = NULL;
679 	void *p = NULL;
680 	int err;
681 
682 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
683 				      BUS_SPACE_MAXADDR_32BIT,
684 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
685 				      len, 0, NULL, NULL, tag)) != 0) {
686 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
687 		return (ENOMEM);
688 	}
689 
690 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
691 				    map)) != 0) {
692 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
693 		return (ENOMEM);
694 	}
695 
696 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
697 	bzero(p, len);
698 	*(void **)desc = p;
699 
700 	if (sw_size) {
701 		len = nelem * sw_size;
702 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
703 		*(void **)sdesc = s;
704 	}
705 	if (parent_entry_tag == NULL)
706 		return (0);
707 
708 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
709 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
710 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
711 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
712 		                      NULL, NULL, entry_tag)) != 0) {
713 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
714 		return (ENOMEM);
715 	}
716 	return (0);
717 }
718 
719 static void
720 sge_slow_intr_handler(void *arg, int ncount)
721 {
722 	adapter_t *sc = arg;
723 
724 	t3_slow_intr_handler(sc);
725 }
726 
727 /**
728  *	sge_timer_cb - perform periodic maintenance of an SGE qset
729  *	@data: the SGE queue set to maintain
730  *
731  *	Runs periodically from a timer to perform maintenance of an SGE queue
732  *	set.  It performs two tasks:
733  *
734  *	a) Cleans up any completed Tx descriptors that may still be pending.
735  *	Normal descriptor cleanup happens when new packets are added to a Tx
736  *	queue so this timer is relatively infrequent and does any cleanup only
737  *	if the Tx queue has not seen any new packets in a while.  We make a
738  *	best effort attempt to reclaim descriptors, in that we don't wait
739  *	around if we cannot get a queue's lock (which most likely is because
740  *	someone else is queueing new packets and so will also handle the clean
741  *	up).  Since control queues use immediate data exclusively we don't
742  *	bother cleaning them up here.
743  *
744  *	b) Replenishes Rx queues that have run out due to memory shortage.
745  *	Normally new Rx buffers are added when existing ones are consumed but
746  *	when out of memory a queue can become empty.  We try to add only a few
747  *	buffers here, the queue will be replenished fully as these new buffers
748  *	are used up if memory shortage has subsided.
749  *
750  *	c) Return coalesced response queue credits in case a response queue is
751  *	starved.
752  *
753  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
754  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
755  */
756 static void
757 sge_timer_cb(void *arg)
758 {
759 	adapter_t *sc = arg;
760 #ifndef IFNET_MULTIQUEUE
761 	struct port_info *pi;
762 	struct sge_qset *qs;
763 	struct sge_txq  *txq;
764 	int i, j;
765 	int reclaim_ofl, refill_rx;
766 
767 	for (i = 0; i < sc->params.nports; i++)
768 		for (j = 0; j < sc->port[i].nqsets; j++) {
769 			qs = &sc->sge.qs[i + j];
770 			txq = &qs->txq[0];
771 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
772 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
773 			    (qs->fl[1].credits < qs->fl[1].size));
774 			if (reclaim_ofl || refill_rx) {
775 				pi = &sc->port[i];
776 				taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
777 				break;
778 			}
779 		}
780 #endif
781 	if (sc->params.nports > 2) {
782 		int i;
783 
784 		for_each_port(sc, i) {
785 			struct port_info *pi = &sc->port[i];
786 
787 			t3_write_reg(sc, A_SG_KDOORBELL,
788 				     F_SELEGRCNTX |
789 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
790 		}
791 	}
792 	if (sc->open_device_map != 0)
793 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
794 }
795 
796 /*
797  * This is meant to be a catch-all function to keep sge state private
798  * to sge.c
799  *
800  */
801 int
802 t3_sge_init_adapter(adapter_t *sc)
803 {
804 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
805 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
806 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
807 	mi_init();
808 	cxgb_cache_init();
809 	return (0);
810 }
811 
812 int
813 t3_sge_reset_adapter(adapter_t *sc)
814 {
815 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
816 	return (0);
817 }
818 
819 int
820 t3_sge_init_port(struct port_info *pi)
821 {
822 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
823 	return (0);
824 }
825 
826 void
827 t3_sge_deinit_sw(adapter_t *sc)
828 {
829 
830 	mi_deinit();
831 }
832 
833 /**
834  *	refill_rspq - replenish an SGE response queue
835  *	@adapter: the adapter
836  *	@q: the response queue to replenish
837  *	@credits: how many new responses to make available
838  *
839  *	Replenishes a response queue by making the supplied number of responses
840  *	available to HW.
841  */
842 static __inline void
843 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
844 {
845 
846 	/* mbufs are allocated on demand when a rspq entry is processed. */
847 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
848 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
849 }
850 
851 static __inline void
852 sge_txq_reclaim_(struct sge_txq *txq, int force)
853 {
854 
855 	if (desc_reclaimable(txq) < 16)
856 		return;
857 	if (mtx_trylock(&txq->lock) == 0)
858 		return;
859 	reclaim_completed_tx_(txq, 16);
860 	mtx_unlock(&txq->lock);
861 
862 }
863 
864 static void
865 sge_txq_reclaim_handler(void *arg, int ncount)
866 {
867 	struct sge_txq *q = arg;
868 
869 	sge_txq_reclaim_(q, TRUE);
870 }
871 
872 
873 
874 static void
875 sge_timer_reclaim(void *arg, int ncount)
876 {
877 	struct port_info *pi = arg;
878 	int i, nqsets = pi->nqsets;
879 	adapter_t *sc = pi->adapter;
880 	struct sge_qset *qs;
881 	struct sge_txq *txq;
882 	struct mtx *lock;
883 
884 #ifdef IFNET_MULTIQUEUE
885 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
886 #endif
887 	for (i = 0; i < nqsets; i++) {
888 		qs = &sc->sge.qs[i];
889 
890 		txq = &qs->txq[TXQ_OFLD];
891 		sge_txq_reclaim_(txq, FALSE);
892 
893 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
894 			    &sc->sge.qs[0].rspq.lock;
895 
896 		if (mtx_trylock(lock)) {
897 			/* XXX currently assume that we are *NOT* polling */
898 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
899 
900 			if (qs->fl[0].credits < qs->fl[0].size - 16)
901 				__refill_fl(sc, &qs->fl[0]);
902 			if (qs->fl[1].credits < qs->fl[1].size - 16)
903 				__refill_fl(sc, &qs->fl[1]);
904 
905 			if (status & (1 << qs->rspq.cntxt_id)) {
906 				if (qs->rspq.credits) {
907 					refill_rspq(sc, &qs->rspq, 1);
908 					qs->rspq.credits--;
909 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
910 					    1 << qs->rspq.cntxt_id);
911 				}
912 			}
913 			mtx_unlock(lock);
914 		}
915 	}
916 }
917 
918 /**
919  *	init_qset_cntxt - initialize an SGE queue set context info
920  *	@qs: the queue set
921  *	@id: the queue set id
922  *
923  *	Initializes the TIDs and context ids for the queues of a queue set.
924  */
925 static void
926 init_qset_cntxt(struct sge_qset *qs, u_int id)
927 {
928 
929 	qs->rspq.cntxt_id = id;
930 	qs->fl[0].cntxt_id = 2 * id;
931 	qs->fl[1].cntxt_id = 2 * id + 1;
932 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
933 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
934 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
935 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
936 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
937 
938 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
939 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
940 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
941 }
942 
943 
944 static void
945 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
946 {
947 	txq->in_use += ndesc;
948 	/*
949 	 * XXX we don't handle stopping of queue
950 	 * presumably start handles this when we bump against the end
951 	 */
952 	txqs->gen = txq->gen;
953 	txq->unacked += ndesc;
954 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
955 	txq->unacked &= 31;
956 	txqs->pidx = txq->pidx;
957 	txq->pidx += ndesc;
958 #ifdef INVARIANTS
959 	if (((txqs->pidx > txq->cidx) &&
960 		(txq->pidx < txqs->pidx) &&
961 		(txq->pidx >= txq->cidx)) ||
962 	    ((txqs->pidx < txq->cidx) &&
963 		(txq->pidx >= txq-> cidx)) ||
964 	    ((txqs->pidx < txq->cidx) &&
965 		(txq->cidx < txqs->pidx)))
966 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
967 		    txqs->pidx, txq->pidx, txq->cidx);
968 #endif
969 	if (txq->pidx >= txq->size) {
970 		txq->pidx -= txq->size;
971 		txq->gen ^= 1;
972 	}
973 
974 }
975 
976 /**
977  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
978  *	@m: the packet mbufs
979  *      @nsegs: the number of segments
980  *
981  * 	Returns the number of Tx descriptors needed for the given Ethernet
982  * 	packet.  Ethernet packets require addition of WR and CPL headers.
983  */
984 static __inline unsigned int
985 calc_tx_descs(const struct mbuf *m, int nsegs)
986 {
987 	unsigned int flits;
988 
989 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
990 		return 1;
991 
992 	flits = sgl_len(nsegs) + 2;
993 #ifdef TSO_SUPPORTED
994 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
995 		flits++;
996 #endif
997 	return flits_to_desc(flits);
998 }
999 
1000 static unsigned int
1001 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1002     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1003 {
1004 	struct mbuf *m0;
1005 	int err, pktlen, pass = 0;
1006 
1007 retry:
1008 	err = 0;
1009 	m0 = *m;
1010 	pktlen = m0->m_pkthdr.len;
1011 #if defined(__i386__) || defined(__amd64__)
1012 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1013 		goto done;
1014 	} else
1015 #endif
1016 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1017 
1018 	if (err == 0) {
1019 		goto done;
1020 	}
1021 	if (err == EFBIG && pass == 0) {
1022 		pass = 1;
1023 		/* Too many segments, try to defrag */
1024 		m0 = m_defrag(m0, M_DONTWAIT);
1025 		if (m0 == NULL) {
1026 			m_freem(*m);
1027 			*m = NULL;
1028 			return (ENOBUFS);
1029 		}
1030 		*m = m0;
1031 		goto retry;
1032 	} else if (err == ENOMEM) {
1033 		return (err);
1034 	} if (err) {
1035 		if (cxgb_debug)
1036 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1037 		m_freem(m0);
1038 		*m = NULL;
1039 		return (err);
1040 	}
1041 done:
1042 #if !defined(__i386__) && !defined(__amd64__)
1043 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1044 #endif
1045 	txsd->flags |= TX_SW_DESC_MAPPED;
1046 
1047 	return (0);
1048 }
1049 
1050 /**
1051  *	make_sgl - populate a scatter/gather list for a packet
1052  *	@sgp: the SGL to populate
1053  *	@segs: the packet dma segments
1054  *	@nsegs: the number of segments
1055  *
1056  *	Generates a scatter/gather list for the buffers that make up a packet
1057  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1058  *	appropriately.
1059  */
1060 static __inline void
1061 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1062 {
1063 	int i, idx;
1064 
1065 	for (idx = 0, i = 0; i < nsegs; i++) {
1066 		/*
1067 		 * firmware doesn't like empty segments
1068 		 */
1069 		if (segs[i].ds_len == 0)
1070 			continue;
1071 		if (i && idx == 0)
1072 			++sgp;
1073 
1074 		sgp->len[idx] = htobe32(segs[i].ds_len);
1075 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1076 		idx ^= 1;
1077 	}
1078 
1079 	if (idx) {
1080 		sgp->len[idx] = 0;
1081 		sgp->addr[idx] = 0;
1082 	}
1083 }
1084 
1085 /**
1086  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1087  *	@adap: the adapter
1088  *	@q: the Tx queue
1089  *
1090  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1091  *	where the HW is going to sleep just after we checked, however,
1092  *	then the interrupt handler will detect the outstanding TX packet
1093  *	and ring the doorbell for us.
1094  *
1095  *	When GTS is disabled we unconditionally ring the doorbell.
1096  */
1097 static __inline void
1098 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1099 {
1100 #if USE_GTS
1101 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1102 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1103 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1104 #ifdef T3_TRACE
1105 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1106 			  q->cntxt_id);
1107 #endif
1108 		t3_write_reg(adap, A_SG_KDOORBELL,
1109 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1110 	}
1111 #else
1112 	wmb();            /* write descriptors before telling HW */
1113 	t3_write_reg(adap, A_SG_KDOORBELL,
1114 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1115 #endif
1116 }
1117 
1118 static __inline void
1119 wr_gen2(struct tx_desc *d, unsigned int gen)
1120 {
1121 #if SGE_NUM_GENBITS == 2
1122 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1123 #endif
1124 }
1125 
1126 /**
1127  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1128  *	@ndesc: number of Tx descriptors spanned by the SGL
1129  *	@txd: first Tx descriptor to be written
1130  *	@txqs: txq state (generation and producer index)
1131  *	@txq: the SGE Tx queue
1132  *	@sgl: the SGL
1133  *	@flits: number of flits to the start of the SGL in the first descriptor
1134  *	@sgl_flits: the SGL size in flits
1135  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1136  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1137  *
1138  *	Write a work request header and an associated SGL.  If the SGL is
1139  *	small enough to fit into one Tx descriptor it has already been written
1140  *	and we just need to write the WR header.  Otherwise we distribute the
1141  *	SGL across the number of descriptors it spans.
1142  */
1143 static void
1144 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1145     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1146     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1147 {
1148 
1149 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1150 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1151 
1152 	if (__predict_true(ndesc == 1)) {
1153 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1154 		    V_WR_SGLSFLT(flits)) | wr_hi;
1155 		wmb();
1156 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1157 		    V_WR_GEN(txqs->gen)) | wr_lo;
1158 		/* XXX gen? */
1159 		wr_gen2(txd, txqs->gen);
1160 
1161 	} else {
1162 		unsigned int ogen = txqs->gen;
1163 		const uint64_t *fp = (const uint64_t *)sgl;
1164 		struct work_request_hdr *wp = wrp;
1165 
1166 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1167 		    V_WR_SGLSFLT(flits)) | wr_hi;
1168 
1169 		while (sgl_flits) {
1170 			unsigned int avail = WR_FLITS - flits;
1171 
1172 			if (avail > sgl_flits)
1173 				avail = sgl_flits;
1174 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1175 			sgl_flits -= avail;
1176 			ndesc--;
1177 			if (!sgl_flits)
1178 				break;
1179 
1180 			fp += avail;
1181 			txd++;
1182 			txsd++;
1183 			if (++txqs->pidx == txq->size) {
1184 				txqs->pidx = 0;
1185 				txqs->gen ^= 1;
1186 				txd = txq->desc;
1187 				txsd = txq->sdesc;
1188 			}
1189 
1190 			/*
1191 			 * when the head of the mbuf chain
1192 			 * is freed all clusters will be freed
1193 			 * with it
1194 			 */
1195 			KASSERT(txsd->mi.mi_base == NULL,
1196 			    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1197 			wrp = (struct work_request_hdr *)txd;
1198 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1199 			    V_WR_SGLSFLT(1)) | wr_hi;
1200 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1201 				    sgl_flits + 1)) |
1202 			    V_WR_GEN(txqs->gen)) | wr_lo;
1203 			wr_gen2(txd, txqs->gen);
1204 			flits = 1;
1205 		}
1206 		wrp->wr_hi |= htonl(F_WR_EOP);
1207 		wmb();
1208 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1209 		wr_gen2((struct tx_desc *)wp, ogen);
1210 	}
1211 }
1212 
1213 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1214 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1215 
1216 #ifdef VLAN_SUPPORTED
1217 #define GET_VTAG(cntrl, m) \
1218 do { \
1219 	if ((m)->m_flags & M_VLANTAG)					            \
1220 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1221 } while (0)
1222 
1223 #define GET_VTAG_MI(cntrl, mi) \
1224 do { \
1225 	if ((mi)->mi_flags & M_VLANTAG)					\
1226 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1227 } while (0)
1228 #else
1229 #define GET_VTAG(cntrl, m)
1230 #define GET_VTAG_MI(cntrl, m)
1231 #endif
1232 
1233 int
1234 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1235 {
1236 	adapter_t *sc;
1237 	struct mbuf *m0;
1238 	struct sge_txq *txq;
1239 	struct txq_state txqs;
1240 	struct port_info *pi;
1241 	unsigned int ndesc, flits, cntrl, mlen;
1242 	int err, nsegs, tso_info = 0;
1243 
1244 	struct work_request_hdr *wrp;
1245 	struct tx_sw_desc *txsd;
1246 	struct sg_ent *sgp, *sgl;
1247 	uint32_t wr_hi, wr_lo, sgl_flits;
1248 	bus_dma_segment_t segs[TX_MAX_SEGS];
1249 
1250 	struct tx_desc *txd;
1251 	struct mbuf_vec *mv;
1252 	struct mbuf_iovec *mi;
1253 
1254 	DPRINTF("t3_encap cpu=%d ", curcpu);
1255 
1256 	mi = NULL;
1257 	pi = qs->port;
1258 	sc = pi->adapter;
1259 	txq = &qs->txq[TXQ_ETH];
1260 	txd = &txq->desc[txq->pidx];
1261 	txsd = &txq->sdesc[txq->pidx];
1262 	sgl = txq->txq_sgl;
1263 	m0 = *m;
1264 
1265 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1266 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1267 	if (cxgb_debug)
1268 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1269 
1270 	mtx_assert(&txq->lock, MA_OWNED);
1271 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1272 /*
1273  * XXX need to add VLAN support for 6.x
1274  */
1275 #ifdef VLAN_SUPPORTED
1276 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1277 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1278 #endif
1279 	KASSERT(txsd->mi.mi_base == NULL,
1280 	    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1281 	if (count > 1) {
1282 		panic("count > 1 not support in CVS\n");
1283 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1284 			return (err);
1285 		nsegs = count;
1286 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1287 		if (cxgb_debug)
1288 			printf("failed ... err=%d\n", err);
1289 		return (err);
1290 	}
1291 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1292 
1293 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1294 		mi_collapse_mbuf(&txsd->mi, m0);
1295 		mi = &txsd->mi;
1296 	}
1297 	if (count > 1) {
1298 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1299 		int i, fidx;
1300 		struct mbuf_iovec *batchmi;
1301 
1302 		mv = mtomv(m0);
1303 		batchmi = mv->mv_vec;
1304 
1305 		wrp = (struct work_request_hdr *)txd;
1306 
1307 		flits = count*2 + 1;
1308 		txq_prod(txq, 1, &txqs);
1309 
1310 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1311 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1312 
1313 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1314 			GET_VTAG_MI(cntrl, batchmi);
1315 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1316 			if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1317 				cntrl |= F_TXPKT_IPCSUM_DIS;
1318 			if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1319 				cntrl |= F_TXPKT_L4CSUM_DIS;
1320 			cbe->cntrl = htonl(cntrl);
1321 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1322 			cbe->addr = htobe64(segs[i].ds_addr);
1323 			txd->flit[fidx] |= htobe64(1 << 24);
1324 		}
1325 
1326 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1327 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1328 		wmb();
1329 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1330 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1331 		/* XXX gen? */
1332 		wr_gen2(txd, txqs.gen);
1333 		check_ring_tx_db(sc, txq);
1334 
1335 		return (0);
1336 	} else if (tso_info) {
1337 		int undersized, eth_type;
1338 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1339 		struct ip *ip;
1340 		struct tcphdr *tcp;
1341 		char *pkthdr, tmp[TCPPKTHDRSIZE];
1342 		struct mbuf_vec *mv;
1343 		struct mbuf_iovec *tmpmi;
1344 
1345 		mv = mtomv(m0);
1346 		tmpmi = mv->mv_vec;
1347 
1348 		txd->flit[2] = 0;
1349 		GET_VTAG(cntrl, m0);
1350 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1351 		hdr->cntrl = htonl(cntrl);
1352 		mlen = m0->m_pkthdr.len;
1353 		hdr->len = htonl(mlen | 0x80000000);
1354 
1355 		DPRINTF("tso buf len=%d\n", mlen);
1356 		undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) &&
1357 			(m0->m_flags & M_VLANTAG)) ||
1358 		    (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN));
1359 
1360 		if (__predict_false(undersized)) {
1361 			pkthdr = tmp;
1362 			if (mi)
1363 				dump_mi(mi);
1364 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1365 			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1366 			panic("discontig packet - fixxorz");
1367 		} else
1368 			pkthdr = m0->m_data;
1369 
1370 		if (__predict_false(m0->m_flags & M_VLANTAG)) {
1371 			eth_type = CPL_ETH_II_VLAN;
1372 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1373 			    ETHER_VLAN_ENCAP_LEN);
1374 		} else {
1375 			eth_type = CPL_ETH_II;
1376 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1377 		}
1378 		tcp = (struct tcphdr *)((uint8_t *)ip +
1379 		    sizeof(*ip));
1380 
1381 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1382 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1383 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1384 		hdr->lso_info = htonl(tso_info);
1385 
1386 		if (__predict_false(mlen <= PIO_LEN)) {
1387 			/* pkt not undersized but fits in PIO_LEN */
1388 			printf("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1389 			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1390 			txq_prod(txq, 1, &txqs);
1391 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1392 			m_freem(m0);
1393 			m0 = NULL;
1394 			flits = (mlen + 7) / 8 + 3;
1395 			hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1396 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1397 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1398 			wmb();
1399 			hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1400 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1401 
1402 			wr_gen2(txd, txqs.gen);
1403 			check_ring_tx_db(sc, txq);
1404 			return (0);
1405 		}
1406 		flits = 3;
1407 	} else {
1408 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1409 
1410 		GET_VTAG(cntrl, m0);
1411 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1412 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1413 			cntrl |= F_TXPKT_IPCSUM_DIS;
1414 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1415 			cntrl |= F_TXPKT_L4CSUM_DIS;
1416 		cpl->cntrl = htonl(cntrl);
1417 		mlen = m0->m_pkthdr.len;
1418 		cpl->len = htonl(mlen | 0x80000000);
1419 
1420 		if (mlen <= PIO_LEN) {
1421 			txq_prod(txq, 1, &txqs);
1422 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1423 			m_freem(m0);
1424 			m0 = NULL;
1425 			flits = (mlen + 7) / 8 + 2;
1426 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1427 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1428 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1429 			wmb();
1430 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1431 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1432 
1433 			wr_gen2(txd, txqs.gen);
1434 			check_ring_tx_db(sc, txq);
1435 			DPRINTF("pio buf\n");
1436 			return (0);
1437 		}
1438 		DPRINTF("regular buf\n");
1439 		flits = 2;
1440 	}
1441 	wrp = (struct work_request_hdr *)txd;
1442 
1443 #ifdef	nomore
1444 	/*
1445 	 * XXX need to move into one of the helper routines above
1446 	 *
1447 	 */
1448 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1449 		return (err);
1450 	m0 = *m;
1451 #endif
1452 	ndesc = calc_tx_descs(m0, nsegs);
1453 
1454 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1455 	make_sgl(sgp, segs, nsegs);
1456 
1457 	sgl_flits = sgl_len(nsegs);
1458 
1459 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1460 	txq_prod(txq, ndesc, &txqs);
1461 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1462 	wr_lo = htonl(V_WR_TID(txq->token));
1463 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1464 	check_ring_tx_db(pi->adapter, txq);
1465 
1466 	if ((m0->m_type == MT_DATA) &&
1467 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1468 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1469 		m0->m_flags &= ~M_EXT ;
1470 		cxgb_mbufs_outstanding--;
1471 		m_free(m0);
1472 	}
1473 
1474 	return (0);
1475 }
1476 
1477 
1478 /**
1479  *	write_imm - write a packet into a Tx descriptor as immediate data
1480  *	@d: the Tx descriptor to write
1481  *	@m: the packet
1482  *	@len: the length of packet data to write as immediate data
1483  *	@gen: the generation bit value to write
1484  *
1485  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1486  *	contains a work request at its beginning.  We must write the packet
1487  *	carefully so the SGE doesn't read accidentally before it's written in
1488  *	its entirety.
1489  */
1490 static __inline void
1491 write_imm(struct tx_desc *d, struct mbuf *m,
1492 	  unsigned int len, unsigned int gen)
1493 {
1494 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1495 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1496 
1497 	if (len > WR_LEN)
1498 		panic("len too big %d\n", len);
1499 	if (len < sizeof(*from))
1500 		panic("len too small %d", len);
1501 
1502 	memcpy(&to[1], &from[1], len - sizeof(*from));
1503 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1504 					V_WR_BCNTLFLT(len & 7));
1505 	wmb();
1506 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1507 					V_WR_LEN((len + 7) / 8));
1508 	wr_gen2(d, gen);
1509 
1510 	/*
1511 	 * This check is a hack we should really fix the logic so
1512 	 * that this can't happen
1513 	 */
1514 	if (m->m_type != MT_DONTFREE)
1515 		m_freem(m);
1516 
1517 }
1518 
1519 /**
1520  *	check_desc_avail - check descriptor availability on a send queue
1521  *	@adap: the adapter
1522  *	@q: the TX queue
1523  *	@m: the packet needing the descriptors
1524  *	@ndesc: the number of Tx descriptors needed
1525  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1526  *
1527  *	Checks if the requested number of Tx descriptors is available on an
1528  *	SGE send queue.  If the queue is already suspended or not enough
1529  *	descriptors are available the packet is queued for later transmission.
1530  *	Must be called with the Tx queue locked.
1531  *
1532  *	Returns 0 if enough descriptors are available, 1 if there aren't
1533  *	enough descriptors and the packet has been queued, and 2 if the caller
1534  *	needs to retry because there weren't enough descriptors at the
1535  *	beginning of the call but some freed up in the mean time.
1536  */
1537 static __inline int
1538 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1539 		 struct mbuf *m, unsigned int ndesc,
1540 		 unsigned int qid)
1541 {
1542 	/*
1543 	 * XXX We currently only use this for checking the control queue
1544 	 * the control queue is only used for binding qsets which happens
1545 	 * at init time so we are guaranteed enough descriptors
1546 	 */
1547 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1548 addq_exit:	mbufq_tail(&q->sendq, m);
1549 		return 1;
1550 	}
1551 	if (__predict_false(q->size - q->in_use < ndesc)) {
1552 
1553 		struct sge_qset *qs = txq_to_qset(q, qid);
1554 
1555 		printf("stopping q\n");
1556 
1557 		setbit(&qs->txq_stopped, qid);
1558 		smp_mb();
1559 
1560 		if (should_restart_tx(q) &&
1561 		    test_and_clear_bit(qid, &qs->txq_stopped))
1562 			return 2;
1563 
1564 		q->stops++;
1565 		goto addq_exit;
1566 	}
1567 	return 0;
1568 }
1569 
1570 
1571 /**
1572  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1573  *	@q: the SGE control Tx queue
1574  *
1575  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1576  *	that send only immediate data (presently just the control queues) and
1577  *	thus do not have any mbufs
1578  */
1579 static __inline void
1580 reclaim_completed_tx_imm(struct sge_txq *q)
1581 {
1582 	unsigned int reclaim = q->processed - q->cleaned;
1583 
1584 	mtx_assert(&q->lock, MA_OWNED);
1585 
1586 	q->in_use -= reclaim;
1587 	q->cleaned += reclaim;
1588 }
1589 
1590 static __inline int
1591 immediate(const struct mbuf *m)
1592 {
1593 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1594 }
1595 
1596 /**
1597  *	ctrl_xmit - send a packet through an SGE control Tx queue
1598  *	@adap: the adapter
1599  *	@q: the control queue
1600  *	@m: the packet
1601  *
1602  *	Send a packet through an SGE control Tx queue.  Packets sent through
1603  *	a control queue must fit entirely as immediate data in a single Tx
1604  *	descriptor and have no page fragments.
1605  */
1606 static int
1607 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1608 {
1609 	int ret;
1610 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1611 
1612 	if (__predict_false(!immediate(m))) {
1613 		m_freem(m);
1614 		return 0;
1615 	}
1616 
1617 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1618 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1619 
1620 	mtx_lock(&q->lock);
1621 again:	reclaim_completed_tx_imm(q);
1622 
1623 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1624 	if (__predict_false(ret)) {
1625 		if (ret == 1) {
1626 			mtx_unlock(&q->lock);
1627 			log(LOG_ERR, "no desc available\n");
1628 			return (ENOSPC);
1629 		}
1630 		goto again;
1631 	}
1632 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1633 
1634 	q->in_use++;
1635 	if (++q->pidx >= q->size) {
1636 		q->pidx = 0;
1637 		q->gen ^= 1;
1638 	}
1639 	mtx_unlock(&q->lock);
1640 	wmb();
1641 	t3_write_reg(adap, A_SG_KDOORBELL,
1642 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1643 	return (0);
1644 }
1645 
1646 
1647 /**
1648  *	restart_ctrlq - restart a suspended control queue
1649  *	@qs: the queue set cotaining the control queue
1650  *
1651  *	Resumes transmission on a suspended Tx control queue.
1652  */
1653 static void
1654 restart_ctrlq(void *data, int npending)
1655 {
1656 	struct mbuf *m;
1657 	struct sge_qset *qs = (struct sge_qset *)data;
1658 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1659 	adapter_t *adap = qs->port->adapter;
1660 
1661 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1662 
1663 	mtx_lock(&q->lock);
1664 again:	reclaim_completed_tx_imm(q);
1665 
1666 	while (q->in_use < q->size &&
1667 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1668 
1669 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1670 
1671 		if (++q->pidx >= q->size) {
1672 			q->pidx = 0;
1673 			q->gen ^= 1;
1674 		}
1675 		q->in_use++;
1676 	}
1677 	if (!mbufq_empty(&q->sendq)) {
1678 		setbit(&qs->txq_stopped, TXQ_CTRL);
1679 		smp_mb();
1680 
1681 		if (should_restart_tx(q) &&
1682 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1683 			goto again;
1684 		q->stops++;
1685 	}
1686 	mtx_unlock(&q->lock);
1687 	wmb();
1688 	t3_write_reg(adap, A_SG_KDOORBELL,
1689 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1690 }
1691 
1692 
1693 /*
1694  * Send a management message through control queue 0
1695  */
1696 int
1697 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1698 {
1699 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1700 }
1701 
1702 
1703 /**
1704  *	free_qset - free the resources of an SGE queue set
1705  *	@sc: the controller owning the queue set
1706  *	@q: the queue set
1707  *
1708  *	Release the HW and SW resources associated with an SGE queue set, such
1709  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1710  *	queue set must be quiesced prior to calling this.
1711  */
1712 void
1713 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1714 {
1715 	int i;
1716 
1717 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1718 
1719 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1720 		if (q->txq[i].txq_mr.br_ring != NULL) {
1721 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1722 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1723 		}
1724 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1725 		if (q->fl[i].desc) {
1726 			mtx_lock_spin(&sc->sge.reg_lock);
1727 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1728 			mtx_unlock_spin(&sc->sge.reg_lock);
1729 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1730 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1731 					q->fl[i].desc_map);
1732 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1733 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1734 		}
1735 		if (q->fl[i].sdesc) {
1736 			free_rx_bufs(sc, &q->fl[i]);
1737 			free(q->fl[i].sdesc, M_DEVBUF);
1738 		}
1739 	}
1740 
1741 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1742 		if (q->txq[i].desc) {
1743 			mtx_lock_spin(&sc->sge.reg_lock);
1744 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1745 			mtx_unlock_spin(&sc->sge.reg_lock);
1746 			bus_dmamap_unload(q->txq[i].desc_tag,
1747 					q->txq[i].desc_map);
1748 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1749 					q->txq[i].desc_map);
1750 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1751 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1752 			MTX_DESTROY(&q->txq[i].lock);
1753 		}
1754 		if (q->txq[i].sdesc) {
1755 			free(q->txq[i].sdesc, M_DEVBUF);
1756 		}
1757 	}
1758 
1759 	if (q->rspq.desc) {
1760 		mtx_lock_spin(&sc->sge.reg_lock);
1761 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1762 		mtx_unlock_spin(&sc->sge.reg_lock);
1763 
1764 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1765 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1766 			        q->rspq.desc_map);
1767 		bus_dma_tag_destroy(q->rspq.desc_tag);
1768 		MTX_DESTROY(&q->rspq.lock);
1769 	}
1770 
1771 	tcp_lro_free(&q->lro.ctrl);
1772 
1773 	bzero(q, sizeof(*q));
1774 }
1775 
1776 /**
1777  *	t3_free_sge_resources - free SGE resources
1778  *	@sc: the adapter softc
1779  *
1780  *	Frees resources used by the SGE queue sets.
1781  */
1782 void
1783 t3_free_sge_resources(adapter_t *sc)
1784 {
1785 	int i, nqsets;
1786 
1787 #ifdef IFNET_MULTIQUEUE
1788 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1789 #endif
1790 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1791 		nqsets += sc->port[i].nqsets;
1792 
1793 	for (i = 0; i < nqsets; ++i)
1794 		t3_free_qset(sc, &sc->sge.qs[i]);
1795 }
1796 
1797 /**
1798  *	t3_sge_start - enable SGE
1799  *	@sc: the controller softc
1800  *
1801  *	Enables the SGE for DMAs.  This is the last step in starting packet
1802  *	transfers.
1803  */
1804 void
1805 t3_sge_start(adapter_t *sc)
1806 {
1807 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1808 }
1809 
1810 /**
1811  *	t3_sge_stop - disable SGE operation
1812  *	@sc: the adapter
1813  *
1814  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1815  *	from error interrupts) or from normal process context.  In the latter
1816  *	case it also disables any pending queue restart tasklets.  Note that
1817  *	if it is called in interrupt context it cannot disable the restart
1818  *	tasklets as it cannot wait, however the tasklets will have no effect
1819  *	since the doorbells are disabled and the driver will call this again
1820  *	later from process context, at which time the tasklets will be stopped
1821  *	if they are still running.
1822  */
1823 void
1824 t3_sge_stop(adapter_t *sc)
1825 {
1826 	int i, nqsets;
1827 
1828 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1829 
1830 	if (sc->tq == NULL)
1831 		return;
1832 
1833 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1834 		nqsets += sc->port[i].nqsets;
1835 #ifdef notyet
1836 	/*
1837 	 *
1838 	 * XXX
1839 	 */
1840 	for (i = 0; i < nqsets; ++i) {
1841 		struct sge_qset *qs = &sc->sge.qs[i];
1842 
1843 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1844 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1845 	}
1846 #endif
1847 }
1848 
1849 /**
1850  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1851  *	@adapter: the adapter
1852  *	@q: the Tx queue to reclaim descriptors from
1853  *	@reclaimable: the number of descriptors to reclaim
1854  *      @m_vec_size: maximum number of buffers to reclaim
1855  *      @desc_reclaimed: returns the number of descriptors reclaimed
1856  *
1857  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1858  *	Tx buffers.  Called with the Tx queue lock held.
1859  *
1860  *      Returns number of buffers of reclaimed
1861  */
1862 void
1863 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1864 {
1865 	struct tx_sw_desc *txsd;
1866 	unsigned int cidx;
1867 
1868 #ifdef T3_TRACE
1869 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1870 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1871 #endif
1872 	cidx = q->cidx;
1873 	txsd = &q->sdesc[cidx];
1874 	DPRINTF("reclaiming %d WR\n", reclaimable);
1875 	mtx_assert(&q->lock, MA_OWNED);
1876 	while (reclaimable--) {
1877 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1878 		if (txsd->mi.mi_base != NULL) {
1879 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1880 				bus_dmamap_unload(q->entry_tag, txsd->map);
1881 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1882 			}
1883 			m_freem_iovec(&txsd->mi);
1884 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1885 			txsd->mi.mi_base = NULL;
1886 
1887 #if defined(DIAGNOSTIC) && 0
1888 			if (m_get_priority(txsd->m[0]) != cidx)
1889 				printf("pri=%d cidx=%d\n",
1890 				    (int)m_get_priority(txsd->m[0]), cidx);
1891 #endif
1892 
1893 		} else
1894 			q->txq_skipped++;
1895 
1896 		++txsd;
1897 		if (++cidx == q->size) {
1898 			cidx = 0;
1899 			txsd = q->sdesc;
1900 		}
1901 	}
1902 	q->cidx = cidx;
1903 
1904 }
1905 
1906 void
1907 t3_free_tx_desc_all(struct sge_txq *q)
1908 {
1909 	int i;
1910 	struct tx_sw_desc *txsd;
1911 
1912 	for (i = 0; i < q->size; i++) {
1913 		txsd = &q->sdesc[i];
1914 		if (txsd->mi.mi_base != NULL) {
1915 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1916 				bus_dmamap_unload(q->entry_tag, txsd->map);
1917 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1918 			}
1919 			m_freem_iovec(&txsd->mi);
1920 			bzero(&txsd->mi, sizeof(txsd->mi));
1921 		}
1922 	}
1923 }
1924 
1925 /**
1926  *	is_new_response - check if a response is newly written
1927  *	@r: the response descriptor
1928  *	@q: the response queue
1929  *
1930  *	Returns true if a response descriptor contains a yet unprocessed
1931  *	response.
1932  */
1933 static __inline int
1934 is_new_response(const struct rsp_desc *r,
1935     const struct sge_rspq *q)
1936 {
1937 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1938 }
1939 
1940 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1941 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1942 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1943 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1944 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1945 
1946 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1947 #define NOMEM_INTR_DELAY 2500
1948 
1949 /**
1950  *	write_ofld_wr - write an offload work request
1951  *	@adap: the adapter
1952  *	@m: the packet to send
1953  *	@q: the Tx queue
1954  *	@pidx: index of the first Tx descriptor to write
1955  *	@gen: the generation value to use
1956  *	@ndesc: number of descriptors the packet will occupy
1957  *
1958  *	Write an offload work request to send the supplied packet.  The packet
1959  *	data already carry the work request with most fields populated.
1960  */
1961 static void
1962 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1963     struct sge_txq *q, unsigned int pidx,
1964     unsigned int gen, unsigned int ndesc,
1965     bus_dma_segment_t *segs, unsigned int nsegs)
1966 {
1967 	unsigned int sgl_flits, flits;
1968 	struct work_request_hdr *from;
1969 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1970 	struct tx_desc *d = &q->desc[pidx];
1971 	struct txq_state txqs;
1972 
1973 	if (immediate(m) && nsegs == 0) {
1974 		write_imm(d, m, m->m_len, gen);
1975 		return;
1976 	}
1977 
1978 	/* Only TX_DATA builds SGLs */
1979 	from = mtod(m, struct work_request_hdr *);
1980 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1981 
1982 	flits = m->m_len / 8;
1983 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1984 
1985 	make_sgl(sgp, segs, nsegs);
1986 	sgl_flits = sgl_len(nsegs);
1987 
1988 	txqs.gen = gen;
1989 	txqs.pidx = pidx;
1990 	txqs.compl = 0;
1991 
1992 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1993 	    from->wr_hi, from->wr_lo);
1994 }
1995 
1996 /**
1997  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1998  *	@m: the packet
1999  *
2000  * 	Returns the number of Tx descriptors needed for the given offload
2001  * 	packet.  These packets are already fully constructed.
2002  */
2003 static __inline unsigned int
2004 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2005 {
2006 	unsigned int flits, cnt = 0;
2007 	int ndescs;
2008 
2009 	if (m->m_len <= WR_LEN && nsegs == 0)
2010 		return (1);                 /* packet fits as immediate data */
2011 
2012 	if (m->m_flags & M_IOVEC)
2013 		cnt = mtomv(m)->mv_count;
2014 	else
2015 		cnt = nsegs;
2016 
2017 	/* headers */
2018 	flits = m->m_len / 8;
2019 
2020 	ndescs = flits_to_desc(flits + sgl_len(cnt));
2021 
2022 	CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d",
2023 	    flits, sgl_len(cnt), nsegs, ndescs);
2024 
2025 	return (ndescs);
2026 }
2027 
2028 /**
2029  *	ofld_xmit - send a packet through an offload queue
2030  *	@adap: the adapter
2031  *	@q: the Tx offload queue
2032  *	@m: the packet
2033  *
2034  *	Send an offload packet through an SGE offload queue.
2035  */
2036 static int
2037 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2038 {
2039 	int ret, nsegs;
2040 	unsigned int ndesc;
2041 	unsigned int pidx, gen;
2042 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2043 	struct tx_sw_desc *stx;
2044 
2045 	nsegs = m_get_sgllen(m);
2046 	vsegs = m_get_sgl(m);
2047 	ndesc = calc_tx_descs_ofld(m, nsegs);
2048 	busdma_map_sgl(vsegs, segs, nsegs);
2049 
2050 	stx = &q->sdesc[q->pidx];
2051 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2052 
2053 	mtx_lock(&q->lock);
2054 again:	reclaim_completed_tx_(q, 16);
2055 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2056 	if (__predict_false(ret)) {
2057 		if (ret == 1) {
2058 			printf("no ofld desc avail\n");
2059 
2060 			m_set_priority(m, ndesc);     /* save for restart */
2061 			mtx_unlock(&q->lock);
2062 			return (EINTR);
2063 		}
2064 		goto again;
2065 	}
2066 
2067 	gen = q->gen;
2068 	q->in_use += ndesc;
2069 	pidx = q->pidx;
2070 	q->pidx += ndesc;
2071 	if (q->pidx >= q->size) {
2072 		q->pidx -= q->size;
2073 		q->gen ^= 1;
2074 	}
2075 #ifdef T3_TRACE
2076 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2077 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2078 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2079 		  skb_shinfo(skb)->nr_frags);
2080 #endif
2081 	mtx_unlock(&q->lock);
2082 
2083 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2084 	check_ring_tx_db(adap, q);
2085 	return (0);
2086 }
2087 
2088 /**
2089  *	restart_offloadq - restart a suspended offload queue
2090  *	@qs: the queue set cotaining the offload queue
2091  *
2092  *	Resumes transmission on a suspended Tx offload queue.
2093  */
2094 static void
2095 restart_offloadq(void *data, int npending)
2096 {
2097 	struct mbuf *m;
2098 	struct sge_qset *qs = data;
2099 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2100 	adapter_t *adap = qs->port->adapter;
2101 	bus_dma_segment_t segs[TX_MAX_SEGS];
2102 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2103 	int nsegs, cleaned;
2104 
2105 	mtx_lock(&q->lock);
2106 again:	cleaned = reclaim_completed_tx_(q, 16);
2107 
2108 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2109 		unsigned int gen, pidx;
2110 		unsigned int ndesc = m_get_priority(m);
2111 
2112 		if (__predict_false(q->size - q->in_use < ndesc)) {
2113 			setbit(&qs->txq_stopped, TXQ_OFLD);
2114 			smp_mb();
2115 
2116 			if (should_restart_tx(q) &&
2117 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2118 				goto again;
2119 			q->stops++;
2120 			break;
2121 		}
2122 
2123 		gen = q->gen;
2124 		q->in_use += ndesc;
2125 		pidx = q->pidx;
2126 		q->pidx += ndesc;
2127 		if (q->pidx >= q->size) {
2128 			q->pidx -= q->size;
2129 			q->gen ^= 1;
2130 		}
2131 
2132 		(void)mbufq_dequeue(&q->sendq);
2133 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2134 		mtx_unlock(&q->lock);
2135 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2136 		mtx_lock(&q->lock);
2137 	}
2138 	mtx_unlock(&q->lock);
2139 
2140 #if USE_GTS
2141 	set_bit(TXQ_RUNNING, &q->flags);
2142 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2143 #endif
2144 	wmb();
2145 	t3_write_reg(adap, A_SG_KDOORBELL,
2146 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2147 }
2148 
2149 /**
2150  *	queue_set - return the queue set a packet should use
2151  *	@m: the packet
2152  *
2153  *	Maps a packet to the SGE queue set it should use.  The desired queue
2154  *	set is carried in bits 1-3 in the packet's priority.
2155  */
2156 static __inline int
2157 queue_set(const struct mbuf *m)
2158 {
2159 	return m_get_priority(m) >> 1;
2160 }
2161 
2162 /**
2163  *	is_ctrl_pkt - return whether an offload packet is a control packet
2164  *	@m: the packet
2165  *
2166  *	Determines whether an offload packet should use an OFLD or a CTRL
2167  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2168  */
2169 static __inline int
2170 is_ctrl_pkt(const struct mbuf *m)
2171 {
2172 	return m_get_priority(m) & 1;
2173 }
2174 
2175 /**
2176  *	t3_offload_tx - send an offload packet
2177  *	@tdev: the offload device to send to
2178  *	@m: the packet
2179  *
2180  *	Sends an offload packet.  We use the packet priority to select the
2181  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2182  *	should be sent as regular or control, bits 1-3 select the queue set.
2183  */
2184 int
2185 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2186 {
2187 	adapter_t *adap = tdev2adap(tdev);
2188 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2189 
2190 	if (__predict_false(is_ctrl_pkt(m)))
2191 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2192 
2193 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2194 }
2195 
2196 /**
2197  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2198  *	@tdev: the offload device that will be receiving the packets
2199  *	@q: the SGE response queue that assembled the bundle
2200  *	@m: the partial bundle
2201  *	@n: the number of packets in the bundle
2202  *
2203  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2204  */
2205 static __inline void
2206 deliver_partial_bundle(struct t3cdev *tdev,
2207 			struct sge_rspq *q,
2208 			struct mbuf *mbufs[], int n)
2209 {
2210 	if (n) {
2211 		q->offload_bundles++;
2212 		cxgb_ofld_recv(tdev, mbufs, n);
2213 	}
2214 }
2215 
2216 static __inline int
2217 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2218     struct mbuf *m, struct mbuf *rx_gather[],
2219     unsigned int gather_idx)
2220 {
2221 
2222 	rq->offload_pkts++;
2223 	m->m_pkthdr.header = mtod(m, void *);
2224 	rx_gather[gather_idx++] = m;
2225 	if (gather_idx == RX_BUNDLE_SIZE) {
2226 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2227 		gather_idx = 0;
2228 		rq->offload_bundles++;
2229 	}
2230 	return (gather_idx);
2231 }
2232 
2233 static void
2234 restart_tx(struct sge_qset *qs)
2235 {
2236 	struct adapter *sc = qs->port->adapter;
2237 
2238 
2239 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2240 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2241 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2242 		qs->txq[TXQ_OFLD].restarts++;
2243 		DPRINTF("restarting TXQ_OFLD\n");
2244 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2245 	}
2246 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2247 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2248 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2249 	    qs->txq[TXQ_CTRL].in_use);
2250 
2251 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2252 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2253 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2254 		qs->txq[TXQ_CTRL].restarts++;
2255 		DPRINTF("restarting TXQ_CTRL\n");
2256 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2257 	}
2258 }
2259 
2260 /**
2261  *	t3_sge_alloc_qset - initialize an SGE queue set
2262  *	@sc: the controller softc
2263  *	@id: the queue set id
2264  *	@nports: how many Ethernet ports will be using this queue set
2265  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2266  *	@p: configuration parameters for this queue set
2267  *	@ntxq: number of Tx queues for the queue set
2268  *	@pi: port info for queue set
2269  *
2270  *	Allocate resources and initialize an SGE queue set.  A queue set
2271  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2272  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2273  *	queue, offload queue, and control queue.
2274  */
2275 int
2276 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2277 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2278 {
2279 	struct sge_qset *q = &sc->sge.qs[id];
2280 	int i, header_size, ret = 0;
2281 
2282 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2283 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2284 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2285 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2286 			goto err;
2287 		}
2288 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2289 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2290 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2291 	}
2292 
2293 	init_qset_cntxt(q, id);
2294 	q->idx = id;
2295 
2296 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2297 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2298 		    &q->fl[0].desc, &q->fl[0].sdesc,
2299 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2300 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2301 		printf("error %d from alloc ring fl0\n", ret);
2302 		goto err;
2303 	}
2304 
2305 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2306 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2307 		    &q->fl[1].desc, &q->fl[1].sdesc,
2308 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2309 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2310 		printf("error %d from alloc ring fl1\n", ret);
2311 		goto err;
2312 	}
2313 
2314 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2315 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2316 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2317 		    NULL, NULL)) != 0) {
2318 		printf("error %d from alloc ring rspq\n", ret);
2319 		goto err;
2320 	}
2321 
2322 	for (i = 0; i < ntxq; ++i) {
2323 		/*
2324 		 * The control queue always uses immediate data so does not
2325 		 * need to keep track of any mbufs.
2326 		 * XXX Placeholder for future TOE support.
2327 		 */
2328 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2329 
2330 		if ((ret = alloc_ring(sc, p->txq_size[i],
2331 			    sizeof(struct tx_desc), sz,
2332 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2333 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2334 			    &q->txq[i].desc_map,
2335 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2336 			printf("error %d from alloc ring tx %i\n", ret, i);
2337 			goto err;
2338 		}
2339 		mbufq_init(&q->txq[i].sendq);
2340 		q->txq[i].gen = 1;
2341 		q->txq[i].size = p->txq_size[i];
2342 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2343 		    device_get_unit(sc->dev), irq_vec_idx, i);
2344 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2345 	}
2346 
2347 	q->txq[TXQ_ETH].port = pi;
2348 
2349 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2350 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2351 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2352 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2353 
2354 	q->fl[0].gen = q->fl[1].gen = 1;
2355 	q->fl[0].size = p->fl_size;
2356 	q->fl[1].size = p->jumbo_size;
2357 
2358 	q->rspq.gen = 1;
2359 	q->rspq.cidx = 0;
2360 	q->rspq.size = p->rspq_size;
2361 
2362 
2363 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2364 	q->txq[TXQ_ETH].stop_thres = nports *
2365 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2366 
2367 	q->fl[0].buf_size = (MCLBYTES - header_size);
2368 	q->fl[0].zone = zone_clust;
2369 	q->fl[0].type = EXT_CLUSTER;
2370 #if __FreeBSD_version > 800000
2371 	if (cxgb_use_16k_clusters) {
2372 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2373 		q->fl[1].zone = zone_jumbo16;
2374 		q->fl[1].type = EXT_JUMBO16;
2375 	} else {
2376 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2377 		q->fl[1].zone = zone_jumbo9;
2378 		q->fl[1].type = EXT_JUMBO9;
2379 	}
2380 #else
2381 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2382 	q->fl[1].zone = zone_jumbop;
2383 	q->fl[1].type = EXT_JUMBOP;
2384 #endif
2385 
2386 	/*
2387 	 * We allocate and setup the lro_ctrl structure irrespective of whether
2388 	 * lro is available and/or enabled.
2389 	 */
2390 	q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2391 	ret = tcp_lro_init(&q->lro.ctrl);
2392 	if (ret) {
2393 		printf("error %d from tcp_lro_init\n", ret);
2394 		goto err;
2395 	}
2396 	q->lro.ctrl.ifp = pi->ifp;
2397 
2398 	mtx_lock_spin(&sc->sge.reg_lock);
2399 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2400 				   q->rspq.phys_addr, q->rspq.size,
2401 				   q->fl[0].buf_size, 1, 0);
2402 	if (ret) {
2403 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2404 		goto err_unlock;
2405 	}
2406 
2407 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2408 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2409 					  q->fl[i].phys_addr, q->fl[i].size,
2410 					  q->fl[i].buf_size, p->cong_thres, 1,
2411 					  0);
2412 		if (ret) {
2413 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2414 			goto err_unlock;
2415 		}
2416 	}
2417 
2418 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2419 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2420 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2421 				 1, 0);
2422 	if (ret) {
2423 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2424 		goto err_unlock;
2425 	}
2426 
2427 	if (ntxq > 1) {
2428 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2429 					 USE_GTS, SGE_CNTXT_OFLD, id,
2430 					 q->txq[TXQ_OFLD].phys_addr,
2431 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2432 		if (ret) {
2433 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2434 			goto err_unlock;
2435 		}
2436 	}
2437 
2438 	if (ntxq > 2) {
2439 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2440 					 SGE_CNTXT_CTRL, id,
2441 					 q->txq[TXQ_CTRL].phys_addr,
2442 					 q->txq[TXQ_CTRL].size,
2443 					 q->txq[TXQ_CTRL].token, 1, 0);
2444 		if (ret) {
2445 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2446 			goto err_unlock;
2447 		}
2448 	}
2449 
2450 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2451 	    device_get_unit(sc->dev), irq_vec_idx);
2452 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2453 
2454 	mtx_unlock_spin(&sc->sge.reg_lock);
2455 	t3_update_qset_coalesce(q, p);
2456 	q->port = pi;
2457 
2458 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2459 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2460 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2461 
2462 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2463 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2464 
2465 	return (0);
2466 
2467 err_unlock:
2468 	mtx_unlock_spin(&sc->sge.reg_lock);
2469 err:
2470 	t3_free_qset(sc, q);
2471 
2472 	return (ret);
2473 }
2474 
2475 /*
2476  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2477  * ethernet data.  Hardware assistance with various checksums and any vlan tag
2478  * will also be taken into account here.
2479  */
2480 void
2481 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2482 {
2483 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2484 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2485 	struct ifnet *ifp = pi->ifp;
2486 
2487 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2488 
2489 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2490 	    cpl->csum_valid && cpl->csum == 0xffff) {
2491 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2492 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2493 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2494 		m->m_pkthdr.csum_data = 0xffff;
2495 	}
2496 	/*
2497 	 * XXX need to add VLAN support for 6.x
2498 	 */
2499 #ifdef VLAN_SUPPORTED
2500 	if (__predict_false(cpl->vlan_valid)) {
2501 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2502 		m->m_flags |= M_VLANTAG;
2503 	}
2504 #endif
2505 
2506 	m->m_pkthdr.rcvif = ifp;
2507 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2508 #ifndef DISABLE_MBUF_IOVEC
2509 	m_explode(m);
2510 #endif
2511 	/*
2512 	 * adjust after conversion to mbuf chain
2513 	 */
2514 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2515 	m->m_len -= (sizeof(*cpl) + ethpad);
2516 	m->m_data += (sizeof(*cpl) + ethpad);
2517 }
2518 
2519 static void
2520 ext_free_handler(void *arg1, void * arg2)
2521 {
2522 	uintptr_t type = (uintptr_t)arg2;
2523 	uma_zone_t zone;
2524 	struct mbuf *m;
2525 
2526 	m = arg1;
2527 	zone = m_getzonefromtype(type);
2528 	m->m_ext.ext_type = (int)type;
2529 	cxgb_ext_freed++;
2530 	cxgb_cache_put(zone, m);
2531 }
2532 
2533 static void
2534 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2535 {
2536 	struct mbuf *m;
2537 	int header_size;
2538 
2539 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2540 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2541 
2542 	bzero(cl, header_size);
2543 	m = (struct mbuf *)cl;
2544 
2545 	cxgb_ext_inited++;
2546 	SLIST_INIT(&m->m_pkthdr.tags);
2547 	m->m_type = MT_DATA;
2548 	m->m_flags = flags | M_NOFREE | M_EXT;
2549 	m->m_data = cl + header_size;
2550 	m->m_ext.ext_buf = cl;
2551 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2552 	m->m_ext.ext_size = m_getsizefromtype(type);
2553 	m->m_ext.ext_free = ext_free_handler;
2554 	m->m_ext.ext_arg1 = cl;
2555 	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2556 	m->m_ext.ext_type = EXT_EXTREF;
2557 	*(m->m_ext.ref_cnt) = 1;
2558 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2559 }
2560 
2561 
2562 /**
2563  *	get_packet - return the next ingress packet buffer from a free list
2564  *	@adap: the adapter that received the packet
2565  *	@drop_thres: # of remaining buffers before we start dropping packets
2566  *	@qs: the qset that the SGE free list holding the packet belongs to
2567  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2568  *      @r: response descriptor
2569  *
2570  *	Get the next packet from a free list and complete setup of the
2571  *	sk_buff.  If the packet is small we make a copy and recycle the
2572  *	original buffer, otherwise we use the original buffer itself.  If a
2573  *	positive drop threshold is supplied packets are dropped and their
2574  *	buffers recycled if (a) the number of remaining buffers is under the
2575  *	threshold and the packet is too big to copy, or (b) the packet should
2576  *	be copied but there is no memory for the copy.
2577  */
2578 #ifdef DISABLE_MBUF_IOVEC
2579 
2580 static int
2581 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2582     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2583 {
2584 
2585 	unsigned int len_cq =  ntohl(r->len_cq);
2586 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2587 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2588 	uint32_t len = G_RSPD_LEN(len_cq);
2589 	uint32_t flags = ntohl(r->flags);
2590 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2591 	caddr_t cl;
2592 	struct mbuf *m, *m0;
2593 	int ret = 0;
2594 
2595 	prefetch(sd->rxsd_cl);
2596 
2597 	fl->credits--;
2598 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2599 
2600 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2601 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2602 			goto skip_recycle;
2603 		cl = mtod(m0, void *);
2604 		memcpy(cl, sd->data, len);
2605 		recycle_rx_buf(adap, fl, fl->cidx);
2606 		m = m0;
2607 		m0->m_len = len;
2608 	} else {
2609 	skip_recycle:
2610 
2611 		bus_dmamap_unload(fl->entry_tag, sd->map);
2612 		cl = sd->rxsd_cl;
2613 		m = m0 = (struct mbuf *)cl;
2614 
2615 		if ((sopeop == RSPQ_SOP_EOP) ||
2616 		    (sopeop == RSPQ_SOP))
2617 			flags = M_PKTHDR;
2618 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2619 		m0->m_len = len;
2620 	}
2621 	switch(sopeop) {
2622 	case RSPQ_SOP_EOP:
2623 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2624 		mh->mh_head = mh->mh_tail = m;
2625 		m->m_pkthdr.len = len;
2626 		ret = 1;
2627 		break;
2628 	case RSPQ_NSOP_NEOP:
2629 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2630 		if (mh->mh_tail == NULL) {
2631 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2632 			m_freem(m);
2633 			break;
2634 		}
2635 		mh->mh_tail->m_next = m;
2636 		mh->mh_tail = m;
2637 		mh->mh_head->m_pkthdr.len += len;
2638 		ret = 0;
2639 		break;
2640 	case RSPQ_SOP:
2641 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2642 		m->m_pkthdr.len = len;
2643 		mh->mh_head = mh->mh_tail = m;
2644 		ret = 0;
2645 		break;
2646 	case RSPQ_EOP:
2647 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2648 		mh->mh_head->m_pkthdr.len += len;
2649 		mh->mh_tail->m_next = m;
2650 		mh->mh_tail = m;
2651 		ret = 1;
2652 		break;
2653 	}
2654 	if (++fl->cidx == fl->size)
2655 		fl->cidx = 0;
2656 
2657 	return (ret);
2658 }
2659 
2660 #else
2661 
2662 static int
2663 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2664     struct mbuf **m, struct rsp_desc *r)
2665 {
2666 
2667 	unsigned int len_cq =  ntohl(r->len_cq);
2668 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2669 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2670 	uint32_t len = G_RSPD_LEN(len_cq);
2671 	uint32_t flags = ntohl(r->flags);
2672 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2673 	void *cl;
2674 	int ret = 0;
2675 	struct mbuf *m0;
2676 #if 0
2677 	if ((sd + 1 )->rxsd_cl)
2678 		prefetch((sd + 1)->rxsd_cl);
2679 	if ((sd + 2)->rxsd_cl)
2680 		prefetch((sd + 2)->rxsd_cl);
2681 #endif
2682 	DPRINTF("rx cpu=%d\n", curcpu);
2683 	fl->credits--;
2684 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2685 
2686 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2687 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2688 			goto skip_recycle;
2689 		cl = mtod(m0, void *);
2690 		memcpy(cl, sd->data, len);
2691 		recycle_rx_buf(adap, fl, fl->cidx);
2692 		*m = m0;
2693 	} else {
2694 	skip_recycle:
2695 		bus_dmamap_unload(fl->entry_tag, sd->map);
2696 		cl = sd->rxsd_cl;
2697 		*m = m0 = (struct mbuf *)cl;
2698 	}
2699 
2700 	switch(sopeop) {
2701 	case RSPQ_SOP_EOP:
2702 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2703 		if (cl == sd->rxsd_cl)
2704 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2705 		m0->m_len = m0->m_pkthdr.len = len;
2706 		ret = 1;
2707 		goto done;
2708 		break;
2709 	case RSPQ_NSOP_NEOP:
2710 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2711 		panic("chaining unsupported");
2712 		ret = 0;
2713 		break;
2714 	case RSPQ_SOP:
2715 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2716 		panic("chaining unsupported");
2717 		m_iovinit(m0);
2718 		ret = 0;
2719 		break;
2720 	case RSPQ_EOP:
2721 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2722 		panic("chaining unsupported");
2723 		ret = 1;
2724 		break;
2725 	}
2726 	panic("append not supported");
2727 #if 0
2728 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2729 #endif
2730 done:
2731 	if (++fl->cidx == fl->size)
2732 		fl->cidx = 0;
2733 
2734 	return (ret);
2735 }
2736 #endif
2737 /**
2738  *	handle_rsp_cntrl_info - handles control information in a response
2739  *	@qs: the queue set corresponding to the response
2740  *	@flags: the response control flags
2741  *
2742  *	Handles the control information of an SGE response, such as GTS
2743  *	indications and completion credits for the queue set's Tx queues.
2744  *	HW coalesces credits, we don't do any extra SW coalescing.
2745  */
2746 static __inline void
2747 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2748 {
2749 	unsigned int credits;
2750 
2751 #if USE_GTS
2752 	if (flags & F_RSPD_TXQ0_GTS)
2753 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2754 #endif
2755 	credits = G_RSPD_TXQ0_CR(flags);
2756 	if (credits)
2757 		qs->txq[TXQ_ETH].processed += credits;
2758 
2759 	credits = G_RSPD_TXQ2_CR(flags);
2760 	if (credits)
2761 		qs->txq[TXQ_CTRL].processed += credits;
2762 
2763 # if USE_GTS
2764 	if (flags & F_RSPD_TXQ1_GTS)
2765 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2766 # endif
2767 	credits = G_RSPD_TXQ1_CR(flags);
2768 	if (credits)
2769 		qs->txq[TXQ_OFLD].processed += credits;
2770 
2771 }
2772 
2773 static void
2774 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2775     unsigned int sleeping)
2776 {
2777 	;
2778 }
2779 
2780 /**
2781  *	process_responses - process responses from an SGE response queue
2782  *	@adap: the adapter
2783  *	@qs: the queue set to which the response queue belongs
2784  *	@budget: how many responses can be processed in this round
2785  *
2786  *	Process responses from an SGE response queue up to the supplied budget.
2787  *	Responses include received packets as well as credits and other events
2788  *	for the queues that belong to the response queue's queue set.
2789  *	A negative budget is effectively unlimited.
2790  *
2791  *	Additionally choose the interrupt holdoff time for the next interrupt
2792  *	on this queue.  If the system is under memory shortage use a fairly
2793  *	long delay to help recovery.
2794  */
2795 int
2796 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2797 {
2798 	struct sge_rspq *rspq = &qs->rspq;
2799 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2800 	int budget_left = budget;
2801 	unsigned int sleeping = 0;
2802 	int lro_enabled = qs->lro.enabled;
2803 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2804 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2805 	int ngathered = 0;
2806 #ifdef DEBUG
2807 	static int last_holdoff = 0;
2808 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2809 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2810 		last_holdoff = rspq->holdoff_tmr;
2811 	}
2812 #endif
2813 	rspq->next_holdoff = rspq->holdoff_tmr;
2814 
2815 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2816 		int eth, eop = 0, ethpad = 0;
2817 		uint32_t flags = ntohl(r->flags);
2818 		uint32_t rss_csum = *(const uint32_t *)r;
2819 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2820 
2821 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2822 
2823 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2824 			struct mbuf *m;
2825 
2826 			if (cxgb_debug)
2827 				printf("async notification\n");
2828 
2829 			if (rspq->rspq_mh.mh_head == NULL) {
2830 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2831 				m = rspq->rspq_mh.mh_head;
2832 			} else {
2833 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2834 			}
2835 
2836 			/* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2837 
2838 			if (m == NULL)
2839 				goto no_mem;
2840 
2841                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2842 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2843                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
2844 			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2845 			eop = 1;
2846                         rspq->async_notif++;
2847 			goto skip;
2848 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2849 			struct mbuf *m = NULL;
2850 
2851 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2852 			    r->rss_hdr.opcode, rspq->cidx);
2853 			if (rspq->rspq_mh.mh_head == NULL)
2854 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2855                         else
2856 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2857 
2858 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2859 		no_mem:
2860 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2861 				budget_left--;
2862 				break;
2863 			}
2864 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2865 			eop = 1;
2866 			rspq->imm_data++;
2867 		} else if (r->len_cq) {
2868 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2869 
2870 #ifdef DISABLE_MBUF_IOVEC
2871 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2872 #else
2873 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2874 #endif
2875 #ifdef IFNET_MULTIQUEUE
2876 			rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2877 #endif
2878 			ethpad = 2;
2879 		} else {
2880 			DPRINTF("pure response\n");
2881 			rspq->pure_rsps++;
2882 		}
2883 	skip:
2884 		if (flags & RSPD_CTRL_MASK) {
2885 			sleeping |= flags & RSPD_GTS_MASK;
2886 			handle_rsp_cntrl_info(qs, flags);
2887 		}
2888 
2889 		r++;
2890 		if (__predict_false(++rspq->cidx == rspq->size)) {
2891 			rspq->cidx = 0;
2892 			rspq->gen ^= 1;
2893 			r = rspq->desc;
2894 		}
2895 		prefetch(r);
2896 		if (++rspq->credits >= (rspq->size / 4)) {
2897 			refill_rspq(adap, rspq, rspq->credits);
2898 			rspq->credits = 0;
2899 		}
2900 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2901 
2902 		if (!eth && eop) {
2903 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2904 			/*
2905 			 * XXX size mismatch
2906 			 */
2907 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2908 
2909 
2910 			ngathered = rx_offload(&adap->tdev, rspq,
2911 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2912 			rspq->rspq_mh.mh_head = NULL;
2913 			DPRINTF("received offload packet\n");
2914 
2915 		} else if (eth && eop) {
2916 			struct mbuf *m = rspq->rspq_mh.mh_head;
2917 			prefetch(mtod(m, uint8_t *));
2918 			prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES);
2919 
2920 			t3_rx_eth(adap, rspq, m, ethpad);
2921 			if (lro_enabled && lro_ctrl->lro_cnt &&
2922 			    (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
2923 				/* successfully queue'd for LRO */
2924 			} else {
2925 				/*
2926 				 * LRO not enabled, packet unsuitable for LRO,
2927 				 * or unable to queue.  Pass it up right now in
2928 				 * either case.
2929 				 */
2930 				struct ifnet *ifp = m->m_pkthdr.rcvif;
2931 				(*ifp->if_input)(ifp, m);
2932 			}
2933 			DPRINTF("received tunnel packet\n");
2934 			rspq->rspq_mh.mh_head = NULL;
2935 
2936 		}
2937 		__refill_fl_lt(adap, &qs->fl[0], 32);
2938 		__refill_fl_lt(adap, &qs->fl[1], 32);
2939 		--budget_left;
2940 	}
2941 
2942 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2943 
2944 	/* Flush LRO */
2945 	while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2946 		struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2947 		SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2948 		tcp_lro_flush(lro_ctrl, queued);
2949 	}
2950 
2951 	if (sleeping)
2952 		check_ring_db(adap, qs, sleeping);
2953 
2954 	smp_mb();  /* commit Tx queue processed updates */
2955 	if (__predict_false(qs->txq_stopped > 1)) {
2956 		printf("restarting tx on %p\n", qs);
2957 
2958 		restart_tx(qs);
2959 	}
2960 
2961 	__refill_fl_lt(adap, &qs->fl[0], 512);
2962 	__refill_fl_lt(adap, &qs->fl[1], 512);
2963 	budget -= budget_left;
2964 	return (budget);
2965 }
2966 
2967 /*
2968  * A helper function that processes responses and issues GTS.
2969  */
2970 static __inline int
2971 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2972 {
2973 	int work;
2974 	static int last_holdoff = 0;
2975 
2976 	work = process_responses(adap, rspq_to_qset(rq), -1);
2977 
2978 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2979 		printf("next_holdoff=%d\n", rq->next_holdoff);
2980 		last_holdoff = rq->next_holdoff;
2981 	}
2982 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2983 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2984 
2985 	return (work);
2986 }
2987 
2988 
2989 /*
2990  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2991  * Handles data events from SGE response queues as well as error and other
2992  * async events as they all use the same interrupt pin.  We use one SGE
2993  * response queue per port in this mode and protect all response queues with
2994  * queue 0's lock.
2995  */
2996 void
2997 t3b_intr(void *data)
2998 {
2999 	uint32_t i, map;
3000 	adapter_t *adap = data;
3001 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3002 
3003 	t3_write_reg(adap, A_PL_CLI, 0);
3004 	map = t3_read_reg(adap, A_SG_DATA_INTR);
3005 
3006 	if (!map)
3007 		return;
3008 
3009 	if (__predict_false(map & F_ERRINTR))
3010 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3011 
3012 	mtx_lock(&q0->lock);
3013 	for_each_port(adap, i)
3014 	    if (map & (1 << i))
3015 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3016 	mtx_unlock(&q0->lock);
3017 }
3018 
3019 /*
3020  * The MSI interrupt handler.  This needs to handle data events from SGE
3021  * response queues as well as error and other async events as they all use
3022  * the same MSI vector.  We use one SGE response queue per port in this mode
3023  * and protect all response queues with queue 0's lock.
3024  */
3025 void
3026 t3_intr_msi(void *data)
3027 {
3028 	adapter_t *adap = data;
3029 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3030 	int i, new_packets = 0;
3031 
3032 	mtx_lock(&q0->lock);
3033 
3034 	for_each_port(adap, i)
3035 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3036 		    new_packets = 1;
3037 	mtx_unlock(&q0->lock);
3038 	if (new_packets == 0)
3039 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3040 }
3041 
3042 void
3043 t3_intr_msix(void *data)
3044 {
3045 	struct sge_qset *qs = data;
3046 	adapter_t *adap = qs->port->adapter;
3047 	struct sge_rspq *rspq = &qs->rspq;
3048 #ifndef IFNET_MULTIQUEUE
3049 	mtx_lock(&rspq->lock);
3050 #else
3051 	if (mtx_trylock(&rspq->lock))
3052 #endif
3053 	{
3054 
3055 		if (process_responses_gts(adap, rspq) == 0)
3056 			rspq->unhandled_irqs++;
3057 		mtx_unlock(&rspq->lock);
3058 	}
3059 }
3060 
3061 #define QDUMP_SBUF_SIZE		32 * 400
3062 static int
3063 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3064 {
3065 	struct sge_rspq *rspq;
3066 	struct sge_qset *qs;
3067 	int i, err, dump_end, idx;
3068 	static int multiplier = 1;
3069 	struct sbuf *sb;
3070 	struct rsp_desc *rspd;
3071 	uint32_t data[4];
3072 
3073 	rspq = arg1;
3074 	qs = rspq_to_qset(rspq);
3075 	if (rspq->rspq_dump_count == 0)
3076 		return (0);
3077 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3078 		log(LOG_WARNING,
3079 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3080 		rspq->rspq_dump_count = 0;
3081 		return (EINVAL);
3082 	}
3083 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3084 		log(LOG_WARNING,
3085 		    "dump start of %d is greater than queue size\n",
3086 		    rspq->rspq_dump_start);
3087 		rspq->rspq_dump_start = 0;
3088 		return (EINVAL);
3089 	}
3090 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3091 	if (err)
3092 		return (err);
3093 retry_sbufops:
3094 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3095 
3096 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3097 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3098 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3099 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3100 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3101 
3102 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3103 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3104 
3105 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3106 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3107 		idx = i & (RSPQ_Q_SIZE-1);
3108 
3109 		rspd = &rspq->desc[idx];
3110 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3111 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3112 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3113 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3114 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3115 		    be32toh(rspd->len_cq), rspd->intr_gen);
3116 	}
3117 	if (sbuf_overflowed(sb)) {
3118 		sbuf_delete(sb);
3119 		multiplier++;
3120 		goto retry_sbufops;
3121 	}
3122 	sbuf_finish(sb);
3123 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3124 	sbuf_delete(sb);
3125 	return (err);
3126 }
3127 
3128 static int
3129 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3130 {
3131 	struct sge_txq *txq;
3132 	struct sge_qset *qs;
3133 	int i, j, err, dump_end;
3134 	static int multiplier = 1;
3135 	struct sbuf *sb;
3136 	struct tx_desc *txd;
3137 	uint32_t *WR, wr_hi, wr_lo, gen;
3138 	uint32_t data[4];
3139 
3140 	txq = arg1;
3141 	qs = txq_to_qset(txq, TXQ_ETH);
3142 	if (txq->txq_dump_count == 0) {
3143 		return (0);
3144 	}
3145 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3146 		log(LOG_WARNING,
3147 		    "dump count is too large %d\n", txq->txq_dump_count);
3148 		txq->txq_dump_count = 1;
3149 		return (EINVAL);
3150 	}
3151 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3152 		log(LOG_WARNING,
3153 		    "dump start of %d is greater than queue size\n",
3154 		    txq->txq_dump_start);
3155 		txq->txq_dump_start = 0;
3156 		return (EINVAL);
3157 	}
3158 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3159 	if (err)
3160 		return (err);
3161 
3162 
3163 retry_sbufops:
3164 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3165 
3166 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3167 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3168 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3169 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3170 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3171 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3172 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3173 	    txq->txq_dump_start,
3174 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3175 
3176 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3177 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3178 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3179 		WR = (uint32_t *)txd->flit;
3180 		wr_hi = ntohl(WR[0]);
3181 		wr_lo = ntohl(WR[1]);
3182 		gen = G_WR_GEN(wr_lo);
3183 
3184 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3185 		    wr_hi, wr_lo, gen);
3186 		for (j = 2; j < 30; j += 4)
3187 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3188 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3189 
3190 	}
3191 	if (sbuf_overflowed(sb)) {
3192 		sbuf_delete(sb);
3193 		multiplier++;
3194 		goto retry_sbufops;
3195 	}
3196 	sbuf_finish(sb);
3197 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3198 	sbuf_delete(sb);
3199 	return (err);
3200 }
3201 
3202 static int
3203 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3204 {
3205 	struct sge_txq *txq;
3206 	struct sge_qset *qs;
3207 	int i, j, err, dump_end;
3208 	static int multiplier = 1;
3209 	struct sbuf *sb;
3210 	struct tx_desc *txd;
3211 	uint32_t *WR, wr_hi, wr_lo, gen;
3212 
3213 	txq = arg1;
3214 	qs = txq_to_qset(txq, TXQ_CTRL);
3215 	if (txq->txq_dump_count == 0) {
3216 		return (0);
3217 	}
3218 	if (txq->txq_dump_count > 256) {
3219 		log(LOG_WARNING,
3220 		    "dump count is too large %d\n", txq->txq_dump_count);
3221 		txq->txq_dump_count = 1;
3222 		return (EINVAL);
3223 	}
3224 	if (txq->txq_dump_start > 255) {
3225 		log(LOG_WARNING,
3226 		    "dump start of %d is greater than queue size\n",
3227 		    txq->txq_dump_start);
3228 		txq->txq_dump_start = 0;
3229 		return (EINVAL);
3230 	}
3231 
3232 retry_sbufops:
3233 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3234 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3235 	    txq->txq_dump_start,
3236 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3237 
3238 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3239 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3240 		txd = &txq->desc[i & (255)];
3241 		WR = (uint32_t *)txd->flit;
3242 		wr_hi = ntohl(WR[0]);
3243 		wr_lo = ntohl(WR[1]);
3244 		gen = G_WR_GEN(wr_lo);
3245 
3246 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3247 		    wr_hi, wr_lo, gen);
3248 		for (j = 2; j < 30; j += 4)
3249 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3250 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3251 
3252 	}
3253 	if (sbuf_overflowed(sb)) {
3254 		sbuf_delete(sb);
3255 		multiplier++;
3256 		goto retry_sbufops;
3257 	}
3258 	sbuf_finish(sb);
3259 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3260 	sbuf_delete(sb);
3261 	return (err);
3262 }
3263 
3264 static int
3265 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3266 {
3267 	adapter_t *sc = arg1;
3268 	struct qset_params *qsp = &sc->params.sge.qset[0];
3269 	int coalesce_usecs;
3270 	struct sge_qset *qs;
3271 	int i, j, err, nqsets = 0;
3272 	struct mtx *lock;
3273 
3274 	if ((sc->flags & FULL_INIT_DONE) == 0)
3275 		return (ENXIO);
3276 
3277 	coalesce_usecs = qsp->coalesce_usecs;
3278         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3279 
3280 	if (err != 0) {
3281 		return (err);
3282 	}
3283 	if (coalesce_usecs == qsp->coalesce_usecs)
3284 		return (0);
3285 
3286 	for (i = 0; i < sc->params.nports; i++)
3287 		for (j = 0; j < sc->port[i].nqsets; j++)
3288 			nqsets++;
3289 
3290 	coalesce_usecs = max(1, coalesce_usecs);
3291 
3292 	for (i = 0; i < nqsets; i++) {
3293 		qs = &sc->sge.qs[i];
3294 		qsp = &sc->params.sge.qset[i];
3295 		qsp->coalesce_usecs = coalesce_usecs;
3296 
3297 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3298 			    &sc->sge.qs[0].rspq.lock;
3299 
3300 		mtx_lock(lock);
3301 		t3_update_qset_coalesce(qs, qsp);
3302 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3303 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3304 		mtx_unlock(lock);
3305 	}
3306 
3307 	return (0);
3308 }
3309 
3310 
3311 void
3312 t3_add_attach_sysctls(adapter_t *sc)
3313 {
3314 	struct sysctl_ctx_list *ctx;
3315 	struct sysctl_oid_list *children;
3316 
3317 	ctx = device_get_sysctl_ctx(sc->dev);
3318 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3319 
3320 	/* random information */
3321 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3322 	    "firmware_version",
3323 	    CTLFLAG_RD, &sc->fw_version,
3324 	    0, "firmware version");
3325 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3326 	    "hw_revision",
3327 	    CTLFLAG_RD, &sc->params.rev,
3328 	    0, "chip model");
3329 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3330 	    "enable_debug",
3331 	    CTLFLAG_RW, &cxgb_debug,
3332 	    0, "enable verbose debugging output");
3333 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3334 	    CTLFLAG_RD, &sc->tunq_coalesce,
3335 	    "#tunneled packets freed");
3336 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3337 	    "txq_overrun",
3338 	    CTLFLAG_RD, &txq_fills,
3339 	    0, "#times txq overrun");
3340 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3341 	    "pcpu_cache_enable",
3342 	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3343 	    0, "#enable driver local pcpu caches");
3344 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3345 	    "cache_alloc",
3346 	    CTLFLAG_RD, &cxgb_cached_allocations,
3347 	    0, "#times a cluster was allocated from cache");
3348 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3349 	    "cached",
3350 	    CTLFLAG_RD, &cxgb_cached,
3351 	    0, "#times a cluster was cached");
3352 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3353 	    "ext_freed",
3354 	    CTLFLAG_RD, &cxgb_ext_freed,
3355 	    0, "#times a cluster was freed through ext_free");
3356 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3357 	    "ext_inited",
3358 	    CTLFLAG_RD, &cxgb_ext_inited,
3359 	    0, "#times a cluster was initialized for ext_free");
3360 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3361 	    "mbufs_outstanding",
3362 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3363 	    0, "#mbufs in flight in the driver");
3364 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3365 	    "pack_outstanding",
3366 	    CTLFLAG_RD, &cxgb_pack_outstanding,
3367 	    0, "#packet in flight in the driver");
3368 }
3369 
3370 
3371 static const char *rspq_name = "rspq";
3372 static const char *txq_names[] =
3373 {
3374 	"txq_eth",
3375 	"txq_ofld",
3376 	"txq_ctrl"
3377 };
3378 
3379 void
3380 t3_add_configured_sysctls(adapter_t *sc)
3381 {
3382 	struct sysctl_ctx_list *ctx;
3383 	struct sysctl_oid_list *children;
3384 	int i, j;
3385 
3386 	ctx = device_get_sysctl_ctx(sc->dev);
3387 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3388 
3389 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3390 	    "intr_coal",
3391 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3392 	    0, t3_set_coalesce_usecs,
3393 	    "I", "interrupt coalescing timer (us)");
3394 
3395 	for (i = 0; i < sc->params.nports; i++) {
3396 		struct port_info *pi = &sc->port[i];
3397 		struct sysctl_oid *poid;
3398 		struct sysctl_oid_list *poidlist;
3399 
3400 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3401 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3402 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3403 		poidlist = SYSCTL_CHILDREN(poid);
3404 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3405 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3406 		    0, "#queue sets");
3407 
3408 		for (j = 0; j < pi->nqsets; j++) {
3409 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3410 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid;
3411 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist;
3412 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3413 
3414 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3415 
3416 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3417 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3418 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3419 
3420 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3421 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3422 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3423 
3424 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3425 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3426 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3427 
3428 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3429 			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3430 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3431 
3432 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3433 			    CTLFLAG_RD, &qs->rspq.size,
3434 			    0, "#entries in response queue");
3435 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3436 			    CTLFLAG_RD, &qs->rspq.cidx,
3437 			    0, "consumer index");
3438 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3439 			    CTLFLAG_RD, &qs->rspq.credits,
3440 			    0, "#credits");
3441 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3442 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3443 			    "physical_address_of the queue");
3444 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3445 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3446 			    0, "start rspq dump entry");
3447 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3448 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3449 			    0, "#rspq entries to dump");
3450 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3451 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3452 			    0, t3_dump_rspq, "A", "dump of the response queue");
3453 
3454 
3455 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3456 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3457 			    0, "#tunneled packets dropped");
3458 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3459 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3460 			    0, "#tunneled packets waiting to be sent");
3461 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3462 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3463 			    0, "#tunneled packets queue producer index");
3464 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3465 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3466 			    0, "#tunneled packets queue consumer index");
3467 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3468 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3469 			    0, "#tunneled packets processed by the card");
3470 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3471 			    CTLFLAG_RD, &txq->cleaned,
3472 			    0, "#tunneled packets cleaned");
3473 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3474 			    CTLFLAG_RD, &txq->in_use,
3475 			    0, "#tunneled packet slots in use");
3476 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3477 			    CTLFLAG_RD, &txq->txq_frees,
3478 			    "#tunneled packets freed");
3479 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3480 			    CTLFLAG_RD, &txq->txq_skipped,
3481 			    0, "#tunneled packet descriptors skipped");
3482 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3483 			    CTLFLAG_RD, &txq->txq_coalesced,
3484 			    0, "#tunneled packets coalesced");
3485 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3486 			    CTLFLAG_RD, &txq->txq_enqueued,
3487 			    0, "#tunneled packets enqueued to hardware");
3488 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3489 			    CTLFLAG_RD, &qs->txq_stopped,
3490 			    0, "tx queues stopped");
3491 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3492 			    CTLFLAG_RD, &txq->phys_addr,
3493 			    "physical_address_of the queue");
3494 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3495 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3496 			    0, "txq generation");
3497 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3498 			    CTLFLAG_RD, &txq->cidx,
3499 			    0, "hardware queue cidx");
3500 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3501 			    CTLFLAG_RD, &txq->pidx,
3502 			    0, "hardware queue pidx");
3503 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3504 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3505 			    0, "txq start idx for dump");
3506 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3507 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3508 			    0, "txq #entries to dump");
3509 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3510 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3511 			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3512 
3513 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3514 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3515 			    0, "ctrlq start idx for dump");
3516 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3517 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3518 			    0, "ctrl #entries to dump");
3519 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3520 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3521 			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3522 
3523 
3524 
3525 
3526 
3527 		}
3528 	}
3529 }
3530 
3531 /**
3532  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3533  *	@qs: the queue set
3534  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3535  *	@idx: the descriptor index in the queue
3536  *	@data: where to dump the descriptor contents
3537  *
3538  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3539  *	size of the descriptor.
3540  */
3541 int
3542 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3543 		unsigned char *data)
3544 {
3545 	if (qnum >= 6)
3546 		return (EINVAL);
3547 
3548 	if (qnum < 3) {
3549 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3550 			return -EINVAL;
3551 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3552 		return sizeof(struct tx_desc);
3553 	}
3554 
3555 	if (qnum == 3) {
3556 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3557 			return (EINVAL);
3558 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3559 		return sizeof(struct rsp_desc);
3560 	}
3561 
3562 	qnum -= 4;
3563 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3564 		return (EINVAL);
3565 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3566 	return sizeof(struct rx_desc);
3567 }
3568