xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision 4af83c8cff85c89629e629b6becbf9712fa691c0)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #ifdef CONFIG_DEFINED
68 #include <cxgb_include.h>
69 #include <sys/mvec.h>
70 #else
71 #include <dev/cxgb/cxgb_include.h>
72 #include <dev/cxgb/sys/mvec.h>
73 #endif
74 
75 int      txq_fills = 0;
76 /*
77  * XXX don't re-enable this until TOE stops assuming
78  * we have an m_ext
79  */
80 static int recycle_enable = 0;
81 extern int cxgb_txq_buf_ring_size;
82 int cxgb_cached_allocations;
83 int cxgb_cached;
84 int cxgb_ext_freed = 0;
85 int cxgb_ext_inited = 0;
86 int fl_q_size = 0;
87 int jumbo_q_size = 0;
88 
89 extern int cxgb_use_16k_clusters;
90 extern int cxgb_pcpu_cache_enable;
91 extern int nmbjumbo4;
92 extern int nmbjumbo9;
93 extern int nmbjumbo16;
94 
95 
96 
97 
98 #define USE_GTS 0
99 
100 #define SGE_RX_SM_BUF_SIZE	1536
101 #define SGE_RX_DROP_THRES	16
102 #define SGE_RX_COPY_THRES	128
103 
104 /*
105  * Period of the Tx buffer reclaim timer.  This timer does not need to run
106  * frequently as Tx buffers are usually reclaimed by new Tx packets.
107  */
108 #define TX_RECLAIM_PERIOD       (hz >> 1)
109 
110 /*
111  * Values for sge_txq.flags
112  */
113 enum {
114 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
115 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
116 };
117 
118 struct tx_desc {
119 	uint64_t	flit[TX_DESC_FLITS];
120 } __packed;
121 
122 struct rx_desc {
123 	uint32_t	addr_lo;
124 	uint32_t	len_gen;
125 	uint32_t	gen2;
126 	uint32_t	addr_hi;
127 } __packed;;
128 
129 struct rsp_desc {               /* response queue descriptor */
130 	struct rss_header	rss_hdr;
131 	uint32_t		flags;
132 	uint32_t		len_cq;
133 	uint8_t			imm_data[47];
134 	uint8_t			intr_gen;
135 } __packed;
136 
137 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
138 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
139 #define RX_SW_DESC_INUSE        (1 << 3)
140 #define TX_SW_DESC_MAPPED       (1 << 4)
141 
142 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
143 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
144 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
145 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
146 
147 struct tx_sw_desc {                /* SW state per Tx descriptor */
148 	struct mbuf_iovec mi;
149 	bus_dmamap_t	map;
150 	int		flags;
151 };
152 
153 struct rx_sw_desc {                /* SW state per Rx descriptor */
154 	caddr_t	         rxsd_cl;
155 	caddr_t	         data;
156 	bus_dmamap_t	  map;
157 	int		  flags;
158 };
159 
160 struct txq_state {
161 	unsigned int compl;
162 	unsigned int gen;
163 	unsigned int pidx;
164 };
165 
166 struct refill_fl_cb_arg {
167 	int               error;
168 	bus_dma_segment_t seg;
169 	int               nseg;
170 };
171 
172 /*
173  * Maps a number of flits to the number of Tx descriptors that can hold them.
174  * The formula is
175  *
176  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
177  *
178  * HW allows up to 4 descriptors to be combined into a WR.
179  */
180 static uint8_t flit_desc_map[] = {
181 	0,
182 #if SGE_NUM_GENBITS == 1
183 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
185 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
186 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
187 #elif SGE_NUM_GENBITS == 2
188 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
189 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
191 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 #else
193 # error "SGE_NUM_GENBITS must be 1 or 2"
194 #endif
195 };
196 
197 
198 static int lro_default = 0;
199 int cxgb_debug = 0;
200 
201 static void sge_timer_cb(void *arg);
202 static void sge_timer_reclaim(void *arg, int ncount);
203 static void sge_txq_reclaim_handler(void *arg, int ncount);
204 
205 /**
206  *	reclaim_completed_tx - reclaims completed Tx descriptors
207  *	@adapter: the adapter
208  *	@q: the Tx queue to reclaim completed descriptors from
209  *
210  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
211  *	and frees the associated buffers if possible.  Called with the Tx
212  *	queue's lock held.
213  */
214 static __inline int
215 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
216 {
217 	int reclaim = desc_reclaimable(q);
218 
219 	if (reclaim < reclaim_min)
220 		return (0);
221 
222 	mtx_assert(&q->lock, MA_OWNED);
223 	if (reclaim > 0) {
224 		t3_free_tx_desc(q, reclaim);
225 		q->cleaned += reclaim;
226 		q->in_use -= reclaim;
227 	}
228 	return (reclaim);
229 }
230 
231 /**
232  *	should_restart_tx - are there enough resources to restart a Tx queue?
233  *	@q: the Tx queue
234  *
235  *	Checks if there are enough descriptors to restart a suspended Tx queue.
236  */
237 static __inline int
238 should_restart_tx(const struct sge_txq *q)
239 {
240 	unsigned int r = q->processed - q->cleaned;
241 
242 	return q->in_use - r < (q->size >> 1);
243 }
244 
245 /**
246  *	t3_sge_init - initialize SGE
247  *	@adap: the adapter
248  *	@p: the SGE parameters
249  *
250  *	Performs SGE initialization needed every time after a chip reset.
251  *	We do not initialize any of the queue sets here, instead the driver
252  *	top-level must request those individually.  We also do not enable DMA
253  *	here, that should be done after the queues have been set up.
254  */
255 void
256 t3_sge_init(adapter_t *adap, struct sge_params *p)
257 {
258 	u_int ctrl, ups;
259 
260 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
261 
262 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
263 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
264 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
265 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
266 #if SGE_NUM_GENBITS == 1
267 	ctrl |= F_EGRGENCTRL;
268 #endif
269 	if (adap->params.rev > 0) {
270 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
271 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
272 	}
273 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
274 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
275 		     V_LORCQDRBTHRSH(512));
276 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
277 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
278 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
279 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
280 		     adap->params.rev < T3_REV_C ? 1000 : 500);
281 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
282 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
283 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
284 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
285 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
286 }
287 
288 
289 /**
290  *	sgl_len - calculates the size of an SGL of the given capacity
291  *	@n: the number of SGL entries
292  *
293  *	Calculates the number of flits needed for a scatter/gather list that
294  *	can hold the given number of entries.
295  */
296 static __inline unsigned int
297 sgl_len(unsigned int n)
298 {
299 	return ((3 * n) / 2 + (n & 1));
300 }
301 
302 /**
303  *	get_imm_packet - return the next ingress packet buffer from a response
304  *	@resp: the response descriptor containing the packet data
305  *
306  *	Return a packet containing the immediate data of the given response.
307  */
308 static int
309 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
310 {
311 
312 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
313 	m->m_ext.ext_buf = NULL;
314 	m->m_ext.ext_type = 0;
315 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
316 	return (0);
317 }
318 
319 static __inline u_int
320 flits_to_desc(u_int n)
321 {
322 	return (flit_desc_map[n]);
323 }
324 
325 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
326 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
327 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
328 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
329 		    F_HIRCQPARITYERROR)
330 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
331 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
332 		      F_RSPQDISABLED)
333 
334 /**
335  *	t3_sge_err_intr_handler - SGE async event interrupt handler
336  *	@adapter: the adapter
337  *
338  *	Interrupt handler for SGE asynchronous (non-data) events.
339  */
340 void
341 t3_sge_err_intr_handler(adapter_t *adapter)
342 {
343 	unsigned int v, status;
344 
345 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
346 	if (status & SGE_PARERR)
347 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
348 			 status & SGE_PARERR);
349 	if (status & SGE_FRAMINGERR)
350 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
351 			 status & SGE_FRAMINGERR);
352 	if (status & F_RSPQCREDITOVERFOW)
353 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
354 
355 	if (status & F_RSPQDISABLED) {
356 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
357 
358 		CH_ALERT(adapter,
359 			 "packet delivered to disabled response queue (0x%x)\n",
360 			 (v >> S_RSPQ0DISABLED) & 0xff);
361 	}
362 
363 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
364 	if (status & SGE_FATALERR)
365 		t3_fatal_err(adapter);
366 }
367 
368 void
369 t3_sge_prep(adapter_t *adap, struct sge_params *p)
370 {
371 	int i, nqsets;
372 
373 	nqsets = min(SGE_QSETS, mp_ncpus*4);
374 
375 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
376 
377 	while (!powerof2(fl_q_size))
378 		fl_q_size--;
379 #if __FreeBSD_version > 800000
380 	if (cxgb_use_16k_clusters)
381 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
382 	else
383 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
384 #else
385 	jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
386 #endif
387 	while (!powerof2(jumbo_q_size))
388 		jumbo_q_size--;
389 
390 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
391 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
392 
393 	for (i = 0; i < SGE_QSETS; ++i) {
394 		struct qset_params *q = p->qset + i;
395 
396 		if (adap->params.nports > 2) {
397 			q->coalesce_usecs = 50;
398 		} else {
399 #ifdef INVARIANTS
400 			q->coalesce_usecs = 10;
401 #else
402 			q->coalesce_usecs = 5;
403 #endif
404 		}
405 		q->polling = adap->params.rev > 0;
406 		q->rspq_size = RSPQ_Q_SIZE;
407 		q->fl_size = fl_q_size;
408 		q->jumbo_size = jumbo_q_size;
409 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
410 		q->txq_size[TXQ_OFLD] = 1024;
411 		q->txq_size[TXQ_CTRL] = 256;
412 		q->cong_thres = 0;
413 	}
414 }
415 
416 int
417 t3_sge_alloc(adapter_t *sc)
418 {
419 
420 	/* The parent tag. */
421 	if (bus_dma_tag_create( NULL,			/* parent */
422 				1, 0,			/* algnmnt, boundary */
423 				BUS_SPACE_MAXADDR,	/* lowaddr */
424 				BUS_SPACE_MAXADDR,	/* highaddr */
425 				NULL, NULL,		/* filter, filterarg */
426 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
427 				BUS_SPACE_UNRESTRICTED, /* nsegments */
428 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
429 				0,			/* flags */
430 				NULL, NULL,		/* lock, lockarg */
431 				&sc->parent_dmat)) {
432 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
433 		return (ENOMEM);
434 	}
435 
436 	/*
437 	 * DMA tag for normal sized RX frames
438 	 */
439 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
440 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
441 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
442 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
443 		return (ENOMEM);
444 	}
445 
446 	/*
447 	 * DMA tag for jumbo sized RX frames.
448 	 */
449 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
450 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
451 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
452 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
453 		return (ENOMEM);
454 	}
455 
456 	/*
457 	 * DMA tag for TX frames.
458 	 */
459 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
460 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
461 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
462 		NULL, NULL, &sc->tx_dmat)) {
463 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
464 		return (ENOMEM);
465 	}
466 
467 	return (0);
468 }
469 
470 int
471 t3_sge_free(struct adapter * sc)
472 {
473 
474 	if (sc->tx_dmat != NULL)
475 		bus_dma_tag_destroy(sc->tx_dmat);
476 
477 	if (sc->rx_jumbo_dmat != NULL)
478 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
479 
480 	if (sc->rx_dmat != NULL)
481 		bus_dma_tag_destroy(sc->rx_dmat);
482 
483 	if (sc->parent_dmat != NULL)
484 		bus_dma_tag_destroy(sc->parent_dmat);
485 
486 	return (0);
487 }
488 
489 void
490 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
491 {
492 
493 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
494 	qs->rspq.polling = 0 /* p->polling */;
495 }
496 
497 #if !defined(__i386__) && !defined(__amd64__)
498 static void
499 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
500 {
501 	struct refill_fl_cb_arg *cb_arg = arg;
502 
503 	cb_arg->error = error;
504 	cb_arg->seg = segs[0];
505 	cb_arg->nseg = nseg;
506 
507 }
508 #endif
509 /**
510  *	refill_fl - refill an SGE free-buffer list
511  *	@sc: the controller softc
512  *	@q: the free-list to refill
513  *	@n: the number of new buffers to allocate
514  *
515  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
516  *	The caller must assure that @n does not exceed the queue's capacity.
517  */
518 static void
519 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
520 {
521 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
522 	struct rx_desc *d = &q->desc[q->pidx];
523 	struct refill_fl_cb_arg cb_arg;
524 	caddr_t cl;
525 	int err, count = 0;
526 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
527 
528 	cb_arg.error = 0;
529 	while (n--) {
530 		/*
531 		 * We only allocate a cluster, mbuf allocation happens after rx
532 		 */
533 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
534 			log(LOG_WARNING, "Failed to allocate cluster\n");
535 			goto done;
536 		}
537 
538 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
539 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
540 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
541 				uma_zfree(q->zone, cl);
542 				goto done;
543 			}
544 			sd->flags |= RX_SW_DESC_MAP_CREATED;
545 		}
546 #if !defined(__i386__) && !defined(__amd64__)
547 		err = bus_dmamap_load(q->entry_tag, sd->map,
548 		    cl + header_size, q->buf_size,
549 		    refill_fl_cb, &cb_arg, 0);
550 
551 		if (err != 0 || cb_arg.error) {
552 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
553 			/*
554 			 * XXX free cluster
555 			 */
556 			return;
557 		}
558 #else
559 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
560 #endif
561 		sd->flags |= RX_SW_DESC_INUSE;
562 		sd->rxsd_cl = cl;
563 		sd->data = cl + header_size;
564 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
565 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
566 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
567 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
568 
569 		d++;
570 		sd++;
571 
572 		if (++q->pidx == q->size) {
573 			q->pidx = 0;
574 			q->gen ^= 1;
575 			sd = q->sdesc;
576 			d = q->desc;
577 		}
578 		q->credits++;
579 		count++;
580 	}
581 
582 done:
583 	if (count)
584 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
585 }
586 
587 
588 /**
589  *	free_rx_bufs - free the Rx buffers on an SGE free list
590  *	@sc: the controle softc
591  *	@q: the SGE free list to clean up
592  *
593  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
594  *	this queue should be stopped before calling this function.
595  */
596 static void
597 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
598 {
599 	u_int cidx = q->cidx;
600 
601 	while (q->credits--) {
602 		struct rx_sw_desc *d = &q->sdesc[cidx];
603 
604 		if (d->flags & RX_SW_DESC_INUSE) {
605 			bus_dmamap_unload(q->entry_tag, d->map);
606 			bus_dmamap_destroy(q->entry_tag, d->map);
607 			uma_zfree(q->zone, d->rxsd_cl);
608 		}
609 		d->rxsd_cl = NULL;
610 		if (++cidx == q->size)
611 			cidx = 0;
612 	}
613 }
614 
615 static __inline void
616 __refill_fl(adapter_t *adap, struct sge_fl *fl)
617 {
618 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
619 }
620 
621 static __inline void
622 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
623 {
624 	if ((fl->size - fl->credits) < max)
625 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
626 }
627 
628 void
629 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
630 {
631 	__refill_fl_lt(adap, fl, 512);
632 }
633 
634 /**
635  *	recycle_rx_buf - recycle a receive buffer
636  *	@adapter: the adapter
637  *	@q: the SGE free list
638  *	@idx: index of buffer to recycle
639  *
640  *	Recycles the specified buffer on the given free list by adding it at
641  *	the next available slot on the list.
642  */
643 static void
644 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
645 {
646 	struct rx_desc *from = &q->desc[idx];
647 	struct rx_desc *to   = &q->desc[q->pidx];
648 
649 	q->sdesc[q->pidx] = q->sdesc[idx];
650 	to->addr_lo = from->addr_lo;        // already big endian
651 	to->addr_hi = from->addr_hi;        // likewise
652 	wmb();
653 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
654 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
655 	q->credits++;
656 
657 	if (++q->pidx == q->size) {
658 		q->pidx = 0;
659 		q->gen ^= 1;
660 	}
661 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
662 }
663 
664 static void
665 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
666 {
667 	uint32_t *addr;
668 
669 	addr = arg;
670 	*addr = segs[0].ds_addr;
671 }
672 
673 static int
674 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
675     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
676     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
677 {
678 	size_t len = nelem * elem_size;
679 	void *s = NULL;
680 	void *p = NULL;
681 	int err;
682 
683 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
684 				      BUS_SPACE_MAXADDR_32BIT,
685 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
686 				      len, 0, NULL, NULL, tag)) != 0) {
687 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
688 		return (ENOMEM);
689 	}
690 
691 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
692 				    map)) != 0) {
693 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
694 		return (ENOMEM);
695 	}
696 
697 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
698 	bzero(p, len);
699 	*(void **)desc = p;
700 
701 	if (sw_size) {
702 		len = nelem * sw_size;
703 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
704 		*(void **)sdesc = s;
705 	}
706 	if (parent_entry_tag == NULL)
707 		return (0);
708 
709 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
710 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
711 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
712 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
713 		                      NULL, NULL, entry_tag)) != 0) {
714 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
715 		return (ENOMEM);
716 	}
717 	return (0);
718 }
719 
720 static void
721 sge_slow_intr_handler(void *arg, int ncount)
722 {
723 	adapter_t *sc = arg;
724 
725 	t3_slow_intr_handler(sc);
726 }
727 
728 /**
729  *	sge_timer_cb - perform periodic maintenance of an SGE qset
730  *	@data: the SGE queue set to maintain
731  *
732  *	Runs periodically from a timer to perform maintenance of an SGE queue
733  *	set.  It performs two tasks:
734  *
735  *	a) Cleans up any completed Tx descriptors that may still be pending.
736  *	Normal descriptor cleanup happens when new packets are added to a Tx
737  *	queue so this timer is relatively infrequent and does any cleanup only
738  *	if the Tx queue has not seen any new packets in a while.  We make a
739  *	best effort attempt to reclaim descriptors, in that we don't wait
740  *	around if we cannot get a queue's lock (which most likely is because
741  *	someone else is queueing new packets and so will also handle the clean
742  *	up).  Since control queues use immediate data exclusively we don't
743  *	bother cleaning them up here.
744  *
745  *	b) Replenishes Rx queues that have run out due to memory shortage.
746  *	Normally new Rx buffers are added when existing ones are consumed but
747  *	when out of memory a queue can become empty.  We try to add only a few
748  *	buffers here, the queue will be replenished fully as these new buffers
749  *	are used up if memory shortage has subsided.
750  *
751  *	c) Return coalesced response queue credits in case a response queue is
752  *	starved.
753  *
754  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
755  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
756  */
757 static void
758 sge_timer_cb(void *arg)
759 {
760 	adapter_t *sc = arg;
761 #ifndef IFNET_MULTIQUEUE
762 	struct port_info *pi;
763 	struct sge_qset *qs;
764 	struct sge_txq  *txq;
765 	int i, j;
766 	int reclaim_ofl, refill_rx;
767 
768 	for (i = 0; i < sc->params.nports; i++)
769 		for (j = 0; j < sc->port[i].nqsets; j++) {
770 			qs = &sc->sge.qs[i + j];
771 			txq = &qs->txq[0];
772 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
773 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
774 			    (qs->fl[1].credits < qs->fl[1].size));
775 			if (reclaim_ofl || refill_rx) {
776 				pi = &sc->port[i];
777 				taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
778 				break;
779 			}
780 		}
781 #endif
782 	if (sc->params.nports > 2) {
783 		int i;
784 
785 		for_each_port(sc, i) {
786 			struct port_info *pi = &sc->port[i];
787 
788 			t3_write_reg(sc, A_SG_KDOORBELL,
789 				     F_SELEGRCNTX |
790 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
791 		}
792 	}
793 	if (sc->open_device_map != 0)
794 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
795 }
796 
797 /*
798  * This is meant to be a catch-all function to keep sge state private
799  * to sge.c
800  *
801  */
802 int
803 t3_sge_init_adapter(adapter_t *sc)
804 {
805 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
806 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
807 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
808 	mi_init();
809 	cxgb_cache_init();
810 	return (0);
811 }
812 
813 int
814 t3_sge_reset_adapter(adapter_t *sc)
815 {
816 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
817 	return (0);
818 }
819 
820 int
821 t3_sge_init_port(struct port_info *pi)
822 {
823 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
824 	return (0);
825 }
826 
827 void
828 t3_sge_deinit_sw(adapter_t *sc)
829 {
830 
831 	mi_deinit();
832 }
833 
834 /**
835  *	refill_rspq - replenish an SGE response queue
836  *	@adapter: the adapter
837  *	@q: the response queue to replenish
838  *	@credits: how many new responses to make available
839  *
840  *	Replenishes a response queue by making the supplied number of responses
841  *	available to HW.
842  */
843 static __inline void
844 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
845 {
846 
847 	/* mbufs are allocated on demand when a rspq entry is processed. */
848 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
849 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
850 }
851 
852 static __inline void
853 sge_txq_reclaim_(struct sge_txq *txq, int force)
854 {
855 
856 	if (desc_reclaimable(txq) < 16)
857 		return;
858 	if (mtx_trylock(&txq->lock) == 0)
859 		return;
860 	reclaim_completed_tx_(txq, 16);
861 	mtx_unlock(&txq->lock);
862 
863 }
864 
865 static void
866 sge_txq_reclaim_handler(void *arg, int ncount)
867 {
868 	struct sge_txq *q = arg;
869 
870 	sge_txq_reclaim_(q, TRUE);
871 }
872 
873 
874 
875 static void
876 sge_timer_reclaim(void *arg, int ncount)
877 {
878 	struct port_info *pi = arg;
879 	int i, nqsets = pi->nqsets;
880 	adapter_t *sc = pi->adapter;
881 	struct sge_qset *qs;
882 	struct sge_txq *txq;
883 	struct mtx *lock;
884 
885 #ifdef IFNET_MULTIQUEUE
886 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
887 #endif
888 	for (i = 0; i < nqsets; i++) {
889 		qs = &sc->sge.qs[i];
890 
891 		txq = &qs->txq[TXQ_OFLD];
892 		sge_txq_reclaim_(txq, FALSE);
893 
894 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
895 			    &sc->sge.qs[0].rspq.lock;
896 
897 		if (mtx_trylock(lock)) {
898 			/* XXX currently assume that we are *NOT* polling */
899 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
900 
901 			if (qs->fl[0].credits < qs->fl[0].size - 16)
902 				__refill_fl(sc, &qs->fl[0]);
903 			if (qs->fl[1].credits < qs->fl[1].size - 16)
904 				__refill_fl(sc, &qs->fl[1]);
905 
906 			if (status & (1 << qs->rspq.cntxt_id)) {
907 				if (qs->rspq.credits) {
908 					refill_rspq(sc, &qs->rspq, 1);
909 					qs->rspq.credits--;
910 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
911 					    1 << qs->rspq.cntxt_id);
912 				}
913 			}
914 			mtx_unlock(lock);
915 		}
916 	}
917 }
918 
919 /**
920  *	init_qset_cntxt - initialize an SGE queue set context info
921  *	@qs: the queue set
922  *	@id: the queue set id
923  *
924  *	Initializes the TIDs and context ids for the queues of a queue set.
925  */
926 static void
927 init_qset_cntxt(struct sge_qset *qs, u_int id)
928 {
929 
930 	qs->rspq.cntxt_id = id;
931 	qs->fl[0].cntxt_id = 2 * id;
932 	qs->fl[1].cntxt_id = 2 * id + 1;
933 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
934 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
935 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
936 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
937 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
938 
939 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
940 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
941 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
942 }
943 
944 
945 static void
946 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
947 {
948 	txq->in_use += ndesc;
949 	/*
950 	 * XXX we don't handle stopping of queue
951 	 * presumably start handles this when we bump against the end
952 	 */
953 	txqs->gen = txq->gen;
954 	txq->unacked += ndesc;
955 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
956 	txq->unacked &= 31;
957 	txqs->pidx = txq->pidx;
958 	txq->pidx += ndesc;
959 #ifdef INVARIANTS
960 	if (((txqs->pidx > txq->cidx) &&
961 		(txq->pidx < txqs->pidx) &&
962 		(txq->pidx >= txq->cidx)) ||
963 	    ((txqs->pidx < txq->cidx) &&
964 		(txq->pidx >= txq-> cidx)) ||
965 	    ((txqs->pidx < txq->cidx) &&
966 		(txq->cidx < txqs->pidx)))
967 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
968 		    txqs->pidx, txq->pidx, txq->cidx);
969 #endif
970 	if (txq->pidx >= txq->size) {
971 		txq->pidx -= txq->size;
972 		txq->gen ^= 1;
973 	}
974 
975 }
976 
977 /**
978  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
979  *	@m: the packet mbufs
980  *      @nsegs: the number of segments
981  *
982  * 	Returns the number of Tx descriptors needed for the given Ethernet
983  * 	packet.  Ethernet packets require addition of WR and CPL headers.
984  */
985 static __inline unsigned int
986 calc_tx_descs(const struct mbuf *m, int nsegs)
987 {
988 	unsigned int flits;
989 
990 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
991 		return 1;
992 
993 	flits = sgl_len(nsegs) + 2;
994 #ifdef TSO_SUPPORTED
995 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
996 		flits++;
997 #endif
998 	return flits_to_desc(flits);
999 }
1000 
1001 static unsigned int
1002 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1003     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1004 {
1005 	struct mbuf *m0;
1006 	int err, pktlen, pass = 0;
1007 
1008 retry:
1009 	err = 0;
1010 	m0 = *m;
1011 	pktlen = m0->m_pkthdr.len;
1012 #if defined(__i386__) || defined(__amd64__)
1013 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1014 		goto done;
1015 	} else
1016 #endif
1017 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1018 
1019 	if (err == 0) {
1020 		goto done;
1021 	}
1022 	if (err == EFBIG && pass == 0) {
1023 		pass = 1;
1024 		/* Too many segments, try to defrag */
1025 		m0 = m_defrag(m0, M_DONTWAIT);
1026 		if (m0 == NULL) {
1027 			m_freem(*m);
1028 			*m = NULL;
1029 			return (ENOBUFS);
1030 		}
1031 		*m = m0;
1032 		goto retry;
1033 	} else if (err == ENOMEM) {
1034 		return (err);
1035 	} if (err) {
1036 		if (cxgb_debug)
1037 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1038 		m_freem(m0);
1039 		*m = NULL;
1040 		return (err);
1041 	}
1042 done:
1043 #if !defined(__i386__) && !defined(__amd64__)
1044 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1045 #endif
1046 	txsd->flags |= TX_SW_DESC_MAPPED;
1047 
1048 	return (0);
1049 }
1050 
1051 /**
1052  *	make_sgl - populate a scatter/gather list for a packet
1053  *	@sgp: the SGL to populate
1054  *	@segs: the packet dma segments
1055  *	@nsegs: the number of segments
1056  *
1057  *	Generates a scatter/gather list for the buffers that make up a packet
1058  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1059  *	appropriately.
1060  */
1061 static __inline void
1062 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1063 {
1064 	int i, idx;
1065 
1066 	for (idx = 0, i = 0; i < nsegs; i++) {
1067 		/*
1068 		 * firmware doesn't like empty segments
1069 		 */
1070 		if (segs[i].ds_len == 0)
1071 			continue;
1072 		if (i && idx == 0)
1073 			++sgp;
1074 
1075 		sgp->len[idx] = htobe32(segs[i].ds_len);
1076 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1077 		idx ^= 1;
1078 	}
1079 
1080 	if (idx) {
1081 		sgp->len[idx] = 0;
1082 		sgp->addr[idx] = 0;
1083 	}
1084 }
1085 
1086 /**
1087  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1088  *	@adap: the adapter
1089  *	@q: the Tx queue
1090  *
1091  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1092  *	where the HW is going to sleep just after we checked, however,
1093  *	then the interrupt handler will detect the outstanding TX packet
1094  *	and ring the doorbell for us.
1095  *
1096  *	When GTS is disabled we unconditionally ring the doorbell.
1097  */
1098 static __inline void
1099 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1100 {
1101 #if USE_GTS
1102 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1103 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1104 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1105 #ifdef T3_TRACE
1106 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1107 			  q->cntxt_id);
1108 #endif
1109 		t3_write_reg(adap, A_SG_KDOORBELL,
1110 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1111 	}
1112 #else
1113 	wmb();            /* write descriptors before telling HW */
1114 	t3_write_reg(adap, A_SG_KDOORBELL,
1115 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1116 #endif
1117 }
1118 
1119 static __inline void
1120 wr_gen2(struct tx_desc *d, unsigned int gen)
1121 {
1122 #if SGE_NUM_GENBITS == 2
1123 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1124 #endif
1125 }
1126 
1127 /**
1128  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1129  *	@ndesc: number of Tx descriptors spanned by the SGL
1130  *	@txd: first Tx descriptor to be written
1131  *	@txqs: txq state (generation and producer index)
1132  *	@txq: the SGE Tx queue
1133  *	@sgl: the SGL
1134  *	@flits: number of flits to the start of the SGL in the first descriptor
1135  *	@sgl_flits: the SGL size in flits
1136  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1137  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1138  *
1139  *	Write a work request header and an associated SGL.  If the SGL is
1140  *	small enough to fit into one Tx descriptor it has already been written
1141  *	and we just need to write the WR header.  Otherwise we distribute the
1142  *	SGL across the number of descriptors it spans.
1143  */
1144 static void
1145 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1146     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1147     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1148 {
1149 
1150 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1151 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1152 
1153 	if (__predict_true(ndesc == 1)) {
1154 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1155 		    V_WR_SGLSFLT(flits)) | wr_hi;
1156 		wmb();
1157 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1158 		    V_WR_GEN(txqs->gen)) | wr_lo;
1159 		/* XXX gen? */
1160 		wr_gen2(txd, txqs->gen);
1161 
1162 	} else {
1163 		unsigned int ogen = txqs->gen;
1164 		const uint64_t *fp = (const uint64_t *)sgl;
1165 		struct work_request_hdr *wp = wrp;
1166 
1167 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1168 		    V_WR_SGLSFLT(flits)) | wr_hi;
1169 
1170 		while (sgl_flits) {
1171 			unsigned int avail = WR_FLITS - flits;
1172 
1173 			if (avail > sgl_flits)
1174 				avail = sgl_flits;
1175 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1176 			sgl_flits -= avail;
1177 			ndesc--;
1178 			if (!sgl_flits)
1179 				break;
1180 
1181 			fp += avail;
1182 			txd++;
1183 			txsd++;
1184 			if (++txqs->pidx == txq->size) {
1185 				txqs->pidx = 0;
1186 				txqs->gen ^= 1;
1187 				txd = txq->desc;
1188 				txsd = txq->sdesc;
1189 			}
1190 
1191 			/*
1192 			 * when the head of the mbuf chain
1193 			 * is freed all clusters will be freed
1194 			 * with it
1195 			 */
1196 			KASSERT(txsd->mi.mi_base == NULL,
1197 			    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1198 			wrp = (struct work_request_hdr *)txd;
1199 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1200 			    V_WR_SGLSFLT(1)) | wr_hi;
1201 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1202 				    sgl_flits + 1)) |
1203 			    V_WR_GEN(txqs->gen)) | wr_lo;
1204 			wr_gen2(txd, txqs->gen);
1205 			flits = 1;
1206 		}
1207 		wrp->wr_hi |= htonl(F_WR_EOP);
1208 		wmb();
1209 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1210 		wr_gen2((struct tx_desc *)wp, ogen);
1211 	}
1212 }
1213 
1214 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1215 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1216 
1217 #ifdef VLAN_SUPPORTED
1218 #define GET_VTAG(cntrl, m) \
1219 do { \
1220 	if ((m)->m_flags & M_VLANTAG)					            \
1221 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1222 } while (0)
1223 
1224 #define GET_VTAG_MI(cntrl, mi) \
1225 do { \
1226 	if ((mi)->mi_flags & M_VLANTAG)					\
1227 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1228 } while (0)
1229 #else
1230 #define GET_VTAG(cntrl, m)
1231 #define GET_VTAG_MI(cntrl, m)
1232 #endif
1233 
1234 int
1235 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1236 {
1237 	adapter_t *sc;
1238 	struct mbuf *m0;
1239 	struct sge_txq *txq;
1240 	struct txq_state txqs;
1241 	struct port_info *pi;
1242 	unsigned int ndesc, flits, cntrl, mlen;
1243 	int err, nsegs, tso_info = 0;
1244 
1245 	struct work_request_hdr *wrp;
1246 	struct tx_sw_desc *txsd;
1247 	struct sg_ent *sgp, *sgl;
1248 	uint32_t wr_hi, wr_lo, sgl_flits;
1249 	bus_dma_segment_t segs[TX_MAX_SEGS];
1250 
1251 	struct tx_desc *txd;
1252 	struct mbuf_vec *mv;
1253 	struct mbuf_iovec *mi;
1254 
1255 	DPRINTF("t3_encap cpu=%d ", curcpu);
1256 
1257 	mi = NULL;
1258 	pi = qs->port;
1259 	sc = pi->adapter;
1260 	txq = &qs->txq[TXQ_ETH];
1261 	txd = &txq->desc[txq->pidx];
1262 	txsd = &txq->sdesc[txq->pidx];
1263 	sgl = txq->txq_sgl;
1264 	m0 = *m;
1265 
1266 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1267 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1268 	if (cxgb_debug)
1269 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1270 
1271 	mtx_assert(&txq->lock, MA_OWNED);
1272 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1273 /*
1274  * XXX need to add VLAN support for 6.x
1275  */
1276 #ifdef VLAN_SUPPORTED
1277 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1278 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1279 #endif
1280 	KASSERT(txsd->mi.mi_base == NULL,
1281 	    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1282 	if (count > 1) {
1283 		panic("count > 1 not support in CVS\n");
1284 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1285 			return (err);
1286 		nsegs = count;
1287 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1288 		if (cxgb_debug)
1289 			printf("failed ... err=%d\n", err);
1290 		return (err);
1291 	}
1292 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1293 
1294 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1295 		mi_collapse_mbuf(&txsd->mi, m0);
1296 		mi = &txsd->mi;
1297 	}
1298 	if (count > 1) {
1299 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1300 		int i, fidx;
1301 		struct mbuf_iovec *batchmi;
1302 
1303 		mv = mtomv(m0);
1304 		batchmi = mv->mv_vec;
1305 
1306 		wrp = (struct work_request_hdr *)txd;
1307 
1308 		flits = count*2 + 1;
1309 		txq_prod(txq, 1, &txqs);
1310 
1311 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1312 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1313 
1314 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1315 			GET_VTAG_MI(cntrl, batchmi);
1316 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1317 			if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1318 				cntrl |= F_TXPKT_IPCSUM_DIS;
1319 			if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1320 				cntrl |= F_TXPKT_L4CSUM_DIS;
1321 			cbe->cntrl = htonl(cntrl);
1322 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1323 			cbe->addr = htobe64(segs[i].ds_addr);
1324 			txd->flit[fidx] |= htobe64(1 << 24);
1325 		}
1326 
1327 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1328 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1329 		wmb();
1330 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1331 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1332 		/* XXX gen? */
1333 		wr_gen2(txd, txqs.gen);
1334 		check_ring_tx_db(sc, txq);
1335 
1336 		return (0);
1337 	} else if (tso_info) {
1338 		int undersized, eth_type;
1339 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1340 		struct ip *ip;
1341 		struct tcphdr *tcp;
1342 		char *pkthdr, tmp[TCPPKTHDRSIZE];
1343 		struct mbuf_vec *mv;
1344 		struct mbuf_iovec *tmpmi;
1345 
1346 		mv = mtomv(m0);
1347 		tmpmi = mv->mv_vec;
1348 
1349 		txd->flit[2] = 0;
1350 		GET_VTAG(cntrl, m0);
1351 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1352 		hdr->cntrl = htonl(cntrl);
1353 		mlen = m0->m_pkthdr.len;
1354 		hdr->len = htonl(mlen | 0x80000000);
1355 
1356 		DPRINTF("tso buf len=%d\n", mlen);
1357 		undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) &&
1358 			(m0->m_flags & M_VLANTAG)) ||
1359 		    (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN));
1360 
1361 		if (__predict_false(undersized)) {
1362 			pkthdr = tmp;
1363 			if (mi)
1364 				dump_mi(mi);
1365 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1366 			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1367 			panic("discontig packet - fixxorz");
1368 		} else
1369 			pkthdr = m0->m_data;
1370 
1371 		if (__predict_false(m0->m_flags & M_VLANTAG)) {
1372 			eth_type = CPL_ETH_II_VLAN;
1373 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1374 			    ETHER_VLAN_ENCAP_LEN);
1375 		} else {
1376 			eth_type = CPL_ETH_II;
1377 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1378 		}
1379 		tcp = (struct tcphdr *)((uint8_t *)ip +
1380 		    sizeof(*ip));
1381 
1382 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1383 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1384 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1385 		hdr->lso_info = htonl(tso_info);
1386 
1387 		if (__predict_false(mlen <= PIO_LEN)) {
1388 			/* pkt not undersized but fits in PIO_LEN */
1389 			printf("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1390 			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1391 			txq_prod(txq, 1, &txqs);
1392 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1393 			m_freem(m0);
1394 			m0 = NULL;
1395 			flits = (mlen + 7) / 8 + 3;
1396 			hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1397 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1398 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1399 			wmb();
1400 			hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1401 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1402 
1403 			wr_gen2(txd, txqs.gen);
1404 			check_ring_tx_db(sc, txq);
1405 			return (0);
1406 		}
1407 		flits = 3;
1408 	} else {
1409 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1410 
1411 		GET_VTAG(cntrl, m0);
1412 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1413 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1414 			cntrl |= F_TXPKT_IPCSUM_DIS;
1415 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1416 			cntrl |= F_TXPKT_L4CSUM_DIS;
1417 		cpl->cntrl = htonl(cntrl);
1418 		mlen = m0->m_pkthdr.len;
1419 		cpl->len = htonl(mlen | 0x80000000);
1420 
1421 		if (mlen <= PIO_LEN) {
1422 			txq_prod(txq, 1, &txqs);
1423 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1424 			m_freem(m0);
1425 			m0 = NULL;
1426 			flits = (mlen + 7) / 8 + 2;
1427 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1428 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1429 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1430 			wmb();
1431 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1432 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1433 
1434 			wr_gen2(txd, txqs.gen);
1435 			check_ring_tx_db(sc, txq);
1436 			DPRINTF("pio buf\n");
1437 			return (0);
1438 		}
1439 		DPRINTF("regular buf\n");
1440 		flits = 2;
1441 	}
1442 	wrp = (struct work_request_hdr *)txd;
1443 
1444 #ifdef	nomore
1445 	/*
1446 	 * XXX need to move into one of the helper routines above
1447 	 *
1448 	 */
1449 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1450 		return (err);
1451 	m0 = *m;
1452 #endif
1453 	ndesc = calc_tx_descs(m0, nsegs);
1454 
1455 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1456 	make_sgl(sgp, segs, nsegs);
1457 
1458 	sgl_flits = sgl_len(nsegs);
1459 
1460 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1461 	txq_prod(txq, ndesc, &txqs);
1462 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1463 	wr_lo = htonl(V_WR_TID(txq->token));
1464 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1465 	check_ring_tx_db(pi->adapter, txq);
1466 
1467 	if ((m0->m_type == MT_DATA) &&
1468 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1469 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1470 		m0->m_flags &= ~M_EXT ;
1471 		cxgb_mbufs_outstanding--;
1472 		m_free(m0);
1473 	}
1474 
1475 	return (0);
1476 }
1477 
1478 
1479 /**
1480  *	write_imm - write a packet into a Tx descriptor as immediate data
1481  *	@d: the Tx descriptor to write
1482  *	@m: the packet
1483  *	@len: the length of packet data to write as immediate data
1484  *	@gen: the generation bit value to write
1485  *
1486  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1487  *	contains a work request at its beginning.  We must write the packet
1488  *	carefully so the SGE doesn't read accidentally before it's written in
1489  *	its entirety.
1490  */
1491 static __inline void
1492 write_imm(struct tx_desc *d, struct mbuf *m,
1493 	  unsigned int len, unsigned int gen)
1494 {
1495 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1496 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1497 
1498 	if (len > WR_LEN)
1499 		panic("len too big %d\n", len);
1500 	if (len < sizeof(*from))
1501 		panic("len too small %d", len);
1502 
1503 	memcpy(&to[1], &from[1], len - sizeof(*from));
1504 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1505 					V_WR_BCNTLFLT(len & 7));
1506 	wmb();
1507 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1508 					V_WR_LEN((len + 7) / 8));
1509 	wr_gen2(d, gen);
1510 
1511 	/*
1512 	 * This check is a hack we should really fix the logic so
1513 	 * that this can't happen
1514 	 */
1515 	if (m->m_type != MT_DONTFREE)
1516 		m_freem(m);
1517 
1518 }
1519 
1520 /**
1521  *	check_desc_avail - check descriptor availability on a send queue
1522  *	@adap: the adapter
1523  *	@q: the TX queue
1524  *	@m: the packet needing the descriptors
1525  *	@ndesc: the number of Tx descriptors needed
1526  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1527  *
1528  *	Checks if the requested number of Tx descriptors is available on an
1529  *	SGE send queue.  If the queue is already suspended or not enough
1530  *	descriptors are available the packet is queued for later transmission.
1531  *	Must be called with the Tx queue locked.
1532  *
1533  *	Returns 0 if enough descriptors are available, 1 if there aren't
1534  *	enough descriptors and the packet has been queued, and 2 if the caller
1535  *	needs to retry because there weren't enough descriptors at the
1536  *	beginning of the call but some freed up in the mean time.
1537  */
1538 static __inline int
1539 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1540 		 struct mbuf *m, unsigned int ndesc,
1541 		 unsigned int qid)
1542 {
1543 	/*
1544 	 * XXX We currently only use this for checking the control queue
1545 	 * the control queue is only used for binding qsets which happens
1546 	 * at init time so we are guaranteed enough descriptors
1547 	 */
1548 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1549 addq_exit:	mbufq_tail(&q->sendq, m);
1550 		return 1;
1551 	}
1552 	if (__predict_false(q->size - q->in_use < ndesc)) {
1553 
1554 		struct sge_qset *qs = txq_to_qset(q, qid);
1555 
1556 		printf("stopping q\n");
1557 
1558 		setbit(&qs->txq_stopped, qid);
1559 		smp_mb();
1560 
1561 		if (should_restart_tx(q) &&
1562 		    test_and_clear_bit(qid, &qs->txq_stopped))
1563 			return 2;
1564 
1565 		q->stops++;
1566 		goto addq_exit;
1567 	}
1568 	return 0;
1569 }
1570 
1571 
1572 /**
1573  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1574  *	@q: the SGE control Tx queue
1575  *
1576  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1577  *	that send only immediate data (presently just the control queues) and
1578  *	thus do not have any mbufs
1579  */
1580 static __inline void
1581 reclaim_completed_tx_imm(struct sge_txq *q)
1582 {
1583 	unsigned int reclaim = q->processed - q->cleaned;
1584 
1585 	mtx_assert(&q->lock, MA_OWNED);
1586 
1587 	q->in_use -= reclaim;
1588 	q->cleaned += reclaim;
1589 }
1590 
1591 static __inline int
1592 immediate(const struct mbuf *m)
1593 {
1594 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1595 }
1596 
1597 /**
1598  *	ctrl_xmit - send a packet through an SGE control Tx queue
1599  *	@adap: the adapter
1600  *	@q: the control queue
1601  *	@m: the packet
1602  *
1603  *	Send a packet through an SGE control Tx queue.  Packets sent through
1604  *	a control queue must fit entirely as immediate data in a single Tx
1605  *	descriptor and have no page fragments.
1606  */
1607 static int
1608 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1609 {
1610 	int ret;
1611 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1612 
1613 	if (__predict_false(!immediate(m))) {
1614 		m_freem(m);
1615 		return 0;
1616 	}
1617 
1618 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1619 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1620 
1621 	mtx_lock(&q->lock);
1622 again:	reclaim_completed_tx_imm(q);
1623 
1624 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1625 	if (__predict_false(ret)) {
1626 		if (ret == 1) {
1627 			mtx_unlock(&q->lock);
1628 			log(LOG_ERR, "no desc available\n");
1629 			return (ENOSPC);
1630 		}
1631 		goto again;
1632 	}
1633 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1634 
1635 	q->in_use++;
1636 	if (++q->pidx >= q->size) {
1637 		q->pidx = 0;
1638 		q->gen ^= 1;
1639 	}
1640 	mtx_unlock(&q->lock);
1641 	wmb();
1642 	t3_write_reg(adap, A_SG_KDOORBELL,
1643 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1644 	return (0);
1645 }
1646 
1647 
1648 /**
1649  *	restart_ctrlq - restart a suspended control queue
1650  *	@qs: the queue set cotaining the control queue
1651  *
1652  *	Resumes transmission on a suspended Tx control queue.
1653  */
1654 static void
1655 restart_ctrlq(void *data, int npending)
1656 {
1657 	struct mbuf *m;
1658 	struct sge_qset *qs = (struct sge_qset *)data;
1659 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1660 	adapter_t *adap = qs->port->adapter;
1661 
1662 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1663 
1664 	mtx_lock(&q->lock);
1665 again:	reclaim_completed_tx_imm(q);
1666 
1667 	while (q->in_use < q->size &&
1668 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1669 
1670 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1671 
1672 		if (++q->pidx >= q->size) {
1673 			q->pidx = 0;
1674 			q->gen ^= 1;
1675 		}
1676 		q->in_use++;
1677 	}
1678 	if (!mbufq_empty(&q->sendq)) {
1679 		setbit(&qs->txq_stopped, TXQ_CTRL);
1680 		smp_mb();
1681 
1682 		if (should_restart_tx(q) &&
1683 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1684 			goto again;
1685 		q->stops++;
1686 	}
1687 	mtx_unlock(&q->lock);
1688 	wmb();
1689 	t3_write_reg(adap, A_SG_KDOORBELL,
1690 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1691 }
1692 
1693 
1694 /*
1695  * Send a management message through control queue 0
1696  */
1697 int
1698 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1699 {
1700 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1701 }
1702 
1703 
1704 /**
1705  *	free_qset - free the resources of an SGE queue set
1706  *	@sc: the controller owning the queue set
1707  *	@q: the queue set
1708  *
1709  *	Release the HW and SW resources associated with an SGE queue set, such
1710  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1711  *	queue set must be quiesced prior to calling this.
1712  */
1713 void
1714 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1715 {
1716 	int i;
1717 
1718 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1719 
1720 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1721 		if (q->txq[i].txq_mr.br_ring != NULL) {
1722 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1723 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1724 		}
1725 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1726 		if (q->fl[i].desc) {
1727 			mtx_lock_spin(&sc->sge.reg_lock);
1728 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1729 			mtx_unlock_spin(&sc->sge.reg_lock);
1730 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1731 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1732 					q->fl[i].desc_map);
1733 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1734 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1735 		}
1736 		if (q->fl[i].sdesc) {
1737 			free_rx_bufs(sc, &q->fl[i]);
1738 			free(q->fl[i].sdesc, M_DEVBUF);
1739 		}
1740 	}
1741 
1742 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1743 		if (q->txq[i].desc) {
1744 			mtx_lock_spin(&sc->sge.reg_lock);
1745 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1746 			mtx_unlock_spin(&sc->sge.reg_lock);
1747 			bus_dmamap_unload(q->txq[i].desc_tag,
1748 					q->txq[i].desc_map);
1749 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1750 					q->txq[i].desc_map);
1751 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1752 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1753 			MTX_DESTROY(&q->txq[i].lock);
1754 		}
1755 		if (q->txq[i].sdesc) {
1756 			free(q->txq[i].sdesc, M_DEVBUF);
1757 		}
1758 	}
1759 
1760 	if (q->rspq.desc) {
1761 		mtx_lock_spin(&sc->sge.reg_lock);
1762 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1763 		mtx_unlock_spin(&sc->sge.reg_lock);
1764 
1765 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1766 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1767 			        q->rspq.desc_map);
1768 		bus_dma_tag_destroy(q->rspq.desc_tag);
1769 		MTX_DESTROY(&q->rspq.lock);
1770 	}
1771 
1772 	bzero(q, sizeof(*q));
1773 }
1774 
1775 /**
1776  *	t3_free_sge_resources - free SGE resources
1777  *	@sc: the adapter softc
1778  *
1779  *	Frees resources used by the SGE queue sets.
1780  */
1781 void
1782 t3_free_sge_resources(adapter_t *sc)
1783 {
1784 	int i, nqsets;
1785 
1786 #ifdef IFNET_MULTIQUEUE
1787 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1788 #endif
1789 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1790 		nqsets += sc->port[i].nqsets;
1791 
1792 	for (i = 0; i < nqsets; ++i)
1793 		t3_free_qset(sc, &sc->sge.qs[i]);
1794 }
1795 
1796 /**
1797  *	t3_sge_start - enable SGE
1798  *	@sc: the controller softc
1799  *
1800  *	Enables the SGE for DMAs.  This is the last step in starting packet
1801  *	transfers.
1802  */
1803 void
1804 t3_sge_start(adapter_t *sc)
1805 {
1806 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1807 }
1808 
1809 /**
1810  *	t3_sge_stop - disable SGE operation
1811  *	@sc: the adapter
1812  *
1813  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1814  *	from error interrupts) or from normal process context.  In the latter
1815  *	case it also disables any pending queue restart tasklets.  Note that
1816  *	if it is called in interrupt context it cannot disable the restart
1817  *	tasklets as it cannot wait, however the tasklets will have no effect
1818  *	since the doorbells are disabled and the driver will call this again
1819  *	later from process context, at which time the tasklets will be stopped
1820  *	if they are still running.
1821  */
1822 void
1823 t3_sge_stop(adapter_t *sc)
1824 {
1825 	int i, nqsets;
1826 
1827 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1828 
1829 	if (sc->tq == NULL)
1830 		return;
1831 
1832 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1833 		nqsets += sc->port[i].nqsets;
1834 #ifdef notyet
1835 	/*
1836 	 *
1837 	 * XXX
1838 	 */
1839 	for (i = 0; i < nqsets; ++i) {
1840 		struct sge_qset *qs = &sc->sge.qs[i];
1841 
1842 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1843 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1844 	}
1845 #endif
1846 }
1847 
1848 /**
1849  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1850  *	@adapter: the adapter
1851  *	@q: the Tx queue to reclaim descriptors from
1852  *	@reclaimable: the number of descriptors to reclaim
1853  *      @m_vec_size: maximum number of buffers to reclaim
1854  *      @desc_reclaimed: returns the number of descriptors reclaimed
1855  *
1856  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1857  *	Tx buffers.  Called with the Tx queue lock held.
1858  *
1859  *      Returns number of buffers of reclaimed
1860  */
1861 void
1862 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1863 {
1864 	struct tx_sw_desc *txsd;
1865 	unsigned int cidx;
1866 
1867 #ifdef T3_TRACE
1868 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1869 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1870 #endif
1871 	cidx = q->cidx;
1872 	txsd = &q->sdesc[cidx];
1873 	DPRINTF("reclaiming %d WR\n", reclaimable);
1874 	mtx_assert(&q->lock, MA_OWNED);
1875 	while (reclaimable--) {
1876 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1877 		if (txsd->mi.mi_base != NULL) {
1878 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1879 				bus_dmamap_unload(q->entry_tag, txsd->map);
1880 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1881 			}
1882 			m_freem_iovec(&txsd->mi);
1883 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1884 			txsd->mi.mi_base = NULL;
1885 
1886 #if defined(DIAGNOSTIC) && 0
1887 			if (m_get_priority(txsd->m[0]) != cidx)
1888 				printf("pri=%d cidx=%d\n",
1889 				    (int)m_get_priority(txsd->m[0]), cidx);
1890 #endif
1891 
1892 		} else
1893 			q->txq_skipped++;
1894 
1895 		++txsd;
1896 		if (++cidx == q->size) {
1897 			cidx = 0;
1898 			txsd = q->sdesc;
1899 		}
1900 	}
1901 	q->cidx = cidx;
1902 
1903 }
1904 
1905 void
1906 t3_free_tx_desc_all(struct sge_txq *q)
1907 {
1908 	int i;
1909 	struct tx_sw_desc *txsd;
1910 
1911 	for (i = 0; i < q->size; i++) {
1912 		txsd = &q->sdesc[i];
1913 		if (txsd->mi.mi_base != NULL) {
1914 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1915 				bus_dmamap_unload(q->entry_tag, txsd->map);
1916 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1917 			}
1918 			m_freem_iovec(&txsd->mi);
1919 			bzero(&txsd->mi, sizeof(txsd->mi));
1920 		}
1921 	}
1922 }
1923 
1924 /**
1925  *	is_new_response - check if a response is newly written
1926  *	@r: the response descriptor
1927  *	@q: the response queue
1928  *
1929  *	Returns true if a response descriptor contains a yet unprocessed
1930  *	response.
1931  */
1932 static __inline int
1933 is_new_response(const struct rsp_desc *r,
1934     const struct sge_rspq *q)
1935 {
1936 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1937 }
1938 
1939 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1940 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1941 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1942 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1943 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1944 
1945 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1946 #define NOMEM_INTR_DELAY 2500
1947 
1948 /**
1949  *	write_ofld_wr - write an offload work request
1950  *	@adap: the adapter
1951  *	@m: the packet to send
1952  *	@q: the Tx queue
1953  *	@pidx: index of the first Tx descriptor to write
1954  *	@gen: the generation value to use
1955  *	@ndesc: number of descriptors the packet will occupy
1956  *
1957  *	Write an offload work request to send the supplied packet.  The packet
1958  *	data already carry the work request with most fields populated.
1959  */
1960 static void
1961 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1962     struct sge_txq *q, unsigned int pidx,
1963     unsigned int gen, unsigned int ndesc,
1964     bus_dma_segment_t *segs, unsigned int nsegs)
1965 {
1966 	unsigned int sgl_flits, flits;
1967 	struct work_request_hdr *from;
1968 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1969 	struct tx_desc *d = &q->desc[pidx];
1970 	struct txq_state txqs;
1971 
1972 	if (immediate(m) && nsegs == 0) {
1973 		write_imm(d, m, m->m_len, gen);
1974 		return;
1975 	}
1976 
1977 	/* Only TX_DATA builds SGLs */
1978 	from = mtod(m, struct work_request_hdr *);
1979 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1980 
1981 	flits = m->m_len / 8;
1982 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1983 
1984 	make_sgl(sgp, segs, nsegs);
1985 	sgl_flits = sgl_len(nsegs);
1986 
1987 	txqs.gen = gen;
1988 	txqs.pidx = pidx;
1989 	txqs.compl = 0;
1990 
1991 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1992 	    from->wr_hi, from->wr_lo);
1993 }
1994 
1995 /**
1996  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1997  *	@m: the packet
1998  *
1999  * 	Returns the number of Tx descriptors needed for the given offload
2000  * 	packet.  These packets are already fully constructed.
2001  */
2002 static __inline unsigned int
2003 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2004 {
2005 	unsigned int flits, cnt = 0;
2006 	int ndescs;
2007 
2008 	if (m->m_len <= WR_LEN && nsegs == 0)
2009 		return (1);                 /* packet fits as immediate data */
2010 
2011 	if (m->m_flags & M_IOVEC)
2012 		cnt = mtomv(m)->mv_count;
2013 	else
2014 		cnt = nsegs;
2015 
2016 	/* headers */
2017 	flits = m->m_len / 8;
2018 
2019 	ndescs = flits_to_desc(flits + sgl_len(cnt));
2020 
2021 	CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d",
2022 	    flits, sgl_len(cnt), nsegs, ndescs);
2023 
2024 	return (ndescs);
2025 }
2026 
2027 /**
2028  *	ofld_xmit - send a packet through an offload queue
2029  *	@adap: the adapter
2030  *	@q: the Tx offload queue
2031  *	@m: the packet
2032  *
2033  *	Send an offload packet through an SGE offload queue.
2034  */
2035 static int
2036 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2037 {
2038 	int ret, nsegs;
2039 	unsigned int ndesc;
2040 	unsigned int pidx, gen;
2041 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2042 	struct tx_sw_desc *stx;
2043 
2044 	nsegs = m_get_sgllen(m);
2045 	vsegs = m_get_sgl(m);
2046 	ndesc = calc_tx_descs_ofld(m, nsegs);
2047 	busdma_map_sgl(vsegs, segs, nsegs);
2048 
2049 	stx = &q->sdesc[q->pidx];
2050 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2051 
2052 	mtx_lock(&q->lock);
2053 again:	reclaim_completed_tx_(q, 16);
2054 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2055 	if (__predict_false(ret)) {
2056 		if (ret == 1) {
2057 			printf("no ofld desc avail\n");
2058 
2059 			m_set_priority(m, ndesc);     /* save for restart */
2060 			mtx_unlock(&q->lock);
2061 			return (EINTR);
2062 		}
2063 		goto again;
2064 	}
2065 
2066 	gen = q->gen;
2067 	q->in_use += ndesc;
2068 	pidx = q->pidx;
2069 	q->pidx += ndesc;
2070 	if (q->pidx >= q->size) {
2071 		q->pidx -= q->size;
2072 		q->gen ^= 1;
2073 	}
2074 #ifdef T3_TRACE
2075 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2076 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2077 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2078 		  skb_shinfo(skb)->nr_frags);
2079 #endif
2080 	mtx_unlock(&q->lock);
2081 
2082 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2083 	check_ring_tx_db(adap, q);
2084 	return (0);
2085 }
2086 
2087 /**
2088  *	restart_offloadq - restart a suspended offload queue
2089  *	@qs: the queue set cotaining the offload queue
2090  *
2091  *	Resumes transmission on a suspended Tx offload queue.
2092  */
2093 static void
2094 restart_offloadq(void *data, int npending)
2095 {
2096 	struct mbuf *m;
2097 	struct sge_qset *qs = data;
2098 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2099 	adapter_t *adap = qs->port->adapter;
2100 	bus_dma_segment_t segs[TX_MAX_SEGS];
2101 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2102 	int nsegs, cleaned;
2103 
2104 	mtx_lock(&q->lock);
2105 again:	cleaned = reclaim_completed_tx_(q, 16);
2106 
2107 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2108 		unsigned int gen, pidx;
2109 		unsigned int ndesc = m_get_priority(m);
2110 
2111 		if (__predict_false(q->size - q->in_use < ndesc)) {
2112 			setbit(&qs->txq_stopped, TXQ_OFLD);
2113 			smp_mb();
2114 
2115 			if (should_restart_tx(q) &&
2116 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2117 				goto again;
2118 			q->stops++;
2119 			break;
2120 		}
2121 
2122 		gen = q->gen;
2123 		q->in_use += ndesc;
2124 		pidx = q->pidx;
2125 		q->pidx += ndesc;
2126 		if (q->pidx >= q->size) {
2127 			q->pidx -= q->size;
2128 			q->gen ^= 1;
2129 		}
2130 
2131 		(void)mbufq_dequeue(&q->sendq);
2132 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2133 		mtx_unlock(&q->lock);
2134 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2135 		mtx_lock(&q->lock);
2136 	}
2137 	mtx_unlock(&q->lock);
2138 
2139 #if USE_GTS
2140 	set_bit(TXQ_RUNNING, &q->flags);
2141 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2142 #endif
2143 	wmb();
2144 	t3_write_reg(adap, A_SG_KDOORBELL,
2145 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2146 }
2147 
2148 /**
2149  *	queue_set - return the queue set a packet should use
2150  *	@m: the packet
2151  *
2152  *	Maps a packet to the SGE queue set it should use.  The desired queue
2153  *	set is carried in bits 1-3 in the packet's priority.
2154  */
2155 static __inline int
2156 queue_set(const struct mbuf *m)
2157 {
2158 	return m_get_priority(m) >> 1;
2159 }
2160 
2161 /**
2162  *	is_ctrl_pkt - return whether an offload packet is a control packet
2163  *	@m: the packet
2164  *
2165  *	Determines whether an offload packet should use an OFLD or a CTRL
2166  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2167  */
2168 static __inline int
2169 is_ctrl_pkt(const struct mbuf *m)
2170 {
2171 	return m_get_priority(m) & 1;
2172 }
2173 
2174 /**
2175  *	t3_offload_tx - send an offload packet
2176  *	@tdev: the offload device to send to
2177  *	@m: the packet
2178  *
2179  *	Sends an offload packet.  We use the packet priority to select the
2180  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2181  *	should be sent as regular or control, bits 1-3 select the queue set.
2182  */
2183 int
2184 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2185 {
2186 	adapter_t *adap = tdev2adap(tdev);
2187 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2188 
2189 	if (__predict_false(is_ctrl_pkt(m)))
2190 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2191 
2192 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2193 }
2194 
2195 /**
2196  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2197  *	@tdev: the offload device that will be receiving the packets
2198  *	@q: the SGE response queue that assembled the bundle
2199  *	@m: the partial bundle
2200  *	@n: the number of packets in the bundle
2201  *
2202  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2203  */
2204 static __inline void
2205 deliver_partial_bundle(struct t3cdev *tdev,
2206 			struct sge_rspq *q,
2207 			struct mbuf *mbufs[], int n)
2208 {
2209 	if (n) {
2210 		q->offload_bundles++;
2211 		cxgb_ofld_recv(tdev, mbufs, n);
2212 	}
2213 }
2214 
2215 static __inline int
2216 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2217     struct mbuf *m, struct mbuf *rx_gather[],
2218     unsigned int gather_idx)
2219 {
2220 
2221 	rq->offload_pkts++;
2222 	m->m_pkthdr.header = mtod(m, void *);
2223 	rx_gather[gather_idx++] = m;
2224 	if (gather_idx == RX_BUNDLE_SIZE) {
2225 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2226 		gather_idx = 0;
2227 		rq->offload_bundles++;
2228 	}
2229 	return (gather_idx);
2230 }
2231 
2232 static void
2233 restart_tx(struct sge_qset *qs)
2234 {
2235 	struct adapter *sc = qs->port->adapter;
2236 
2237 
2238 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2239 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2240 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2241 		qs->txq[TXQ_OFLD].restarts++;
2242 		DPRINTF("restarting TXQ_OFLD\n");
2243 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2244 	}
2245 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2246 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2247 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2248 	    qs->txq[TXQ_CTRL].in_use);
2249 
2250 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2251 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2252 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2253 		qs->txq[TXQ_CTRL].restarts++;
2254 		DPRINTF("restarting TXQ_CTRL\n");
2255 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2256 	}
2257 }
2258 
2259 /**
2260  *	t3_sge_alloc_qset - initialize an SGE queue set
2261  *	@sc: the controller softc
2262  *	@id: the queue set id
2263  *	@nports: how many Ethernet ports will be using this queue set
2264  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2265  *	@p: configuration parameters for this queue set
2266  *	@ntxq: number of Tx queues for the queue set
2267  *	@pi: port info for queue set
2268  *
2269  *	Allocate resources and initialize an SGE queue set.  A queue set
2270  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2271  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2272  *	queue, offload queue, and control queue.
2273  */
2274 int
2275 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2276 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2277 {
2278 	struct sge_qset *q = &sc->sge.qs[id];
2279 	int i, header_size, ret = 0;
2280 
2281 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2282 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2283 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2284 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2285 			goto err;
2286 		}
2287 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2288 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2289 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2290 	}
2291 
2292 	init_qset_cntxt(q, id);
2293 	q->idx = id;
2294 
2295 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2296 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2297 		    &q->fl[0].desc, &q->fl[0].sdesc,
2298 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2299 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2300 		printf("error %d from alloc ring fl0\n", ret);
2301 		goto err;
2302 	}
2303 
2304 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2305 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2306 		    &q->fl[1].desc, &q->fl[1].sdesc,
2307 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2308 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2309 		printf("error %d from alloc ring fl1\n", ret);
2310 		goto err;
2311 	}
2312 
2313 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2314 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2315 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2316 		    NULL, NULL)) != 0) {
2317 		printf("error %d from alloc ring rspq\n", ret);
2318 		goto err;
2319 	}
2320 
2321 	for (i = 0; i < ntxq; ++i) {
2322 		/*
2323 		 * The control queue always uses immediate data so does not
2324 		 * need to keep track of any mbufs.
2325 		 * XXX Placeholder for future TOE support.
2326 		 */
2327 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2328 
2329 		if ((ret = alloc_ring(sc, p->txq_size[i],
2330 			    sizeof(struct tx_desc), sz,
2331 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2332 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2333 			    &q->txq[i].desc_map,
2334 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2335 			printf("error %d from alloc ring tx %i\n", ret, i);
2336 			goto err;
2337 		}
2338 		mbufq_init(&q->txq[i].sendq);
2339 		q->txq[i].gen = 1;
2340 		q->txq[i].size = p->txq_size[i];
2341 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2342 		    device_get_unit(sc->dev), irq_vec_idx, i);
2343 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2344 	}
2345 
2346 	q->txq[TXQ_ETH].port = pi;
2347 
2348 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2349 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2350 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2351 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2352 
2353 	q->fl[0].gen = q->fl[1].gen = 1;
2354 	q->fl[0].size = p->fl_size;
2355 	q->fl[1].size = p->jumbo_size;
2356 
2357 	q->rspq.gen = 1;
2358 	q->rspq.cidx = 0;
2359 	q->rspq.size = p->rspq_size;
2360 
2361 
2362 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2363 	q->txq[TXQ_ETH].stop_thres = nports *
2364 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2365 
2366 	q->fl[0].buf_size = (MCLBYTES - header_size);
2367 	q->fl[0].zone = zone_clust;
2368 	q->fl[0].type = EXT_CLUSTER;
2369 #if __FreeBSD_version > 800000
2370 	if (cxgb_use_16k_clusters) {
2371 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2372 		q->fl[1].zone = zone_jumbo16;
2373 		q->fl[1].type = EXT_JUMBO16;
2374 	} else {
2375 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2376 		q->fl[1].zone = zone_jumbo9;
2377 		q->fl[1].type = EXT_JUMBO9;
2378 	}
2379 #else
2380 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2381 	q->fl[1].zone = zone_jumbop;
2382 	q->fl[1].type = EXT_JUMBOP;
2383 #endif
2384 	q->lro.enabled = lro_default;
2385 
2386 	mtx_lock_spin(&sc->sge.reg_lock);
2387 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2388 				   q->rspq.phys_addr, q->rspq.size,
2389 				   q->fl[0].buf_size, 1, 0);
2390 	if (ret) {
2391 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2392 		goto err_unlock;
2393 	}
2394 
2395 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2396 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2397 					  q->fl[i].phys_addr, q->fl[i].size,
2398 					  q->fl[i].buf_size, p->cong_thres, 1,
2399 					  0);
2400 		if (ret) {
2401 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2402 			goto err_unlock;
2403 		}
2404 	}
2405 
2406 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2407 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2408 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2409 				 1, 0);
2410 	if (ret) {
2411 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2412 		goto err_unlock;
2413 	}
2414 
2415 	if (ntxq > 1) {
2416 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2417 					 USE_GTS, SGE_CNTXT_OFLD, id,
2418 					 q->txq[TXQ_OFLD].phys_addr,
2419 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2420 		if (ret) {
2421 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2422 			goto err_unlock;
2423 		}
2424 	}
2425 
2426 	if (ntxq > 2) {
2427 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2428 					 SGE_CNTXT_CTRL, id,
2429 					 q->txq[TXQ_CTRL].phys_addr,
2430 					 q->txq[TXQ_CTRL].size,
2431 					 q->txq[TXQ_CTRL].token, 1, 0);
2432 		if (ret) {
2433 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2434 			goto err_unlock;
2435 		}
2436 	}
2437 
2438 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2439 	    device_get_unit(sc->dev), irq_vec_idx);
2440 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2441 
2442 	mtx_unlock_spin(&sc->sge.reg_lock);
2443 	t3_update_qset_coalesce(q, p);
2444 	q->port = pi;
2445 
2446 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2447 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2448 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2449 
2450 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2451 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2452 
2453 	return (0);
2454 
2455 err_unlock:
2456 	mtx_unlock_spin(&sc->sge.reg_lock);
2457 err:
2458 	t3_free_qset(sc, q);
2459 
2460 	return (ret);
2461 }
2462 
2463 void
2464 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2465 {
2466 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2467 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2468 	struct ifnet *ifp = pi->ifp;
2469 
2470 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2471 
2472 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2473 	    cpl->csum_valid && cpl->csum == 0xffff) {
2474 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2475 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2476 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2477 		m->m_pkthdr.csum_data = 0xffff;
2478 	}
2479 	/*
2480 	 * XXX need to add VLAN support for 6.x
2481 	 */
2482 #ifdef VLAN_SUPPORTED
2483 	if (__predict_false(cpl->vlan_valid)) {
2484 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2485 		m->m_flags |= M_VLANTAG;
2486 	}
2487 #endif
2488 
2489 	m->m_pkthdr.rcvif = ifp;
2490 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2491 #ifndef DISABLE_MBUF_IOVEC
2492 	m_explode(m);
2493 #endif
2494 	/*
2495 	 * adjust after conversion to mbuf chain
2496 	 */
2497 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2498 	m->m_len -= (sizeof(*cpl) + ethpad);
2499 	m->m_data += (sizeof(*cpl) + ethpad);
2500 
2501 	(*ifp->if_input)(ifp, m);
2502 }
2503 
2504 static void
2505 ext_free_handler(void *arg1, void * arg2)
2506 {
2507 	uintptr_t type = (uintptr_t)arg2;
2508 	uma_zone_t zone;
2509 	struct mbuf *m;
2510 
2511 	m = arg1;
2512 	zone = m_getzonefromtype(type);
2513 	m->m_ext.ext_type = (int)type;
2514 	cxgb_ext_freed++;
2515 	cxgb_cache_put(zone, m);
2516 }
2517 
2518 static void
2519 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2520 {
2521 	struct mbuf *m;
2522 	int header_size;
2523 
2524 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2525 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2526 
2527 	bzero(cl, header_size);
2528 	m = (struct mbuf *)cl;
2529 
2530 	cxgb_ext_inited++;
2531 	SLIST_INIT(&m->m_pkthdr.tags);
2532 	m->m_type = MT_DATA;
2533 	m->m_flags = flags | M_NOFREE | M_EXT;
2534 	m->m_data = cl + header_size;
2535 	m->m_ext.ext_buf = cl;
2536 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2537 	m->m_ext.ext_size = m_getsizefromtype(type);
2538 	m->m_ext.ext_free = ext_free_handler;
2539 	m->m_ext.ext_arg1 = cl;
2540 	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2541 	m->m_ext.ext_type = EXT_EXTREF;
2542 	*(m->m_ext.ref_cnt) = 1;
2543 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2544 }
2545 
2546 
2547 /**
2548  *	get_packet - return the next ingress packet buffer from a free list
2549  *	@adap: the adapter that received the packet
2550  *	@drop_thres: # of remaining buffers before we start dropping packets
2551  *	@qs: the qset that the SGE free list holding the packet belongs to
2552  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2553  *      @r: response descriptor
2554  *
2555  *	Get the next packet from a free list and complete setup of the
2556  *	sk_buff.  If the packet is small we make a copy and recycle the
2557  *	original buffer, otherwise we use the original buffer itself.  If a
2558  *	positive drop threshold is supplied packets are dropped and their
2559  *	buffers recycled if (a) the number of remaining buffers is under the
2560  *	threshold and the packet is too big to copy, or (b) the packet should
2561  *	be copied but there is no memory for the copy.
2562  */
2563 #ifdef DISABLE_MBUF_IOVEC
2564 
2565 static int
2566 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2567     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2568 {
2569 
2570 	unsigned int len_cq =  ntohl(r->len_cq);
2571 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2572 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2573 	uint32_t len = G_RSPD_LEN(len_cq);
2574 	uint32_t flags = ntohl(r->flags);
2575 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2576 	caddr_t cl;
2577 	struct mbuf *m, *m0;
2578 	int ret = 0;
2579 
2580 	prefetch(sd->rxsd_cl);
2581 
2582 	fl->credits--;
2583 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2584 
2585 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2586 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2587 			goto skip_recycle;
2588 		cl = mtod(m0, void *);
2589 		memcpy(cl, sd->data, len);
2590 		recycle_rx_buf(adap, fl, fl->cidx);
2591 		m = m0;
2592 		m0->m_len = len;
2593 	} else {
2594 	skip_recycle:
2595 
2596 		bus_dmamap_unload(fl->entry_tag, sd->map);
2597 		cl = sd->rxsd_cl;
2598 		m = m0 = (struct mbuf *)cl;
2599 
2600 		if ((sopeop == RSPQ_SOP_EOP) ||
2601 		    (sopeop == RSPQ_SOP))
2602 			flags = M_PKTHDR;
2603 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2604 		m0->m_len = len;
2605 	}
2606 	switch(sopeop) {
2607 	case RSPQ_SOP_EOP:
2608 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2609 		mh->mh_head = mh->mh_tail = m;
2610 		m->m_pkthdr.len = len;
2611 		ret = 1;
2612 		break;
2613 	case RSPQ_NSOP_NEOP:
2614 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2615 		if (mh->mh_tail == NULL) {
2616 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2617 			m_freem(m);
2618 			break;
2619 		}
2620 		mh->mh_tail->m_next = m;
2621 		mh->mh_tail = m;
2622 		mh->mh_head->m_pkthdr.len += len;
2623 		ret = 0;
2624 		break;
2625 	case RSPQ_SOP:
2626 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2627 		m->m_pkthdr.len = len;
2628 		mh->mh_head = mh->mh_tail = m;
2629 		ret = 0;
2630 		break;
2631 	case RSPQ_EOP:
2632 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2633 		mh->mh_head->m_pkthdr.len += len;
2634 		mh->mh_tail->m_next = m;
2635 		mh->mh_tail = m;
2636 		ret = 1;
2637 		break;
2638 	}
2639 	if (++fl->cidx == fl->size)
2640 		fl->cidx = 0;
2641 
2642 	return (ret);
2643 }
2644 
2645 #else
2646 
2647 static int
2648 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2649     struct mbuf **m, struct rsp_desc *r)
2650 {
2651 
2652 	unsigned int len_cq =  ntohl(r->len_cq);
2653 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2654 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2655 	uint32_t len = G_RSPD_LEN(len_cq);
2656 	uint32_t flags = ntohl(r->flags);
2657 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2658 	void *cl;
2659 	int ret = 0;
2660 	struct mbuf *m0;
2661 #if 0
2662 	if ((sd + 1 )->rxsd_cl)
2663 		prefetch((sd + 1)->rxsd_cl);
2664 	if ((sd + 2)->rxsd_cl)
2665 		prefetch((sd + 2)->rxsd_cl);
2666 #endif
2667 	DPRINTF("rx cpu=%d\n", curcpu);
2668 	fl->credits--;
2669 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2670 
2671 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2672 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2673 			goto skip_recycle;
2674 		cl = mtod(m0, void *);
2675 		memcpy(cl, sd->data, len);
2676 		recycle_rx_buf(adap, fl, fl->cidx);
2677 		*m = m0;
2678 	} else {
2679 	skip_recycle:
2680 		bus_dmamap_unload(fl->entry_tag, sd->map);
2681 		cl = sd->rxsd_cl;
2682 		*m = m0 = (struct mbuf *)cl;
2683 	}
2684 
2685 	switch(sopeop) {
2686 	case RSPQ_SOP_EOP:
2687 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2688 		if (cl == sd->rxsd_cl)
2689 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2690 		m0->m_len = m0->m_pkthdr.len = len;
2691 		ret = 1;
2692 		goto done;
2693 		break;
2694 	case RSPQ_NSOP_NEOP:
2695 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2696 		panic("chaining unsupported");
2697 		ret = 0;
2698 		break;
2699 	case RSPQ_SOP:
2700 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2701 		panic("chaining unsupported");
2702 		m_iovinit(m0);
2703 		ret = 0;
2704 		break;
2705 	case RSPQ_EOP:
2706 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2707 		panic("chaining unsupported");
2708 		ret = 1;
2709 		break;
2710 	}
2711 	panic("append not supported");
2712 #if 0
2713 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2714 #endif
2715 done:
2716 	if (++fl->cidx == fl->size)
2717 		fl->cidx = 0;
2718 
2719 	return (ret);
2720 }
2721 #endif
2722 /**
2723  *	handle_rsp_cntrl_info - handles control information in a response
2724  *	@qs: the queue set corresponding to the response
2725  *	@flags: the response control flags
2726  *
2727  *	Handles the control information of an SGE response, such as GTS
2728  *	indications and completion credits for the queue set's Tx queues.
2729  *	HW coalesces credits, we don't do any extra SW coalescing.
2730  */
2731 static __inline void
2732 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2733 {
2734 	unsigned int credits;
2735 
2736 #if USE_GTS
2737 	if (flags & F_RSPD_TXQ0_GTS)
2738 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2739 #endif
2740 	credits = G_RSPD_TXQ0_CR(flags);
2741 	if (credits)
2742 		qs->txq[TXQ_ETH].processed += credits;
2743 
2744 	credits = G_RSPD_TXQ2_CR(flags);
2745 	if (credits)
2746 		qs->txq[TXQ_CTRL].processed += credits;
2747 
2748 # if USE_GTS
2749 	if (flags & F_RSPD_TXQ1_GTS)
2750 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2751 # endif
2752 	credits = G_RSPD_TXQ1_CR(flags);
2753 	if (credits)
2754 		qs->txq[TXQ_OFLD].processed += credits;
2755 
2756 }
2757 
2758 static void
2759 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2760     unsigned int sleeping)
2761 {
2762 	;
2763 }
2764 
2765 /**
2766  *	process_responses - process responses from an SGE response queue
2767  *	@adap: the adapter
2768  *	@qs: the queue set to which the response queue belongs
2769  *	@budget: how many responses can be processed in this round
2770  *
2771  *	Process responses from an SGE response queue up to the supplied budget.
2772  *	Responses include received packets as well as credits and other events
2773  *	for the queues that belong to the response queue's queue set.
2774  *	A negative budget is effectively unlimited.
2775  *
2776  *	Additionally choose the interrupt holdoff time for the next interrupt
2777  *	on this queue.  If the system is under memory shortage use a fairly
2778  *	long delay to help recovery.
2779  */
2780 int
2781 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2782 {
2783 	struct sge_rspq *rspq = &qs->rspq;
2784 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2785 	int budget_left = budget;
2786 	unsigned int sleeping = 0;
2787 	int lro = qs->lro.enabled;
2788 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2789 	int ngathered = 0;
2790 #ifdef DEBUG
2791 	static int last_holdoff = 0;
2792 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2793 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2794 		last_holdoff = rspq->holdoff_tmr;
2795 	}
2796 #endif
2797 	rspq->next_holdoff = rspq->holdoff_tmr;
2798 
2799 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2800 		int eth, eop = 0, ethpad = 0;
2801 		uint32_t flags = ntohl(r->flags);
2802 		uint32_t rss_csum = *(const uint32_t *)r;
2803 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2804 
2805 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2806 
2807 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2808 			struct mbuf *m;
2809 
2810 			if (cxgb_debug)
2811 				printf("async notification\n");
2812 
2813 			if (rspq->rspq_mh.mh_head == NULL) {
2814 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2815 				m = rspq->rspq_mh.mh_head;
2816 			} else {
2817 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2818 			}
2819 
2820 			/* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2821 
2822 			if (m == NULL)
2823 				goto no_mem;
2824 
2825                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2826 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2827                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
2828 			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2829 			eop = 1;
2830                         rspq->async_notif++;
2831 			goto skip;
2832 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2833 			struct mbuf *m = NULL;
2834 
2835 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2836 			    r->rss_hdr.opcode, rspq->cidx);
2837 			if (rspq->rspq_mh.mh_head == NULL)
2838 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2839                         else
2840 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2841 
2842 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2843 		no_mem:
2844 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2845 				budget_left--;
2846 				break;
2847 			}
2848 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2849 			eop = 1;
2850 			rspq->imm_data++;
2851 		} else if (r->len_cq) {
2852 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2853 
2854 #ifdef DISABLE_MBUF_IOVEC
2855 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2856 #else
2857 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2858 #endif
2859 #ifdef IFNET_MULTIQUEUE
2860 			rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2861 #endif
2862 			ethpad = 2;
2863 		} else {
2864 			DPRINTF("pure response\n");
2865 			rspq->pure_rsps++;
2866 		}
2867 	skip:
2868 		if (flags & RSPD_CTRL_MASK) {
2869 			sleeping |= flags & RSPD_GTS_MASK;
2870 			handle_rsp_cntrl_info(qs, flags);
2871 		}
2872 
2873 		r++;
2874 		if (__predict_false(++rspq->cidx == rspq->size)) {
2875 			rspq->cidx = 0;
2876 			rspq->gen ^= 1;
2877 			r = rspq->desc;
2878 		}
2879 		prefetch(r);
2880 		if (++rspq->credits >= (rspq->size / 4)) {
2881 			refill_rspq(adap, rspq, rspq->credits);
2882 			rspq->credits = 0;
2883 		}
2884 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2885 
2886 		if (!eth && eop) {
2887 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2888 			/*
2889 			 * XXX size mismatch
2890 			 */
2891 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2892 
2893 
2894 			ngathered = rx_offload(&adap->tdev, rspq,
2895 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2896 			rspq->rspq_mh.mh_head = NULL;
2897 			DPRINTF("received offload packet\n");
2898 
2899 		} else if (eth && eop) {
2900 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2901 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2902 
2903 			t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2904 			    rss_hash, rss_csum, lro);
2905 			DPRINTF("received tunnel packet\n");
2906 				rspq->rspq_mh.mh_head = NULL;
2907 
2908 		}
2909 		__refill_fl_lt(adap, &qs->fl[0], 32);
2910 		__refill_fl_lt(adap, &qs->fl[1], 32);
2911 		--budget_left;
2912 	}
2913 
2914 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2915 	t3_lro_flush(adap, qs, &qs->lro);
2916 
2917 	if (sleeping)
2918 		check_ring_db(adap, qs, sleeping);
2919 
2920 	smp_mb();  /* commit Tx queue processed updates */
2921 	if (__predict_false(qs->txq_stopped > 1)) {
2922 		printf("restarting tx on %p\n", qs);
2923 
2924 		restart_tx(qs);
2925 	}
2926 
2927 	__refill_fl_lt(adap, &qs->fl[0], 512);
2928 	__refill_fl_lt(adap, &qs->fl[1], 512);
2929 	budget -= budget_left;
2930 	return (budget);
2931 }
2932 
2933 /*
2934  * A helper function that processes responses and issues GTS.
2935  */
2936 static __inline int
2937 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2938 {
2939 	int work;
2940 	static int last_holdoff = 0;
2941 
2942 	work = process_responses(adap, rspq_to_qset(rq), -1);
2943 
2944 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2945 		printf("next_holdoff=%d\n", rq->next_holdoff);
2946 		last_holdoff = rq->next_holdoff;
2947 	}
2948 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2949 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2950 
2951 	return (work);
2952 }
2953 
2954 
2955 /*
2956  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2957  * Handles data events from SGE response queues as well as error and other
2958  * async events as they all use the same interrupt pin.  We use one SGE
2959  * response queue per port in this mode and protect all response queues with
2960  * queue 0's lock.
2961  */
2962 void
2963 t3b_intr(void *data)
2964 {
2965 	uint32_t i, map;
2966 	adapter_t *adap = data;
2967 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2968 
2969 	t3_write_reg(adap, A_PL_CLI, 0);
2970 	map = t3_read_reg(adap, A_SG_DATA_INTR);
2971 
2972 	if (!map)
2973 		return;
2974 
2975 	if (__predict_false(map & F_ERRINTR))
2976 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2977 
2978 	mtx_lock(&q0->lock);
2979 	for_each_port(adap, i)
2980 	    if (map & (1 << i))
2981 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
2982 	mtx_unlock(&q0->lock);
2983 }
2984 
2985 /*
2986  * The MSI interrupt handler.  This needs to handle data events from SGE
2987  * response queues as well as error and other async events as they all use
2988  * the same MSI vector.  We use one SGE response queue per port in this mode
2989  * and protect all response queues with queue 0's lock.
2990  */
2991 void
2992 t3_intr_msi(void *data)
2993 {
2994 	adapter_t *adap = data;
2995 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2996 	int i, new_packets = 0;
2997 
2998 	mtx_lock(&q0->lock);
2999 
3000 	for_each_port(adap, i)
3001 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3002 		    new_packets = 1;
3003 	mtx_unlock(&q0->lock);
3004 	if (new_packets == 0)
3005 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3006 }
3007 
3008 void
3009 t3_intr_msix(void *data)
3010 {
3011 	struct sge_qset *qs = data;
3012 	adapter_t *adap = qs->port->adapter;
3013 	struct sge_rspq *rspq = &qs->rspq;
3014 #ifndef IFNET_MULTIQUEUE
3015 	mtx_lock(&rspq->lock);
3016 #else
3017 	if (mtx_trylock(&rspq->lock))
3018 #endif
3019 	{
3020 
3021 		if (process_responses_gts(adap, rspq) == 0)
3022 			rspq->unhandled_irqs++;
3023 		mtx_unlock(&rspq->lock);
3024 	}
3025 }
3026 
3027 #define QDUMP_SBUF_SIZE		32 * 400
3028 static int
3029 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3030 {
3031 	struct sge_rspq *rspq;
3032 	struct sge_qset *qs;
3033 	int i, err, dump_end, idx;
3034 	static int multiplier = 1;
3035 	struct sbuf *sb;
3036 	struct rsp_desc *rspd;
3037 	uint32_t data[4];
3038 
3039 	rspq = arg1;
3040 	qs = rspq_to_qset(rspq);
3041 	if (rspq->rspq_dump_count == 0)
3042 		return (0);
3043 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3044 		log(LOG_WARNING,
3045 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3046 		rspq->rspq_dump_count = 0;
3047 		return (EINVAL);
3048 	}
3049 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3050 		log(LOG_WARNING,
3051 		    "dump start of %d is greater than queue size\n",
3052 		    rspq->rspq_dump_start);
3053 		rspq->rspq_dump_start = 0;
3054 		return (EINVAL);
3055 	}
3056 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3057 	if (err)
3058 		return (err);
3059 retry_sbufops:
3060 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3061 
3062 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3063 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3064 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3065 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3066 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3067 
3068 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3069 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3070 
3071 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3072 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3073 		idx = i & (RSPQ_Q_SIZE-1);
3074 
3075 		rspd = &rspq->desc[idx];
3076 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3077 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3078 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3079 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3080 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3081 		    be32toh(rspd->len_cq), rspd->intr_gen);
3082 	}
3083 	if (sbuf_overflowed(sb)) {
3084 		sbuf_delete(sb);
3085 		multiplier++;
3086 		goto retry_sbufops;
3087 	}
3088 	sbuf_finish(sb);
3089 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3090 	sbuf_delete(sb);
3091 	return (err);
3092 }
3093 
3094 static int
3095 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3096 {
3097 	struct sge_txq *txq;
3098 	struct sge_qset *qs;
3099 	int i, j, err, dump_end;
3100 	static int multiplier = 1;
3101 	struct sbuf *sb;
3102 	struct tx_desc *txd;
3103 	uint32_t *WR, wr_hi, wr_lo, gen;
3104 	uint32_t data[4];
3105 
3106 	txq = arg1;
3107 	qs = txq_to_qset(txq, TXQ_ETH);
3108 	if (txq->txq_dump_count == 0) {
3109 		return (0);
3110 	}
3111 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3112 		log(LOG_WARNING,
3113 		    "dump count is too large %d\n", txq->txq_dump_count);
3114 		txq->txq_dump_count = 1;
3115 		return (EINVAL);
3116 	}
3117 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3118 		log(LOG_WARNING,
3119 		    "dump start of %d is greater than queue size\n",
3120 		    txq->txq_dump_start);
3121 		txq->txq_dump_start = 0;
3122 		return (EINVAL);
3123 	}
3124 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3125 	if (err)
3126 		return (err);
3127 
3128 
3129 retry_sbufops:
3130 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3131 
3132 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3133 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3134 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3135 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3136 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3137 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3138 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3139 	    txq->txq_dump_start,
3140 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3141 
3142 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3143 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3144 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3145 		WR = (uint32_t *)txd->flit;
3146 		wr_hi = ntohl(WR[0]);
3147 		wr_lo = ntohl(WR[1]);
3148 		gen = G_WR_GEN(wr_lo);
3149 
3150 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3151 		    wr_hi, wr_lo, gen);
3152 		for (j = 2; j < 30; j += 4)
3153 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3154 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3155 
3156 	}
3157 	if (sbuf_overflowed(sb)) {
3158 		sbuf_delete(sb);
3159 		multiplier++;
3160 		goto retry_sbufops;
3161 	}
3162 	sbuf_finish(sb);
3163 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3164 	sbuf_delete(sb);
3165 	return (err);
3166 }
3167 
3168 static int
3169 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3170 {
3171 	struct sge_txq *txq;
3172 	struct sge_qset *qs;
3173 	int i, j, err, dump_end;
3174 	static int multiplier = 1;
3175 	struct sbuf *sb;
3176 	struct tx_desc *txd;
3177 	uint32_t *WR, wr_hi, wr_lo, gen;
3178 
3179 	txq = arg1;
3180 	qs = txq_to_qset(txq, TXQ_CTRL);
3181 	if (txq->txq_dump_count == 0) {
3182 		return (0);
3183 	}
3184 	if (txq->txq_dump_count > 256) {
3185 		log(LOG_WARNING,
3186 		    "dump count is too large %d\n", txq->txq_dump_count);
3187 		txq->txq_dump_count = 1;
3188 		return (EINVAL);
3189 	}
3190 	if (txq->txq_dump_start > 255) {
3191 		log(LOG_WARNING,
3192 		    "dump start of %d is greater than queue size\n",
3193 		    txq->txq_dump_start);
3194 		txq->txq_dump_start = 0;
3195 		return (EINVAL);
3196 	}
3197 
3198 retry_sbufops:
3199 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3200 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3201 	    txq->txq_dump_start,
3202 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3203 
3204 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3205 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3206 		txd = &txq->desc[i & (255)];
3207 		WR = (uint32_t *)txd->flit;
3208 		wr_hi = ntohl(WR[0]);
3209 		wr_lo = ntohl(WR[1]);
3210 		gen = G_WR_GEN(wr_lo);
3211 
3212 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3213 		    wr_hi, wr_lo, gen);
3214 		for (j = 2; j < 30; j += 4)
3215 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3216 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3217 
3218 	}
3219 	if (sbuf_overflowed(sb)) {
3220 		sbuf_delete(sb);
3221 		multiplier++;
3222 		goto retry_sbufops;
3223 	}
3224 	sbuf_finish(sb);
3225 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3226 	sbuf_delete(sb);
3227 	return (err);
3228 }
3229 
3230 static int
3231 t3_lro_enable(SYSCTL_HANDLER_ARGS)
3232 {
3233 	adapter_t *sc;
3234 	int i, j, enabled, err, nqsets = 0;
3235 
3236 #ifndef LRO_WORKING
3237 	return (0);
3238 #endif
3239 	sc = arg1;
3240 	enabled = sc->sge.qs[0].lro.enabled;
3241         err = sysctl_handle_int(oidp, &enabled, arg2, req);
3242 
3243 	if (err != 0)
3244 		return (err);
3245 	if (enabled == sc->sge.qs[0].lro.enabled)
3246 		return (0);
3247 
3248 	for (i = 0; i < sc->params.nports; i++)
3249 		for (j = 0; j < sc->port[i].nqsets; j++)
3250 			nqsets++;
3251 
3252 	for (i = 0; i < nqsets; i++)
3253 		sc->sge.qs[i].lro.enabled = enabled;
3254 
3255 	return (0);
3256 }
3257 
3258 static int
3259 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3260 {
3261 	adapter_t *sc = arg1;
3262 	struct qset_params *qsp = &sc->params.sge.qset[0];
3263 	int coalesce_usecs;
3264 	struct sge_qset *qs;
3265 	int i, j, err, nqsets = 0;
3266 	struct mtx *lock;
3267 
3268 	if ((sc->flags & FULL_INIT_DONE) == 0)
3269 		return (ENXIO);
3270 
3271 	coalesce_usecs = qsp->coalesce_usecs;
3272         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3273 
3274 	if (err != 0) {
3275 		return (err);
3276 	}
3277 	if (coalesce_usecs == qsp->coalesce_usecs)
3278 		return (0);
3279 
3280 	for (i = 0; i < sc->params.nports; i++)
3281 		for (j = 0; j < sc->port[i].nqsets; j++)
3282 			nqsets++;
3283 
3284 	coalesce_usecs = max(1, coalesce_usecs);
3285 
3286 	for (i = 0; i < nqsets; i++) {
3287 		qs = &sc->sge.qs[i];
3288 		qsp = &sc->params.sge.qset[i];
3289 		qsp->coalesce_usecs = coalesce_usecs;
3290 
3291 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3292 			    &sc->sge.qs[0].rspq.lock;
3293 
3294 		mtx_lock(lock);
3295 		t3_update_qset_coalesce(qs, qsp);
3296 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3297 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3298 		mtx_unlock(lock);
3299 	}
3300 
3301 	return (0);
3302 }
3303 
3304 
3305 void
3306 t3_add_attach_sysctls(adapter_t *sc)
3307 {
3308 	struct sysctl_ctx_list *ctx;
3309 	struct sysctl_oid_list *children;
3310 
3311 	ctx = device_get_sysctl_ctx(sc->dev);
3312 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3313 
3314 	/* random information */
3315 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3316 	    "firmware_version",
3317 	    CTLFLAG_RD, &sc->fw_version,
3318 	    0, "firmware version");
3319 
3320 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3321 	    "enable_lro",
3322 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3323 	    0, t3_lro_enable,
3324 	    "I", "enable large receive offload");
3325 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3326 	    "hw_revision",
3327 	    CTLFLAG_RD, &sc->params.rev,
3328 	    0, "chip model");
3329 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3330 	    "enable_debug",
3331 	    CTLFLAG_RW, &cxgb_debug,
3332 	    0, "enable verbose debugging output");
3333 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3334 	    CTLFLAG_RD, &sc->tunq_coalesce,
3335 	    "#tunneled packets freed");
3336 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3337 	    "txq_overrun",
3338 	    CTLFLAG_RD, &txq_fills,
3339 	    0, "#times txq overrun");
3340 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3341 	    "pcpu_cache_enable",
3342 	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3343 	    0, "#enable driver local pcpu caches");
3344 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3345 	    "cache_alloc",
3346 	    CTLFLAG_RD, &cxgb_cached_allocations,
3347 	    0, "#times a cluster was allocated from cache");
3348 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3349 	    "cached",
3350 	    CTLFLAG_RD, &cxgb_cached,
3351 	    0, "#times a cluster was cached");
3352 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3353 	    "ext_freed",
3354 	    CTLFLAG_RD, &cxgb_ext_freed,
3355 	    0, "#times a cluster was freed through ext_free");
3356 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3357 	    "ext_inited",
3358 	    CTLFLAG_RD, &cxgb_ext_inited,
3359 	    0, "#times a cluster was initialized for ext_free");
3360 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3361 	    "mbufs_outstanding",
3362 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3363 	    0, "#mbufs in flight in the driver");
3364 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3365 	    "pack_outstanding",
3366 	    CTLFLAG_RD, &cxgb_pack_outstanding,
3367 	    0, "#packet in flight in the driver");
3368 }
3369 
3370 
3371 static const char *rspq_name = "rspq";
3372 static const char *txq_names[] =
3373 {
3374 	"txq_eth",
3375 	"txq_ofld",
3376 	"txq_ctrl"
3377 };
3378 
3379 void
3380 t3_add_configured_sysctls(adapter_t *sc)
3381 {
3382 	struct sysctl_ctx_list *ctx;
3383 	struct sysctl_oid_list *children;
3384 	int i, j;
3385 
3386 	ctx = device_get_sysctl_ctx(sc->dev);
3387 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3388 
3389 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3390 	    "intr_coal",
3391 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3392 	    0, t3_set_coalesce_usecs,
3393 	    "I", "interrupt coalescing timer (us)");
3394 
3395 	for (i = 0; i < sc->params.nports; i++) {
3396 		struct port_info *pi = &sc->port[i];
3397 		struct sysctl_oid *poid;
3398 		struct sysctl_oid_list *poidlist;
3399 
3400 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3401 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3402 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3403 		poidlist = SYSCTL_CHILDREN(poid);
3404 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3405 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3406 		    0, "#queue sets");
3407 
3408 		for (j = 0; j < pi->nqsets; j++) {
3409 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3410 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid;
3411 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist;
3412 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3413 
3414 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3415 
3416 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3417 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3418 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3419 
3420 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3421 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3422 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3423 
3424 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3425 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3426 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3427 
3428 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3429 			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3430 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3431 
3432 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3433 			    CTLFLAG_RD, &qs->rspq.size,
3434 			    0, "#entries in response queue");
3435 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3436 			    CTLFLAG_RD, &qs->rspq.cidx,
3437 			    0, "consumer index");
3438 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3439 			    CTLFLAG_RD, &qs->rspq.credits,
3440 			    0, "#credits");
3441 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3442 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3443 			    "physical_address_of the queue");
3444 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3445 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3446 			    0, "start rspq dump entry");
3447 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3448 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3449 			    0, "#rspq entries to dump");
3450 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3451 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3452 			    0, t3_dump_rspq, "A", "dump of the response queue");
3453 
3454 
3455 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3456 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3457 			    0, "#tunneled packets dropped");
3458 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3459 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3460 			    0, "#tunneled packets waiting to be sent");
3461 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3462 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3463 			    0, "#tunneled packets queue producer index");
3464 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3465 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3466 			    0, "#tunneled packets queue consumer index");
3467 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3468 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3469 			    0, "#tunneled packets processed by the card");
3470 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3471 			    CTLFLAG_RD, &txq->cleaned,
3472 			    0, "#tunneled packets cleaned");
3473 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3474 			    CTLFLAG_RD, &txq->in_use,
3475 			    0, "#tunneled packet slots in use");
3476 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3477 			    CTLFLAG_RD, &txq->txq_frees,
3478 			    "#tunneled packets freed");
3479 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3480 			    CTLFLAG_RD, &txq->txq_skipped,
3481 			    0, "#tunneled packet descriptors skipped");
3482 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3483 			    CTLFLAG_RD, &txq->txq_coalesced,
3484 			    0, "#tunneled packets coalesced");
3485 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3486 			    CTLFLAG_RD, &txq->txq_enqueued,
3487 			    0, "#tunneled packets enqueued to hardware");
3488 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3489 			    CTLFLAG_RD, &qs->txq_stopped,
3490 			    0, "tx queues stopped");
3491 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3492 			    CTLFLAG_RD, &txq->phys_addr,
3493 			    "physical_address_of the queue");
3494 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3495 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3496 			    0, "txq generation");
3497 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3498 			    CTLFLAG_RD, &txq->cidx,
3499 			    0, "hardware queue cidx");
3500 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3501 			    CTLFLAG_RD, &txq->pidx,
3502 			    0, "hardware queue pidx");
3503 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3504 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3505 			    0, "txq start idx for dump");
3506 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3507 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3508 			    0, "txq #entries to dump");
3509 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3510 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3511 			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3512 
3513 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3514 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3515 			    0, "ctrlq start idx for dump");
3516 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3517 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3518 			    0, "ctrl #entries to dump");
3519 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3520 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3521 			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3522 
3523 
3524 
3525 
3526 
3527 		}
3528 	}
3529 }
3530 
3531 /**
3532  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3533  *	@qs: the queue set
3534  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3535  *	@idx: the descriptor index in the queue
3536  *	@data: where to dump the descriptor contents
3537  *
3538  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3539  *	size of the descriptor.
3540  */
3541 int
3542 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3543 		unsigned char *data)
3544 {
3545 	if (qnum >= 6)
3546 		return (EINVAL);
3547 
3548 	if (qnum < 3) {
3549 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3550 			return -EINVAL;
3551 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3552 		return sizeof(struct tx_desc);
3553 	}
3554 
3555 	if (qnum == 3) {
3556 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3557 			return (EINVAL);
3558 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3559 		return sizeof(struct rsp_desc);
3560 	}
3561 
3562 	qnum -= 4;
3563 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3564 		return (EINVAL);
3565 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3566 	return sizeof(struct rx_desc);
3567 }
3568