xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision 995dc984471c92c03daad19a1d35af46c086ef3e)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #ifdef CONFIG_DEFINED
68 #include <cxgb_include.h>
69 #include <sys/mvec.h>
70 #else
71 #include <dev/cxgb/cxgb_include.h>
72 #include <dev/cxgb/sys/mvec.h>
73 #endif
74 
75 int      txq_fills = 0;
76 static int recycle_enable = 1;
77 extern int cxgb_txq_buf_ring_size;
78 int cxgb_cached_allocations;
79 int cxgb_cached;
80 int cxgb_ext_freed;
81 extern int cxgb_use_16k_clusters;
82 extern int cxgb_pcpu_cache_enable;
83 
84 
85 #define USE_GTS 0
86 
87 #define SGE_RX_SM_BUF_SIZE	1536
88 #define SGE_RX_DROP_THRES	16
89 #define SGE_RX_COPY_THRES	128
90 
91 /*
92  * Period of the Tx buffer reclaim timer.  This timer does not need to run
93  * frequently as Tx buffers are usually reclaimed by new Tx packets.
94  */
95 #define TX_RECLAIM_PERIOD       (hz >> 1)
96 
97 /*
98  * Values for sge_txq.flags
99  */
100 enum {
101 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
102 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
103 };
104 
105 struct tx_desc {
106 	uint64_t	flit[TX_DESC_FLITS];
107 } __packed;
108 
109 struct rx_desc {
110 	uint32_t	addr_lo;
111 	uint32_t	len_gen;
112 	uint32_t	gen2;
113 	uint32_t	addr_hi;
114 } __packed;;
115 
116 struct rsp_desc {               /* response queue descriptor */
117 	struct rss_header	rss_hdr;
118 	uint32_t		flags;
119 	uint32_t		len_cq;
120 	uint8_t			imm_data[47];
121 	uint8_t			intr_gen;
122 } __packed;
123 
124 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
125 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
126 #define RX_SW_DESC_INUSE        (1 << 3)
127 #define TX_SW_DESC_MAPPED       (1 << 4)
128 
129 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
130 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
131 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
132 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
133 
134 struct tx_sw_desc {                /* SW state per Tx descriptor */
135 	struct mbuf_iovec mi;
136 	bus_dmamap_t	map;
137 	int		flags;
138 };
139 
140 struct rx_sw_desc {                /* SW state per Rx descriptor */
141 	caddr_t	         rxsd_cl;
142 	caddr_t	         data;
143 	bus_dmamap_t	  map;
144 	int		  flags;
145 };
146 
147 struct txq_state {
148 	unsigned int compl;
149 	unsigned int gen;
150 	unsigned int pidx;
151 };
152 
153 struct refill_fl_cb_arg {
154 	int               error;
155 	bus_dma_segment_t seg;
156 	int               nseg;
157 };
158 
159 /*
160  * Maps a number of flits to the number of Tx descriptors that can hold them.
161  * The formula is
162  *
163  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
164  *
165  * HW allows up to 4 descriptors to be combined into a WR.
166  */
167 static uint8_t flit_desc_map[] = {
168 	0,
169 #if SGE_NUM_GENBITS == 1
170 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
171 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
172 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
173 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
174 #elif SGE_NUM_GENBITS == 2
175 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
176 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
177 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
178 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
179 #else
180 # error "SGE_NUM_GENBITS must be 1 or 2"
181 #endif
182 };
183 
184 
185 static int lro_default = 0;
186 int cxgb_debug = 0;
187 
188 static void sge_timer_cb(void *arg);
189 static void sge_timer_reclaim(void *arg, int ncount);
190 static void sge_txq_reclaim_handler(void *arg, int ncount);
191 
192 /**
193  *	reclaim_completed_tx - reclaims completed Tx descriptors
194  *	@adapter: the adapter
195  *	@q: the Tx queue to reclaim completed descriptors from
196  *
197  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
198  *	and frees the associated buffers if possible.  Called with the Tx
199  *	queue's lock held.
200  */
201 static __inline int
202 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
203 {
204 	int reclaim = desc_reclaimable(q);
205 
206 	if (reclaim < reclaim_min)
207 		return (0);
208 
209 	mtx_assert(&q->lock, MA_OWNED);
210 	if (reclaim > 0) {
211 		t3_free_tx_desc(q, reclaim);
212 		q->cleaned += reclaim;
213 		q->in_use -= reclaim;
214 	}
215 	return (reclaim);
216 }
217 
218 /**
219  *	should_restart_tx - are there enough resources to restart a Tx queue?
220  *	@q: the Tx queue
221  *
222  *	Checks if there are enough descriptors to restart a suspended Tx queue.
223  */
224 static __inline int
225 should_restart_tx(const struct sge_txq *q)
226 {
227 	unsigned int r = q->processed - q->cleaned;
228 
229 	return q->in_use - r < (q->size >> 1);
230 }
231 
232 /**
233  *	t3_sge_init - initialize SGE
234  *	@adap: the adapter
235  *	@p: the SGE parameters
236  *
237  *	Performs SGE initialization needed every time after a chip reset.
238  *	We do not initialize any of the queue sets here, instead the driver
239  *	top-level must request those individually.  We also do not enable DMA
240  *	here, that should be done after the queues have been set up.
241  */
242 void
243 t3_sge_init(adapter_t *adap, struct sge_params *p)
244 {
245 	u_int ctrl, ups;
246 
247 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
248 
249 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
250 	       F_CQCRDTCTRL |
251 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
252 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
253 #if SGE_NUM_GENBITS == 1
254 	ctrl |= F_EGRGENCTRL;
255 #endif
256 	if (adap->params.rev > 0) {
257 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
258 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
259 		ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
260 	}
261 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
262 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
263 		     V_LORCQDRBTHRSH(512));
264 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
265 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
266 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
267 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
268 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
269 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
270 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
271 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
272 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
273 }
274 
275 
276 /**
277  *	sgl_len - calculates the size of an SGL of the given capacity
278  *	@n: the number of SGL entries
279  *
280  *	Calculates the number of flits needed for a scatter/gather list that
281  *	can hold the given number of entries.
282  */
283 static __inline unsigned int
284 sgl_len(unsigned int n)
285 {
286 	return ((3 * n) / 2 + (n & 1));
287 }
288 
289 /**
290  *	get_imm_packet - return the next ingress packet buffer from a response
291  *	@resp: the response descriptor containing the packet data
292  *
293  *	Return a packet containing the immediate data of the given response.
294  */
295 static int
296 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
297 {
298 
299 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
300 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
301 	return (0);
302 
303 }
304 
305 static __inline u_int
306 flits_to_desc(u_int n)
307 {
308 	return (flit_desc_map[n]);
309 }
310 
311 void
312 t3_sge_err_intr_handler(adapter_t *adapter)
313 {
314 	unsigned int v, status;
315 
316 
317 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
318 
319 	if (status & F_RSPQCREDITOVERFOW)
320 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
321 
322 	if (status & F_RSPQDISABLED) {
323 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
324 
325 		CH_ALERT(adapter,
326 			 "packet delivered to disabled response queue (0x%x)\n",
327 			 (v >> S_RSPQ0DISABLED) & 0xff);
328 	}
329 
330 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
331 	if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
332 		t3_fatal_err(adapter);
333 }
334 
335 void
336 t3_sge_prep(adapter_t *adap, struct sge_params *p)
337 {
338 	int i;
339 
340 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
341 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
342 
343 	for (i = 0; i < SGE_QSETS; ++i) {
344 		struct qset_params *q = p->qset + i;
345 
346 		q->polling = adap->params.rev > 0;
347 
348 		if (adap->params.nports > 2) {
349 			q->coalesce_nsecs = 50000;
350 		} else {
351 #ifdef INVARIANTS
352 			q->coalesce_nsecs = 10000;
353 #else
354 			q->coalesce_nsecs = 5000;
355 #endif
356 		}
357 		q->rspq_size = RSPQ_Q_SIZE;
358 		q->fl_size = FL_Q_SIZE;
359 		q->jumbo_size = JUMBO_Q_SIZE;
360 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
361 		q->txq_size[TXQ_OFLD] = 1024;
362 		q->txq_size[TXQ_CTRL] = 256;
363 		q->cong_thres = 0;
364 	}
365 }
366 
367 int
368 t3_sge_alloc(adapter_t *sc)
369 {
370 
371 	/* The parent tag. */
372 	if (bus_dma_tag_create( NULL,			/* parent */
373 				1, 0,			/* algnmnt, boundary */
374 				BUS_SPACE_MAXADDR,	/* lowaddr */
375 				BUS_SPACE_MAXADDR,	/* highaddr */
376 				NULL, NULL,		/* filter, filterarg */
377 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
378 				BUS_SPACE_UNRESTRICTED, /* nsegments */
379 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
380 				0,			/* flags */
381 				NULL, NULL,		/* lock, lockarg */
382 				&sc->parent_dmat)) {
383 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
384 		return (ENOMEM);
385 	}
386 
387 	/*
388 	 * DMA tag for normal sized RX frames
389 	 */
390 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
391 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
392 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
393 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
394 		return (ENOMEM);
395 	}
396 
397 	/*
398 	 * DMA tag for jumbo sized RX frames.
399 	 */
400 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
401 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
402 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
403 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
404 		return (ENOMEM);
405 	}
406 
407 	/*
408 	 * DMA tag for TX frames.
409 	 */
410 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
411 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
412 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
413 		NULL, NULL, &sc->tx_dmat)) {
414 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
415 		return (ENOMEM);
416 	}
417 
418 	return (0);
419 }
420 
421 int
422 t3_sge_free(struct adapter * sc)
423 {
424 
425 	if (sc->tx_dmat != NULL)
426 		bus_dma_tag_destroy(sc->tx_dmat);
427 
428 	if (sc->rx_jumbo_dmat != NULL)
429 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
430 
431 	if (sc->rx_dmat != NULL)
432 		bus_dma_tag_destroy(sc->rx_dmat);
433 
434 	if (sc->parent_dmat != NULL)
435 		bus_dma_tag_destroy(sc->parent_dmat);
436 
437 	return (0);
438 }
439 
440 void
441 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
442 {
443 
444 	qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
445 	qs->rspq.polling = 0 /* p->polling */;
446 }
447 
448 #if !defined(__i386__) && !defined(__amd64__)
449 static void
450 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
451 {
452 	struct refill_fl_cb_arg *cb_arg = arg;
453 
454 	cb_arg->error = error;
455 	cb_arg->seg = segs[0];
456 	cb_arg->nseg = nseg;
457 
458 }
459 #endif
460 /**
461  *	refill_fl - refill an SGE free-buffer list
462  *	@sc: the controller softc
463  *	@q: the free-list to refill
464  *	@n: the number of new buffers to allocate
465  *
466  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
467  *	The caller must assure that @n does not exceed the queue's capacity.
468  */
469 static void
470 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
471 {
472 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
473 	struct rx_desc *d = &q->desc[q->pidx];
474 	struct refill_fl_cb_arg cb_arg;
475 	caddr_t cl;
476 	int err;
477 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
478 
479 	cb_arg.error = 0;
480 	while (n--) {
481 		/*
482 		 * We only allocate a cluster, mbuf allocation happens after rx
483 		 */
484 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
485 			log(LOG_WARNING, "Failed to allocate cluster\n");
486 			goto done;
487 		}
488 
489 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
490 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
491 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
492 				uma_zfree(q->zone, cl);
493 				goto done;
494 			}
495 			sd->flags |= RX_SW_DESC_MAP_CREATED;
496 		}
497 #if !defined(__i386__) && !defined(__amd64__)
498 		err = bus_dmamap_load(q->entry_tag, sd->map,
499 		    cl + header_size, q->buf_size,
500 		    refill_fl_cb, &cb_arg, 0);
501 
502 		if (err != 0 || cb_arg.error) {
503 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
504 			/*
505 			 * XXX free cluster
506 			 */
507 			return;
508 		}
509 #else
510 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
511 #endif
512 		sd->flags |= RX_SW_DESC_INUSE;
513 		sd->rxsd_cl = cl;
514 		sd->data = cl + header_size;
515 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
516 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
517 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
518 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
519 
520 		d++;
521 		sd++;
522 
523 		if (++q->pidx == q->size) {
524 			q->pidx = 0;
525 			q->gen ^= 1;
526 			sd = q->sdesc;
527 			d = q->desc;
528 		}
529 		q->credits++;
530 	}
531 
532 done:
533 	t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
534 }
535 
536 
537 /**
538  *	free_rx_bufs - free the Rx buffers on an SGE free list
539  *	@sc: the controle softc
540  *	@q: the SGE free list to clean up
541  *
542  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
543  *	this queue should be stopped before calling this function.
544  */
545 static void
546 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
547 {
548 	u_int cidx = q->cidx;
549 
550 	while (q->credits--) {
551 		struct rx_sw_desc *d = &q->sdesc[cidx];
552 
553 		if (d->flags & RX_SW_DESC_INUSE) {
554 			bus_dmamap_unload(q->entry_tag, d->map);
555 			bus_dmamap_destroy(q->entry_tag, d->map);
556 			uma_zfree(q->zone, d->rxsd_cl);
557 		}
558 		d->rxsd_cl = NULL;
559 		if (++cidx == q->size)
560 			cidx = 0;
561 	}
562 }
563 
564 static __inline void
565 __refill_fl(adapter_t *adap, struct sge_fl *fl)
566 {
567 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
568 }
569 
570 static __inline void
571 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
572 {
573 	if ((fl->size - fl->credits) < max)
574 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
575 }
576 
577 void
578 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
579 {
580 	__refill_fl_lt(adap, fl, 512);
581 }
582 
583 /**
584  *	recycle_rx_buf - recycle a receive buffer
585  *	@adapter: the adapter
586  *	@q: the SGE free list
587  *	@idx: index of buffer to recycle
588  *
589  *	Recycles the specified buffer on the given free list by adding it at
590  *	the next available slot on the list.
591  */
592 static void
593 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
594 {
595 	struct rx_desc *from = &q->desc[idx];
596 	struct rx_desc *to   = &q->desc[q->pidx];
597 
598 	q->sdesc[q->pidx] = q->sdesc[idx];
599 	to->addr_lo = from->addr_lo;        // already big endian
600 	to->addr_hi = from->addr_hi;        // likewise
601 	wmb();
602 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
603 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
604 	q->credits++;
605 
606 	if (++q->pidx == q->size) {
607 		q->pidx = 0;
608 		q->gen ^= 1;
609 	}
610 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
611 }
612 
613 static void
614 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
615 {
616 	uint32_t *addr;
617 
618 	addr = arg;
619 	*addr = segs[0].ds_addr;
620 }
621 
622 static int
623 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
624     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
625     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
626 {
627 	size_t len = nelem * elem_size;
628 	void *s = NULL;
629 	void *p = NULL;
630 	int err;
631 
632 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
633 				      BUS_SPACE_MAXADDR_32BIT,
634 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
635 				      len, 0, NULL, NULL, tag)) != 0) {
636 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
637 		return (ENOMEM);
638 	}
639 
640 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
641 				    map)) != 0) {
642 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
643 		return (ENOMEM);
644 	}
645 
646 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
647 	bzero(p, len);
648 	*(void **)desc = p;
649 
650 	if (sw_size) {
651 		len = nelem * sw_size;
652 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
653 		*(void **)sdesc = s;
654 	}
655 	if (parent_entry_tag == NULL)
656 		return (0);
657 
658 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
659 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
660 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
661 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
662 		                      NULL, NULL, entry_tag)) != 0) {
663 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
664 		return (ENOMEM);
665 	}
666 	return (0);
667 }
668 
669 static void
670 sge_slow_intr_handler(void *arg, int ncount)
671 {
672 	adapter_t *sc = arg;
673 
674 	t3_slow_intr_handler(sc);
675 }
676 
677 /**
678  *	sge_timer_cb - perform periodic maintenance of an SGE qset
679  *	@data: the SGE queue set to maintain
680  *
681  *	Runs periodically from a timer to perform maintenance of an SGE queue
682  *	set.  It performs two tasks:
683  *
684  *	a) Cleans up any completed Tx descriptors that may still be pending.
685  *	Normal descriptor cleanup happens when new packets are added to a Tx
686  *	queue so this timer is relatively infrequent and does any cleanup only
687  *	if the Tx queue has not seen any new packets in a while.  We make a
688  *	best effort attempt to reclaim descriptors, in that we don't wait
689  *	around if we cannot get a queue's lock (which most likely is because
690  *	someone else is queueing new packets and so will also handle the clean
691  *	up).  Since control queues use immediate data exclusively we don't
692  *	bother cleaning them up here.
693  *
694  *	b) Replenishes Rx queues that have run out due to memory shortage.
695  *	Normally new Rx buffers are added when existing ones are consumed but
696  *	when out of memory a queue can become empty.  We try to add only a few
697  *	buffers here, the queue will be replenished fully as these new buffers
698  *	are used up if memory shortage has subsided.
699  *
700  *	c) Return coalesced response queue credits in case a response queue is
701  *	starved.
702  *
703  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
704  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
705  */
706 static void
707 sge_timer_cb(void *arg)
708 {
709 	adapter_t *sc = arg;
710 #ifndef IFNET_MULTIQUEUE
711 	struct port_info *pi;
712 	struct sge_qset *qs;
713 	struct sge_txq  *txq;
714 	int i, j;
715 	int reclaim_ofl, refill_rx;
716 
717 	for (i = 0; i < sc->params.nports; i++)
718 		for (j = 0; j < sc->port[i].nqsets; j++) {
719 			qs = &sc->sge.qs[i + j];
720 			txq = &qs->txq[0];
721 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
722 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
723 			    (qs->fl[1].credits < qs->fl[1].size));
724 			if (reclaim_ofl || refill_rx) {
725 				pi = &sc->port[i];
726 				taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
727 				break;
728 			}
729 		}
730 #endif
731 	if (sc->params.nports > 2) {
732 		int i;
733 
734 		for_each_port(sc, i) {
735 			struct port_info *pi = &sc->port[i];
736 
737 			t3_write_reg(sc, A_SG_KDOORBELL,
738 				     F_SELEGRCNTX |
739 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
740 		}
741 	}
742 	if (sc->open_device_map != 0)
743 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
744 }
745 
746 /*
747  * This is meant to be a catch-all function to keep sge state private
748  * to sge.c
749  *
750  */
751 int
752 t3_sge_init_adapter(adapter_t *sc)
753 {
754 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
755 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
756 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
757 	mi_init();
758 	cxgb_cache_init();
759 	return (0);
760 }
761 
762 int
763 t3_sge_reset_adapter(adapter_t *sc)
764 {
765 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
766 	return (0);
767 }
768 
769 int
770 t3_sge_init_port(struct port_info *pi)
771 {
772 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
773 	return (0);
774 }
775 
776 void
777 t3_sge_deinit_sw(adapter_t *sc)
778 {
779 	int i;
780 
781 	callout_drain(&sc->sge_timer_ch);
782 	if (sc->tq)
783 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
784 	for (i = 0; i < sc->params.nports; i++)
785 		if (sc->port[i].tq != NULL)
786 			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
787 
788 	mi_deinit();
789 }
790 
791 /**
792  *	refill_rspq - replenish an SGE response queue
793  *	@adapter: the adapter
794  *	@q: the response queue to replenish
795  *	@credits: how many new responses to make available
796  *
797  *	Replenishes a response queue by making the supplied number of responses
798  *	available to HW.
799  */
800 static __inline void
801 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
802 {
803 
804 	/* mbufs are allocated on demand when a rspq entry is processed. */
805 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
806 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
807 }
808 
809 static __inline void
810 sge_txq_reclaim_(struct sge_txq *txq, int force)
811 {
812 
813 	if (desc_reclaimable(txq) < 16)
814 		return;
815 	if (mtx_trylock(&txq->lock) == 0)
816 		return;
817 	reclaim_completed_tx_(txq, 16);
818 	mtx_unlock(&txq->lock);
819 
820 }
821 
822 static void
823 sge_txq_reclaim_handler(void *arg, int ncount)
824 {
825 	struct sge_txq *q = arg;
826 
827 	sge_txq_reclaim_(q, TRUE);
828 }
829 
830 
831 
832 static void
833 sge_timer_reclaim(void *arg, int ncount)
834 {
835 	struct port_info *pi = arg;
836 	int i, nqsets = pi->nqsets;
837 	adapter_t *sc = pi->adapter;
838 	struct sge_qset *qs;
839 	struct sge_txq *txq;
840 	struct mtx *lock;
841 
842 #ifdef IFNET_MULTIQUEUE
843 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
844 #endif
845 	for (i = 0; i < nqsets; i++) {
846 		qs = &sc->sge.qs[i];
847 
848 		txq = &qs->txq[TXQ_OFLD];
849 		sge_txq_reclaim_(txq, FALSE);
850 
851 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
852 			    &sc->sge.qs[0].rspq.lock;
853 
854 		if (mtx_trylock(lock)) {
855 			/* XXX currently assume that we are *NOT* polling */
856 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
857 
858 			if (qs->fl[0].credits < qs->fl[0].size - 16)
859 				__refill_fl(sc, &qs->fl[0]);
860 			if (qs->fl[1].credits < qs->fl[1].size - 16)
861 				__refill_fl(sc, &qs->fl[1]);
862 
863 			if (status & (1 << qs->rspq.cntxt_id)) {
864 				if (qs->rspq.credits) {
865 					refill_rspq(sc, &qs->rspq, 1);
866 					qs->rspq.credits--;
867 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
868 					    1 << qs->rspq.cntxt_id);
869 				}
870 			}
871 			mtx_unlock(lock);
872 		}
873 	}
874 }
875 
876 /**
877  *	init_qset_cntxt - initialize an SGE queue set context info
878  *	@qs: the queue set
879  *	@id: the queue set id
880  *
881  *	Initializes the TIDs and context ids for the queues of a queue set.
882  */
883 static void
884 init_qset_cntxt(struct sge_qset *qs, u_int id)
885 {
886 
887 	qs->rspq.cntxt_id = id;
888 	qs->fl[0].cntxt_id = 2 * id;
889 	qs->fl[1].cntxt_id = 2 * id + 1;
890 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
891 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
892 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
893 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
894 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
895 
896 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
897 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
898 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
899 }
900 
901 
902 static void
903 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
904 {
905 	txq->in_use += ndesc;
906 	/*
907 	 * XXX we don't handle stopping of queue
908 	 * presumably start handles this when we bump against the end
909 	 */
910 	txqs->gen = txq->gen;
911 	txq->unacked += ndesc;
912 	txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
913 	txq->unacked &= 7;
914 	txqs->pidx = txq->pidx;
915 	txq->pidx += ndesc;
916 #ifdef INVARIANTS
917 	if (((txqs->pidx > txq->cidx) &&
918 		(txq->pidx < txqs->pidx) &&
919 		(txq->pidx >= txq->cidx)) ||
920 	    ((txqs->pidx < txq->cidx) &&
921 		(txq->pidx >= txq-> cidx)) ||
922 	    ((txqs->pidx < txq->cidx) &&
923 		(txq->cidx < txqs->pidx)))
924 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
925 		    txqs->pidx, txq->pidx, txq->cidx);
926 #endif
927 	if (txq->pidx >= txq->size) {
928 		txq->pidx -= txq->size;
929 		txq->gen ^= 1;
930 	}
931 
932 }
933 
934 /**
935  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
936  *	@m: the packet mbufs
937  *      @nsegs: the number of segments
938  *
939  * 	Returns the number of Tx descriptors needed for the given Ethernet
940  * 	packet.  Ethernet packets require addition of WR and CPL headers.
941  */
942 static __inline unsigned int
943 calc_tx_descs(const struct mbuf *m, int nsegs)
944 {
945 	unsigned int flits;
946 
947 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
948 		return 1;
949 
950 	flits = sgl_len(nsegs) + 2;
951 #ifdef TSO_SUPPORTED
952 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
953 		flits++;
954 #endif
955 	return flits_to_desc(flits);
956 }
957 
958 static unsigned int
959 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
960     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
961 {
962 	struct mbuf *m0;
963 	int err, pktlen, pass = 0;
964 
965 retry:
966 	err = 0;
967 	m0 = *m;
968 	pktlen = m0->m_pkthdr.len;
969 #if defined(__i386__) || defined(__amd64__)
970 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
971 		goto done;
972 	} else
973 #endif
974 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
975 
976 	if (err == 0) {
977 		goto done;
978 	}
979 	if (err == EFBIG && pass == 0) {
980 		pass = 1;
981 		/* Too many segments, try to defrag */
982 		m0 = m_defrag(m0, M_DONTWAIT);
983 		if (m0 == NULL) {
984 			m_freem(*m);
985 			*m = NULL;
986 			return (ENOBUFS);
987 		}
988 		*m = m0;
989 		goto retry;
990 	} else if (err == ENOMEM) {
991 		return (err);
992 	} if (err) {
993 		if (cxgb_debug)
994 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
995 		m_freem(m0);
996 		*m = NULL;
997 		return (err);
998 	}
999 done:
1000 #if !defined(__i386__) && !defined(__amd64__)
1001 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1002 #endif
1003 	txsd->flags |= TX_SW_DESC_MAPPED;
1004 
1005 	return (0);
1006 }
1007 
1008 /**
1009  *	make_sgl - populate a scatter/gather list for a packet
1010  *	@sgp: the SGL to populate
1011  *	@segs: the packet dma segments
1012  *	@nsegs: the number of segments
1013  *
1014  *	Generates a scatter/gather list for the buffers that make up a packet
1015  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1016  *	appropriately.
1017  */
1018 static __inline void
1019 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1020 {
1021 	int i, idx;
1022 
1023 	for (idx = 0, i = 0; i < nsegs; i++) {
1024 		/*
1025 		 * firmware doesn't like empty segments
1026 		 */
1027 		if (segs[i].ds_len == 0)
1028 			continue;
1029 		if (i && idx == 0)
1030 			++sgp;
1031 
1032 		sgp->len[idx] = htobe32(segs[i].ds_len);
1033 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1034 		idx ^= 1;
1035 	}
1036 
1037 	if (idx) {
1038 		sgp->len[idx] = 0;
1039 		sgp->addr[idx] = 0;
1040 	}
1041 }
1042 
1043 /**
1044  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1045  *	@adap: the adapter
1046  *	@q: the Tx queue
1047  *
1048  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1049  *	where the HW is going to sleep just after we checked, however,
1050  *	then the interrupt handler will detect the outstanding TX packet
1051  *	and ring the doorbell for us.
1052  *
1053  *	When GTS is disabled we unconditionally ring the doorbell.
1054  */
1055 static __inline void
1056 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1057 {
1058 #if USE_GTS
1059 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1060 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1061 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1062 #ifdef T3_TRACE
1063 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1064 			  q->cntxt_id);
1065 #endif
1066 		t3_write_reg(adap, A_SG_KDOORBELL,
1067 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1068 	}
1069 #else
1070 	wmb();            /* write descriptors before telling HW */
1071 	t3_write_reg(adap, A_SG_KDOORBELL,
1072 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1073 #endif
1074 }
1075 
1076 static __inline void
1077 wr_gen2(struct tx_desc *d, unsigned int gen)
1078 {
1079 #if SGE_NUM_GENBITS == 2
1080 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1081 #endif
1082 }
1083 
1084 /**
1085  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1086  *	@ndesc: number of Tx descriptors spanned by the SGL
1087  *	@txd: first Tx descriptor to be written
1088  *	@txqs: txq state (generation and producer index)
1089  *	@txq: the SGE Tx queue
1090  *	@sgl: the SGL
1091  *	@flits: number of flits to the start of the SGL in the first descriptor
1092  *	@sgl_flits: the SGL size in flits
1093  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1094  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1095  *
1096  *	Write a work request header and an associated SGL.  If the SGL is
1097  *	small enough to fit into one Tx descriptor it has already been written
1098  *	and we just need to write the WR header.  Otherwise we distribute the
1099  *	SGL across the number of descriptors it spans.
1100  */
1101 static void
1102 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1103     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1104     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1105 {
1106 
1107 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1108 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1109 
1110 	if (__predict_true(ndesc == 1)) {
1111 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1112 		    V_WR_SGLSFLT(flits)) | wr_hi;
1113 		wmb();
1114 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1115 		    V_WR_GEN(txqs->gen)) | wr_lo;
1116 		/* XXX gen? */
1117 		wr_gen2(txd, txqs->gen);
1118 
1119 	} else {
1120 		unsigned int ogen = txqs->gen;
1121 		const uint64_t *fp = (const uint64_t *)sgl;
1122 		struct work_request_hdr *wp = wrp;
1123 
1124 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1125 		    V_WR_SGLSFLT(flits)) | wr_hi;
1126 
1127 		while (sgl_flits) {
1128 			unsigned int avail = WR_FLITS - flits;
1129 
1130 			if (avail > sgl_flits)
1131 				avail = sgl_flits;
1132 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1133 			sgl_flits -= avail;
1134 			ndesc--;
1135 			if (!sgl_flits)
1136 				break;
1137 
1138 			fp += avail;
1139 			txd++;
1140 			txsd++;
1141 			if (++txqs->pidx == txq->size) {
1142 				txqs->pidx = 0;
1143 				txqs->gen ^= 1;
1144 				txd = txq->desc;
1145 				txsd = txq->sdesc;
1146 			}
1147 
1148 			/*
1149 			 * when the head of the mbuf chain
1150 			 * is freed all clusters will be freed
1151 			 * with it
1152 			 */
1153 			KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", txsd->mi.mi_base));
1154 			wrp = (struct work_request_hdr *)txd;
1155 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1156 			    V_WR_SGLSFLT(1)) | wr_hi;
1157 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1158 				    sgl_flits + 1)) |
1159 			    V_WR_GEN(txqs->gen)) | wr_lo;
1160 			wr_gen2(txd, txqs->gen);
1161 			flits = 1;
1162 		}
1163 		wrp->wr_hi |= htonl(F_WR_EOP);
1164 		wmb();
1165 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1166 		wr_gen2((struct tx_desc *)wp, ogen);
1167 	}
1168 }
1169 
1170 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1171 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1172 
1173 #ifdef VLAN_SUPPORTED
1174 #define GET_VTAG(cntrl, m) \
1175 do { \
1176 	if ((m)->m_flags & M_VLANTAG)					            \
1177 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1178 } while (0)
1179 
1180 #define GET_VTAG_MI(cntrl, mi) \
1181 do { \
1182 	if ((mi)->mi_flags & M_VLANTAG)					\
1183 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1184 } while (0)
1185 #else
1186 #define GET_VTAG(cntrl, m)
1187 #define GET_VTAG_MI(cntrl, m)
1188 #endif
1189 
1190 int
1191 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1192 {
1193 	adapter_t *sc;
1194 	struct mbuf *m0;
1195 	struct sge_txq *txq;
1196 	struct txq_state txqs;
1197 	struct port_info *pi;
1198 	unsigned int ndesc, flits, cntrl, mlen;
1199 	int err, nsegs, tso_info = 0;
1200 
1201 	struct work_request_hdr *wrp;
1202 	struct tx_sw_desc *txsd;
1203 	struct sg_ent *sgp, *sgl;
1204 	uint32_t wr_hi, wr_lo, sgl_flits;
1205 	bus_dma_segment_t segs[TX_MAX_SEGS];
1206 
1207 	struct tx_desc *txd;
1208 	struct mbuf_vec *mv;
1209 	struct mbuf_iovec *mi;
1210 
1211 	DPRINTF("t3_encap cpu=%d ", curcpu);
1212 	KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx));
1213 
1214 	mi = NULL;
1215 	pi = qs->port;
1216 	sc = pi->adapter;
1217 	txq = &qs->txq[TXQ_ETH];
1218 	txd = &txq->desc[txq->pidx];
1219 	txsd = &txq->sdesc[txq->pidx];
1220 	sgl = txq->txq_sgl;
1221 	m0 = *m;
1222 
1223 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1224 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1225 	if (cxgb_debug)
1226 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1227 
1228 	mtx_assert(&txq->lock, MA_OWNED);
1229 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1230 /*
1231  * XXX need to add VLAN support for 6.x
1232  */
1233 #ifdef VLAN_SUPPORTED
1234 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1235 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1236 #endif
1237 	KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p",
1238 		txsd->mi.mi_base));
1239 	if (count > 1) {
1240 		panic("count > 1 not support in CVS\n");
1241 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1242 			return (err);
1243 		nsegs = count;
1244 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1245 		if (cxgb_debug)
1246 			printf("failed ... err=%d\n", err);
1247 		return (err);
1248 	}
1249 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1250 
1251 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1252 		mi_collapse_mbuf(&txsd->mi, m0);
1253 		mi = &txsd->mi;
1254 	}
1255 	if (count > 1) {
1256 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1257 		int i, fidx;
1258 		struct mbuf_iovec *batchmi;
1259 
1260 		mv = mtomv(m0);
1261 		batchmi = mv->mv_vec;
1262 
1263 		wrp = (struct work_request_hdr *)txd;
1264 
1265 		flits = count*2 + 1;
1266 		txq_prod(txq, 1, &txqs);
1267 
1268 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1269 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1270 
1271 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1272 			GET_VTAG_MI(cntrl, batchmi);
1273 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1274 			cbe->cntrl = htonl(cntrl);
1275 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1276 			cbe->addr = htobe64(segs[i].ds_addr);
1277 			txd->flit[fidx] |= htobe64(1 << 24);
1278 		}
1279 
1280 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1281 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1282 		wmb();
1283 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1284 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1285 		/* XXX gen? */
1286 		wr_gen2(txd, txqs.gen);
1287 		check_ring_tx_db(sc, txq);
1288 
1289 		return (0);
1290 	} else if (tso_info) {
1291 		int undersized, eth_type;
1292 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1293 		struct ip *ip;
1294 		struct tcphdr *tcp;
1295 		char *pkthdr, tmp[TCPPKTHDRSIZE];
1296 		struct mbuf_vec *mv;
1297 		struct mbuf_iovec *tmpmi;
1298 
1299 		mv = mtomv(m0);
1300 		tmpmi = mv->mv_vec;
1301 
1302 		txd->flit[2] = 0;
1303 		GET_VTAG_MI(cntrl, mi);
1304 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1305 		hdr->cntrl = htonl(cntrl);
1306 		mlen = m0->m_pkthdr.len;
1307 		hdr->len = htonl(mlen | 0x80000000);
1308 
1309 		DPRINTF("tso buf len=%d\n", mlen);
1310 		undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) &&
1311 			(m0->m_flags & M_VLANTAG)) ||
1312 		    (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN));
1313 		if (__predict_false(undersized)) {
1314 			pkthdr = tmp;
1315 			dump_mi(mi);
1316 			panic("discontig packet - fixxorz");
1317 		} else
1318 			pkthdr = m0->m_data;
1319 
1320 		if (__predict_false(m0->m_flags & M_VLANTAG)) {
1321 			eth_type = CPL_ETH_II_VLAN;
1322 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1323 			    ETHER_VLAN_ENCAP_LEN);
1324 		} else {
1325 			eth_type = CPL_ETH_II;
1326 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1327 		}
1328 		tcp = (struct tcphdr *)((uint8_t *)ip +
1329 		    sizeof(*ip));
1330 
1331 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1332 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1333 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1334 		hdr->lso_info = htonl(tso_info);
1335 		flits = 3;
1336 	} else {
1337 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1338 
1339 		GET_VTAG(cntrl, m0);
1340 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1341 		cpl->cntrl = htonl(cntrl);
1342 		mlen = m0->m_pkthdr.len;
1343 		cpl->len = htonl(mlen | 0x80000000);
1344 
1345 		if (mlen <= PIO_LEN) {
1346 			txq_prod(txq, 1, &txqs);
1347 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1348 			m_freem(m0);
1349 			m0 = NULL;
1350 			flits = (mlen + 7) / 8 + 2;
1351 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1352 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1353 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1354 			wmb();
1355 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1356 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1357 
1358 			wr_gen2(txd, txqs.gen);
1359 			check_ring_tx_db(sc, txq);
1360 			DPRINTF("pio buf\n");
1361 			return (0);
1362 		}
1363 		DPRINTF("regular buf\n");
1364 		flits = 2;
1365 	}
1366 	wrp = (struct work_request_hdr *)txd;
1367 
1368 #ifdef	nomore
1369 	/*
1370 	 * XXX need to move into one of the helper routines above
1371 	 *
1372 	 */
1373 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1374 		return (err);
1375 	m0 = *m;
1376 #endif
1377 	ndesc = calc_tx_descs(m0, nsegs);
1378 
1379 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1380 	make_sgl(sgp, segs, nsegs);
1381 
1382 	sgl_flits = sgl_len(nsegs);
1383 
1384 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1385 	txq_prod(txq, ndesc, &txqs);
1386 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1387 	wr_lo = htonl(V_WR_TID(txq->token));
1388 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1389 	check_ring_tx_db(pi->adapter, txq);
1390 
1391 	if ((m0->m_type == MT_DATA) &&
1392 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1393 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1394 		m0->m_flags &= ~M_EXT ;
1395 		cxgb_mbufs_outstanding--;
1396 		m_free(m0);
1397 	}
1398 
1399 	return (0);
1400 }
1401 
1402 
1403 /**
1404  *	write_imm - write a packet into a Tx descriptor as immediate data
1405  *	@d: the Tx descriptor to write
1406  *	@m: the packet
1407  *	@len: the length of packet data to write as immediate data
1408  *	@gen: the generation bit value to write
1409  *
1410  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1411  *	contains a work request at its beginning.  We must write the packet
1412  *	carefully so the SGE doesn't read accidentally before it's written in
1413  *	its entirety.
1414  */
1415 static __inline void
1416 write_imm(struct tx_desc *d, struct mbuf *m,
1417 	  unsigned int len, unsigned int gen)
1418 {
1419 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1420 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1421 
1422 	if (len > WR_LEN)
1423 		panic("len too big %d\n", len);
1424 	if (len < sizeof(*from))
1425 		panic("len too small %d", len);
1426 
1427 	memcpy(&to[1], &from[1], len - sizeof(*from));
1428 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1429 					V_WR_BCNTLFLT(len & 7));
1430 	wmb();
1431 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1432 					V_WR_LEN((len + 7) / 8));
1433 	wr_gen2(d, gen);
1434 
1435 	/*
1436 	 * This check is a hack we should really fix the logic so
1437 	 * that this can't happen
1438 	 */
1439 	if (m->m_type != MT_DONTFREE)
1440 		m_freem(m);
1441 
1442 }
1443 
1444 /**
1445  *	check_desc_avail - check descriptor availability on a send queue
1446  *	@adap: the adapter
1447  *	@q: the TX queue
1448  *	@m: the packet needing the descriptors
1449  *	@ndesc: the number of Tx descriptors needed
1450  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1451  *
1452  *	Checks if the requested number of Tx descriptors is available on an
1453  *	SGE send queue.  If the queue is already suspended or not enough
1454  *	descriptors are available the packet is queued for later transmission.
1455  *	Must be called with the Tx queue locked.
1456  *
1457  *	Returns 0 if enough descriptors are available, 1 if there aren't
1458  *	enough descriptors and the packet has been queued, and 2 if the caller
1459  *	needs to retry because there weren't enough descriptors at the
1460  *	beginning of the call but some freed up in the mean time.
1461  */
1462 static __inline int
1463 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1464 		 struct mbuf *m, unsigned int ndesc,
1465 		 unsigned int qid)
1466 {
1467 	/*
1468 	 * XXX We currently only use this for checking the control queue
1469 	 * the control queue is only used for binding qsets which happens
1470 	 * at init time so we are guaranteed enough descriptors
1471 	 */
1472 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1473 addq_exit:	mbufq_tail(&q->sendq, m);
1474 		return 1;
1475 	}
1476 	if (__predict_false(q->size - q->in_use < ndesc)) {
1477 
1478 		struct sge_qset *qs = txq_to_qset(q, qid);
1479 
1480 		printf("stopping q\n");
1481 
1482 		setbit(&qs->txq_stopped, qid);
1483 		smp_mb();
1484 
1485 		if (should_restart_tx(q) &&
1486 		    test_and_clear_bit(qid, &qs->txq_stopped))
1487 			return 2;
1488 
1489 		q->stops++;
1490 		goto addq_exit;
1491 	}
1492 	return 0;
1493 }
1494 
1495 
1496 /**
1497  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1498  *	@q: the SGE control Tx queue
1499  *
1500  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1501  *	that send only immediate data (presently just the control queues) and
1502  *	thus do not have any mbufs
1503  */
1504 static __inline void
1505 reclaim_completed_tx_imm(struct sge_txq *q)
1506 {
1507 	unsigned int reclaim = q->processed - q->cleaned;
1508 
1509 	mtx_assert(&q->lock, MA_OWNED);
1510 
1511 	q->in_use -= reclaim;
1512 	q->cleaned += reclaim;
1513 }
1514 
1515 static __inline int
1516 immediate(const struct mbuf *m)
1517 {
1518 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1519 }
1520 
1521 /**
1522  *	ctrl_xmit - send a packet through an SGE control Tx queue
1523  *	@adap: the adapter
1524  *	@q: the control queue
1525  *	@m: the packet
1526  *
1527  *	Send a packet through an SGE control Tx queue.  Packets sent through
1528  *	a control queue must fit entirely as immediate data in a single Tx
1529  *	descriptor and have no page fragments.
1530  */
1531 static int
1532 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1533 {
1534 	int ret;
1535 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1536 
1537 	if (__predict_false(!immediate(m))) {
1538 		m_freem(m);
1539 		return 0;
1540 	}
1541 
1542 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1543 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1544 
1545 	mtx_lock(&q->lock);
1546 again:	reclaim_completed_tx_imm(q);
1547 
1548 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1549 	if (__predict_false(ret)) {
1550 		if (ret == 1) {
1551 			mtx_unlock(&q->lock);
1552 			log(LOG_ERR, "no desc available\n");
1553 
1554 			return (ENOSPC);
1555 		}
1556 		goto again;
1557 	}
1558 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1559 
1560 	q->in_use++;
1561 	if (++q->pidx >= q->size) {
1562 		q->pidx = 0;
1563 		q->gen ^= 1;
1564 	}
1565 	mtx_unlock(&q->lock);
1566 	wmb();
1567 	t3_write_reg(adap, A_SG_KDOORBELL,
1568 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1569 	return (0);
1570 }
1571 
1572 
1573 /**
1574  *	restart_ctrlq - restart a suspended control queue
1575  *	@qs: the queue set cotaining the control queue
1576  *
1577  *	Resumes transmission on a suspended Tx control queue.
1578  */
1579 static void
1580 restart_ctrlq(void *data, int npending)
1581 {
1582 	struct mbuf *m;
1583 	struct sge_qset *qs = (struct sge_qset *)data;
1584 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1585 	adapter_t *adap = qs->port->adapter;
1586 
1587 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1588 
1589 	mtx_lock(&q->lock);
1590 again:	reclaim_completed_tx_imm(q);
1591 
1592 	while (q->in_use < q->size &&
1593 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1594 
1595 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1596 
1597 		if (++q->pidx >= q->size) {
1598 			q->pidx = 0;
1599 			q->gen ^= 1;
1600 		}
1601 		q->in_use++;
1602 	}
1603 	if (!mbufq_empty(&q->sendq)) {
1604 		setbit(&qs->txq_stopped, TXQ_CTRL);
1605 		smp_mb();
1606 
1607 		if (should_restart_tx(q) &&
1608 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1609 			goto again;
1610 		q->stops++;
1611 	}
1612 	mtx_unlock(&q->lock);
1613 	t3_write_reg(adap, A_SG_KDOORBELL,
1614 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1615 }
1616 
1617 
1618 /*
1619  * Send a management message through control queue 0
1620  */
1621 int
1622 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1623 {
1624 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1625 }
1626 
1627 
1628 /**
1629  *	free_qset - free the resources of an SGE queue set
1630  *	@sc: the controller owning the queue set
1631  *	@q: the queue set
1632  *
1633  *	Release the HW and SW resources associated with an SGE queue set, such
1634  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1635  *	queue set must be quiesced prior to calling this.
1636  */
1637 void
1638 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1639 {
1640 	int i;
1641 
1642 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1643 
1644 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1645 		if (q->txq[i].txq_mr.br_ring != NULL) {
1646 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1647 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1648 		}
1649 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1650 		if (q->fl[i].desc) {
1651 			mtx_lock(&sc->sge.reg_lock);
1652 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1653 			mtx_unlock(&sc->sge.reg_lock);
1654 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1655 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1656 					q->fl[i].desc_map);
1657 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1658 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1659 		}
1660 		if (q->fl[i].sdesc) {
1661 			free_rx_bufs(sc, &q->fl[i]);
1662 			free(q->fl[i].sdesc, M_DEVBUF);
1663 		}
1664 	}
1665 
1666 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1667 		if (q->txq[i].desc) {
1668 			mtx_lock(&sc->sge.reg_lock);
1669 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1670 			mtx_unlock(&sc->sge.reg_lock);
1671 			bus_dmamap_unload(q->txq[i].desc_tag,
1672 					q->txq[i].desc_map);
1673 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1674 					q->txq[i].desc_map);
1675 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1676 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1677 			MTX_DESTROY(&q->txq[i].lock);
1678 		}
1679 		if (q->txq[i].sdesc) {
1680 			free(q->txq[i].sdesc, M_DEVBUF);
1681 		}
1682 	}
1683 
1684 	if (q->rspq.desc) {
1685 		mtx_lock(&sc->sge.reg_lock);
1686 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1687 		mtx_unlock(&sc->sge.reg_lock);
1688 
1689 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1690 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1691 			        q->rspq.desc_map);
1692 		bus_dma_tag_destroy(q->rspq.desc_tag);
1693 		MTX_DESTROY(&q->rspq.lock);
1694 	}
1695 
1696 	bzero(q, sizeof(*q));
1697 }
1698 
1699 /**
1700  *	t3_free_sge_resources - free SGE resources
1701  *	@sc: the adapter softc
1702  *
1703  *	Frees resources used by the SGE queue sets.
1704  */
1705 void
1706 t3_free_sge_resources(adapter_t *sc)
1707 {
1708 	int i, nqsets;
1709 
1710 #ifdef IFNET_MULTIQUEUE
1711 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1712 #endif
1713 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1714 		nqsets += sc->port[i].nqsets;
1715 
1716 	for (i = 0; i < nqsets; ++i)
1717 		t3_free_qset(sc, &sc->sge.qs[i]);
1718 }
1719 
1720 /**
1721  *	t3_sge_start - enable SGE
1722  *	@sc: the controller softc
1723  *
1724  *	Enables the SGE for DMAs.  This is the last step in starting packet
1725  *	transfers.
1726  */
1727 void
1728 t3_sge_start(adapter_t *sc)
1729 {
1730 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1731 }
1732 
1733 /**
1734  *	t3_sge_stop - disable SGE operation
1735  *	@sc: the adapter
1736  *
1737  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1738  *	from error interrupts) or from normal process context.  In the latter
1739  *	case it also disables any pending queue restart tasklets.  Note that
1740  *	if it is called in interrupt context it cannot disable the restart
1741  *	tasklets as it cannot wait, however the tasklets will have no effect
1742  *	since the doorbells are disabled and the driver will call this again
1743  *	later from process context, at which time the tasklets will be stopped
1744  *	if they are still running.
1745  */
1746 void
1747 t3_sge_stop(adapter_t *sc)
1748 {
1749 	int i, nqsets;
1750 
1751 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1752 
1753 	if (sc->tq == NULL)
1754 		return;
1755 
1756 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1757 		nqsets += sc->port[i].nqsets;
1758 #ifdef notyet
1759 	/*
1760 	 *
1761 	 * XXX
1762 	 */
1763 	for (i = 0; i < nqsets; ++i) {
1764 		struct sge_qset *qs = &sc->sge.qs[i];
1765 
1766 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1767 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1768 	}
1769 #endif
1770 }
1771 
1772 /**
1773  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1774  *	@adapter: the adapter
1775  *	@q: the Tx queue to reclaim descriptors from
1776  *	@reclaimable: the number of descriptors to reclaim
1777  *      @m_vec_size: maximum number of buffers to reclaim
1778  *      @desc_reclaimed: returns the number of descriptors reclaimed
1779  *
1780  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1781  *	Tx buffers.  Called with the Tx queue lock held.
1782  *
1783  *      Returns number of buffers of reclaimed
1784  */
1785 void
1786 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1787 {
1788 	struct tx_sw_desc *txsd;
1789 	unsigned int cidx;
1790 
1791 #ifdef T3_TRACE
1792 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1793 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1794 #endif
1795 	cidx = q->cidx;
1796 	txsd = &q->sdesc[cidx];
1797 	DPRINTF("reclaiming %d WR\n", reclaimable);
1798 	mtx_assert(&q->lock, MA_OWNED);
1799 	while (reclaimable--) {
1800 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1801 		if (txsd->mi.mi_base != NULL) {
1802 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1803 				bus_dmamap_unload(q->entry_tag, txsd->map);
1804 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1805 			}
1806 			m_freem_iovec(&txsd->mi);
1807 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1808 			txsd->mi.mi_base = NULL;
1809 
1810 #if defined(DIAGNOSTIC) && 0
1811 			if (m_get_priority(txsd->m[0]) != cidx)
1812 				printf("pri=%d cidx=%d\n",
1813 				    (int)m_get_priority(txsd->m[0]), cidx);
1814 #endif
1815 
1816 		} else
1817 			q->txq_skipped++;
1818 
1819 		++txsd;
1820 		if (++cidx == q->size) {
1821 			cidx = 0;
1822 			txsd = q->sdesc;
1823 		}
1824 	}
1825 	q->cidx = cidx;
1826 
1827 }
1828 
1829 void
1830 t3_free_tx_desc_all(struct sge_txq *q)
1831 {
1832 	int i;
1833 	struct tx_sw_desc *txsd;
1834 
1835 	for (i = 0; i < q->size; i++) {
1836 		txsd = &q->sdesc[i];
1837 		if (txsd->mi.mi_base != NULL) {
1838 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1839 				bus_dmamap_unload(q->entry_tag, txsd->map);
1840 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1841 			}
1842 			m_freem_iovec(&txsd->mi);
1843 			bzero(&txsd->mi, sizeof(txsd->mi));
1844 		}
1845 	}
1846 }
1847 
1848 /**
1849  *	is_new_response - check if a response is newly written
1850  *	@r: the response descriptor
1851  *	@q: the response queue
1852  *
1853  *	Returns true if a response descriptor contains a yet unprocessed
1854  *	response.
1855  */
1856 static __inline int
1857 is_new_response(const struct rsp_desc *r,
1858     const struct sge_rspq *q)
1859 {
1860 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1861 }
1862 
1863 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1864 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1865 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1866 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1867 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1868 
1869 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1870 #define NOMEM_INTR_DELAY 2500
1871 
1872 /**
1873  *	write_ofld_wr - write an offload work request
1874  *	@adap: the adapter
1875  *	@m: the packet to send
1876  *	@q: the Tx queue
1877  *	@pidx: index of the first Tx descriptor to write
1878  *	@gen: the generation value to use
1879  *	@ndesc: number of descriptors the packet will occupy
1880  *
1881  *	Write an offload work request to send the supplied packet.  The packet
1882  *	data already carry the work request with most fields populated.
1883  */
1884 static void
1885 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1886     struct sge_txq *q, unsigned int pidx,
1887     unsigned int gen, unsigned int ndesc,
1888     bus_dma_segment_t *segs, unsigned int nsegs)
1889 {
1890 	unsigned int sgl_flits, flits;
1891 	struct work_request_hdr *from;
1892 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1893 	struct tx_desc *d = &q->desc[pidx];
1894 	struct txq_state txqs;
1895 
1896 	if (immediate(m) && segs == NULL) {
1897 		write_imm(d, m, m->m_len, gen);
1898 		return;
1899 	}
1900 
1901 	/* Only TX_DATA builds SGLs */
1902 	from = mtod(m, struct work_request_hdr *);
1903 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1904 
1905 	flits = m->m_len / 8;
1906 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1907 
1908 	make_sgl(sgp, segs, nsegs);
1909 	sgl_flits = sgl_len(nsegs);
1910 
1911 	txqs.gen = gen;
1912 	txqs.pidx = pidx;
1913 	txqs.compl = 0;
1914 
1915 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1916 	    from->wr_hi, from->wr_lo);
1917 }
1918 
1919 /**
1920  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1921  *	@m: the packet
1922  *
1923  * 	Returns the number of Tx descriptors needed for the given offload
1924  * 	packet.  These packets are already fully constructed.
1925  */
1926 static __inline unsigned int
1927 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1928 {
1929 	unsigned int flits, cnt = 0;
1930 
1931 
1932 	if (m->m_len <= WR_LEN)
1933 		return 1;                 /* packet fits as immediate data */
1934 
1935 	if (m->m_flags & M_IOVEC)
1936 		cnt = mtomv(m)->mv_count;
1937 
1938 	/* headers */
1939 	flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1940 
1941 	return flits_to_desc(flits + sgl_len(cnt));
1942 }
1943 
1944 /**
1945  *	ofld_xmit - send a packet through an offload queue
1946  *	@adap: the adapter
1947  *	@q: the Tx offload queue
1948  *	@m: the packet
1949  *
1950  *	Send an offload packet through an SGE offload queue.
1951  */
1952 static int
1953 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1954 {
1955 	int ret, nsegs;
1956 	unsigned int ndesc;
1957 	unsigned int pidx, gen;
1958 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
1959 	struct tx_sw_desc *stx;
1960 
1961 	nsegs = m_get_sgllen(m);
1962 	vsegs = m_get_sgl(m);
1963 	ndesc = calc_tx_descs_ofld(m, nsegs);
1964 	busdma_map_sgl(vsegs, segs, nsegs);
1965 
1966 	stx = &q->sdesc[q->pidx];
1967 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
1968 
1969 	mtx_lock(&q->lock);
1970 again:	reclaim_completed_tx_(q, 16);
1971 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1972 	if (__predict_false(ret)) {
1973 		if (ret == 1) {
1974 			printf("no ofld desc avail\n");
1975 
1976 			m_set_priority(m, ndesc);     /* save for restart */
1977 			mtx_unlock(&q->lock);
1978 			return (EINTR);
1979 		}
1980 		goto again;
1981 	}
1982 
1983 	gen = q->gen;
1984 	q->in_use += ndesc;
1985 	pidx = q->pidx;
1986 	q->pidx += ndesc;
1987 	if (q->pidx >= q->size) {
1988 		q->pidx -= q->size;
1989 		q->gen ^= 1;
1990 	}
1991 #ifdef T3_TRACE
1992 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
1993 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
1994 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
1995 		  skb_shinfo(skb)->nr_frags);
1996 #endif
1997 	mtx_unlock(&q->lock);
1998 
1999 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2000 	check_ring_tx_db(adap, q);
2001 
2002 	return (0);
2003 }
2004 
2005 /**
2006  *	restart_offloadq - restart a suspended offload queue
2007  *	@qs: the queue set cotaining the offload queue
2008  *
2009  *	Resumes transmission on a suspended Tx offload queue.
2010  */
2011 static void
2012 restart_offloadq(void *data, int npending)
2013 {
2014 	struct mbuf *m;
2015 	struct sge_qset *qs = data;
2016 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2017 	adapter_t *adap = qs->port->adapter;
2018 	bus_dma_segment_t segs[TX_MAX_SEGS];
2019 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2020 	int nsegs, cleaned;
2021 
2022 	mtx_lock(&q->lock);
2023 again:	cleaned = reclaim_completed_tx_(q, 16);
2024 
2025 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2026 		unsigned int gen, pidx;
2027 		unsigned int ndesc = m_get_priority(m);
2028 
2029 		if (__predict_false(q->size - q->in_use < ndesc)) {
2030 			setbit(&qs->txq_stopped, TXQ_OFLD);
2031 			smp_mb();
2032 
2033 			if (should_restart_tx(q) &&
2034 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2035 				goto again;
2036 			q->stops++;
2037 			break;
2038 		}
2039 
2040 		gen = q->gen;
2041 		q->in_use += ndesc;
2042 		pidx = q->pidx;
2043 		q->pidx += ndesc;
2044 		if (q->pidx >= q->size) {
2045 			q->pidx -= q->size;
2046 			q->gen ^= 1;
2047 		}
2048 
2049 		(void)mbufq_dequeue(&q->sendq);
2050 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2051 		mtx_unlock(&q->lock);
2052 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2053 		mtx_lock(&q->lock);
2054 	}
2055 	mtx_unlock(&q->lock);
2056 
2057 #if USE_GTS
2058 	set_bit(TXQ_RUNNING, &q->flags);
2059 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2060 #endif
2061 	t3_write_reg(adap, A_SG_KDOORBELL,
2062 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2063 }
2064 
2065 /**
2066  *	queue_set - return the queue set a packet should use
2067  *	@m: the packet
2068  *
2069  *	Maps a packet to the SGE queue set it should use.  The desired queue
2070  *	set is carried in bits 1-3 in the packet's priority.
2071  */
2072 static __inline int
2073 queue_set(const struct mbuf *m)
2074 {
2075 	return m_get_priority(m) >> 1;
2076 }
2077 
2078 /**
2079  *	is_ctrl_pkt - return whether an offload packet is a control packet
2080  *	@m: the packet
2081  *
2082  *	Determines whether an offload packet should use an OFLD or a CTRL
2083  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2084  */
2085 static __inline int
2086 is_ctrl_pkt(const struct mbuf *m)
2087 {
2088 	return m_get_priority(m) & 1;
2089 }
2090 
2091 /**
2092  *	t3_offload_tx - send an offload packet
2093  *	@tdev: the offload device to send to
2094  *	@m: the packet
2095  *
2096  *	Sends an offload packet.  We use the packet priority to select the
2097  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2098  *	should be sent as regular or control, bits 1-3 select the queue set.
2099  */
2100 int
2101 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2102 {
2103 	adapter_t *adap = tdev2adap(tdev);
2104 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2105 
2106 	if (__predict_false(is_ctrl_pkt(m)))
2107 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2108 
2109 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2110 }
2111 
2112 /**
2113  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2114  *	@tdev: the offload device that will be receiving the packets
2115  *	@q: the SGE response queue that assembled the bundle
2116  *	@m: the partial bundle
2117  *	@n: the number of packets in the bundle
2118  *
2119  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2120  */
2121 static __inline void
2122 deliver_partial_bundle(struct t3cdev *tdev,
2123 			struct sge_rspq *q,
2124 			struct mbuf *mbufs[], int n)
2125 {
2126 	if (n) {
2127 		q->offload_bundles++;
2128 		cxgb_ofld_recv(tdev, mbufs, n);
2129 	}
2130 }
2131 
2132 static __inline int
2133 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2134     struct mbuf *m, struct mbuf *rx_gather[],
2135     unsigned int gather_idx)
2136 {
2137 
2138 	rq->offload_pkts++;
2139 	m->m_pkthdr.header = mtod(m, void *);
2140 	rx_gather[gather_idx++] = m;
2141 	if (gather_idx == RX_BUNDLE_SIZE) {
2142 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2143 		gather_idx = 0;
2144 		rq->offload_bundles++;
2145 	}
2146 	return (gather_idx);
2147 }
2148 
2149 static void
2150 restart_tx(struct sge_qset *qs)
2151 {
2152 	struct adapter *sc = qs->port->adapter;
2153 
2154 
2155 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2156 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2157 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2158 		qs->txq[TXQ_OFLD].restarts++;
2159 		DPRINTF("restarting TXQ_OFLD\n");
2160 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2161 	}
2162 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2163 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2164 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2165 	    qs->txq[TXQ_CTRL].in_use);
2166 
2167 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2168 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2169 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2170 		qs->txq[TXQ_CTRL].restarts++;
2171 		DPRINTF("restarting TXQ_CTRL\n");
2172 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2173 	}
2174 }
2175 
2176 /**
2177  *	t3_sge_alloc_qset - initialize an SGE queue set
2178  *	@sc: the controller softc
2179  *	@id: the queue set id
2180  *	@nports: how many Ethernet ports will be using this queue set
2181  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2182  *	@p: configuration parameters for this queue set
2183  *	@ntxq: number of Tx queues for the queue set
2184  *	@pi: port info for queue set
2185  *
2186  *	Allocate resources and initialize an SGE queue set.  A queue set
2187  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2188  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2189  *	queue, offload queue, and control queue.
2190  */
2191 int
2192 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2193 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2194 {
2195 	struct sge_qset *q = &sc->sge.qs[id];
2196 	int i, header_size, ret = 0;
2197 
2198 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2199 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2200 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2201 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2202 			goto err;
2203 		}
2204 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2205 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2206 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2207 	}
2208 
2209 	init_qset_cntxt(q, id);
2210 	q->idx = id;
2211 
2212 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2213 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2214 		    &q->fl[0].desc, &q->fl[0].sdesc,
2215 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2216 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2217 		printf("error %d from alloc ring fl0\n", ret);
2218 		goto err;
2219 	}
2220 
2221 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2222 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2223 		    &q->fl[1].desc, &q->fl[1].sdesc,
2224 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2225 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2226 		printf("error %d from alloc ring fl1\n", ret);
2227 		goto err;
2228 	}
2229 
2230 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2231 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2232 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2233 		    NULL, NULL)) != 0) {
2234 		printf("error %d from alloc ring rspq\n", ret);
2235 		goto err;
2236 	}
2237 
2238 	for (i = 0; i < ntxq; ++i) {
2239 		/*
2240 		 * The control queue always uses immediate data so does not
2241 		 * need to keep track of any mbufs.
2242 		 * XXX Placeholder for future TOE support.
2243 		 */
2244 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2245 
2246 		if ((ret = alloc_ring(sc, p->txq_size[i],
2247 			    sizeof(struct tx_desc), sz,
2248 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2249 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2250 			    &q->txq[i].desc_map,
2251 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2252 			printf("error %d from alloc ring tx %i\n", ret, i);
2253 			goto err;
2254 		}
2255 		mbufq_init(&q->txq[i].sendq);
2256 		q->txq[i].gen = 1;
2257 		q->txq[i].size = p->txq_size[i];
2258 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2259 		    device_get_unit(sc->dev), irq_vec_idx, i);
2260 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2261 	}
2262 
2263 	q->txq[TXQ_ETH].port = pi;
2264 
2265 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2266 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2267 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2268 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2269 
2270 	q->fl[0].gen = q->fl[1].gen = 1;
2271 	q->fl[0].size = p->fl_size;
2272 	q->fl[1].size = p->jumbo_size;
2273 
2274 	q->rspq.gen = 1;
2275 	q->rspq.cidx = 0;
2276 	q->rspq.size = p->rspq_size;
2277 
2278 
2279 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2280 	q->txq[TXQ_ETH].stop_thres = nports *
2281 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2282 
2283 	q->fl[0].buf_size = (MCLBYTES - header_size);
2284 	q->fl[0].zone = zone_clust;
2285 	q->fl[0].type = EXT_CLUSTER;
2286 #if __FreeBSD_version > 800000
2287 	if (cxgb_use_16k_clusters) {
2288 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2289 		q->fl[1].zone = zone_jumbo16;
2290 		q->fl[1].type = EXT_JUMBO16;
2291 	} else {
2292 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2293 		q->fl[1].zone = zone_jumbo9;
2294 		q->fl[1].type = EXT_JUMBO9;
2295 	}
2296 #else
2297 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2298 	q->fl[1].zone = zone_jumbop;
2299 	q->fl[1].type = EXT_JUMBOP;
2300 #endif
2301 	q->lro.enabled = lro_default;
2302 
2303 	mtx_lock(&sc->sge.reg_lock);
2304 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2305 				   q->rspq.phys_addr, q->rspq.size,
2306 				   q->fl[0].buf_size, 1, 0);
2307 	if (ret) {
2308 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2309 		goto err_unlock;
2310 	}
2311 
2312 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2313 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2314 					  q->fl[i].phys_addr, q->fl[i].size,
2315 					  q->fl[i].buf_size, p->cong_thres, 1,
2316 					  0);
2317 		if (ret) {
2318 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2319 			goto err_unlock;
2320 		}
2321 	}
2322 
2323 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2324 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2325 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2326 				 1, 0);
2327 	if (ret) {
2328 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2329 		goto err_unlock;
2330 	}
2331 
2332 	if (ntxq > 1) {
2333 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2334 					 USE_GTS, SGE_CNTXT_OFLD, id,
2335 					 q->txq[TXQ_OFLD].phys_addr,
2336 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2337 		if (ret) {
2338 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2339 			goto err_unlock;
2340 		}
2341 	}
2342 
2343 	if (ntxq > 2) {
2344 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2345 					 SGE_CNTXT_CTRL, id,
2346 					 q->txq[TXQ_CTRL].phys_addr,
2347 					 q->txq[TXQ_CTRL].size,
2348 					 q->txq[TXQ_CTRL].token, 1, 0);
2349 		if (ret) {
2350 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2351 			goto err_unlock;
2352 		}
2353 	}
2354 
2355 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2356 	    device_get_unit(sc->dev), irq_vec_idx);
2357 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2358 
2359 	mtx_unlock(&sc->sge.reg_lock);
2360 	t3_update_qset_coalesce(q, p);
2361 	q->port = pi;
2362 
2363 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2364 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2365 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2366 
2367 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2368 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2369 
2370 	return (0);
2371 
2372 err_unlock:
2373 	mtx_unlock(&sc->sge.reg_lock);
2374 err:
2375 	t3_free_qset(sc, q);
2376 
2377 	return (ret);
2378 }
2379 
2380 void
2381 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2382 {
2383 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2384 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2385 	struct ifnet *ifp = pi->ifp;
2386 
2387 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2388 
2389 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2390 	    cpl->csum_valid && cpl->csum == 0xffff) {
2391 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2392 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2393 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2394 		m->m_pkthdr.csum_data = 0xffff;
2395 	}
2396 	/*
2397 	 * XXX need to add VLAN support for 6.x
2398 	 */
2399 #ifdef VLAN_SUPPORTED
2400 	if (__predict_false(cpl->vlan_valid)) {
2401 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2402 		m->m_flags |= M_VLANTAG;
2403 	}
2404 #endif
2405 
2406 	m->m_pkthdr.rcvif = ifp;
2407 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2408 #ifndef DISABLE_MBUF_IOVEC
2409 	m_explode(m);
2410 #endif
2411 	/*
2412 	 * adjust after conversion to mbuf chain
2413 	 */
2414 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2415 	m->m_len -= (sizeof(*cpl) + ethpad);
2416 	m->m_data += (sizeof(*cpl) + ethpad);
2417 
2418 	(*ifp->if_input)(ifp, m);
2419 }
2420 
2421 static void
2422 ext_free_handler(void *cl, void * arg)
2423 {
2424 	uintptr_t type = (uintptr_t)arg;
2425 	uma_zone_t zone;
2426 	struct mbuf *m;
2427 
2428 	m = cl;
2429 	zone = m_getzonefromtype(type);
2430 	m->m_ext.ext_type = (int)type;
2431 	cxgb_ext_freed++;
2432 	cxgb_cache_put(zone, cl);
2433 }
2434 
2435 static void
2436 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2437 {
2438 	struct mbuf *m;
2439 	int header_size;
2440 
2441 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2442 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2443 
2444 	bzero(cl, header_size);
2445 	m = (struct mbuf *)cl;
2446 
2447 	SLIST_INIT(&m->m_pkthdr.tags);
2448 	m->m_type = MT_DATA;
2449 	m->m_flags = flags | M_NOFREE | M_EXT;
2450 	m->m_data = cl + header_size;
2451 	m->m_ext.ext_buf = cl;
2452 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2453 	m->m_ext.ext_size = m_getsizefromtype(type);
2454 	m->m_ext.ext_free = ext_free_handler;
2455 	m->m_ext.ext_arg1 = cl;
2456 	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2457 	m->m_ext.ext_type = EXT_EXTREF;
2458 	*(m->m_ext.ref_cnt) = 1;
2459 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2460 }
2461 
2462 
2463 /**
2464  *	get_packet - return the next ingress packet buffer from a free list
2465  *	@adap: the adapter that received the packet
2466  *	@drop_thres: # of remaining buffers before we start dropping packets
2467  *	@qs: the qset that the SGE free list holding the packet belongs to
2468  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2469  *      @r: response descriptor
2470  *
2471  *	Get the next packet from a free list and complete setup of the
2472  *	sk_buff.  If the packet is small we make a copy and recycle the
2473  *	original buffer, otherwise we use the original buffer itself.  If a
2474  *	positive drop threshold is supplied packets are dropped and their
2475  *	buffers recycled if (a) the number of remaining buffers is under the
2476  *	threshold and the packet is too big to copy, or (b) the packet should
2477  *	be copied but there is no memory for the copy.
2478  */
2479 #ifdef DISABLE_MBUF_IOVEC
2480 
2481 static int
2482 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2483     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2484 {
2485 
2486 	unsigned int len_cq =  ntohl(r->len_cq);
2487 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2488 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2489 	uint32_t len = G_RSPD_LEN(len_cq);
2490 	uint32_t flags = ntohl(r->flags);
2491 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2492 	caddr_t cl;
2493 	struct mbuf *m, *m0;
2494 	int ret = 0;
2495 
2496 	prefetch(sd->rxsd_cl);
2497 
2498 	fl->credits--;
2499 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2500 
2501 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2502 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2503 			goto skip_recycle;
2504 		cl = mtod(m0, void *);
2505 		memcpy(cl, sd->data, len);
2506 		recycle_rx_buf(adap, fl, fl->cidx);
2507 		m = m0;
2508 		m0->m_len = len;
2509 	} else {
2510 	skip_recycle:
2511 
2512 		bus_dmamap_unload(fl->entry_tag, sd->map);
2513 		cl = sd->rxsd_cl;
2514 		m = m0 = (struct mbuf *)cl;
2515 
2516 		if ((sopeop == RSPQ_SOP_EOP) ||
2517 		    (sopeop == RSPQ_SOP))
2518 			flags = M_PKTHDR;
2519 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2520 		m0->m_len = len;
2521 	}
2522 	switch(sopeop) {
2523 	case RSPQ_SOP_EOP:
2524 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2525 		mh->mh_head = mh->mh_tail = m;
2526 		m->m_pkthdr.len = len;
2527 		ret = 1;
2528 		break;
2529 	case RSPQ_NSOP_NEOP:
2530 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2531 		if (mh->mh_tail == NULL) {
2532 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2533 			m_freem(m);
2534 			break;
2535 		}
2536 		mh->mh_tail->m_next = m;
2537 		mh->mh_tail = m;
2538 		mh->mh_head->m_pkthdr.len += len;
2539 		ret = 0;
2540 		break;
2541 	case RSPQ_SOP:
2542 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2543 		m->m_pkthdr.len = len;
2544 		mh->mh_head = mh->mh_tail = m;
2545 		ret = 0;
2546 		break;
2547 	case RSPQ_EOP:
2548 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2549 		mh->mh_head->m_pkthdr.len += len;
2550 		mh->mh_tail->m_next = m;
2551 		mh->mh_tail = m;
2552 		ret = 1;
2553 		break;
2554 	}
2555 	if (++fl->cidx == fl->size)
2556 		fl->cidx = 0;
2557 
2558 	return (ret);
2559 }
2560 
2561 #else
2562 
2563 static int
2564 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2565     struct mbuf **m, struct rsp_desc *r)
2566 {
2567 
2568 	unsigned int len_cq =  ntohl(r->len_cq);
2569 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2570 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2571 	uint32_t len = G_RSPD_LEN(len_cq);
2572 	uint32_t flags = ntohl(r->flags);
2573 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2574 	void *cl;
2575 	int ret = 0;
2576 	struct mbuf *m0;
2577 #if 0
2578 	if ((sd + 1 )->rxsd_cl)
2579 		prefetch((sd + 1)->rxsd_cl);
2580 	if ((sd + 2)->rxsd_cl)
2581 		prefetch((sd + 2)->rxsd_cl);
2582 #endif
2583 	DPRINTF("rx cpu=%d\n", curcpu);
2584 	fl->credits--;
2585 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2586 
2587 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2588 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2589 			goto skip_recycle;
2590 		cl = mtod(m0, void *);
2591 		memcpy(cl, sd->data, len);
2592 		recycle_rx_buf(adap, fl, fl->cidx);
2593 		*m = m0;
2594 	} else {
2595 	skip_recycle:
2596 		bus_dmamap_unload(fl->entry_tag, sd->map);
2597 		cl = sd->rxsd_cl;
2598 		*m = m0 = (struct mbuf *)cl;
2599 	}
2600 
2601 	switch(sopeop) {
2602 	case RSPQ_SOP_EOP:
2603 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2604 		if (cl == sd->rxsd_cl)
2605 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2606 		m0->m_len = m0->m_pkthdr.len = len;
2607 		ret = 1;
2608 		goto done;
2609 		break;
2610 	case RSPQ_NSOP_NEOP:
2611 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2612 		panic("chaining unsupported");
2613 		ret = 0;
2614 		break;
2615 	case RSPQ_SOP:
2616 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2617 		panic("chaining unsupported");
2618 		m_iovinit(m0);
2619 		ret = 0;
2620 		break;
2621 	case RSPQ_EOP:
2622 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2623 		panic("chaining unsupported");
2624 		ret = 1;
2625 		break;
2626 	}
2627 	panic("append not supported");
2628 #if 0
2629 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2630 #endif
2631 done:
2632 	if (++fl->cidx == fl->size)
2633 		fl->cidx = 0;
2634 
2635 	return (ret);
2636 }
2637 #endif
2638 /**
2639  *	handle_rsp_cntrl_info - handles control information in a response
2640  *	@qs: the queue set corresponding to the response
2641  *	@flags: the response control flags
2642  *
2643  *	Handles the control information of an SGE response, such as GTS
2644  *	indications and completion credits for the queue set's Tx queues.
2645  *	HW coalesces credits, we don't do any extra SW coalescing.
2646  */
2647 static __inline void
2648 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2649 {
2650 	unsigned int credits;
2651 
2652 #if USE_GTS
2653 	if (flags & F_RSPD_TXQ0_GTS)
2654 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2655 #endif
2656 	credits = G_RSPD_TXQ0_CR(flags);
2657 	if (credits)
2658 		qs->txq[TXQ_ETH].processed += credits;
2659 
2660 	credits = G_RSPD_TXQ2_CR(flags);
2661 	if (credits)
2662 		qs->txq[TXQ_CTRL].processed += credits;
2663 
2664 # if USE_GTS
2665 	if (flags & F_RSPD_TXQ1_GTS)
2666 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2667 # endif
2668 	credits = G_RSPD_TXQ1_CR(flags);
2669 	if (credits)
2670 		qs->txq[TXQ_OFLD].processed += credits;
2671 
2672 }
2673 
2674 static void
2675 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2676     unsigned int sleeping)
2677 {
2678 	;
2679 }
2680 
2681 /**
2682  *	process_responses - process responses from an SGE response queue
2683  *	@adap: the adapter
2684  *	@qs: the queue set to which the response queue belongs
2685  *	@budget: how many responses can be processed in this round
2686  *
2687  *	Process responses from an SGE response queue up to the supplied budget.
2688  *	Responses include received packets as well as credits and other events
2689  *	for the queues that belong to the response queue's queue set.
2690  *	A negative budget is effectively unlimited.
2691  *
2692  *	Additionally choose the interrupt holdoff time for the next interrupt
2693  *	on this queue.  If the system is under memory shortage use a fairly
2694  *	long delay to help recovery.
2695  */
2696 int
2697 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2698 {
2699 	struct sge_rspq *rspq = &qs->rspq;
2700 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2701 	int budget_left = budget;
2702 	unsigned int sleeping = 0;
2703 	int lro = qs->lro.enabled;
2704 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2705 	int ngathered = 0;
2706 #ifdef DEBUG
2707 	static int last_holdoff = 0;
2708 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2709 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2710 		last_holdoff = rspq->holdoff_tmr;
2711 	}
2712 #endif
2713 	rspq->next_holdoff = rspq->holdoff_tmr;
2714 
2715 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2716 		int eth, eop = 0, ethpad = 0;
2717 		uint32_t flags = ntohl(r->flags);
2718 		uint32_t rss_csum = *(const uint32_t *)r;
2719 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2720 
2721 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2722 
2723 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2724 			/* XXX */
2725 			printf("async notification\n");
2726 
2727 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2728 			struct mbuf *m = NULL;
2729 
2730 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2731 			    r->rss_hdr.opcode, rspq->cidx);
2732 			if (rspq->rspq_mh.mh_head == NULL)
2733 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2734                         else
2735 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2736 
2737 			/*
2738 			 * XXX revisit me
2739 			 */
2740 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2741 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2742 				budget_left--;
2743 				break;
2744 			}
2745 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head, m, flags);
2746 
2747 			eop = 1;
2748 			rspq->imm_data++;
2749 		} else if (r->len_cq) {
2750 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2751 
2752 #ifdef DISABLE_MBUF_IOVEC
2753 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2754 #else
2755 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2756 #ifdef IFNET_MULTIQUEUE
2757 			rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash;
2758 #endif
2759 #endif
2760 			ethpad = 2;
2761 		} else {
2762 			DPRINTF("pure response\n");
2763 			rspq->pure_rsps++;
2764 		}
2765 
2766 		if (flags & RSPD_CTRL_MASK) {
2767 			sleeping |= flags & RSPD_GTS_MASK;
2768 			handle_rsp_cntrl_info(qs, flags);
2769 		}
2770 
2771 		r++;
2772 		if (__predict_false(++rspq->cidx == rspq->size)) {
2773 			rspq->cidx = 0;
2774 			rspq->gen ^= 1;
2775 			r = rspq->desc;
2776 		}
2777 		prefetch(r);
2778 		if (++rspq->credits >= (rspq->size / 4)) {
2779 			refill_rspq(adap, rspq, rspq->credits);
2780 			rspq->credits = 0;
2781 		}
2782 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2783 
2784 		if (!eth && eop) {
2785 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2786 			/*
2787 			 * XXX size mismatch
2788 			 */
2789 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2790 
2791 			ngathered = rx_offload(&adap->tdev, rspq,
2792 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2793 			rspq->rspq_mh.mh_head = NULL;
2794 			DPRINTF("received offload packet\n");
2795 
2796 		} else if (eth && eop) {
2797 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2798 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2799 
2800 			t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2801 			    rss_hash, rss_csum, lro);
2802 			DPRINTF("received tunnel packet\n");
2803 				rspq->rspq_mh.mh_head = NULL;
2804 
2805 		}
2806 		__refill_fl_lt(adap, &qs->fl[0], 32);
2807 		__refill_fl_lt(adap, &qs->fl[1], 32);
2808 		--budget_left;
2809 	}
2810 
2811 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2812 	t3_lro_flush(adap, qs, &qs->lro);
2813 
2814 	if (sleeping)
2815 		check_ring_db(adap, qs, sleeping);
2816 
2817 	smp_mb();  /* commit Tx queue processed updates */
2818 	if (__predict_false(qs->txq_stopped > 1)) {
2819 		printf("restarting tx on %p\n", qs);
2820 
2821 		restart_tx(qs);
2822 	}
2823 
2824 	__refill_fl_lt(adap, &qs->fl[0], 512);
2825 	__refill_fl_lt(adap, &qs->fl[1], 512);
2826 	budget -= budget_left;
2827 	return (budget);
2828 }
2829 
2830 /*
2831  * A helper function that processes responses and issues GTS.
2832  */
2833 static __inline int
2834 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2835 {
2836 	int work;
2837 	static int last_holdoff = 0;
2838 
2839 	work = process_responses(adap, rspq_to_qset(rq), -1);
2840 
2841 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2842 		printf("next_holdoff=%d\n", rq->next_holdoff);
2843 		last_holdoff = rq->next_holdoff;
2844 	}
2845 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2846 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2847 
2848 	return (work);
2849 }
2850 
2851 
2852 /*
2853  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2854  * Handles data events from SGE response queues as well as error and other
2855  * async events as they all use the same interrupt pin.  We use one SGE
2856  * response queue per port in this mode and protect all response queues with
2857  * queue 0's lock.
2858  */
2859 void
2860 t3b_intr(void *data)
2861 {
2862 	uint32_t i, map;
2863 	adapter_t *adap = data;
2864 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2865 
2866 	t3_write_reg(adap, A_PL_CLI, 0);
2867 	map = t3_read_reg(adap, A_SG_DATA_INTR);
2868 
2869 	if (!map)
2870 		return;
2871 
2872 	if (__predict_false(map & F_ERRINTR))
2873 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2874 
2875 	mtx_lock(&q0->lock);
2876 	for_each_port(adap, i)
2877 	    if (map & (1 << i))
2878 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
2879 	mtx_unlock(&q0->lock);
2880 }
2881 
2882 /*
2883  * The MSI interrupt handler.  This needs to handle data events from SGE
2884  * response queues as well as error and other async events as they all use
2885  * the same MSI vector.  We use one SGE response queue per port in this mode
2886  * and protect all response queues with queue 0's lock.
2887  */
2888 void
2889 t3_intr_msi(void *data)
2890 {
2891 	adapter_t *adap = data;
2892 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2893 	int i, new_packets = 0;
2894 
2895 	mtx_lock(&q0->lock);
2896 
2897 	for_each_port(adap, i)
2898 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2899 		    new_packets = 1;
2900 	mtx_unlock(&q0->lock);
2901 	if (new_packets == 0)
2902 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2903 }
2904 
2905 void
2906 t3_intr_msix(void *data)
2907 {
2908 	struct sge_qset *qs = data;
2909 	adapter_t *adap = qs->port->adapter;
2910 	struct sge_rspq *rspq = &qs->rspq;
2911 #ifndef IFNET_MULTIQUEUE
2912 	mtx_lock(&rspq->lock);
2913 #else
2914 	if (mtx_trylock(&rspq->lock))
2915 #endif
2916 	{
2917 
2918 		if (process_responses_gts(adap, rspq) == 0)
2919 			rspq->unhandled_irqs++;
2920 		mtx_unlock(&rspq->lock);
2921 	}
2922 }
2923 
2924 #define QDUMP_SBUF_SIZE		32 * 400
2925 static int
2926 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
2927 {
2928 	struct sge_rspq *rspq;
2929 	struct sge_qset *qs;
2930 	int i, err, dump_end, idx;
2931 	static int multiplier = 1;
2932 	struct sbuf *sb;
2933 	struct rsp_desc *rspd;
2934 	uint32_t data[4];
2935 
2936 	rspq = arg1;
2937 	qs = rspq_to_qset(rspq);
2938 	if (rspq->rspq_dump_count == 0)
2939 		return (0);
2940 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
2941 		log(LOG_WARNING,
2942 		    "dump count is too large %d\n", rspq->rspq_dump_count);
2943 		rspq->rspq_dump_count = 0;
2944 		return (EINVAL);
2945 	}
2946 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
2947 		log(LOG_WARNING,
2948 		    "dump start of %d is greater than queue size\n",
2949 		    rspq->rspq_dump_start);
2950 		rspq->rspq_dump_start = 0;
2951 		return (EINVAL);
2952 	}
2953 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
2954 	if (err)
2955 		return (err);
2956 retry_sbufops:
2957 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
2958 
2959 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
2960 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
2961 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
2962 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
2963 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
2964 
2965 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
2966 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
2967 
2968 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
2969 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
2970 		idx = i & (RSPQ_Q_SIZE-1);
2971 
2972 		rspd = &rspq->desc[idx];
2973 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
2974 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
2975 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
2976 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
2977 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
2978 		    be32toh(rspd->len_cq), rspd->intr_gen);
2979 	}
2980 	if (sbuf_overflowed(sb)) {
2981 		sbuf_delete(sb);
2982 		multiplier++;
2983 		goto retry_sbufops;
2984 	}
2985 	sbuf_finish(sb);
2986 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
2987 	sbuf_delete(sb);
2988 	return (err);
2989 }
2990 
2991 
2992 /*
2993  * broken by recent mbuf changes
2994  */
2995 static int
2996 t3_dump_txq(SYSCTL_HANDLER_ARGS)
2997 {
2998 	struct sge_txq *txq;
2999 	struct sge_qset *qs;
3000 	int i, j, err, dump_end;
3001 	static int multiplier = 1;
3002 	struct sbuf *sb;
3003 	struct tx_desc *txd;
3004 	uint32_t *WR, wr_hi, wr_lo, gen;
3005 	uint32_t data[4];
3006 
3007 	txq = arg1;
3008 	qs = txq_to_qset(txq, TXQ_ETH);
3009 	if (txq->txq_dump_count == 0) {
3010 		return (0);
3011 	}
3012 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3013 		log(LOG_WARNING,
3014 		    "dump count is too large %d\n", txq->txq_dump_count);
3015 		txq->txq_dump_count = 1;
3016 		return (EINVAL);
3017 	}
3018 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3019 		log(LOG_WARNING,
3020 		    "dump start of %d is greater than queue size\n",
3021 		    txq->txq_dump_start);
3022 		txq->txq_dump_start = 0;
3023 		return (EINVAL);
3024 	}
3025 	err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data);
3026 	if (err)
3027 		return (err);
3028 
3029 
3030 retry_sbufops:
3031 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3032 
3033 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3034 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3035 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3036 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3037 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3038 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3039 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3040 	    txq->txq_dump_start,
3041 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3042 
3043 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3044 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3045 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3046 		WR = (uint32_t *)txd->flit;
3047 		wr_hi = ntohl(WR[0]);
3048 		wr_lo = ntohl(WR[1]);
3049 		gen = G_WR_GEN(wr_lo);
3050 
3051 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3052 		    wr_hi, wr_lo, gen);
3053 		for (j = 2; j < 30; j += 4)
3054 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3055 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3056 
3057 	}
3058 	if (sbuf_overflowed(sb)) {
3059 		sbuf_delete(sb);
3060 		multiplier++;
3061 		goto retry_sbufops;
3062 	}
3063 	sbuf_finish(sb);
3064 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3065 	sbuf_delete(sb);
3066 	return (err);
3067 }
3068 
3069 
3070 static int
3071 t3_lro_enable(SYSCTL_HANDLER_ARGS)
3072 {
3073 	adapter_t *sc;
3074 	int i, j, enabled, err, nqsets = 0;
3075 
3076 #ifndef LRO_WORKING
3077 	return (0);
3078 #endif
3079 	sc = arg1;
3080 	enabled = sc->sge.qs[0].lro.enabled;
3081         err = sysctl_handle_int(oidp, &enabled, arg2, req);
3082 
3083 	if (err != 0)
3084 		return (err);
3085 	if (enabled == sc->sge.qs[0].lro.enabled)
3086 		return (0);
3087 
3088 	for (i = 0; i < sc->params.nports; i++)
3089 		for (j = 0; j < sc->port[i].nqsets; j++)
3090 			nqsets++;
3091 
3092 	for (i = 0; i < nqsets; i++)
3093 		sc->sge.qs[i].lro.enabled = enabled;
3094 
3095 	return (0);
3096 }
3097 
3098 static int
3099 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
3100 {
3101 	adapter_t *sc = arg1;
3102 	struct qset_params *qsp = &sc->params.sge.qset[0];
3103 	int coalesce_nsecs;
3104 	struct sge_qset *qs;
3105 	int i, j, err, nqsets = 0;
3106 	struct mtx *lock;
3107 
3108 	if ((sc->flags & FULL_INIT_DONE) == 0)
3109 		return (ENXIO);
3110 
3111 	coalesce_nsecs = qsp->coalesce_nsecs;
3112         err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
3113 
3114 	if (err != 0) {
3115 		return (err);
3116 	}
3117 	if (coalesce_nsecs == qsp->coalesce_nsecs)
3118 		return (0);
3119 
3120 	for (i = 0; i < sc->params.nports; i++)
3121 		for (j = 0; j < sc->port[i].nqsets; j++)
3122 			nqsets++;
3123 
3124 	coalesce_nsecs = max(100, coalesce_nsecs);
3125 
3126 	for (i = 0; i < nqsets; i++) {
3127 		qs = &sc->sge.qs[i];
3128 		qsp = &sc->params.sge.qset[i];
3129 		qsp->coalesce_nsecs = coalesce_nsecs;
3130 
3131 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3132 			    &sc->sge.qs[0].rspq.lock;
3133 
3134 		mtx_lock(lock);
3135 		t3_update_qset_coalesce(qs, qsp);
3136 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3137 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3138 		mtx_unlock(lock);
3139 	}
3140 
3141 	return (0);
3142 }
3143 
3144 
3145 void
3146 t3_add_attach_sysctls(adapter_t *sc)
3147 {
3148 	struct sysctl_ctx_list *ctx;
3149 	struct sysctl_oid_list *children;
3150 
3151 	ctx = device_get_sysctl_ctx(sc->dev);
3152 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3153 
3154 	/* random information */
3155 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3156 	    "firmware_version",
3157 	    CTLFLAG_RD, &sc->fw_version,
3158 	    0, "firmware version");
3159 
3160 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3161 	    "enable_lro",
3162 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3163 	    0, t3_lro_enable,
3164 	    "I", "enable large receive offload");
3165 
3166 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3167 	    "enable_debug",
3168 	    CTLFLAG_RW, &cxgb_debug,
3169 	    0, "enable verbose debugging output");
3170 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3171 	    CTLFLAG_RD, &sc->tunq_coalesce,
3172 	    "#tunneled packets freed");
3173 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3174 	    "txq_overrun",
3175 	    CTLFLAG_RD, &txq_fills,
3176 	    0, "#times txq overrun");
3177 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3178 	    "pcpu_cache_enable",
3179 	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3180 	    0, "#enable driver local pcpu caches");
3181 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3182 	    "cache_alloc",
3183 	    CTLFLAG_RD, &cxgb_cached_allocations,
3184 	    0, "#times a cluster was allocated from cache");
3185 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3186 	    "cached",
3187 	    CTLFLAG_RD, &cxgb_cached,
3188 	    0, "#times a cluster was cached");
3189 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3190 	    "ext_freed",
3191 	    CTLFLAG_RD, &cxgb_ext_freed,
3192 	    0, "#times a cluster was freed through ext_free");
3193 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3194 	    "mbufs_outstanding",
3195 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3196 	    0, "#mbufs in flight in the driver");
3197 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3198 	    "pack_outstanding",
3199 	    CTLFLAG_RD, &cxgb_pack_outstanding,
3200 	    0, "#packet in flight in the driver");
3201 }
3202 
3203 
3204 static const char *rspq_name = "rspq";
3205 static const char *txq_names[] =
3206 {
3207 	"txq_eth",
3208 	"txq_ofld",
3209 	"txq_ctrl"
3210 };
3211 
3212 void
3213 t3_add_configured_sysctls(adapter_t *sc)
3214 {
3215 	struct sysctl_ctx_list *ctx;
3216 	struct sysctl_oid_list *children;
3217 	int i, j;
3218 
3219 	ctx = device_get_sysctl_ctx(sc->dev);
3220 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3221 
3222 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3223 	    "intr_coal",
3224 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3225 	    0, t3_set_coalesce_nsecs,
3226 	    "I", "interrupt coalescing timer (ns)");
3227 
3228 	for (i = 0; i < sc->params.nports; i++) {
3229 		struct port_info *pi = &sc->port[i];
3230 		struct sysctl_oid *poid;
3231 		struct sysctl_oid_list *poidlist;
3232 
3233 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3234 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3235 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3236 		poidlist = SYSCTL_CHILDREN(poid);
3237 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3238 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3239 		    0, "#queue sets");
3240 
3241 		for (j = 0; j < pi->nqsets; j++) {
3242 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3243 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid;
3244 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist;
3245 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3246 
3247 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3248 
3249 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3250 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3251 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3252 
3253 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3254 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3255 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3256 
3257 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3258 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3259 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3260 
3261 
3262 
3263 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3264 			    CTLFLAG_RD, &qs->rspq.size,
3265 			    0, "#entries in response queue");
3266 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3267 			    CTLFLAG_RD, &qs->rspq.cidx,
3268 			    0, "consumer index");
3269 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3270 			    CTLFLAG_RD, &qs->rspq.credits,
3271 			    0, "#credits");
3272 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3273 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3274 			    "physical_address_of the queue");
3275 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3276 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3277 			    0, "start rspq dump entry");
3278 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3279 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3280 			    0, "#rspq entries to dump");
3281 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3282 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3283 			    0, t3_dump_rspq, "A", "dump of the response queue");
3284 
3285 
3286 
3287 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3288 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3289 			    0, "#tunneled packets dropped");
3290 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3291 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3292 			    0, "#tunneled packets waiting to be sent");
3293 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3294 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3295 			    0, "#tunneled packets queue producer index");
3296 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3297 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3298 			    0, "#tunneled packets queue consumer index");
3299 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3300 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3301 			    0, "#tunneled packets processed by the card");
3302 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3303 			    CTLFLAG_RD, &txq->cleaned,
3304 			    0, "#tunneled packets cleaned");
3305 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3306 			    CTLFLAG_RD, &txq->in_use,
3307 			    0, "#tunneled packet slots in use");
3308 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3309 			    CTLFLAG_RD, &txq->txq_frees,
3310 			    "#tunneled packets freed");
3311 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3312 			    CTLFLAG_RD, &txq->txq_skipped,
3313 			    0, "#tunneled packet descriptors skipped");
3314 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3315 			    CTLFLAG_RD, &txq->txq_coalesced,
3316 			    0, "#tunneled packets coalesced");
3317 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3318 			    CTLFLAG_RD, &txq->txq_enqueued,
3319 			    0, "#tunneled packets enqueued to hardware");
3320 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3321 			    CTLFLAG_RD, &qs->txq_stopped,
3322 			    0, "tx queues stopped");
3323 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3324 			    CTLFLAG_RD, &txq->phys_addr,
3325 			    "physical_address_of the queue");
3326 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3327 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3328 			    0, "txq generation");
3329 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3330 			    CTLFLAG_RD, &txq->cidx,
3331 			    0, "hardware queue cidx");
3332 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3333 			    CTLFLAG_RD, &txq->pidx,
3334 			    0, "hardware queue pidx");
3335 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3336 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3337 			    0, "txq start idx for dump");
3338 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3339 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3340 			    0, "txq #entries to dump");
3341 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3342 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3343 			    0, t3_dump_txq, "A", "dump of the transmit queue");
3344 		}
3345 	}
3346 }
3347 
3348 /**
3349  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3350  *	@qs: the queue set
3351  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3352  *	@idx: the descriptor index in the queue
3353  *	@data: where to dump the descriptor contents
3354  *
3355  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3356  *	size of the descriptor.
3357  */
3358 int
3359 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3360 		unsigned char *data)
3361 {
3362 	if (qnum >= 6)
3363 		return (EINVAL);
3364 
3365 	if (qnum < 3) {
3366 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3367 			return -EINVAL;
3368 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3369 		return sizeof(struct tx_desc);
3370 	}
3371 
3372 	if (qnum == 3) {
3373 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3374 			return (EINVAL);
3375 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3376 		return sizeof(struct rsp_desc);
3377 	}
3378 
3379 	qnum -= 4;
3380 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3381 		return (EINVAL);
3382 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3383 	return sizeof(struct rx_desc);
3384 }
3385