xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision db612abe8df3355d1eb23bb3b50fdd97bc21e979)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #ifdef CONFIG_DEFINED
68 #include <cxgb_include.h>
69 #include <sys/mvec.h>
70 #else
71 #include <dev/cxgb/cxgb_include.h>
72 #include <dev/cxgb/sys/mvec.h>
73 #endif
74 
75 int      txq_fills = 0;
76 /*
77  * XXX don't re-enable this until TOE stops assuming
78  * we have an m_ext
79  */
80 static int recycle_enable = 0;
81 extern int cxgb_txq_buf_ring_size;
82 int cxgb_cached_allocations;
83 int cxgb_cached;
84 int cxgb_ext_freed = 0;
85 int cxgb_ext_inited = 0;
86 int fl_q_size = 0;
87 int jumbo_q_size = 0;
88 
89 extern int cxgb_use_16k_clusters;
90 extern int cxgb_pcpu_cache_enable;
91 extern int nmbjumbo4;
92 extern int nmbjumbo9;
93 extern int nmbjumbo16;
94 
95 
96 
97 
98 #define USE_GTS 0
99 
100 #define SGE_RX_SM_BUF_SIZE	1536
101 #define SGE_RX_DROP_THRES	16
102 #define SGE_RX_COPY_THRES	128
103 
104 /*
105  * Period of the Tx buffer reclaim timer.  This timer does not need to run
106  * frequently as Tx buffers are usually reclaimed by new Tx packets.
107  */
108 #define TX_RECLAIM_PERIOD       (hz >> 1)
109 
110 /*
111  * Values for sge_txq.flags
112  */
113 enum {
114 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
115 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
116 };
117 
118 struct tx_desc {
119 	uint64_t	flit[TX_DESC_FLITS];
120 } __packed;
121 
122 struct rx_desc {
123 	uint32_t	addr_lo;
124 	uint32_t	len_gen;
125 	uint32_t	gen2;
126 	uint32_t	addr_hi;
127 } __packed;;
128 
129 struct rsp_desc {               /* response queue descriptor */
130 	struct rss_header	rss_hdr;
131 	uint32_t		flags;
132 	uint32_t		len_cq;
133 	uint8_t			imm_data[47];
134 	uint8_t			intr_gen;
135 } __packed;
136 
137 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
138 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
139 #define RX_SW_DESC_INUSE        (1 << 3)
140 #define TX_SW_DESC_MAPPED       (1 << 4)
141 
142 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
143 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
144 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
145 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
146 
147 struct tx_sw_desc {                /* SW state per Tx descriptor */
148 	struct mbuf_iovec mi;
149 	bus_dmamap_t	map;
150 	int		flags;
151 };
152 
153 struct rx_sw_desc {                /* SW state per Rx descriptor */
154 	caddr_t	         rxsd_cl;
155 	caddr_t	         data;
156 	bus_dmamap_t	  map;
157 	int		  flags;
158 };
159 
160 struct txq_state {
161 	unsigned int compl;
162 	unsigned int gen;
163 	unsigned int pidx;
164 };
165 
166 struct refill_fl_cb_arg {
167 	int               error;
168 	bus_dma_segment_t seg;
169 	int               nseg;
170 };
171 
172 /*
173  * Maps a number of flits to the number of Tx descriptors that can hold them.
174  * The formula is
175  *
176  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
177  *
178  * HW allows up to 4 descriptors to be combined into a WR.
179  */
180 static uint8_t flit_desc_map[] = {
181 	0,
182 #if SGE_NUM_GENBITS == 1
183 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
185 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
186 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
187 #elif SGE_NUM_GENBITS == 2
188 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
189 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
190 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
191 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
192 #else
193 # error "SGE_NUM_GENBITS must be 1 or 2"
194 #endif
195 };
196 
197 
198 static int lro_default = 0;
199 int cxgb_debug = 0;
200 
201 static void sge_timer_cb(void *arg);
202 static void sge_timer_reclaim(void *arg, int ncount);
203 static void sge_txq_reclaim_handler(void *arg, int ncount);
204 
205 /**
206  *	reclaim_completed_tx - reclaims completed Tx descriptors
207  *	@adapter: the adapter
208  *	@q: the Tx queue to reclaim completed descriptors from
209  *
210  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
211  *	and frees the associated buffers if possible.  Called with the Tx
212  *	queue's lock held.
213  */
214 static __inline int
215 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
216 {
217 	int reclaim = desc_reclaimable(q);
218 
219 	if (reclaim < reclaim_min)
220 		return (0);
221 
222 	mtx_assert(&q->lock, MA_OWNED);
223 	if (reclaim > 0) {
224 		t3_free_tx_desc(q, reclaim);
225 		q->cleaned += reclaim;
226 		q->in_use -= reclaim;
227 	}
228 	return (reclaim);
229 }
230 
231 /**
232  *	should_restart_tx - are there enough resources to restart a Tx queue?
233  *	@q: the Tx queue
234  *
235  *	Checks if there are enough descriptors to restart a suspended Tx queue.
236  */
237 static __inline int
238 should_restart_tx(const struct sge_txq *q)
239 {
240 	unsigned int r = q->processed - q->cleaned;
241 
242 	return q->in_use - r < (q->size >> 1);
243 }
244 
245 /**
246  *	t3_sge_init - initialize SGE
247  *	@adap: the adapter
248  *	@p: the SGE parameters
249  *
250  *	Performs SGE initialization needed every time after a chip reset.
251  *	We do not initialize any of the queue sets here, instead the driver
252  *	top-level must request those individually.  We also do not enable DMA
253  *	here, that should be done after the queues have been set up.
254  */
255 void
256 t3_sge_init(adapter_t *adap, struct sge_params *p)
257 {
258 	u_int ctrl, ups;
259 
260 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
261 
262 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
263 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
264 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
265 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
266 #if SGE_NUM_GENBITS == 1
267 	ctrl |= F_EGRGENCTRL;
268 #endif
269 	if (adap->params.rev > 0) {
270 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
271 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
272 	}
273 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
274 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
275 		     V_LORCQDRBTHRSH(512));
276 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
277 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
278 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
279 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
280 		     adap->params.rev < T3_REV_C ? 1000 : 500);
281 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
282 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
283 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
284 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
285 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
286 }
287 
288 
289 /**
290  *	sgl_len - calculates the size of an SGL of the given capacity
291  *	@n: the number of SGL entries
292  *
293  *	Calculates the number of flits needed for a scatter/gather list that
294  *	can hold the given number of entries.
295  */
296 static __inline unsigned int
297 sgl_len(unsigned int n)
298 {
299 	return ((3 * n) / 2 + (n & 1));
300 }
301 
302 /**
303  *	get_imm_packet - return the next ingress packet buffer from a response
304  *	@resp: the response descriptor containing the packet data
305  *
306  *	Return a packet containing the immediate data of the given response.
307  */
308 static int
309 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
310 {
311 
312 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
313 	m->m_ext.ext_buf = NULL;
314 	m->m_ext.ext_type = 0;
315 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
316 	return (0);
317 }
318 
319 static __inline u_int
320 flits_to_desc(u_int n)
321 {
322 	return (flit_desc_map[n]);
323 }
324 
325 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
326 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
327 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
328 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
329 		    F_HIRCQPARITYERROR)
330 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
331 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
332 		      F_RSPQDISABLED)
333 
334 /**
335  *	t3_sge_err_intr_handler - SGE async event interrupt handler
336  *	@adapter: the adapter
337  *
338  *	Interrupt handler for SGE asynchronous (non-data) events.
339  */
340 void
341 t3_sge_err_intr_handler(adapter_t *adapter)
342 {
343 	unsigned int v, status;
344 
345 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
346 	if (status & SGE_PARERR)
347 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
348 			 status & SGE_PARERR);
349 	if (status & SGE_FRAMINGERR)
350 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
351 			 status & SGE_FRAMINGERR);
352 	if (status & F_RSPQCREDITOVERFOW)
353 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
354 
355 	if (status & F_RSPQDISABLED) {
356 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
357 
358 		CH_ALERT(adapter,
359 			 "packet delivered to disabled response queue (0x%x)\n",
360 			 (v >> S_RSPQ0DISABLED) & 0xff);
361 	}
362 
363 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
364 	if (status & SGE_FATALERR)
365 		t3_fatal_err(adapter);
366 }
367 
368 void
369 t3_sge_prep(adapter_t *adap, struct sge_params *p)
370 {
371 	int i, nqsets;
372 
373 	nqsets = min(SGE_QSETS, mp_ncpus*4);
374 
375 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
376 
377 	while (!powerof2(fl_q_size))
378 		fl_q_size--;
379 #if __FreeBSD_version > 800000
380 	if (cxgb_use_16k_clusters)
381 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
382 	else
383 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
384 #else
385 	jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
386 #endif
387 	while (!powerof2(jumbo_q_size))
388 		jumbo_q_size--;
389 
390 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
391 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
392 
393 	for (i = 0; i < SGE_QSETS; ++i) {
394 		struct qset_params *q = p->qset + i;
395 
396 		if (adap->params.nports > 2) {
397 			q->coalesce_nsecs = 50000;
398 		} else {
399 #ifdef INVARIANTS
400 			q->coalesce_nsecs = 10000;
401 #else
402 			q->coalesce_nsecs = 5000;
403 #endif
404 		}
405 		q->polling = adap->params.rev > 0;
406 		q->rspq_size = RSPQ_Q_SIZE;
407 		q->fl_size = fl_q_size;
408 		q->jumbo_size = jumbo_q_size;
409 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
410 		q->txq_size[TXQ_OFLD] = 1024;
411 		q->txq_size[TXQ_CTRL] = 256;
412 		q->cong_thres = 0;
413 	}
414 }
415 
416 int
417 t3_sge_alloc(adapter_t *sc)
418 {
419 
420 	/* The parent tag. */
421 	if (bus_dma_tag_create( NULL,			/* parent */
422 				1, 0,			/* algnmnt, boundary */
423 				BUS_SPACE_MAXADDR,	/* lowaddr */
424 				BUS_SPACE_MAXADDR,	/* highaddr */
425 				NULL, NULL,		/* filter, filterarg */
426 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
427 				BUS_SPACE_UNRESTRICTED, /* nsegments */
428 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
429 				0,			/* flags */
430 				NULL, NULL,		/* lock, lockarg */
431 				&sc->parent_dmat)) {
432 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
433 		return (ENOMEM);
434 	}
435 
436 	/*
437 	 * DMA tag for normal sized RX frames
438 	 */
439 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
440 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
441 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
442 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
443 		return (ENOMEM);
444 	}
445 
446 	/*
447 	 * DMA tag for jumbo sized RX frames.
448 	 */
449 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
450 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
451 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
452 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
453 		return (ENOMEM);
454 	}
455 
456 	/*
457 	 * DMA tag for TX frames.
458 	 */
459 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
460 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
461 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
462 		NULL, NULL, &sc->tx_dmat)) {
463 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
464 		return (ENOMEM);
465 	}
466 
467 	return (0);
468 }
469 
470 int
471 t3_sge_free(struct adapter * sc)
472 {
473 
474 	if (sc->tx_dmat != NULL)
475 		bus_dma_tag_destroy(sc->tx_dmat);
476 
477 	if (sc->rx_jumbo_dmat != NULL)
478 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
479 
480 	if (sc->rx_dmat != NULL)
481 		bus_dma_tag_destroy(sc->rx_dmat);
482 
483 	if (sc->parent_dmat != NULL)
484 		bus_dma_tag_destroy(sc->parent_dmat);
485 
486 	return (0);
487 }
488 
489 void
490 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
491 {
492 
493 	qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
494 	qs->rspq.polling = 0 /* p->polling */;
495 }
496 
497 #if !defined(__i386__) && !defined(__amd64__)
498 static void
499 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
500 {
501 	struct refill_fl_cb_arg *cb_arg = arg;
502 
503 	cb_arg->error = error;
504 	cb_arg->seg = segs[0];
505 	cb_arg->nseg = nseg;
506 
507 }
508 #endif
509 /**
510  *	refill_fl - refill an SGE free-buffer list
511  *	@sc: the controller softc
512  *	@q: the free-list to refill
513  *	@n: the number of new buffers to allocate
514  *
515  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
516  *	The caller must assure that @n does not exceed the queue's capacity.
517  */
518 static void
519 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
520 {
521 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
522 	struct rx_desc *d = &q->desc[q->pidx];
523 	struct refill_fl_cb_arg cb_arg;
524 	caddr_t cl;
525 	int err, count = 0;
526 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
527 
528 	cb_arg.error = 0;
529 	while (n--) {
530 		/*
531 		 * We only allocate a cluster, mbuf allocation happens after rx
532 		 */
533 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
534 			log(LOG_WARNING, "Failed to allocate cluster\n");
535 			goto done;
536 		}
537 
538 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
539 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
540 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
541 				uma_zfree(q->zone, cl);
542 				goto done;
543 			}
544 			sd->flags |= RX_SW_DESC_MAP_CREATED;
545 		}
546 #if !defined(__i386__) && !defined(__amd64__)
547 		err = bus_dmamap_load(q->entry_tag, sd->map,
548 		    cl + header_size, q->buf_size,
549 		    refill_fl_cb, &cb_arg, 0);
550 
551 		if (err != 0 || cb_arg.error) {
552 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
553 			/*
554 			 * XXX free cluster
555 			 */
556 			return;
557 		}
558 #else
559 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
560 #endif
561 		sd->flags |= RX_SW_DESC_INUSE;
562 		sd->rxsd_cl = cl;
563 		sd->data = cl + header_size;
564 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
565 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
566 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
567 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
568 
569 		d++;
570 		sd++;
571 
572 		if (++q->pidx == q->size) {
573 			q->pidx = 0;
574 			q->gen ^= 1;
575 			sd = q->sdesc;
576 			d = q->desc;
577 		}
578 		q->credits++;
579 		count++;
580 	}
581 
582 done:
583 	if (count)
584 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
585 }
586 
587 
588 /**
589  *	free_rx_bufs - free the Rx buffers on an SGE free list
590  *	@sc: the controle softc
591  *	@q: the SGE free list to clean up
592  *
593  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
594  *	this queue should be stopped before calling this function.
595  */
596 static void
597 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
598 {
599 	u_int cidx = q->cidx;
600 
601 	while (q->credits--) {
602 		struct rx_sw_desc *d = &q->sdesc[cidx];
603 
604 		if (d->flags & RX_SW_DESC_INUSE) {
605 			bus_dmamap_unload(q->entry_tag, d->map);
606 			bus_dmamap_destroy(q->entry_tag, d->map);
607 			uma_zfree(q->zone, d->rxsd_cl);
608 		}
609 		d->rxsd_cl = NULL;
610 		if (++cidx == q->size)
611 			cidx = 0;
612 	}
613 }
614 
615 static __inline void
616 __refill_fl(adapter_t *adap, struct sge_fl *fl)
617 {
618 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
619 }
620 
621 static __inline void
622 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
623 {
624 	if ((fl->size - fl->credits) < max)
625 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
626 }
627 
628 void
629 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
630 {
631 	__refill_fl_lt(adap, fl, 512);
632 }
633 
634 /**
635  *	recycle_rx_buf - recycle a receive buffer
636  *	@adapter: the adapter
637  *	@q: the SGE free list
638  *	@idx: index of buffer to recycle
639  *
640  *	Recycles the specified buffer on the given free list by adding it at
641  *	the next available slot on the list.
642  */
643 static void
644 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
645 {
646 	struct rx_desc *from = &q->desc[idx];
647 	struct rx_desc *to   = &q->desc[q->pidx];
648 
649 	q->sdesc[q->pidx] = q->sdesc[idx];
650 	to->addr_lo = from->addr_lo;        // already big endian
651 	to->addr_hi = from->addr_hi;        // likewise
652 	wmb();
653 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
654 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
655 	q->credits++;
656 
657 	if (++q->pidx == q->size) {
658 		q->pidx = 0;
659 		q->gen ^= 1;
660 	}
661 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
662 }
663 
664 static void
665 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
666 {
667 	uint32_t *addr;
668 
669 	addr = arg;
670 	*addr = segs[0].ds_addr;
671 }
672 
673 static int
674 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
675     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
676     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
677 {
678 	size_t len = nelem * elem_size;
679 	void *s = NULL;
680 	void *p = NULL;
681 	int err;
682 
683 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
684 				      BUS_SPACE_MAXADDR_32BIT,
685 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
686 				      len, 0, NULL, NULL, tag)) != 0) {
687 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
688 		return (ENOMEM);
689 	}
690 
691 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
692 				    map)) != 0) {
693 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
694 		return (ENOMEM);
695 	}
696 
697 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
698 	bzero(p, len);
699 	*(void **)desc = p;
700 
701 	if (sw_size) {
702 		len = nelem * sw_size;
703 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
704 		*(void **)sdesc = s;
705 	}
706 	if (parent_entry_tag == NULL)
707 		return (0);
708 
709 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
710 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
711 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
712 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
713 		                      NULL, NULL, entry_tag)) != 0) {
714 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
715 		return (ENOMEM);
716 	}
717 	return (0);
718 }
719 
720 static void
721 sge_slow_intr_handler(void *arg, int ncount)
722 {
723 	adapter_t *sc = arg;
724 
725 	t3_slow_intr_handler(sc);
726 }
727 
728 /**
729  *	sge_timer_cb - perform periodic maintenance of an SGE qset
730  *	@data: the SGE queue set to maintain
731  *
732  *	Runs periodically from a timer to perform maintenance of an SGE queue
733  *	set.  It performs two tasks:
734  *
735  *	a) Cleans up any completed Tx descriptors that may still be pending.
736  *	Normal descriptor cleanup happens when new packets are added to a Tx
737  *	queue so this timer is relatively infrequent and does any cleanup only
738  *	if the Tx queue has not seen any new packets in a while.  We make a
739  *	best effort attempt to reclaim descriptors, in that we don't wait
740  *	around if we cannot get a queue's lock (which most likely is because
741  *	someone else is queueing new packets and so will also handle the clean
742  *	up).  Since control queues use immediate data exclusively we don't
743  *	bother cleaning them up here.
744  *
745  *	b) Replenishes Rx queues that have run out due to memory shortage.
746  *	Normally new Rx buffers are added when existing ones are consumed but
747  *	when out of memory a queue can become empty.  We try to add only a few
748  *	buffers here, the queue will be replenished fully as these new buffers
749  *	are used up if memory shortage has subsided.
750  *
751  *	c) Return coalesced response queue credits in case a response queue is
752  *	starved.
753  *
754  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
755  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
756  */
757 static void
758 sge_timer_cb(void *arg)
759 {
760 	adapter_t *sc = arg;
761 #ifndef IFNET_MULTIQUEUE
762 	struct port_info *pi;
763 	struct sge_qset *qs;
764 	struct sge_txq  *txq;
765 	int i, j;
766 	int reclaim_ofl, refill_rx;
767 
768 	for (i = 0; i < sc->params.nports; i++)
769 		for (j = 0; j < sc->port[i].nqsets; j++) {
770 			qs = &sc->sge.qs[i + j];
771 			txq = &qs->txq[0];
772 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
773 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
774 			    (qs->fl[1].credits < qs->fl[1].size));
775 			if (reclaim_ofl || refill_rx) {
776 				pi = &sc->port[i];
777 				taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
778 				break;
779 			}
780 		}
781 #endif
782 	if (sc->params.nports > 2) {
783 		int i;
784 
785 		for_each_port(sc, i) {
786 			struct port_info *pi = &sc->port[i];
787 
788 			t3_write_reg(sc, A_SG_KDOORBELL,
789 				     F_SELEGRCNTX |
790 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
791 		}
792 	}
793 	if (sc->open_device_map != 0)
794 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
795 }
796 
797 /*
798  * This is meant to be a catch-all function to keep sge state private
799  * to sge.c
800  *
801  */
802 int
803 t3_sge_init_adapter(adapter_t *sc)
804 {
805 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
806 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
807 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
808 	mi_init();
809 	cxgb_cache_init();
810 	return (0);
811 }
812 
813 int
814 t3_sge_reset_adapter(adapter_t *sc)
815 {
816 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
817 	return (0);
818 }
819 
820 int
821 t3_sge_init_port(struct port_info *pi)
822 {
823 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
824 	return (0);
825 }
826 
827 void
828 t3_sge_deinit_sw(adapter_t *sc)
829 {
830 
831 	mi_deinit();
832 }
833 
834 /**
835  *	refill_rspq - replenish an SGE response queue
836  *	@adapter: the adapter
837  *	@q: the response queue to replenish
838  *	@credits: how many new responses to make available
839  *
840  *	Replenishes a response queue by making the supplied number of responses
841  *	available to HW.
842  */
843 static __inline void
844 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
845 {
846 
847 	/* mbufs are allocated on demand when a rspq entry is processed. */
848 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
849 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
850 }
851 
852 static __inline void
853 sge_txq_reclaim_(struct sge_txq *txq, int force)
854 {
855 
856 	if (desc_reclaimable(txq) < 16)
857 		return;
858 	if (mtx_trylock(&txq->lock) == 0)
859 		return;
860 	reclaim_completed_tx_(txq, 16);
861 	mtx_unlock(&txq->lock);
862 
863 }
864 
865 static void
866 sge_txq_reclaim_handler(void *arg, int ncount)
867 {
868 	struct sge_txq *q = arg;
869 
870 	sge_txq_reclaim_(q, TRUE);
871 }
872 
873 
874 
875 static void
876 sge_timer_reclaim(void *arg, int ncount)
877 {
878 	struct port_info *pi = arg;
879 	int i, nqsets = pi->nqsets;
880 	adapter_t *sc = pi->adapter;
881 	struct sge_qset *qs;
882 	struct sge_txq *txq;
883 	struct mtx *lock;
884 
885 #ifdef IFNET_MULTIQUEUE
886 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
887 #endif
888 	for (i = 0; i < nqsets; i++) {
889 		qs = &sc->sge.qs[i];
890 
891 		txq = &qs->txq[TXQ_OFLD];
892 		sge_txq_reclaim_(txq, FALSE);
893 
894 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
895 			    &sc->sge.qs[0].rspq.lock;
896 
897 		if (mtx_trylock(lock)) {
898 			/* XXX currently assume that we are *NOT* polling */
899 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
900 
901 			if (qs->fl[0].credits < qs->fl[0].size - 16)
902 				__refill_fl(sc, &qs->fl[0]);
903 			if (qs->fl[1].credits < qs->fl[1].size - 16)
904 				__refill_fl(sc, &qs->fl[1]);
905 
906 			if (status & (1 << qs->rspq.cntxt_id)) {
907 				if (qs->rspq.credits) {
908 					refill_rspq(sc, &qs->rspq, 1);
909 					qs->rspq.credits--;
910 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
911 					    1 << qs->rspq.cntxt_id);
912 				}
913 			}
914 			mtx_unlock(lock);
915 		}
916 	}
917 }
918 
919 /**
920  *	init_qset_cntxt - initialize an SGE queue set context info
921  *	@qs: the queue set
922  *	@id: the queue set id
923  *
924  *	Initializes the TIDs and context ids for the queues of a queue set.
925  */
926 static void
927 init_qset_cntxt(struct sge_qset *qs, u_int id)
928 {
929 
930 	qs->rspq.cntxt_id = id;
931 	qs->fl[0].cntxt_id = 2 * id;
932 	qs->fl[1].cntxt_id = 2 * id + 1;
933 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
934 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
935 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
936 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
937 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
938 
939 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
940 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
941 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
942 }
943 
944 
945 static void
946 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
947 {
948 	txq->in_use += ndesc;
949 	/*
950 	 * XXX we don't handle stopping of queue
951 	 * presumably start handles this when we bump against the end
952 	 */
953 	txqs->gen = txq->gen;
954 	txq->unacked += ndesc;
955 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
956 	txq->unacked &= 31;
957 	txqs->pidx = txq->pidx;
958 	txq->pidx += ndesc;
959 #ifdef INVARIANTS
960 	if (((txqs->pidx > txq->cidx) &&
961 		(txq->pidx < txqs->pidx) &&
962 		(txq->pidx >= txq->cidx)) ||
963 	    ((txqs->pidx < txq->cidx) &&
964 		(txq->pidx >= txq-> cidx)) ||
965 	    ((txqs->pidx < txq->cidx) &&
966 		(txq->cidx < txqs->pidx)))
967 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
968 		    txqs->pidx, txq->pidx, txq->cidx);
969 #endif
970 	if (txq->pidx >= txq->size) {
971 		txq->pidx -= txq->size;
972 		txq->gen ^= 1;
973 	}
974 
975 }
976 
977 /**
978  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
979  *	@m: the packet mbufs
980  *      @nsegs: the number of segments
981  *
982  * 	Returns the number of Tx descriptors needed for the given Ethernet
983  * 	packet.  Ethernet packets require addition of WR and CPL headers.
984  */
985 static __inline unsigned int
986 calc_tx_descs(const struct mbuf *m, int nsegs)
987 {
988 	unsigned int flits;
989 
990 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
991 		return 1;
992 
993 	flits = sgl_len(nsegs) + 2;
994 #ifdef TSO_SUPPORTED
995 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
996 		flits++;
997 #endif
998 	return flits_to_desc(flits);
999 }
1000 
1001 static unsigned int
1002 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
1003     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
1004 {
1005 	struct mbuf *m0;
1006 	int err, pktlen, pass = 0;
1007 
1008 retry:
1009 	err = 0;
1010 	m0 = *m;
1011 	pktlen = m0->m_pkthdr.len;
1012 #if defined(__i386__) || defined(__amd64__)
1013 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1014 		goto done;
1015 	} else
1016 #endif
1017 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1018 
1019 	if (err == 0) {
1020 		goto done;
1021 	}
1022 	if (err == EFBIG && pass == 0) {
1023 		pass = 1;
1024 		/* Too many segments, try to defrag */
1025 		m0 = m_defrag(m0, M_DONTWAIT);
1026 		if (m0 == NULL) {
1027 			m_freem(*m);
1028 			*m = NULL;
1029 			return (ENOBUFS);
1030 		}
1031 		*m = m0;
1032 		goto retry;
1033 	} else if (err == ENOMEM) {
1034 		return (err);
1035 	} if (err) {
1036 		if (cxgb_debug)
1037 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1038 		m_freem(m0);
1039 		*m = NULL;
1040 		return (err);
1041 	}
1042 done:
1043 #if !defined(__i386__) && !defined(__amd64__)
1044 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1045 #endif
1046 	txsd->flags |= TX_SW_DESC_MAPPED;
1047 
1048 	return (0);
1049 }
1050 
1051 /**
1052  *	make_sgl - populate a scatter/gather list for a packet
1053  *	@sgp: the SGL to populate
1054  *	@segs: the packet dma segments
1055  *	@nsegs: the number of segments
1056  *
1057  *	Generates a scatter/gather list for the buffers that make up a packet
1058  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1059  *	appropriately.
1060  */
1061 static __inline void
1062 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1063 {
1064 	int i, idx;
1065 
1066 	for (idx = 0, i = 0; i < nsegs; i++) {
1067 		/*
1068 		 * firmware doesn't like empty segments
1069 		 */
1070 		if (segs[i].ds_len == 0)
1071 			continue;
1072 		if (i && idx == 0)
1073 			++sgp;
1074 
1075 		sgp->len[idx] = htobe32(segs[i].ds_len);
1076 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1077 		idx ^= 1;
1078 	}
1079 
1080 	if (idx) {
1081 		sgp->len[idx] = 0;
1082 		sgp->addr[idx] = 0;
1083 	}
1084 }
1085 
1086 /**
1087  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1088  *	@adap: the adapter
1089  *	@q: the Tx queue
1090  *
1091  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1092  *	where the HW is going to sleep just after we checked, however,
1093  *	then the interrupt handler will detect the outstanding TX packet
1094  *	and ring the doorbell for us.
1095  *
1096  *	When GTS is disabled we unconditionally ring the doorbell.
1097  */
1098 static __inline void
1099 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1100 {
1101 #if USE_GTS
1102 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1103 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1104 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1105 #ifdef T3_TRACE
1106 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1107 			  q->cntxt_id);
1108 #endif
1109 		t3_write_reg(adap, A_SG_KDOORBELL,
1110 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1111 	}
1112 #else
1113 	wmb();            /* write descriptors before telling HW */
1114 	t3_write_reg(adap, A_SG_KDOORBELL,
1115 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1116 #endif
1117 }
1118 
1119 static __inline void
1120 wr_gen2(struct tx_desc *d, unsigned int gen)
1121 {
1122 #if SGE_NUM_GENBITS == 2
1123 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1124 #endif
1125 }
1126 
1127 /**
1128  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1129  *	@ndesc: number of Tx descriptors spanned by the SGL
1130  *	@txd: first Tx descriptor to be written
1131  *	@txqs: txq state (generation and producer index)
1132  *	@txq: the SGE Tx queue
1133  *	@sgl: the SGL
1134  *	@flits: number of flits to the start of the SGL in the first descriptor
1135  *	@sgl_flits: the SGL size in flits
1136  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1137  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1138  *
1139  *	Write a work request header and an associated SGL.  If the SGL is
1140  *	small enough to fit into one Tx descriptor it has already been written
1141  *	and we just need to write the WR header.  Otherwise we distribute the
1142  *	SGL across the number of descriptors it spans.
1143  */
1144 static void
1145 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1146     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1147     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1148 {
1149 
1150 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1151 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1152 
1153 	if (__predict_true(ndesc == 1)) {
1154 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1155 		    V_WR_SGLSFLT(flits)) | wr_hi;
1156 		wmb();
1157 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1158 		    V_WR_GEN(txqs->gen)) | wr_lo;
1159 		/* XXX gen? */
1160 		wr_gen2(txd, txqs->gen);
1161 
1162 	} else {
1163 		unsigned int ogen = txqs->gen;
1164 		const uint64_t *fp = (const uint64_t *)sgl;
1165 		struct work_request_hdr *wp = wrp;
1166 
1167 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1168 		    V_WR_SGLSFLT(flits)) | wr_hi;
1169 
1170 		while (sgl_flits) {
1171 			unsigned int avail = WR_FLITS - flits;
1172 
1173 			if (avail > sgl_flits)
1174 				avail = sgl_flits;
1175 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1176 			sgl_flits -= avail;
1177 			ndesc--;
1178 			if (!sgl_flits)
1179 				break;
1180 
1181 			fp += avail;
1182 			txd++;
1183 			txsd++;
1184 			if (++txqs->pidx == txq->size) {
1185 				txqs->pidx = 0;
1186 				txqs->gen ^= 1;
1187 				txd = txq->desc;
1188 				txsd = txq->sdesc;
1189 			}
1190 
1191 			/*
1192 			 * when the head of the mbuf chain
1193 			 * is freed all clusters will be freed
1194 			 * with it
1195 			 */
1196 			KASSERT(txsd->mi.mi_base == NULL,
1197 			    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1198 			wrp = (struct work_request_hdr *)txd;
1199 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1200 			    V_WR_SGLSFLT(1)) | wr_hi;
1201 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1202 				    sgl_flits + 1)) |
1203 			    V_WR_GEN(txqs->gen)) | wr_lo;
1204 			wr_gen2(txd, txqs->gen);
1205 			flits = 1;
1206 		}
1207 		wrp->wr_hi |= htonl(F_WR_EOP);
1208 		wmb();
1209 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1210 		wr_gen2((struct tx_desc *)wp, ogen);
1211 	}
1212 }
1213 
1214 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1215 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1216 
1217 #ifdef VLAN_SUPPORTED
1218 #define GET_VTAG(cntrl, m) \
1219 do { \
1220 	if ((m)->m_flags & M_VLANTAG)					            \
1221 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1222 } while (0)
1223 
1224 #define GET_VTAG_MI(cntrl, mi) \
1225 do { \
1226 	if ((mi)->mi_flags & M_VLANTAG)					\
1227 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1228 } while (0)
1229 #else
1230 #define GET_VTAG(cntrl, m)
1231 #define GET_VTAG_MI(cntrl, m)
1232 #endif
1233 
1234 int
1235 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1236 {
1237 	adapter_t *sc;
1238 	struct mbuf *m0;
1239 	struct sge_txq *txq;
1240 	struct txq_state txqs;
1241 	struct port_info *pi;
1242 	unsigned int ndesc, flits, cntrl, mlen;
1243 	int err, nsegs, tso_info = 0;
1244 
1245 	struct work_request_hdr *wrp;
1246 	struct tx_sw_desc *txsd;
1247 	struct sg_ent *sgp, *sgl;
1248 	uint32_t wr_hi, wr_lo, sgl_flits;
1249 	bus_dma_segment_t segs[TX_MAX_SEGS];
1250 
1251 	struct tx_desc *txd;
1252 	struct mbuf_vec *mv;
1253 	struct mbuf_iovec *mi;
1254 
1255 	DPRINTF("t3_encap cpu=%d ", curcpu);
1256 
1257 	mi = NULL;
1258 	pi = qs->port;
1259 	sc = pi->adapter;
1260 	txq = &qs->txq[TXQ_ETH];
1261 	txd = &txq->desc[txq->pidx];
1262 	txsd = &txq->sdesc[txq->pidx];
1263 	sgl = txq->txq_sgl;
1264 	m0 = *m;
1265 
1266 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1267 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1268 	if (cxgb_debug)
1269 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1270 
1271 	mtx_assert(&txq->lock, MA_OWNED);
1272 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1273 /*
1274  * XXX need to add VLAN support for 6.x
1275  */
1276 #ifdef VLAN_SUPPORTED
1277 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1278 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1279 #endif
1280 	KASSERT(txsd->mi.mi_base == NULL,
1281 	    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1282 	if (count > 1) {
1283 		panic("count > 1 not support in CVS\n");
1284 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1285 			return (err);
1286 		nsegs = count;
1287 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1288 		if (cxgb_debug)
1289 			printf("failed ... err=%d\n", err);
1290 		return (err);
1291 	}
1292 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1293 
1294 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1295 		mi_collapse_mbuf(&txsd->mi, m0);
1296 		mi = &txsd->mi;
1297 	}
1298 	if (count > 1) {
1299 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1300 		int i, fidx;
1301 		struct mbuf_iovec *batchmi;
1302 
1303 		mv = mtomv(m0);
1304 		batchmi = mv->mv_vec;
1305 
1306 		wrp = (struct work_request_hdr *)txd;
1307 
1308 		flits = count*2 + 1;
1309 		txq_prod(txq, 1, &txqs);
1310 
1311 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1312 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1313 
1314 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1315 			GET_VTAG_MI(cntrl, batchmi);
1316 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1317 			cbe->cntrl = htonl(cntrl);
1318 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1319 			cbe->addr = htobe64(segs[i].ds_addr);
1320 			txd->flit[fidx] |= htobe64(1 << 24);
1321 		}
1322 
1323 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1324 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1325 		wmb();
1326 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1327 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1328 		/* XXX gen? */
1329 		wr_gen2(txd, txqs.gen);
1330 		check_ring_tx_db(sc, txq);
1331 
1332 		return (0);
1333 	} else if (tso_info) {
1334 		int undersized, eth_type;
1335 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1336 		struct ip *ip;
1337 		struct tcphdr *tcp;
1338 		char *pkthdr, tmp[TCPPKTHDRSIZE];
1339 		struct mbuf_vec *mv;
1340 		struct mbuf_iovec *tmpmi;
1341 
1342 		mv = mtomv(m0);
1343 		tmpmi = mv->mv_vec;
1344 
1345 		txd->flit[2] = 0;
1346 		GET_VTAG_MI(cntrl, mi);
1347 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1348 		hdr->cntrl = htonl(cntrl);
1349 		mlen = m0->m_pkthdr.len;
1350 		hdr->len = htonl(mlen | 0x80000000);
1351 
1352 		DPRINTF("tso buf len=%d\n", mlen);
1353 		undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) &&
1354 			(m0->m_flags & M_VLANTAG)) ||
1355 		    (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN));
1356 
1357 		if (__predict_false(undersized)) {
1358 			pkthdr = tmp;
1359 			dump_mi(mi);
1360 			panic("discontig packet - fixxorz");
1361 		} else
1362 			pkthdr = m0->m_data;
1363 
1364 		if (__predict_false(m0->m_flags & M_VLANTAG)) {
1365 			eth_type = CPL_ETH_II_VLAN;
1366 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1367 			    ETHER_VLAN_ENCAP_LEN);
1368 		} else {
1369 			eth_type = CPL_ETH_II;
1370 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1371 		}
1372 		tcp = (struct tcphdr *)((uint8_t *)ip +
1373 		    sizeof(*ip));
1374 
1375 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1376 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1377 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1378 		hdr->lso_info = htonl(tso_info);
1379 		flits = 3;
1380 	} else {
1381 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1382 
1383 		GET_VTAG(cntrl, m0);
1384 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1385 		cpl->cntrl = htonl(cntrl);
1386 		mlen = m0->m_pkthdr.len;
1387 		cpl->len = htonl(mlen | 0x80000000);
1388 
1389 		if (mlen <= PIO_LEN) {
1390 			txq_prod(txq, 1, &txqs);
1391 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1392 			m_freem(m0);
1393 			m0 = NULL;
1394 			flits = (mlen + 7) / 8 + 2;
1395 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1396 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1397 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1398 			wmb();
1399 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1400 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1401 
1402 			wr_gen2(txd, txqs.gen);
1403 			check_ring_tx_db(sc, txq);
1404 			DPRINTF("pio buf\n");
1405 			return (0);
1406 		}
1407 		DPRINTF("regular buf\n");
1408 		flits = 2;
1409 	}
1410 	wrp = (struct work_request_hdr *)txd;
1411 
1412 #ifdef	nomore
1413 	/*
1414 	 * XXX need to move into one of the helper routines above
1415 	 *
1416 	 */
1417 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1418 		return (err);
1419 	m0 = *m;
1420 #endif
1421 	ndesc = calc_tx_descs(m0, nsegs);
1422 
1423 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1424 	make_sgl(sgp, segs, nsegs);
1425 
1426 	sgl_flits = sgl_len(nsegs);
1427 
1428 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1429 	txq_prod(txq, ndesc, &txqs);
1430 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1431 	wr_lo = htonl(V_WR_TID(txq->token));
1432 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1433 	check_ring_tx_db(pi->adapter, txq);
1434 
1435 	if ((m0->m_type == MT_DATA) &&
1436 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1437 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1438 		m0->m_flags &= ~M_EXT ;
1439 		cxgb_mbufs_outstanding--;
1440 		m_free(m0);
1441 	}
1442 
1443 	return (0);
1444 }
1445 
1446 
1447 /**
1448  *	write_imm - write a packet into a Tx descriptor as immediate data
1449  *	@d: the Tx descriptor to write
1450  *	@m: the packet
1451  *	@len: the length of packet data to write as immediate data
1452  *	@gen: the generation bit value to write
1453  *
1454  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1455  *	contains a work request at its beginning.  We must write the packet
1456  *	carefully so the SGE doesn't read accidentally before it's written in
1457  *	its entirety.
1458  */
1459 static __inline void
1460 write_imm(struct tx_desc *d, struct mbuf *m,
1461 	  unsigned int len, unsigned int gen)
1462 {
1463 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1464 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1465 
1466 	if (len > WR_LEN)
1467 		panic("len too big %d\n", len);
1468 	if (len < sizeof(*from))
1469 		panic("len too small %d", len);
1470 
1471 	memcpy(&to[1], &from[1], len - sizeof(*from));
1472 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1473 					V_WR_BCNTLFLT(len & 7));
1474 	wmb();
1475 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1476 					V_WR_LEN((len + 7) / 8));
1477 	wr_gen2(d, gen);
1478 
1479 	/*
1480 	 * This check is a hack we should really fix the logic so
1481 	 * that this can't happen
1482 	 */
1483 	if (m->m_type != MT_DONTFREE)
1484 		m_freem(m);
1485 
1486 }
1487 
1488 /**
1489  *	check_desc_avail - check descriptor availability on a send queue
1490  *	@adap: the adapter
1491  *	@q: the TX queue
1492  *	@m: the packet needing the descriptors
1493  *	@ndesc: the number of Tx descriptors needed
1494  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1495  *
1496  *	Checks if the requested number of Tx descriptors is available on an
1497  *	SGE send queue.  If the queue is already suspended or not enough
1498  *	descriptors are available the packet is queued for later transmission.
1499  *	Must be called with the Tx queue locked.
1500  *
1501  *	Returns 0 if enough descriptors are available, 1 if there aren't
1502  *	enough descriptors and the packet has been queued, and 2 if the caller
1503  *	needs to retry because there weren't enough descriptors at the
1504  *	beginning of the call but some freed up in the mean time.
1505  */
1506 static __inline int
1507 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1508 		 struct mbuf *m, unsigned int ndesc,
1509 		 unsigned int qid)
1510 {
1511 	/*
1512 	 * XXX We currently only use this for checking the control queue
1513 	 * the control queue is only used for binding qsets which happens
1514 	 * at init time so we are guaranteed enough descriptors
1515 	 */
1516 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1517 addq_exit:	mbufq_tail(&q->sendq, m);
1518 		return 1;
1519 	}
1520 	if (__predict_false(q->size - q->in_use < ndesc)) {
1521 
1522 		struct sge_qset *qs = txq_to_qset(q, qid);
1523 
1524 		printf("stopping q\n");
1525 
1526 		setbit(&qs->txq_stopped, qid);
1527 		smp_mb();
1528 
1529 		if (should_restart_tx(q) &&
1530 		    test_and_clear_bit(qid, &qs->txq_stopped))
1531 			return 2;
1532 
1533 		q->stops++;
1534 		goto addq_exit;
1535 	}
1536 	return 0;
1537 }
1538 
1539 
1540 /**
1541  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1542  *	@q: the SGE control Tx queue
1543  *
1544  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1545  *	that send only immediate data (presently just the control queues) and
1546  *	thus do not have any mbufs
1547  */
1548 static __inline void
1549 reclaim_completed_tx_imm(struct sge_txq *q)
1550 {
1551 	unsigned int reclaim = q->processed - q->cleaned;
1552 
1553 	mtx_assert(&q->lock, MA_OWNED);
1554 
1555 	q->in_use -= reclaim;
1556 	q->cleaned += reclaim;
1557 }
1558 
1559 static __inline int
1560 immediate(const struct mbuf *m)
1561 {
1562 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1563 }
1564 
1565 /**
1566  *	ctrl_xmit - send a packet through an SGE control Tx queue
1567  *	@adap: the adapter
1568  *	@q: the control queue
1569  *	@m: the packet
1570  *
1571  *	Send a packet through an SGE control Tx queue.  Packets sent through
1572  *	a control queue must fit entirely as immediate data in a single Tx
1573  *	descriptor and have no page fragments.
1574  */
1575 static int
1576 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1577 {
1578 	int ret;
1579 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1580 
1581 	if (__predict_false(!immediate(m))) {
1582 		m_freem(m);
1583 		return 0;
1584 	}
1585 
1586 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1587 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1588 
1589 	mtx_lock(&q->lock);
1590 again:	reclaim_completed_tx_imm(q);
1591 
1592 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1593 	if (__predict_false(ret)) {
1594 		if (ret == 1) {
1595 			mtx_unlock(&q->lock);
1596 			log(LOG_ERR, "no desc available\n");
1597 			return (ENOSPC);
1598 		}
1599 		goto again;
1600 	}
1601 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1602 
1603 	q->in_use++;
1604 	if (++q->pidx >= q->size) {
1605 		q->pidx = 0;
1606 		q->gen ^= 1;
1607 	}
1608 	mtx_unlock(&q->lock);
1609 	wmb();
1610 	t3_write_reg(adap, A_SG_KDOORBELL,
1611 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1612 	return (0);
1613 }
1614 
1615 
1616 /**
1617  *	restart_ctrlq - restart a suspended control queue
1618  *	@qs: the queue set cotaining the control queue
1619  *
1620  *	Resumes transmission on a suspended Tx control queue.
1621  */
1622 static void
1623 restart_ctrlq(void *data, int npending)
1624 {
1625 	struct mbuf *m;
1626 	struct sge_qset *qs = (struct sge_qset *)data;
1627 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1628 	adapter_t *adap = qs->port->adapter;
1629 
1630 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1631 
1632 	mtx_lock(&q->lock);
1633 again:	reclaim_completed_tx_imm(q);
1634 
1635 	while (q->in_use < q->size &&
1636 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1637 
1638 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1639 
1640 		if (++q->pidx >= q->size) {
1641 			q->pidx = 0;
1642 			q->gen ^= 1;
1643 		}
1644 		q->in_use++;
1645 	}
1646 	if (!mbufq_empty(&q->sendq)) {
1647 		setbit(&qs->txq_stopped, TXQ_CTRL);
1648 		smp_mb();
1649 
1650 		if (should_restart_tx(q) &&
1651 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1652 			goto again;
1653 		q->stops++;
1654 	}
1655 	mtx_unlock(&q->lock);
1656 	wmb();
1657 	t3_write_reg(adap, A_SG_KDOORBELL,
1658 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1659 }
1660 
1661 
1662 /*
1663  * Send a management message through control queue 0
1664  */
1665 int
1666 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1667 {
1668 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1669 }
1670 
1671 
1672 /**
1673  *	free_qset - free the resources of an SGE queue set
1674  *	@sc: the controller owning the queue set
1675  *	@q: the queue set
1676  *
1677  *	Release the HW and SW resources associated with an SGE queue set, such
1678  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1679  *	queue set must be quiesced prior to calling this.
1680  */
1681 void
1682 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1683 {
1684 	int i;
1685 
1686 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1687 
1688 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1689 		if (q->txq[i].txq_mr.br_ring != NULL) {
1690 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1691 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1692 		}
1693 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1694 		if (q->fl[i].desc) {
1695 			mtx_lock_spin(&sc->sge.reg_lock);
1696 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1697 			mtx_unlock_spin(&sc->sge.reg_lock);
1698 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1699 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1700 					q->fl[i].desc_map);
1701 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1702 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1703 		}
1704 		if (q->fl[i].sdesc) {
1705 			free_rx_bufs(sc, &q->fl[i]);
1706 			free(q->fl[i].sdesc, M_DEVBUF);
1707 		}
1708 	}
1709 
1710 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1711 		if (q->txq[i].desc) {
1712 			mtx_lock_spin(&sc->sge.reg_lock);
1713 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1714 			mtx_unlock_spin(&sc->sge.reg_lock);
1715 			bus_dmamap_unload(q->txq[i].desc_tag,
1716 					q->txq[i].desc_map);
1717 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1718 					q->txq[i].desc_map);
1719 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1720 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1721 			MTX_DESTROY(&q->txq[i].lock);
1722 		}
1723 		if (q->txq[i].sdesc) {
1724 			free(q->txq[i].sdesc, M_DEVBUF);
1725 		}
1726 	}
1727 
1728 	if (q->rspq.desc) {
1729 		mtx_lock_spin(&sc->sge.reg_lock);
1730 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1731 		mtx_unlock_spin(&sc->sge.reg_lock);
1732 
1733 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1734 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1735 			        q->rspq.desc_map);
1736 		bus_dma_tag_destroy(q->rspq.desc_tag);
1737 		MTX_DESTROY(&q->rspq.lock);
1738 	}
1739 
1740 	bzero(q, sizeof(*q));
1741 }
1742 
1743 /**
1744  *	t3_free_sge_resources - free SGE resources
1745  *	@sc: the adapter softc
1746  *
1747  *	Frees resources used by the SGE queue sets.
1748  */
1749 void
1750 t3_free_sge_resources(adapter_t *sc)
1751 {
1752 	int i, nqsets;
1753 
1754 #ifdef IFNET_MULTIQUEUE
1755 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1756 #endif
1757 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1758 		nqsets += sc->port[i].nqsets;
1759 
1760 	for (i = 0; i < nqsets; ++i)
1761 		t3_free_qset(sc, &sc->sge.qs[i]);
1762 }
1763 
1764 /**
1765  *	t3_sge_start - enable SGE
1766  *	@sc: the controller softc
1767  *
1768  *	Enables the SGE for DMAs.  This is the last step in starting packet
1769  *	transfers.
1770  */
1771 void
1772 t3_sge_start(adapter_t *sc)
1773 {
1774 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1775 }
1776 
1777 /**
1778  *	t3_sge_stop - disable SGE operation
1779  *	@sc: the adapter
1780  *
1781  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1782  *	from error interrupts) or from normal process context.  In the latter
1783  *	case it also disables any pending queue restart tasklets.  Note that
1784  *	if it is called in interrupt context it cannot disable the restart
1785  *	tasklets as it cannot wait, however the tasklets will have no effect
1786  *	since the doorbells are disabled and the driver will call this again
1787  *	later from process context, at which time the tasklets will be stopped
1788  *	if they are still running.
1789  */
1790 void
1791 t3_sge_stop(adapter_t *sc)
1792 {
1793 	int i, nqsets;
1794 
1795 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1796 
1797 	if (sc->tq == NULL)
1798 		return;
1799 
1800 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1801 		nqsets += sc->port[i].nqsets;
1802 #ifdef notyet
1803 	/*
1804 	 *
1805 	 * XXX
1806 	 */
1807 	for (i = 0; i < nqsets; ++i) {
1808 		struct sge_qset *qs = &sc->sge.qs[i];
1809 
1810 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1811 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1812 	}
1813 #endif
1814 }
1815 
1816 /**
1817  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1818  *	@adapter: the adapter
1819  *	@q: the Tx queue to reclaim descriptors from
1820  *	@reclaimable: the number of descriptors to reclaim
1821  *      @m_vec_size: maximum number of buffers to reclaim
1822  *      @desc_reclaimed: returns the number of descriptors reclaimed
1823  *
1824  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1825  *	Tx buffers.  Called with the Tx queue lock held.
1826  *
1827  *      Returns number of buffers of reclaimed
1828  */
1829 void
1830 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1831 {
1832 	struct tx_sw_desc *txsd;
1833 	unsigned int cidx;
1834 
1835 #ifdef T3_TRACE
1836 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1837 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1838 #endif
1839 	cidx = q->cidx;
1840 	txsd = &q->sdesc[cidx];
1841 	DPRINTF("reclaiming %d WR\n", reclaimable);
1842 	mtx_assert(&q->lock, MA_OWNED);
1843 	while (reclaimable--) {
1844 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1845 		if (txsd->mi.mi_base != NULL) {
1846 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1847 				bus_dmamap_unload(q->entry_tag, txsd->map);
1848 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1849 			}
1850 			m_freem_iovec(&txsd->mi);
1851 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1852 			txsd->mi.mi_base = NULL;
1853 
1854 #if defined(DIAGNOSTIC) && 0
1855 			if (m_get_priority(txsd->m[0]) != cidx)
1856 				printf("pri=%d cidx=%d\n",
1857 				    (int)m_get_priority(txsd->m[0]), cidx);
1858 #endif
1859 
1860 		} else
1861 			q->txq_skipped++;
1862 
1863 		++txsd;
1864 		if (++cidx == q->size) {
1865 			cidx = 0;
1866 			txsd = q->sdesc;
1867 		}
1868 	}
1869 	q->cidx = cidx;
1870 
1871 }
1872 
1873 void
1874 t3_free_tx_desc_all(struct sge_txq *q)
1875 {
1876 	int i;
1877 	struct tx_sw_desc *txsd;
1878 
1879 	for (i = 0; i < q->size; i++) {
1880 		txsd = &q->sdesc[i];
1881 		if (txsd->mi.mi_base != NULL) {
1882 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1883 				bus_dmamap_unload(q->entry_tag, txsd->map);
1884 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1885 			}
1886 			m_freem_iovec(&txsd->mi);
1887 			bzero(&txsd->mi, sizeof(txsd->mi));
1888 		}
1889 	}
1890 }
1891 
1892 /**
1893  *	is_new_response - check if a response is newly written
1894  *	@r: the response descriptor
1895  *	@q: the response queue
1896  *
1897  *	Returns true if a response descriptor contains a yet unprocessed
1898  *	response.
1899  */
1900 static __inline int
1901 is_new_response(const struct rsp_desc *r,
1902     const struct sge_rspq *q)
1903 {
1904 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1905 }
1906 
1907 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1908 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1909 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1910 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1911 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1912 
1913 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1914 #define NOMEM_INTR_DELAY 2500
1915 
1916 /**
1917  *	write_ofld_wr - write an offload work request
1918  *	@adap: the adapter
1919  *	@m: the packet to send
1920  *	@q: the Tx queue
1921  *	@pidx: index of the first Tx descriptor to write
1922  *	@gen: the generation value to use
1923  *	@ndesc: number of descriptors the packet will occupy
1924  *
1925  *	Write an offload work request to send the supplied packet.  The packet
1926  *	data already carry the work request with most fields populated.
1927  */
1928 static void
1929 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1930     struct sge_txq *q, unsigned int pidx,
1931     unsigned int gen, unsigned int ndesc,
1932     bus_dma_segment_t *segs, unsigned int nsegs)
1933 {
1934 	unsigned int sgl_flits, flits;
1935 	struct work_request_hdr *from;
1936 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1937 	struct tx_desc *d = &q->desc[pidx];
1938 	struct txq_state txqs;
1939 
1940 	if (immediate(m) && nsegs == 0) {
1941 		write_imm(d, m, m->m_len, gen);
1942 		return;
1943 	}
1944 
1945 	/* Only TX_DATA builds SGLs */
1946 	from = mtod(m, struct work_request_hdr *);
1947 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1948 
1949 	flits = m->m_len / 8;
1950 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1951 
1952 	make_sgl(sgp, segs, nsegs);
1953 	sgl_flits = sgl_len(nsegs);
1954 
1955 	txqs.gen = gen;
1956 	txqs.pidx = pidx;
1957 	txqs.compl = 0;
1958 
1959 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1960 	    from->wr_hi, from->wr_lo);
1961 }
1962 
1963 /**
1964  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1965  *	@m: the packet
1966  *
1967  * 	Returns the number of Tx descriptors needed for the given offload
1968  * 	packet.  These packets are already fully constructed.
1969  */
1970 static __inline unsigned int
1971 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1972 {
1973 	unsigned int flits, cnt = 0;
1974 	int ndescs;
1975 
1976 	if (m->m_len <= WR_LEN && nsegs == 0)
1977 		return (1);                 /* packet fits as immediate data */
1978 
1979 	if (m->m_flags & M_IOVEC)
1980 		cnt = mtomv(m)->mv_count;
1981 	else
1982 		cnt = nsegs;
1983 
1984 	/* headers */
1985 	flits = m->m_len / 8;
1986 
1987 	ndescs = flits_to_desc(flits + sgl_len(cnt));
1988 
1989 	CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d",
1990 	    flits, sgl_len(cnt), nsegs, ndescs);
1991 
1992 	return (ndescs);
1993 }
1994 
1995 /**
1996  *	ofld_xmit - send a packet through an offload queue
1997  *	@adap: the adapter
1998  *	@q: the Tx offload queue
1999  *	@m: the packet
2000  *
2001  *	Send an offload packet through an SGE offload queue.
2002  */
2003 static int
2004 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2005 {
2006 	int ret, nsegs;
2007 	unsigned int ndesc;
2008 	unsigned int pidx, gen;
2009 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2010 	struct tx_sw_desc *stx;
2011 
2012 	nsegs = m_get_sgllen(m);
2013 	vsegs = m_get_sgl(m);
2014 	ndesc = calc_tx_descs_ofld(m, nsegs);
2015 	busdma_map_sgl(vsegs, segs, nsegs);
2016 
2017 	stx = &q->sdesc[q->pidx];
2018 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2019 
2020 	mtx_lock(&q->lock);
2021 again:	reclaim_completed_tx_(q, 16);
2022 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2023 	if (__predict_false(ret)) {
2024 		if (ret == 1) {
2025 			printf("no ofld desc avail\n");
2026 
2027 			m_set_priority(m, ndesc);     /* save for restart */
2028 			mtx_unlock(&q->lock);
2029 			return (EINTR);
2030 		}
2031 		goto again;
2032 	}
2033 
2034 	gen = q->gen;
2035 	q->in_use += ndesc;
2036 	pidx = q->pidx;
2037 	q->pidx += ndesc;
2038 	if (q->pidx >= q->size) {
2039 		q->pidx -= q->size;
2040 		q->gen ^= 1;
2041 	}
2042 #ifdef T3_TRACE
2043 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2044 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2045 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2046 		  skb_shinfo(skb)->nr_frags);
2047 #endif
2048 	mtx_unlock(&q->lock);
2049 
2050 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2051 	check_ring_tx_db(adap, q);
2052 	return (0);
2053 }
2054 
2055 /**
2056  *	restart_offloadq - restart a suspended offload queue
2057  *	@qs: the queue set cotaining the offload queue
2058  *
2059  *	Resumes transmission on a suspended Tx offload queue.
2060  */
2061 static void
2062 restart_offloadq(void *data, int npending)
2063 {
2064 	struct mbuf *m;
2065 	struct sge_qset *qs = data;
2066 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2067 	adapter_t *adap = qs->port->adapter;
2068 	bus_dma_segment_t segs[TX_MAX_SEGS];
2069 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2070 	int nsegs, cleaned;
2071 
2072 	mtx_lock(&q->lock);
2073 again:	cleaned = reclaim_completed_tx_(q, 16);
2074 
2075 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2076 		unsigned int gen, pidx;
2077 		unsigned int ndesc = m_get_priority(m);
2078 
2079 		if (__predict_false(q->size - q->in_use < ndesc)) {
2080 			setbit(&qs->txq_stopped, TXQ_OFLD);
2081 			smp_mb();
2082 
2083 			if (should_restart_tx(q) &&
2084 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2085 				goto again;
2086 			q->stops++;
2087 			break;
2088 		}
2089 
2090 		gen = q->gen;
2091 		q->in_use += ndesc;
2092 		pidx = q->pidx;
2093 		q->pidx += ndesc;
2094 		if (q->pidx >= q->size) {
2095 			q->pidx -= q->size;
2096 			q->gen ^= 1;
2097 		}
2098 
2099 		(void)mbufq_dequeue(&q->sendq);
2100 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2101 		mtx_unlock(&q->lock);
2102 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2103 		mtx_lock(&q->lock);
2104 	}
2105 	mtx_unlock(&q->lock);
2106 
2107 #if USE_GTS
2108 	set_bit(TXQ_RUNNING, &q->flags);
2109 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2110 #endif
2111 	wmb();
2112 	t3_write_reg(adap, A_SG_KDOORBELL,
2113 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2114 }
2115 
2116 /**
2117  *	queue_set - return the queue set a packet should use
2118  *	@m: the packet
2119  *
2120  *	Maps a packet to the SGE queue set it should use.  The desired queue
2121  *	set is carried in bits 1-3 in the packet's priority.
2122  */
2123 static __inline int
2124 queue_set(const struct mbuf *m)
2125 {
2126 	return m_get_priority(m) >> 1;
2127 }
2128 
2129 /**
2130  *	is_ctrl_pkt - return whether an offload packet is a control packet
2131  *	@m: the packet
2132  *
2133  *	Determines whether an offload packet should use an OFLD or a CTRL
2134  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2135  */
2136 static __inline int
2137 is_ctrl_pkt(const struct mbuf *m)
2138 {
2139 	return m_get_priority(m) & 1;
2140 }
2141 
2142 /**
2143  *	t3_offload_tx - send an offload packet
2144  *	@tdev: the offload device to send to
2145  *	@m: the packet
2146  *
2147  *	Sends an offload packet.  We use the packet priority to select the
2148  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2149  *	should be sent as regular or control, bits 1-3 select the queue set.
2150  */
2151 int
2152 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2153 {
2154 	adapter_t *adap = tdev2adap(tdev);
2155 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2156 
2157 	if (__predict_false(is_ctrl_pkt(m)))
2158 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2159 
2160 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2161 }
2162 
2163 /**
2164  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2165  *	@tdev: the offload device that will be receiving the packets
2166  *	@q: the SGE response queue that assembled the bundle
2167  *	@m: the partial bundle
2168  *	@n: the number of packets in the bundle
2169  *
2170  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2171  */
2172 static __inline void
2173 deliver_partial_bundle(struct t3cdev *tdev,
2174 			struct sge_rspq *q,
2175 			struct mbuf *mbufs[], int n)
2176 {
2177 	if (n) {
2178 		q->offload_bundles++;
2179 		cxgb_ofld_recv(tdev, mbufs, n);
2180 	}
2181 }
2182 
2183 static __inline int
2184 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2185     struct mbuf *m, struct mbuf *rx_gather[],
2186     unsigned int gather_idx)
2187 {
2188 
2189 	rq->offload_pkts++;
2190 	m->m_pkthdr.header = mtod(m, void *);
2191 	rx_gather[gather_idx++] = m;
2192 	if (gather_idx == RX_BUNDLE_SIZE) {
2193 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2194 		gather_idx = 0;
2195 		rq->offload_bundles++;
2196 	}
2197 	return (gather_idx);
2198 }
2199 
2200 static void
2201 restart_tx(struct sge_qset *qs)
2202 {
2203 	struct adapter *sc = qs->port->adapter;
2204 
2205 
2206 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2207 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2208 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2209 		qs->txq[TXQ_OFLD].restarts++;
2210 		DPRINTF("restarting TXQ_OFLD\n");
2211 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2212 	}
2213 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2214 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2215 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2216 	    qs->txq[TXQ_CTRL].in_use);
2217 
2218 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2219 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2220 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2221 		qs->txq[TXQ_CTRL].restarts++;
2222 		DPRINTF("restarting TXQ_CTRL\n");
2223 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2224 	}
2225 }
2226 
2227 /**
2228  *	t3_sge_alloc_qset - initialize an SGE queue set
2229  *	@sc: the controller softc
2230  *	@id: the queue set id
2231  *	@nports: how many Ethernet ports will be using this queue set
2232  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2233  *	@p: configuration parameters for this queue set
2234  *	@ntxq: number of Tx queues for the queue set
2235  *	@pi: port info for queue set
2236  *
2237  *	Allocate resources and initialize an SGE queue set.  A queue set
2238  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2239  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2240  *	queue, offload queue, and control queue.
2241  */
2242 int
2243 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2244 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2245 {
2246 	struct sge_qset *q = &sc->sge.qs[id];
2247 	int i, header_size, ret = 0;
2248 
2249 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2250 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2251 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2252 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2253 			goto err;
2254 		}
2255 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2256 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2257 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2258 	}
2259 
2260 	init_qset_cntxt(q, id);
2261 	q->idx = id;
2262 
2263 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2264 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2265 		    &q->fl[0].desc, &q->fl[0].sdesc,
2266 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2267 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2268 		printf("error %d from alloc ring fl0\n", ret);
2269 		goto err;
2270 	}
2271 
2272 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2273 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2274 		    &q->fl[1].desc, &q->fl[1].sdesc,
2275 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2276 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2277 		printf("error %d from alloc ring fl1\n", ret);
2278 		goto err;
2279 	}
2280 
2281 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2282 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2283 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2284 		    NULL, NULL)) != 0) {
2285 		printf("error %d from alloc ring rspq\n", ret);
2286 		goto err;
2287 	}
2288 
2289 	for (i = 0; i < ntxq; ++i) {
2290 		/*
2291 		 * The control queue always uses immediate data so does not
2292 		 * need to keep track of any mbufs.
2293 		 * XXX Placeholder for future TOE support.
2294 		 */
2295 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2296 
2297 		if ((ret = alloc_ring(sc, p->txq_size[i],
2298 			    sizeof(struct tx_desc), sz,
2299 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2300 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2301 			    &q->txq[i].desc_map,
2302 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2303 			printf("error %d from alloc ring tx %i\n", ret, i);
2304 			goto err;
2305 		}
2306 		mbufq_init(&q->txq[i].sendq);
2307 		q->txq[i].gen = 1;
2308 		q->txq[i].size = p->txq_size[i];
2309 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2310 		    device_get_unit(sc->dev), irq_vec_idx, i);
2311 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2312 	}
2313 
2314 	q->txq[TXQ_ETH].port = pi;
2315 
2316 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2317 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2318 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2319 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2320 
2321 	q->fl[0].gen = q->fl[1].gen = 1;
2322 	q->fl[0].size = p->fl_size;
2323 	q->fl[1].size = p->jumbo_size;
2324 
2325 	q->rspq.gen = 1;
2326 	q->rspq.cidx = 0;
2327 	q->rspq.size = p->rspq_size;
2328 
2329 
2330 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2331 	q->txq[TXQ_ETH].stop_thres = nports *
2332 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2333 
2334 	q->fl[0].buf_size = (MCLBYTES - header_size);
2335 	q->fl[0].zone = zone_clust;
2336 	q->fl[0].type = EXT_CLUSTER;
2337 #if __FreeBSD_version > 800000
2338 	if (cxgb_use_16k_clusters) {
2339 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2340 		q->fl[1].zone = zone_jumbo16;
2341 		q->fl[1].type = EXT_JUMBO16;
2342 	} else {
2343 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2344 		q->fl[1].zone = zone_jumbo9;
2345 		q->fl[1].type = EXT_JUMBO9;
2346 	}
2347 #else
2348 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2349 	q->fl[1].zone = zone_jumbop;
2350 	q->fl[1].type = EXT_JUMBOP;
2351 #endif
2352 	q->lro.enabled = lro_default;
2353 
2354 	mtx_lock_spin(&sc->sge.reg_lock);
2355 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2356 				   q->rspq.phys_addr, q->rspq.size,
2357 				   q->fl[0].buf_size, 1, 0);
2358 	if (ret) {
2359 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2360 		goto err_unlock;
2361 	}
2362 
2363 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2364 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2365 					  q->fl[i].phys_addr, q->fl[i].size,
2366 					  q->fl[i].buf_size, p->cong_thres, 1,
2367 					  0);
2368 		if (ret) {
2369 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2370 			goto err_unlock;
2371 		}
2372 	}
2373 
2374 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2375 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2376 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2377 				 1, 0);
2378 	if (ret) {
2379 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2380 		goto err_unlock;
2381 	}
2382 
2383 	if (ntxq > 1) {
2384 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2385 					 USE_GTS, SGE_CNTXT_OFLD, id,
2386 					 q->txq[TXQ_OFLD].phys_addr,
2387 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2388 		if (ret) {
2389 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2390 			goto err_unlock;
2391 		}
2392 	}
2393 
2394 	if (ntxq > 2) {
2395 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2396 					 SGE_CNTXT_CTRL, id,
2397 					 q->txq[TXQ_CTRL].phys_addr,
2398 					 q->txq[TXQ_CTRL].size,
2399 					 q->txq[TXQ_CTRL].token, 1, 0);
2400 		if (ret) {
2401 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2402 			goto err_unlock;
2403 		}
2404 	}
2405 
2406 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2407 	    device_get_unit(sc->dev), irq_vec_idx);
2408 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2409 
2410 	mtx_unlock_spin(&sc->sge.reg_lock);
2411 	t3_update_qset_coalesce(q, p);
2412 	q->port = pi;
2413 
2414 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2415 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2416 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2417 
2418 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2419 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2420 
2421 	return (0);
2422 
2423 err_unlock:
2424 	mtx_unlock_spin(&sc->sge.reg_lock);
2425 err:
2426 	t3_free_qset(sc, q);
2427 
2428 	return (ret);
2429 }
2430 
2431 void
2432 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2433 {
2434 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2435 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2436 	struct ifnet *ifp = pi->ifp;
2437 
2438 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2439 
2440 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2441 	    cpl->csum_valid && cpl->csum == 0xffff) {
2442 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2443 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2444 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2445 		m->m_pkthdr.csum_data = 0xffff;
2446 	}
2447 	/*
2448 	 * XXX need to add VLAN support for 6.x
2449 	 */
2450 #ifdef VLAN_SUPPORTED
2451 	if (__predict_false(cpl->vlan_valid)) {
2452 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2453 		m->m_flags |= M_VLANTAG;
2454 	}
2455 #endif
2456 
2457 	m->m_pkthdr.rcvif = ifp;
2458 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2459 #ifndef DISABLE_MBUF_IOVEC
2460 	m_explode(m);
2461 #endif
2462 	/*
2463 	 * adjust after conversion to mbuf chain
2464 	 */
2465 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2466 	m->m_len -= (sizeof(*cpl) + ethpad);
2467 	m->m_data += (sizeof(*cpl) + ethpad);
2468 
2469 	(*ifp->if_input)(ifp, m);
2470 }
2471 
2472 static void
2473 ext_free_handler(void *arg1, void * arg2)
2474 {
2475 	uintptr_t type = (uintptr_t)arg2;
2476 	uma_zone_t zone;
2477 	struct mbuf *m;
2478 
2479 	m = arg1;
2480 	zone = m_getzonefromtype(type);
2481 	m->m_ext.ext_type = (int)type;
2482 	cxgb_ext_freed++;
2483 	cxgb_cache_put(zone, m);
2484 }
2485 
2486 static void
2487 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2488 {
2489 	struct mbuf *m;
2490 	int header_size;
2491 
2492 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2493 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2494 
2495 	bzero(cl, header_size);
2496 	m = (struct mbuf *)cl;
2497 
2498 	cxgb_ext_inited++;
2499 	SLIST_INIT(&m->m_pkthdr.tags);
2500 	m->m_type = MT_DATA;
2501 	m->m_flags = flags | M_NOFREE | M_EXT;
2502 	m->m_data = cl + header_size;
2503 	m->m_ext.ext_buf = cl;
2504 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2505 	m->m_ext.ext_size = m_getsizefromtype(type);
2506 	m->m_ext.ext_free = ext_free_handler;
2507 	m->m_ext.ext_arg1 = cl;
2508 	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2509 	m->m_ext.ext_type = EXT_EXTREF;
2510 	*(m->m_ext.ref_cnt) = 1;
2511 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2512 }
2513 
2514 
2515 /**
2516  *	get_packet - return the next ingress packet buffer from a free list
2517  *	@adap: the adapter that received the packet
2518  *	@drop_thres: # of remaining buffers before we start dropping packets
2519  *	@qs: the qset that the SGE free list holding the packet belongs to
2520  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2521  *      @r: response descriptor
2522  *
2523  *	Get the next packet from a free list and complete setup of the
2524  *	sk_buff.  If the packet is small we make a copy and recycle the
2525  *	original buffer, otherwise we use the original buffer itself.  If a
2526  *	positive drop threshold is supplied packets are dropped and their
2527  *	buffers recycled if (a) the number of remaining buffers is under the
2528  *	threshold and the packet is too big to copy, or (b) the packet should
2529  *	be copied but there is no memory for the copy.
2530  */
2531 #ifdef DISABLE_MBUF_IOVEC
2532 
2533 static int
2534 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2535     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2536 {
2537 
2538 	unsigned int len_cq =  ntohl(r->len_cq);
2539 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2540 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2541 	uint32_t len = G_RSPD_LEN(len_cq);
2542 	uint32_t flags = ntohl(r->flags);
2543 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2544 	caddr_t cl;
2545 	struct mbuf *m, *m0;
2546 	int ret = 0;
2547 
2548 	prefetch(sd->rxsd_cl);
2549 
2550 	fl->credits--;
2551 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2552 
2553 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2554 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2555 			goto skip_recycle;
2556 		cl = mtod(m0, void *);
2557 		memcpy(cl, sd->data, len);
2558 		recycle_rx_buf(adap, fl, fl->cidx);
2559 		m = m0;
2560 		m0->m_len = len;
2561 	} else {
2562 	skip_recycle:
2563 
2564 		bus_dmamap_unload(fl->entry_tag, sd->map);
2565 		cl = sd->rxsd_cl;
2566 		m = m0 = (struct mbuf *)cl;
2567 
2568 		if ((sopeop == RSPQ_SOP_EOP) ||
2569 		    (sopeop == RSPQ_SOP))
2570 			flags = M_PKTHDR;
2571 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2572 		m0->m_len = len;
2573 	}
2574 	switch(sopeop) {
2575 	case RSPQ_SOP_EOP:
2576 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2577 		mh->mh_head = mh->mh_tail = m;
2578 		m->m_pkthdr.len = len;
2579 		ret = 1;
2580 		break;
2581 	case RSPQ_NSOP_NEOP:
2582 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2583 		if (mh->mh_tail == NULL) {
2584 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2585 			m_freem(m);
2586 			break;
2587 		}
2588 		mh->mh_tail->m_next = m;
2589 		mh->mh_tail = m;
2590 		mh->mh_head->m_pkthdr.len += len;
2591 		ret = 0;
2592 		break;
2593 	case RSPQ_SOP:
2594 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2595 		m->m_pkthdr.len = len;
2596 		mh->mh_head = mh->mh_tail = m;
2597 		ret = 0;
2598 		break;
2599 	case RSPQ_EOP:
2600 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2601 		mh->mh_head->m_pkthdr.len += len;
2602 		mh->mh_tail->m_next = m;
2603 		mh->mh_tail = m;
2604 		ret = 1;
2605 		break;
2606 	}
2607 	if (++fl->cidx == fl->size)
2608 		fl->cidx = 0;
2609 
2610 	return (ret);
2611 }
2612 
2613 #else
2614 
2615 static int
2616 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2617     struct mbuf **m, struct rsp_desc *r)
2618 {
2619 
2620 	unsigned int len_cq =  ntohl(r->len_cq);
2621 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2622 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2623 	uint32_t len = G_RSPD_LEN(len_cq);
2624 	uint32_t flags = ntohl(r->flags);
2625 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2626 	void *cl;
2627 	int ret = 0;
2628 	struct mbuf *m0;
2629 #if 0
2630 	if ((sd + 1 )->rxsd_cl)
2631 		prefetch((sd + 1)->rxsd_cl);
2632 	if ((sd + 2)->rxsd_cl)
2633 		prefetch((sd + 2)->rxsd_cl);
2634 #endif
2635 	DPRINTF("rx cpu=%d\n", curcpu);
2636 	fl->credits--;
2637 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2638 
2639 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2640 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2641 			goto skip_recycle;
2642 		cl = mtod(m0, void *);
2643 		memcpy(cl, sd->data, len);
2644 		recycle_rx_buf(adap, fl, fl->cidx);
2645 		*m = m0;
2646 	} else {
2647 	skip_recycle:
2648 		bus_dmamap_unload(fl->entry_tag, sd->map);
2649 		cl = sd->rxsd_cl;
2650 		*m = m0 = (struct mbuf *)cl;
2651 	}
2652 
2653 	switch(sopeop) {
2654 	case RSPQ_SOP_EOP:
2655 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2656 		if (cl == sd->rxsd_cl)
2657 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2658 		m0->m_len = m0->m_pkthdr.len = len;
2659 		ret = 1;
2660 		goto done;
2661 		break;
2662 	case RSPQ_NSOP_NEOP:
2663 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2664 		panic("chaining unsupported");
2665 		ret = 0;
2666 		break;
2667 	case RSPQ_SOP:
2668 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2669 		panic("chaining unsupported");
2670 		m_iovinit(m0);
2671 		ret = 0;
2672 		break;
2673 	case RSPQ_EOP:
2674 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2675 		panic("chaining unsupported");
2676 		ret = 1;
2677 		break;
2678 	}
2679 	panic("append not supported");
2680 #if 0
2681 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2682 #endif
2683 done:
2684 	if (++fl->cidx == fl->size)
2685 		fl->cidx = 0;
2686 
2687 	return (ret);
2688 }
2689 #endif
2690 /**
2691  *	handle_rsp_cntrl_info - handles control information in a response
2692  *	@qs: the queue set corresponding to the response
2693  *	@flags: the response control flags
2694  *
2695  *	Handles the control information of an SGE response, such as GTS
2696  *	indications and completion credits for the queue set's Tx queues.
2697  *	HW coalesces credits, we don't do any extra SW coalescing.
2698  */
2699 static __inline void
2700 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2701 {
2702 	unsigned int credits;
2703 
2704 #if USE_GTS
2705 	if (flags & F_RSPD_TXQ0_GTS)
2706 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2707 #endif
2708 	credits = G_RSPD_TXQ0_CR(flags);
2709 	if (credits)
2710 		qs->txq[TXQ_ETH].processed += credits;
2711 
2712 	credits = G_RSPD_TXQ2_CR(flags);
2713 	if (credits)
2714 		qs->txq[TXQ_CTRL].processed += credits;
2715 
2716 # if USE_GTS
2717 	if (flags & F_RSPD_TXQ1_GTS)
2718 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2719 # endif
2720 	credits = G_RSPD_TXQ1_CR(flags);
2721 	if (credits)
2722 		qs->txq[TXQ_OFLD].processed += credits;
2723 
2724 }
2725 
2726 static void
2727 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2728     unsigned int sleeping)
2729 {
2730 	;
2731 }
2732 
2733 /**
2734  *	process_responses - process responses from an SGE response queue
2735  *	@adap: the adapter
2736  *	@qs: the queue set to which the response queue belongs
2737  *	@budget: how many responses can be processed in this round
2738  *
2739  *	Process responses from an SGE response queue up to the supplied budget.
2740  *	Responses include received packets as well as credits and other events
2741  *	for the queues that belong to the response queue's queue set.
2742  *	A negative budget is effectively unlimited.
2743  *
2744  *	Additionally choose the interrupt holdoff time for the next interrupt
2745  *	on this queue.  If the system is under memory shortage use a fairly
2746  *	long delay to help recovery.
2747  */
2748 int
2749 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2750 {
2751 	struct sge_rspq *rspq = &qs->rspq;
2752 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2753 	int budget_left = budget;
2754 	unsigned int sleeping = 0;
2755 	int lro = qs->lro.enabled;
2756 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2757 	int ngathered = 0;
2758 #ifdef DEBUG
2759 	static int last_holdoff = 0;
2760 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2761 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2762 		last_holdoff = rspq->holdoff_tmr;
2763 	}
2764 #endif
2765 	rspq->next_holdoff = rspq->holdoff_tmr;
2766 
2767 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2768 		int eth, eop = 0, ethpad = 0;
2769 		uint32_t flags = ntohl(r->flags);
2770 		uint32_t rss_csum = *(const uint32_t *)r;
2771 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2772 
2773 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2774 
2775 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2776 			struct mbuf *m;
2777 
2778 			if (cxgb_debug)
2779 				printf("async notification\n");
2780 
2781 			if (rspq->rspq_mh.mh_head == NULL) {
2782 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2783 				m = rspq->rspq_mh.mh_head;
2784 			} else {
2785 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2786 			}
2787 
2788 			/* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2789 
2790 			if (m == NULL)
2791 				goto no_mem;
2792 
2793                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2794 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2795                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
2796 			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2797 			eop = 1;
2798                         rspq->async_notif++;
2799 			goto skip;
2800 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2801 			struct mbuf *m = NULL;
2802 
2803 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2804 			    r->rss_hdr.opcode, rspq->cidx);
2805 			if (rspq->rspq_mh.mh_head == NULL)
2806 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2807                         else
2808 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2809 
2810 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2811 		no_mem:
2812 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2813 				budget_left--;
2814 				break;
2815 			}
2816 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2817 			eop = 1;
2818 			rspq->imm_data++;
2819 		} else if (r->len_cq) {
2820 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2821 
2822 #ifdef DISABLE_MBUF_IOVEC
2823 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2824 #else
2825 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2826 #endif
2827 #ifdef IFNET_MULTIQUEUE
2828 			rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2829 #endif
2830 			ethpad = 2;
2831 		} else {
2832 			DPRINTF("pure response\n");
2833 			rspq->pure_rsps++;
2834 		}
2835 	skip:
2836 		if (flags & RSPD_CTRL_MASK) {
2837 			sleeping |= flags & RSPD_GTS_MASK;
2838 			handle_rsp_cntrl_info(qs, flags);
2839 		}
2840 
2841 		r++;
2842 		if (__predict_false(++rspq->cidx == rspq->size)) {
2843 			rspq->cidx = 0;
2844 			rspq->gen ^= 1;
2845 			r = rspq->desc;
2846 		}
2847 		prefetch(r);
2848 		if (++rspq->credits >= (rspq->size / 4)) {
2849 			refill_rspq(adap, rspq, rspq->credits);
2850 			rspq->credits = 0;
2851 		}
2852 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2853 
2854 		if (!eth && eop) {
2855 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2856 			/*
2857 			 * XXX size mismatch
2858 			 */
2859 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2860 
2861 
2862 			ngathered = rx_offload(&adap->tdev, rspq,
2863 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2864 			rspq->rspq_mh.mh_head = NULL;
2865 			DPRINTF("received offload packet\n");
2866 
2867 		} else if (eth && eop) {
2868 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2869 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2870 
2871 			t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2872 			    rss_hash, rss_csum, lro);
2873 			DPRINTF("received tunnel packet\n");
2874 				rspq->rspq_mh.mh_head = NULL;
2875 
2876 		}
2877 		__refill_fl_lt(adap, &qs->fl[0], 32);
2878 		__refill_fl_lt(adap, &qs->fl[1], 32);
2879 		--budget_left;
2880 	}
2881 
2882 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2883 	t3_lro_flush(adap, qs, &qs->lro);
2884 
2885 	if (sleeping)
2886 		check_ring_db(adap, qs, sleeping);
2887 
2888 	smp_mb();  /* commit Tx queue processed updates */
2889 	if (__predict_false(qs->txq_stopped > 1)) {
2890 		printf("restarting tx on %p\n", qs);
2891 
2892 		restart_tx(qs);
2893 	}
2894 
2895 	__refill_fl_lt(adap, &qs->fl[0], 512);
2896 	__refill_fl_lt(adap, &qs->fl[1], 512);
2897 	budget -= budget_left;
2898 	return (budget);
2899 }
2900 
2901 /*
2902  * A helper function that processes responses and issues GTS.
2903  */
2904 static __inline int
2905 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2906 {
2907 	int work;
2908 	static int last_holdoff = 0;
2909 
2910 	work = process_responses(adap, rspq_to_qset(rq), -1);
2911 
2912 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2913 		printf("next_holdoff=%d\n", rq->next_holdoff);
2914 		last_holdoff = rq->next_holdoff;
2915 	}
2916 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2917 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2918 
2919 	return (work);
2920 }
2921 
2922 
2923 /*
2924  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2925  * Handles data events from SGE response queues as well as error and other
2926  * async events as they all use the same interrupt pin.  We use one SGE
2927  * response queue per port in this mode and protect all response queues with
2928  * queue 0's lock.
2929  */
2930 void
2931 t3b_intr(void *data)
2932 {
2933 	uint32_t i, map;
2934 	adapter_t *adap = data;
2935 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2936 
2937 	t3_write_reg(adap, A_PL_CLI, 0);
2938 	map = t3_read_reg(adap, A_SG_DATA_INTR);
2939 
2940 	if (!map)
2941 		return;
2942 
2943 	if (__predict_false(map & F_ERRINTR))
2944 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2945 
2946 	mtx_lock(&q0->lock);
2947 	for_each_port(adap, i)
2948 	    if (map & (1 << i))
2949 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
2950 	mtx_unlock(&q0->lock);
2951 }
2952 
2953 /*
2954  * The MSI interrupt handler.  This needs to handle data events from SGE
2955  * response queues as well as error and other async events as they all use
2956  * the same MSI vector.  We use one SGE response queue per port in this mode
2957  * and protect all response queues with queue 0's lock.
2958  */
2959 void
2960 t3_intr_msi(void *data)
2961 {
2962 	adapter_t *adap = data;
2963 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2964 	int i, new_packets = 0;
2965 
2966 	mtx_lock(&q0->lock);
2967 
2968 	for_each_port(adap, i)
2969 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2970 		    new_packets = 1;
2971 	mtx_unlock(&q0->lock);
2972 	if (new_packets == 0)
2973 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2974 }
2975 
2976 void
2977 t3_intr_msix(void *data)
2978 {
2979 	struct sge_qset *qs = data;
2980 	adapter_t *adap = qs->port->adapter;
2981 	struct sge_rspq *rspq = &qs->rspq;
2982 #ifndef IFNET_MULTIQUEUE
2983 	mtx_lock(&rspq->lock);
2984 #else
2985 	if (mtx_trylock(&rspq->lock))
2986 #endif
2987 	{
2988 
2989 		if (process_responses_gts(adap, rspq) == 0)
2990 			rspq->unhandled_irqs++;
2991 		mtx_unlock(&rspq->lock);
2992 	}
2993 }
2994 
2995 #define QDUMP_SBUF_SIZE		32 * 400
2996 static int
2997 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
2998 {
2999 	struct sge_rspq *rspq;
3000 	struct sge_qset *qs;
3001 	int i, err, dump_end, idx;
3002 	static int multiplier = 1;
3003 	struct sbuf *sb;
3004 	struct rsp_desc *rspd;
3005 	uint32_t data[4];
3006 
3007 	rspq = arg1;
3008 	qs = rspq_to_qset(rspq);
3009 	if (rspq->rspq_dump_count == 0)
3010 		return (0);
3011 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3012 		log(LOG_WARNING,
3013 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3014 		rspq->rspq_dump_count = 0;
3015 		return (EINVAL);
3016 	}
3017 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3018 		log(LOG_WARNING,
3019 		    "dump start of %d is greater than queue size\n",
3020 		    rspq->rspq_dump_start);
3021 		rspq->rspq_dump_start = 0;
3022 		return (EINVAL);
3023 	}
3024 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3025 	if (err)
3026 		return (err);
3027 retry_sbufops:
3028 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3029 
3030 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3031 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3032 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3033 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3034 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3035 
3036 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3037 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3038 
3039 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3040 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3041 		idx = i & (RSPQ_Q_SIZE-1);
3042 
3043 		rspd = &rspq->desc[idx];
3044 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3045 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3046 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3047 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3048 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3049 		    be32toh(rspd->len_cq), rspd->intr_gen);
3050 	}
3051 	if (sbuf_overflowed(sb)) {
3052 		sbuf_delete(sb);
3053 		multiplier++;
3054 		goto retry_sbufops;
3055 	}
3056 	sbuf_finish(sb);
3057 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3058 	sbuf_delete(sb);
3059 	return (err);
3060 }
3061 
3062 static int
3063 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3064 {
3065 	struct sge_txq *txq;
3066 	struct sge_qset *qs;
3067 	int i, j, err, dump_end;
3068 	static int multiplier = 1;
3069 	struct sbuf *sb;
3070 	struct tx_desc *txd;
3071 	uint32_t *WR, wr_hi, wr_lo, gen;
3072 	uint32_t data[4];
3073 
3074 	txq = arg1;
3075 	qs = txq_to_qset(txq, TXQ_ETH);
3076 	if (txq->txq_dump_count == 0) {
3077 		return (0);
3078 	}
3079 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3080 		log(LOG_WARNING,
3081 		    "dump count is too large %d\n", txq->txq_dump_count);
3082 		txq->txq_dump_count = 1;
3083 		return (EINVAL);
3084 	}
3085 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3086 		log(LOG_WARNING,
3087 		    "dump start of %d is greater than queue size\n",
3088 		    txq->txq_dump_start);
3089 		txq->txq_dump_start = 0;
3090 		return (EINVAL);
3091 	}
3092 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3093 	if (err)
3094 		return (err);
3095 
3096 
3097 retry_sbufops:
3098 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3099 
3100 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3101 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3102 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3103 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3104 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3105 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3106 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3107 	    txq->txq_dump_start,
3108 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3109 
3110 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3111 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3112 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3113 		WR = (uint32_t *)txd->flit;
3114 		wr_hi = ntohl(WR[0]);
3115 		wr_lo = ntohl(WR[1]);
3116 		gen = G_WR_GEN(wr_lo);
3117 
3118 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3119 		    wr_hi, wr_lo, gen);
3120 		for (j = 2; j < 30; j += 4)
3121 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3122 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3123 
3124 	}
3125 	if (sbuf_overflowed(sb)) {
3126 		sbuf_delete(sb);
3127 		multiplier++;
3128 		goto retry_sbufops;
3129 	}
3130 	sbuf_finish(sb);
3131 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3132 	sbuf_delete(sb);
3133 	return (err);
3134 }
3135 
3136 static int
3137 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3138 {
3139 	struct sge_txq *txq;
3140 	struct sge_qset *qs;
3141 	int i, j, err, dump_end;
3142 	static int multiplier = 1;
3143 	struct sbuf *sb;
3144 	struct tx_desc *txd;
3145 	uint32_t *WR, wr_hi, wr_lo, gen;
3146 
3147 	txq = arg1;
3148 	qs = txq_to_qset(txq, TXQ_CTRL);
3149 	if (txq->txq_dump_count == 0) {
3150 		return (0);
3151 	}
3152 	if (txq->txq_dump_count > 256) {
3153 		log(LOG_WARNING,
3154 		    "dump count is too large %d\n", txq->txq_dump_count);
3155 		txq->txq_dump_count = 1;
3156 		return (EINVAL);
3157 	}
3158 	if (txq->txq_dump_start > 255) {
3159 		log(LOG_WARNING,
3160 		    "dump start of %d is greater than queue size\n",
3161 		    txq->txq_dump_start);
3162 		txq->txq_dump_start = 0;
3163 		return (EINVAL);
3164 	}
3165 
3166 retry_sbufops:
3167 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3168 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3169 	    txq->txq_dump_start,
3170 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3171 
3172 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3173 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3174 		txd = &txq->desc[i & (255)];
3175 		WR = (uint32_t *)txd->flit;
3176 		wr_hi = ntohl(WR[0]);
3177 		wr_lo = ntohl(WR[1]);
3178 		gen = G_WR_GEN(wr_lo);
3179 
3180 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3181 		    wr_hi, wr_lo, gen);
3182 		for (j = 2; j < 30; j += 4)
3183 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3184 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3185 
3186 	}
3187 	if (sbuf_overflowed(sb)) {
3188 		sbuf_delete(sb);
3189 		multiplier++;
3190 		goto retry_sbufops;
3191 	}
3192 	sbuf_finish(sb);
3193 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3194 	sbuf_delete(sb);
3195 	return (err);
3196 }
3197 
3198 static int
3199 t3_lro_enable(SYSCTL_HANDLER_ARGS)
3200 {
3201 	adapter_t *sc;
3202 	int i, j, enabled, err, nqsets = 0;
3203 
3204 #ifndef LRO_WORKING
3205 	return (0);
3206 #endif
3207 	sc = arg1;
3208 	enabled = sc->sge.qs[0].lro.enabled;
3209         err = sysctl_handle_int(oidp, &enabled, arg2, req);
3210 
3211 	if (err != 0)
3212 		return (err);
3213 	if (enabled == sc->sge.qs[0].lro.enabled)
3214 		return (0);
3215 
3216 	for (i = 0; i < sc->params.nports; i++)
3217 		for (j = 0; j < sc->port[i].nqsets; j++)
3218 			nqsets++;
3219 
3220 	for (i = 0; i < nqsets; i++)
3221 		sc->sge.qs[i].lro.enabled = enabled;
3222 
3223 	return (0);
3224 }
3225 
3226 static int
3227 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
3228 {
3229 	adapter_t *sc = arg1;
3230 	struct qset_params *qsp = &sc->params.sge.qset[0];
3231 	int coalesce_nsecs;
3232 	struct sge_qset *qs;
3233 	int i, j, err, nqsets = 0;
3234 	struct mtx *lock;
3235 
3236 	if ((sc->flags & FULL_INIT_DONE) == 0)
3237 		return (ENXIO);
3238 
3239 	coalesce_nsecs = qsp->coalesce_nsecs;
3240         err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
3241 
3242 	if (err != 0) {
3243 		return (err);
3244 	}
3245 	if (coalesce_nsecs == qsp->coalesce_nsecs)
3246 		return (0);
3247 
3248 	for (i = 0; i < sc->params.nports; i++)
3249 		for (j = 0; j < sc->port[i].nqsets; j++)
3250 			nqsets++;
3251 
3252 	coalesce_nsecs = max(100, coalesce_nsecs);
3253 
3254 	for (i = 0; i < nqsets; i++) {
3255 		qs = &sc->sge.qs[i];
3256 		qsp = &sc->params.sge.qset[i];
3257 		qsp->coalesce_nsecs = coalesce_nsecs;
3258 
3259 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3260 			    &sc->sge.qs[0].rspq.lock;
3261 
3262 		mtx_lock(lock);
3263 		t3_update_qset_coalesce(qs, qsp);
3264 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3265 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3266 		mtx_unlock(lock);
3267 	}
3268 
3269 	return (0);
3270 }
3271 
3272 
3273 void
3274 t3_add_attach_sysctls(adapter_t *sc)
3275 {
3276 	struct sysctl_ctx_list *ctx;
3277 	struct sysctl_oid_list *children;
3278 
3279 	ctx = device_get_sysctl_ctx(sc->dev);
3280 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3281 
3282 	/* random information */
3283 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3284 	    "firmware_version",
3285 	    CTLFLAG_RD, &sc->fw_version,
3286 	    0, "firmware version");
3287 
3288 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3289 	    "enable_lro",
3290 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3291 	    0, t3_lro_enable,
3292 	    "I", "enable large receive offload");
3293 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3294 	    "hw_revision",
3295 	    CTLFLAG_RD, &sc->params.rev,
3296 	    0, "chip model");
3297 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3298 	    "enable_debug",
3299 	    CTLFLAG_RW, &cxgb_debug,
3300 	    0, "enable verbose debugging output");
3301 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3302 	    CTLFLAG_RD, &sc->tunq_coalesce,
3303 	    "#tunneled packets freed");
3304 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3305 	    "txq_overrun",
3306 	    CTLFLAG_RD, &txq_fills,
3307 	    0, "#times txq overrun");
3308 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3309 	    "pcpu_cache_enable",
3310 	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3311 	    0, "#enable driver local pcpu caches");
3312 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3313 	    "cache_alloc",
3314 	    CTLFLAG_RD, &cxgb_cached_allocations,
3315 	    0, "#times a cluster was allocated from cache");
3316 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3317 	    "cached",
3318 	    CTLFLAG_RD, &cxgb_cached,
3319 	    0, "#times a cluster was cached");
3320 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3321 	    "ext_freed",
3322 	    CTLFLAG_RD, &cxgb_ext_freed,
3323 	    0, "#times a cluster was freed through ext_free");
3324 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3325 	    "ext_inited",
3326 	    CTLFLAG_RD, &cxgb_ext_inited,
3327 	    0, "#times a cluster was initialized for ext_free");
3328 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3329 	    "mbufs_outstanding",
3330 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3331 	    0, "#mbufs in flight in the driver");
3332 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3333 	    "pack_outstanding",
3334 	    CTLFLAG_RD, &cxgb_pack_outstanding,
3335 	    0, "#packet in flight in the driver");
3336 }
3337 
3338 
3339 static const char *rspq_name = "rspq";
3340 static const char *txq_names[] =
3341 {
3342 	"txq_eth",
3343 	"txq_ofld",
3344 	"txq_ctrl"
3345 };
3346 
3347 void
3348 t3_add_configured_sysctls(adapter_t *sc)
3349 {
3350 	struct sysctl_ctx_list *ctx;
3351 	struct sysctl_oid_list *children;
3352 	int i, j;
3353 
3354 	ctx = device_get_sysctl_ctx(sc->dev);
3355 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3356 
3357 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3358 	    "intr_coal",
3359 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3360 	    0, t3_set_coalesce_nsecs,
3361 	    "I", "interrupt coalescing timer (ns)");
3362 
3363 	for (i = 0; i < sc->params.nports; i++) {
3364 		struct port_info *pi = &sc->port[i];
3365 		struct sysctl_oid *poid;
3366 		struct sysctl_oid_list *poidlist;
3367 
3368 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3369 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3370 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3371 		poidlist = SYSCTL_CHILDREN(poid);
3372 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3373 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3374 		    0, "#queue sets");
3375 
3376 		for (j = 0; j < pi->nqsets; j++) {
3377 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3378 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid;
3379 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist;
3380 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3381 
3382 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3383 
3384 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3385 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3386 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3387 
3388 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3389 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3390 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3391 
3392 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3393 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3394 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3395 
3396 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3397 			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3398 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3399 
3400 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3401 			    CTLFLAG_RD, &qs->rspq.size,
3402 			    0, "#entries in response queue");
3403 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3404 			    CTLFLAG_RD, &qs->rspq.cidx,
3405 			    0, "consumer index");
3406 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3407 			    CTLFLAG_RD, &qs->rspq.credits,
3408 			    0, "#credits");
3409 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3410 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3411 			    "physical_address_of the queue");
3412 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3413 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3414 			    0, "start rspq dump entry");
3415 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3416 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3417 			    0, "#rspq entries to dump");
3418 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3419 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3420 			    0, t3_dump_rspq, "A", "dump of the response queue");
3421 
3422 
3423 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3424 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3425 			    0, "#tunneled packets dropped");
3426 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3427 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3428 			    0, "#tunneled packets waiting to be sent");
3429 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3430 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3431 			    0, "#tunneled packets queue producer index");
3432 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3433 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3434 			    0, "#tunneled packets queue consumer index");
3435 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3436 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3437 			    0, "#tunneled packets processed by the card");
3438 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3439 			    CTLFLAG_RD, &txq->cleaned,
3440 			    0, "#tunneled packets cleaned");
3441 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3442 			    CTLFLAG_RD, &txq->in_use,
3443 			    0, "#tunneled packet slots in use");
3444 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3445 			    CTLFLAG_RD, &txq->txq_frees,
3446 			    "#tunneled packets freed");
3447 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3448 			    CTLFLAG_RD, &txq->txq_skipped,
3449 			    0, "#tunneled packet descriptors skipped");
3450 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3451 			    CTLFLAG_RD, &txq->txq_coalesced,
3452 			    0, "#tunneled packets coalesced");
3453 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3454 			    CTLFLAG_RD, &txq->txq_enqueued,
3455 			    0, "#tunneled packets enqueued to hardware");
3456 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3457 			    CTLFLAG_RD, &qs->txq_stopped,
3458 			    0, "tx queues stopped");
3459 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3460 			    CTLFLAG_RD, &txq->phys_addr,
3461 			    "physical_address_of the queue");
3462 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3463 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3464 			    0, "txq generation");
3465 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3466 			    CTLFLAG_RD, &txq->cidx,
3467 			    0, "hardware queue cidx");
3468 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3469 			    CTLFLAG_RD, &txq->pidx,
3470 			    0, "hardware queue pidx");
3471 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3472 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3473 			    0, "txq start idx for dump");
3474 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3475 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3476 			    0, "txq #entries to dump");
3477 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3478 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3479 			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3480 
3481 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3482 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3483 			    0, "ctrlq start idx for dump");
3484 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3485 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3486 			    0, "ctrl #entries to dump");
3487 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3488 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3489 			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3490 
3491 
3492 
3493 
3494 
3495 		}
3496 	}
3497 }
3498 
3499 /**
3500  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3501  *	@qs: the queue set
3502  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3503  *	@idx: the descriptor index in the queue
3504  *	@data: where to dump the descriptor contents
3505  *
3506  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3507  *	size of the descriptor.
3508  */
3509 int
3510 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3511 		unsigned char *data)
3512 {
3513 	if (qnum >= 6)
3514 		return (EINVAL);
3515 
3516 	if (qnum < 3) {
3517 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3518 			return -EINVAL;
3519 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3520 		return sizeof(struct tx_desc);
3521 	}
3522 
3523 	if (qnum == 3) {
3524 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3525 			return (EINVAL);
3526 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3527 		return sizeof(struct rsp_desc);
3528 	}
3529 
3530 	qnum -= 4;
3531 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3532 		return (EINVAL);
3533 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3534 	return sizeof(struct rx_desc);
3535 }
3536