xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision 531c890b8aecbf157fe3491503b5ca62c0b01093)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #ifdef CONFIG_DEFINED
68 #include <cxgb_include.h>
69 #include <sys/mvec.h>
70 #else
71 #include <dev/cxgb/cxgb_include.h>
72 #include <dev/cxgb/sys/mvec.h>
73 #endif
74 
75 int      txq_fills = 0;
76 /*
77  * XXX don't re-enable this until TOE stops assuming
78  * we have an m_ext
79  */
80 static int recycle_enable = 0;
81 extern int cxgb_txq_buf_ring_size;
82 int cxgb_cached_allocations;
83 int cxgb_cached;
84 int cxgb_ext_freed = 0;
85 int cxgb_ext_inited = 0;
86 extern int cxgb_use_16k_clusters;
87 extern int cxgb_pcpu_cache_enable;
88 
89 
90 #define USE_GTS 0
91 
92 #define SGE_RX_SM_BUF_SIZE	1536
93 #define SGE_RX_DROP_THRES	16
94 #define SGE_RX_COPY_THRES	128
95 
96 /*
97  * Period of the Tx buffer reclaim timer.  This timer does not need to run
98  * frequently as Tx buffers are usually reclaimed by new Tx packets.
99  */
100 #define TX_RECLAIM_PERIOD       (hz >> 1)
101 
102 /*
103  * Values for sge_txq.flags
104  */
105 enum {
106 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
107 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
108 };
109 
110 struct tx_desc {
111 	uint64_t	flit[TX_DESC_FLITS];
112 } __packed;
113 
114 struct rx_desc {
115 	uint32_t	addr_lo;
116 	uint32_t	len_gen;
117 	uint32_t	gen2;
118 	uint32_t	addr_hi;
119 } __packed;;
120 
121 struct rsp_desc {               /* response queue descriptor */
122 	struct rss_header	rss_hdr;
123 	uint32_t		flags;
124 	uint32_t		len_cq;
125 	uint8_t			imm_data[47];
126 	uint8_t			intr_gen;
127 } __packed;
128 
129 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
130 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
131 #define RX_SW_DESC_INUSE        (1 << 3)
132 #define TX_SW_DESC_MAPPED       (1 << 4)
133 
134 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
135 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
136 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
137 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
138 
139 struct tx_sw_desc {                /* SW state per Tx descriptor */
140 	struct mbuf_iovec mi;
141 	bus_dmamap_t	map;
142 	int		flags;
143 };
144 
145 struct rx_sw_desc {                /* SW state per Rx descriptor */
146 	caddr_t	         rxsd_cl;
147 	caddr_t	         data;
148 	bus_dmamap_t	  map;
149 	int		  flags;
150 };
151 
152 struct txq_state {
153 	unsigned int compl;
154 	unsigned int gen;
155 	unsigned int pidx;
156 };
157 
158 struct refill_fl_cb_arg {
159 	int               error;
160 	bus_dma_segment_t seg;
161 	int               nseg;
162 };
163 
164 /*
165  * Maps a number of flits to the number of Tx descriptors that can hold them.
166  * The formula is
167  *
168  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
169  *
170  * HW allows up to 4 descriptors to be combined into a WR.
171  */
172 static uint8_t flit_desc_map[] = {
173 	0,
174 #if SGE_NUM_GENBITS == 1
175 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
176 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
177 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
178 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
179 #elif SGE_NUM_GENBITS == 2
180 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
181 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
182 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
183 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
184 #else
185 # error "SGE_NUM_GENBITS must be 1 or 2"
186 #endif
187 };
188 
189 
190 static int lro_default = 0;
191 int cxgb_debug = 0;
192 
193 static void sge_timer_cb(void *arg);
194 static void sge_timer_reclaim(void *arg, int ncount);
195 static void sge_txq_reclaim_handler(void *arg, int ncount);
196 
197 /**
198  *	reclaim_completed_tx - reclaims completed Tx descriptors
199  *	@adapter: the adapter
200  *	@q: the Tx queue to reclaim completed descriptors from
201  *
202  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
203  *	and frees the associated buffers if possible.  Called with the Tx
204  *	queue's lock held.
205  */
206 static __inline int
207 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
208 {
209 	int reclaim = desc_reclaimable(q);
210 
211 	if (reclaim < reclaim_min)
212 		return (0);
213 
214 	mtx_assert(&q->lock, MA_OWNED);
215 	if (reclaim > 0) {
216 		t3_free_tx_desc(q, reclaim);
217 		q->cleaned += reclaim;
218 		q->in_use -= reclaim;
219 	}
220 	return (reclaim);
221 }
222 
223 /**
224  *	should_restart_tx - are there enough resources to restart a Tx queue?
225  *	@q: the Tx queue
226  *
227  *	Checks if there are enough descriptors to restart a suspended Tx queue.
228  */
229 static __inline int
230 should_restart_tx(const struct sge_txq *q)
231 {
232 	unsigned int r = q->processed - q->cleaned;
233 
234 	return q->in_use - r < (q->size >> 1);
235 }
236 
237 /**
238  *	t3_sge_init - initialize SGE
239  *	@adap: the adapter
240  *	@p: the SGE parameters
241  *
242  *	Performs SGE initialization needed every time after a chip reset.
243  *	We do not initialize any of the queue sets here, instead the driver
244  *	top-level must request those individually.  We also do not enable DMA
245  *	here, that should be done after the queues have been set up.
246  */
247 void
248 t3_sge_init(adapter_t *adap, struct sge_params *p)
249 {
250 	u_int ctrl, ups;
251 
252 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
253 
254 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
255 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
256 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
257 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
258 #if SGE_NUM_GENBITS == 1
259 	ctrl |= F_EGRGENCTRL;
260 #endif
261 	if (adap->params.rev > 0) {
262 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
263 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
264 	}
265 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
266 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
267 		     V_LORCQDRBTHRSH(512));
268 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
269 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
270 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
271 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
272 		     adap->params.rev < T3_REV_C ? 1000 : 500);
273 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
274 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
275 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
276 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
277 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
278 }
279 
280 
281 /**
282  *	sgl_len - calculates the size of an SGL of the given capacity
283  *	@n: the number of SGL entries
284  *
285  *	Calculates the number of flits needed for a scatter/gather list that
286  *	can hold the given number of entries.
287  */
288 static __inline unsigned int
289 sgl_len(unsigned int n)
290 {
291 	return ((3 * n) / 2 + (n & 1));
292 }
293 
294 /**
295  *	get_imm_packet - return the next ingress packet buffer from a response
296  *	@resp: the response descriptor containing the packet data
297  *
298  *	Return a packet containing the immediate data of the given response.
299  */
300 static int
301 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
302 {
303 
304 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
305 	m->m_ext.ext_buf = NULL;
306 	m->m_ext.ext_type = 0;
307 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
308 	return (0);
309 }
310 
311 static __inline u_int
312 flits_to_desc(u_int n)
313 {
314 	return (flit_desc_map[n]);
315 }
316 
317 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
318 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
319 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
320 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
321 		    F_HIRCQPARITYERROR)
322 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
323 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
324 		      F_RSPQDISABLED)
325 
326 /**
327  *	t3_sge_err_intr_handler - SGE async event interrupt handler
328  *	@adapter: the adapter
329  *
330  *	Interrupt handler for SGE asynchronous (non-data) events.
331  */
332 void
333 t3_sge_err_intr_handler(adapter_t *adapter)
334 {
335 	unsigned int v, status;
336 
337 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
338 	if (status & SGE_PARERR)
339 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
340 			 status & SGE_PARERR);
341 	if (status & SGE_FRAMINGERR)
342 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
343 			 status & SGE_FRAMINGERR);
344 	if (status & F_RSPQCREDITOVERFOW)
345 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
346 
347 	if (status & F_RSPQDISABLED) {
348 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
349 
350 		CH_ALERT(adapter,
351 			 "packet delivered to disabled response queue (0x%x)\n",
352 			 (v >> S_RSPQ0DISABLED) & 0xff);
353 	}
354 
355 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
356 	if (status & SGE_FATALERR)
357 		t3_fatal_err(adapter);
358 }
359 
360 void
361 t3_sge_prep(adapter_t *adap, struct sge_params *p)
362 {
363 	int i;
364 
365 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
366 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
367 
368 	for (i = 0; i < SGE_QSETS; ++i) {
369 		struct qset_params *q = p->qset + i;
370 
371 		if (adap->params.nports > 2) {
372 			q->coalesce_nsecs = 50000;
373 		} else {
374 #ifdef INVARIANTS
375 			q->coalesce_nsecs = 10000;
376 #else
377 			q->coalesce_nsecs = 5000;
378 #endif
379 		}
380 		q->polling = adap->params.rev > 0;
381 		q->rspq_size = RSPQ_Q_SIZE;
382 		q->fl_size = FL_Q_SIZE;
383 		q->jumbo_size = JUMBO_Q_SIZE;
384 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
385 		q->txq_size[TXQ_OFLD] = 1024;
386 		q->txq_size[TXQ_CTRL] = 256;
387 		q->cong_thres = 0;
388 	}
389 }
390 
391 int
392 t3_sge_alloc(adapter_t *sc)
393 {
394 
395 	/* The parent tag. */
396 	if (bus_dma_tag_create( NULL,			/* parent */
397 				1, 0,			/* algnmnt, boundary */
398 				BUS_SPACE_MAXADDR,	/* lowaddr */
399 				BUS_SPACE_MAXADDR,	/* highaddr */
400 				NULL, NULL,		/* filter, filterarg */
401 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
402 				BUS_SPACE_UNRESTRICTED, /* nsegments */
403 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
404 				0,			/* flags */
405 				NULL, NULL,		/* lock, lockarg */
406 				&sc->parent_dmat)) {
407 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
408 		return (ENOMEM);
409 	}
410 
411 	/*
412 	 * DMA tag for normal sized RX frames
413 	 */
414 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
415 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
416 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
417 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
418 		return (ENOMEM);
419 	}
420 
421 	/*
422 	 * DMA tag for jumbo sized RX frames.
423 	 */
424 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
425 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
426 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
427 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
428 		return (ENOMEM);
429 	}
430 
431 	/*
432 	 * DMA tag for TX frames.
433 	 */
434 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
435 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
436 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
437 		NULL, NULL, &sc->tx_dmat)) {
438 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
439 		return (ENOMEM);
440 	}
441 
442 	return (0);
443 }
444 
445 int
446 t3_sge_free(struct adapter * sc)
447 {
448 
449 	if (sc->tx_dmat != NULL)
450 		bus_dma_tag_destroy(sc->tx_dmat);
451 
452 	if (sc->rx_jumbo_dmat != NULL)
453 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
454 
455 	if (sc->rx_dmat != NULL)
456 		bus_dma_tag_destroy(sc->rx_dmat);
457 
458 	if (sc->parent_dmat != NULL)
459 		bus_dma_tag_destroy(sc->parent_dmat);
460 
461 	return (0);
462 }
463 
464 void
465 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
466 {
467 
468 	qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
469 	qs->rspq.polling = 0 /* p->polling */;
470 }
471 
472 #if !defined(__i386__) && !defined(__amd64__)
473 static void
474 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
475 {
476 	struct refill_fl_cb_arg *cb_arg = arg;
477 
478 	cb_arg->error = error;
479 	cb_arg->seg = segs[0];
480 	cb_arg->nseg = nseg;
481 
482 }
483 #endif
484 /**
485  *	refill_fl - refill an SGE free-buffer list
486  *	@sc: the controller softc
487  *	@q: the free-list to refill
488  *	@n: the number of new buffers to allocate
489  *
490  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
491  *	The caller must assure that @n does not exceed the queue's capacity.
492  */
493 static void
494 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
495 {
496 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
497 	struct rx_desc *d = &q->desc[q->pidx];
498 	struct refill_fl_cb_arg cb_arg;
499 	caddr_t cl;
500 	int err, count = 0;
501 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
502 
503 	cb_arg.error = 0;
504 	while (n--) {
505 		/*
506 		 * We only allocate a cluster, mbuf allocation happens after rx
507 		 */
508 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
509 			log(LOG_WARNING, "Failed to allocate cluster\n");
510 			goto done;
511 		}
512 
513 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
514 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
515 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
516 				uma_zfree(q->zone, cl);
517 				goto done;
518 			}
519 			sd->flags |= RX_SW_DESC_MAP_CREATED;
520 		}
521 #if !defined(__i386__) && !defined(__amd64__)
522 		err = bus_dmamap_load(q->entry_tag, sd->map,
523 		    cl + header_size, q->buf_size,
524 		    refill_fl_cb, &cb_arg, 0);
525 
526 		if (err != 0 || cb_arg.error) {
527 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
528 			/*
529 			 * XXX free cluster
530 			 */
531 			return;
532 		}
533 #else
534 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
535 #endif
536 		sd->flags |= RX_SW_DESC_INUSE;
537 		sd->rxsd_cl = cl;
538 		sd->data = cl + header_size;
539 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
540 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
541 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
542 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
543 
544 		d++;
545 		sd++;
546 
547 		if (++q->pidx == q->size) {
548 			q->pidx = 0;
549 			q->gen ^= 1;
550 			sd = q->sdesc;
551 			d = q->desc;
552 		}
553 		q->credits++;
554 		count++;
555 	}
556 
557 done:
558 	if (count)
559 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
560 }
561 
562 
563 /**
564  *	free_rx_bufs - free the Rx buffers on an SGE free list
565  *	@sc: the controle softc
566  *	@q: the SGE free list to clean up
567  *
568  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
569  *	this queue should be stopped before calling this function.
570  */
571 static void
572 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
573 {
574 	u_int cidx = q->cidx;
575 
576 	while (q->credits--) {
577 		struct rx_sw_desc *d = &q->sdesc[cidx];
578 
579 		if (d->flags & RX_SW_DESC_INUSE) {
580 			bus_dmamap_unload(q->entry_tag, d->map);
581 			bus_dmamap_destroy(q->entry_tag, d->map);
582 			uma_zfree(q->zone, d->rxsd_cl);
583 		}
584 		d->rxsd_cl = NULL;
585 		if (++cidx == q->size)
586 			cidx = 0;
587 	}
588 }
589 
590 static __inline void
591 __refill_fl(adapter_t *adap, struct sge_fl *fl)
592 {
593 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
594 }
595 
596 static __inline void
597 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
598 {
599 	if ((fl->size - fl->credits) < max)
600 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
601 }
602 
603 void
604 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
605 {
606 	__refill_fl_lt(adap, fl, 512);
607 }
608 
609 /**
610  *	recycle_rx_buf - recycle a receive buffer
611  *	@adapter: the adapter
612  *	@q: the SGE free list
613  *	@idx: index of buffer to recycle
614  *
615  *	Recycles the specified buffer on the given free list by adding it at
616  *	the next available slot on the list.
617  */
618 static void
619 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
620 {
621 	struct rx_desc *from = &q->desc[idx];
622 	struct rx_desc *to   = &q->desc[q->pidx];
623 
624 	q->sdesc[q->pidx] = q->sdesc[idx];
625 	to->addr_lo = from->addr_lo;        // already big endian
626 	to->addr_hi = from->addr_hi;        // likewise
627 	wmb();
628 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
629 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
630 	q->credits++;
631 
632 	if (++q->pidx == q->size) {
633 		q->pidx = 0;
634 		q->gen ^= 1;
635 	}
636 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
637 }
638 
639 static void
640 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
641 {
642 	uint32_t *addr;
643 
644 	addr = arg;
645 	*addr = segs[0].ds_addr;
646 }
647 
648 static int
649 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
650     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
651     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
652 {
653 	size_t len = nelem * elem_size;
654 	void *s = NULL;
655 	void *p = NULL;
656 	int err;
657 
658 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
659 				      BUS_SPACE_MAXADDR_32BIT,
660 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
661 				      len, 0, NULL, NULL, tag)) != 0) {
662 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
663 		return (ENOMEM);
664 	}
665 
666 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
667 				    map)) != 0) {
668 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
669 		return (ENOMEM);
670 	}
671 
672 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
673 	bzero(p, len);
674 	*(void **)desc = p;
675 
676 	if (sw_size) {
677 		len = nelem * sw_size;
678 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
679 		*(void **)sdesc = s;
680 	}
681 	if (parent_entry_tag == NULL)
682 		return (0);
683 
684 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
685 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
686 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
687 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
688 		                      NULL, NULL, entry_tag)) != 0) {
689 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
690 		return (ENOMEM);
691 	}
692 	return (0);
693 }
694 
695 static void
696 sge_slow_intr_handler(void *arg, int ncount)
697 {
698 	adapter_t *sc = arg;
699 
700 	t3_slow_intr_handler(sc);
701 }
702 
703 /**
704  *	sge_timer_cb - perform periodic maintenance of an SGE qset
705  *	@data: the SGE queue set to maintain
706  *
707  *	Runs periodically from a timer to perform maintenance of an SGE queue
708  *	set.  It performs two tasks:
709  *
710  *	a) Cleans up any completed Tx descriptors that may still be pending.
711  *	Normal descriptor cleanup happens when new packets are added to a Tx
712  *	queue so this timer is relatively infrequent and does any cleanup only
713  *	if the Tx queue has not seen any new packets in a while.  We make a
714  *	best effort attempt to reclaim descriptors, in that we don't wait
715  *	around if we cannot get a queue's lock (which most likely is because
716  *	someone else is queueing new packets and so will also handle the clean
717  *	up).  Since control queues use immediate data exclusively we don't
718  *	bother cleaning them up here.
719  *
720  *	b) Replenishes Rx queues that have run out due to memory shortage.
721  *	Normally new Rx buffers are added when existing ones are consumed but
722  *	when out of memory a queue can become empty.  We try to add only a few
723  *	buffers here, the queue will be replenished fully as these new buffers
724  *	are used up if memory shortage has subsided.
725  *
726  *	c) Return coalesced response queue credits in case a response queue is
727  *	starved.
728  *
729  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
730  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
731  */
732 static void
733 sge_timer_cb(void *arg)
734 {
735 	adapter_t *sc = arg;
736 #ifndef IFNET_MULTIQUEUE
737 	struct port_info *pi;
738 	struct sge_qset *qs;
739 	struct sge_txq  *txq;
740 	int i, j;
741 	int reclaim_ofl, refill_rx;
742 
743 	for (i = 0; i < sc->params.nports; i++)
744 		for (j = 0; j < sc->port[i].nqsets; j++) {
745 			qs = &sc->sge.qs[i + j];
746 			txq = &qs->txq[0];
747 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
748 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
749 			    (qs->fl[1].credits < qs->fl[1].size));
750 			if (reclaim_ofl || refill_rx) {
751 				pi = &sc->port[i];
752 				taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
753 				break;
754 			}
755 		}
756 #endif
757 	if (sc->params.nports > 2) {
758 		int i;
759 
760 		for_each_port(sc, i) {
761 			struct port_info *pi = &sc->port[i];
762 
763 			t3_write_reg(sc, A_SG_KDOORBELL,
764 				     F_SELEGRCNTX |
765 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
766 		}
767 	}
768 	if (sc->open_device_map != 0)
769 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
770 }
771 
772 /*
773  * This is meant to be a catch-all function to keep sge state private
774  * to sge.c
775  *
776  */
777 int
778 t3_sge_init_adapter(adapter_t *sc)
779 {
780 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
781 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
782 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
783 	mi_init();
784 	cxgb_cache_init();
785 	return (0);
786 }
787 
788 int
789 t3_sge_reset_adapter(adapter_t *sc)
790 {
791 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
792 	return (0);
793 }
794 
795 int
796 t3_sge_init_port(struct port_info *pi)
797 {
798 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
799 	return (0);
800 }
801 
802 void
803 t3_sge_deinit_sw(adapter_t *sc)
804 {
805 
806 	mi_deinit();
807 }
808 
809 /**
810  *	refill_rspq - replenish an SGE response queue
811  *	@adapter: the adapter
812  *	@q: the response queue to replenish
813  *	@credits: how many new responses to make available
814  *
815  *	Replenishes a response queue by making the supplied number of responses
816  *	available to HW.
817  */
818 static __inline void
819 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
820 {
821 
822 	/* mbufs are allocated on demand when a rspq entry is processed. */
823 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
824 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
825 }
826 
827 static __inline void
828 sge_txq_reclaim_(struct sge_txq *txq, int force)
829 {
830 
831 	if (desc_reclaimable(txq) < 16)
832 		return;
833 	if (mtx_trylock(&txq->lock) == 0)
834 		return;
835 	reclaim_completed_tx_(txq, 16);
836 	mtx_unlock(&txq->lock);
837 
838 }
839 
840 static void
841 sge_txq_reclaim_handler(void *arg, int ncount)
842 {
843 	struct sge_txq *q = arg;
844 
845 	sge_txq_reclaim_(q, TRUE);
846 }
847 
848 
849 
850 static void
851 sge_timer_reclaim(void *arg, int ncount)
852 {
853 	struct port_info *pi = arg;
854 	int i, nqsets = pi->nqsets;
855 	adapter_t *sc = pi->adapter;
856 	struct sge_qset *qs;
857 	struct sge_txq *txq;
858 	struct mtx *lock;
859 
860 #ifdef IFNET_MULTIQUEUE
861 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
862 #endif
863 	for (i = 0; i < nqsets; i++) {
864 		qs = &sc->sge.qs[i];
865 
866 		txq = &qs->txq[TXQ_OFLD];
867 		sge_txq_reclaim_(txq, FALSE);
868 
869 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
870 			    &sc->sge.qs[0].rspq.lock;
871 
872 		if (mtx_trylock(lock)) {
873 			/* XXX currently assume that we are *NOT* polling */
874 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
875 
876 			if (qs->fl[0].credits < qs->fl[0].size - 16)
877 				__refill_fl(sc, &qs->fl[0]);
878 			if (qs->fl[1].credits < qs->fl[1].size - 16)
879 				__refill_fl(sc, &qs->fl[1]);
880 
881 			if (status & (1 << qs->rspq.cntxt_id)) {
882 				if (qs->rspq.credits) {
883 					refill_rspq(sc, &qs->rspq, 1);
884 					qs->rspq.credits--;
885 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
886 					    1 << qs->rspq.cntxt_id);
887 				}
888 			}
889 			mtx_unlock(lock);
890 		}
891 	}
892 }
893 
894 /**
895  *	init_qset_cntxt - initialize an SGE queue set context info
896  *	@qs: the queue set
897  *	@id: the queue set id
898  *
899  *	Initializes the TIDs and context ids for the queues of a queue set.
900  */
901 static void
902 init_qset_cntxt(struct sge_qset *qs, u_int id)
903 {
904 
905 	qs->rspq.cntxt_id = id;
906 	qs->fl[0].cntxt_id = 2 * id;
907 	qs->fl[1].cntxt_id = 2 * id + 1;
908 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
909 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
910 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
911 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
912 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
913 
914 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
915 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
916 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
917 }
918 
919 
920 static void
921 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
922 {
923 	txq->in_use += ndesc;
924 	/*
925 	 * XXX we don't handle stopping of queue
926 	 * presumably start handles this when we bump against the end
927 	 */
928 	txqs->gen = txq->gen;
929 	txq->unacked += ndesc;
930 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
931 	txq->unacked &= 31;
932 	txqs->pidx = txq->pidx;
933 	txq->pidx += ndesc;
934 #ifdef INVARIANTS
935 	if (((txqs->pidx > txq->cidx) &&
936 		(txq->pidx < txqs->pidx) &&
937 		(txq->pidx >= txq->cidx)) ||
938 	    ((txqs->pidx < txq->cidx) &&
939 		(txq->pidx >= txq-> cidx)) ||
940 	    ((txqs->pidx < txq->cidx) &&
941 		(txq->cidx < txqs->pidx)))
942 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
943 		    txqs->pidx, txq->pidx, txq->cidx);
944 #endif
945 	if (txq->pidx >= txq->size) {
946 		txq->pidx -= txq->size;
947 		txq->gen ^= 1;
948 	}
949 
950 }
951 
952 /**
953  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
954  *	@m: the packet mbufs
955  *      @nsegs: the number of segments
956  *
957  * 	Returns the number of Tx descriptors needed for the given Ethernet
958  * 	packet.  Ethernet packets require addition of WR and CPL headers.
959  */
960 static __inline unsigned int
961 calc_tx_descs(const struct mbuf *m, int nsegs)
962 {
963 	unsigned int flits;
964 
965 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
966 		return 1;
967 
968 	flits = sgl_len(nsegs) + 2;
969 #ifdef TSO_SUPPORTED
970 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
971 		flits++;
972 #endif
973 	return flits_to_desc(flits);
974 }
975 
976 static unsigned int
977 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
978     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
979 {
980 	struct mbuf *m0;
981 	int err, pktlen, pass = 0;
982 
983 retry:
984 	err = 0;
985 	m0 = *m;
986 	pktlen = m0->m_pkthdr.len;
987 #if defined(__i386__) || defined(__amd64__)
988 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
989 		goto done;
990 	} else
991 #endif
992 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
993 
994 	if (err == 0) {
995 		goto done;
996 	}
997 	if (err == EFBIG && pass == 0) {
998 		pass = 1;
999 		/* Too many segments, try to defrag */
1000 		m0 = m_defrag(m0, M_DONTWAIT);
1001 		if (m0 == NULL) {
1002 			m_freem(*m);
1003 			*m = NULL;
1004 			return (ENOBUFS);
1005 		}
1006 		*m = m0;
1007 		goto retry;
1008 	} else if (err == ENOMEM) {
1009 		return (err);
1010 	} if (err) {
1011 		if (cxgb_debug)
1012 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1013 		m_freem(m0);
1014 		*m = NULL;
1015 		return (err);
1016 	}
1017 done:
1018 #if !defined(__i386__) && !defined(__amd64__)
1019 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1020 #endif
1021 	txsd->flags |= TX_SW_DESC_MAPPED;
1022 
1023 	return (0);
1024 }
1025 
1026 /**
1027  *	make_sgl - populate a scatter/gather list for a packet
1028  *	@sgp: the SGL to populate
1029  *	@segs: the packet dma segments
1030  *	@nsegs: the number of segments
1031  *
1032  *	Generates a scatter/gather list for the buffers that make up a packet
1033  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1034  *	appropriately.
1035  */
1036 static __inline void
1037 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1038 {
1039 	int i, idx;
1040 
1041 	for (idx = 0, i = 0; i < nsegs; i++) {
1042 		/*
1043 		 * firmware doesn't like empty segments
1044 		 */
1045 		if (segs[i].ds_len == 0)
1046 			continue;
1047 		if (i && idx == 0)
1048 			++sgp;
1049 
1050 		sgp->len[idx] = htobe32(segs[i].ds_len);
1051 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1052 		idx ^= 1;
1053 	}
1054 
1055 	if (idx) {
1056 		sgp->len[idx] = 0;
1057 		sgp->addr[idx] = 0;
1058 	}
1059 }
1060 
1061 /**
1062  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1063  *	@adap: the adapter
1064  *	@q: the Tx queue
1065  *
1066  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1067  *	where the HW is going to sleep just after we checked, however,
1068  *	then the interrupt handler will detect the outstanding TX packet
1069  *	and ring the doorbell for us.
1070  *
1071  *	When GTS is disabled we unconditionally ring the doorbell.
1072  */
1073 static __inline void
1074 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1075 {
1076 #if USE_GTS
1077 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1078 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1079 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1080 #ifdef T3_TRACE
1081 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1082 			  q->cntxt_id);
1083 #endif
1084 		t3_write_reg(adap, A_SG_KDOORBELL,
1085 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1086 	}
1087 #else
1088 	wmb();            /* write descriptors before telling HW */
1089 	t3_write_reg(adap, A_SG_KDOORBELL,
1090 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1091 #endif
1092 }
1093 
1094 static __inline void
1095 wr_gen2(struct tx_desc *d, unsigned int gen)
1096 {
1097 #if SGE_NUM_GENBITS == 2
1098 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1099 #endif
1100 }
1101 
1102 /**
1103  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1104  *	@ndesc: number of Tx descriptors spanned by the SGL
1105  *	@txd: first Tx descriptor to be written
1106  *	@txqs: txq state (generation and producer index)
1107  *	@txq: the SGE Tx queue
1108  *	@sgl: the SGL
1109  *	@flits: number of flits to the start of the SGL in the first descriptor
1110  *	@sgl_flits: the SGL size in flits
1111  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1112  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1113  *
1114  *	Write a work request header and an associated SGL.  If the SGL is
1115  *	small enough to fit into one Tx descriptor it has already been written
1116  *	and we just need to write the WR header.  Otherwise we distribute the
1117  *	SGL across the number of descriptors it spans.
1118  */
1119 static void
1120 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1121     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1122     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1123 {
1124 
1125 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1126 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1127 
1128 	if (__predict_true(ndesc == 1)) {
1129 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1130 		    V_WR_SGLSFLT(flits)) | wr_hi;
1131 		wmb();
1132 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1133 		    V_WR_GEN(txqs->gen)) | wr_lo;
1134 		/* XXX gen? */
1135 		wr_gen2(txd, txqs->gen);
1136 
1137 	} else {
1138 		unsigned int ogen = txqs->gen;
1139 		const uint64_t *fp = (const uint64_t *)sgl;
1140 		struct work_request_hdr *wp = wrp;
1141 
1142 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1143 		    V_WR_SGLSFLT(flits)) | wr_hi;
1144 
1145 		while (sgl_flits) {
1146 			unsigned int avail = WR_FLITS - flits;
1147 
1148 			if (avail > sgl_flits)
1149 				avail = sgl_flits;
1150 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1151 			sgl_flits -= avail;
1152 			ndesc--;
1153 			if (!sgl_flits)
1154 				break;
1155 
1156 			fp += avail;
1157 			txd++;
1158 			txsd++;
1159 			if (++txqs->pidx == txq->size) {
1160 				txqs->pidx = 0;
1161 				txqs->gen ^= 1;
1162 				txd = txq->desc;
1163 				txsd = txq->sdesc;
1164 			}
1165 
1166 			/*
1167 			 * when the head of the mbuf chain
1168 			 * is freed all clusters will be freed
1169 			 * with it
1170 			 */
1171 			KASSERT(txsd->mi.mi_base == NULL,
1172 			    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1173 			wrp = (struct work_request_hdr *)txd;
1174 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1175 			    V_WR_SGLSFLT(1)) | wr_hi;
1176 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1177 				    sgl_flits + 1)) |
1178 			    V_WR_GEN(txqs->gen)) | wr_lo;
1179 			wr_gen2(txd, txqs->gen);
1180 			flits = 1;
1181 		}
1182 		wrp->wr_hi |= htonl(F_WR_EOP);
1183 		wmb();
1184 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1185 		wr_gen2((struct tx_desc *)wp, ogen);
1186 	}
1187 }
1188 
1189 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1190 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1191 
1192 #ifdef VLAN_SUPPORTED
1193 #define GET_VTAG(cntrl, m) \
1194 do { \
1195 	if ((m)->m_flags & M_VLANTAG)					            \
1196 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1197 } while (0)
1198 
1199 #define GET_VTAG_MI(cntrl, mi) \
1200 do { \
1201 	if ((mi)->mi_flags & M_VLANTAG)					\
1202 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1203 } while (0)
1204 #else
1205 #define GET_VTAG(cntrl, m)
1206 #define GET_VTAG_MI(cntrl, m)
1207 #endif
1208 
1209 int
1210 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1211 {
1212 	adapter_t *sc;
1213 	struct mbuf *m0;
1214 	struct sge_txq *txq;
1215 	struct txq_state txqs;
1216 	struct port_info *pi;
1217 	unsigned int ndesc, flits, cntrl, mlen;
1218 	int err, nsegs, tso_info = 0;
1219 
1220 	struct work_request_hdr *wrp;
1221 	struct tx_sw_desc *txsd;
1222 	struct sg_ent *sgp, *sgl;
1223 	uint32_t wr_hi, wr_lo, sgl_flits;
1224 	bus_dma_segment_t segs[TX_MAX_SEGS];
1225 
1226 	struct tx_desc *txd;
1227 	struct mbuf_vec *mv;
1228 	struct mbuf_iovec *mi;
1229 
1230 	DPRINTF("t3_encap cpu=%d ", curcpu);
1231 
1232 	mi = NULL;
1233 	pi = qs->port;
1234 	sc = pi->adapter;
1235 	txq = &qs->txq[TXQ_ETH];
1236 	txd = &txq->desc[txq->pidx];
1237 	txsd = &txq->sdesc[txq->pidx];
1238 	sgl = txq->txq_sgl;
1239 	m0 = *m;
1240 
1241 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1242 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1243 	if (cxgb_debug)
1244 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1245 
1246 	mtx_assert(&txq->lock, MA_OWNED);
1247 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1248 /*
1249  * XXX need to add VLAN support for 6.x
1250  */
1251 #ifdef VLAN_SUPPORTED
1252 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1253 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1254 #endif
1255 	KASSERT(txsd->mi.mi_base == NULL,
1256 	    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1257 	if (count > 1) {
1258 		panic("count > 1 not support in CVS\n");
1259 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1260 			return (err);
1261 		nsegs = count;
1262 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1263 		if (cxgb_debug)
1264 			printf("failed ... err=%d\n", err);
1265 		return (err);
1266 	}
1267 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1268 
1269 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1270 		mi_collapse_mbuf(&txsd->mi, m0);
1271 		mi = &txsd->mi;
1272 	}
1273 	if (count > 1) {
1274 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1275 		int i, fidx;
1276 		struct mbuf_iovec *batchmi;
1277 
1278 		mv = mtomv(m0);
1279 		batchmi = mv->mv_vec;
1280 
1281 		wrp = (struct work_request_hdr *)txd;
1282 
1283 		flits = count*2 + 1;
1284 		txq_prod(txq, 1, &txqs);
1285 
1286 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1287 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1288 
1289 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1290 			GET_VTAG_MI(cntrl, batchmi);
1291 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1292 			cbe->cntrl = htonl(cntrl);
1293 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1294 			cbe->addr = htobe64(segs[i].ds_addr);
1295 			txd->flit[fidx] |= htobe64(1 << 24);
1296 		}
1297 
1298 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1299 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1300 		wmb();
1301 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1302 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1303 		/* XXX gen? */
1304 		wr_gen2(txd, txqs.gen);
1305 		check_ring_tx_db(sc, txq);
1306 
1307 		return (0);
1308 	} else if (tso_info) {
1309 		int undersized, eth_type;
1310 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1311 		struct ip *ip;
1312 		struct tcphdr *tcp;
1313 		char *pkthdr, tmp[TCPPKTHDRSIZE];
1314 		struct mbuf_vec *mv;
1315 		struct mbuf_iovec *tmpmi;
1316 
1317 		mv = mtomv(m0);
1318 		tmpmi = mv->mv_vec;
1319 
1320 		txd->flit[2] = 0;
1321 		GET_VTAG_MI(cntrl, mi);
1322 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1323 		hdr->cntrl = htonl(cntrl);
1324 		mlen = m0->m_pkthdr.len;
1325 		hdr->len = htonl(mlen | 0x80000000);
1326 
1327 		DPRINTF("tso buf len=%d\n", mlen);
1328 		undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) &&
1329 			(m0->m_flags & M_VLANTAG)) ||
1330 		    (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN));
1331 
1332 		if (__predict_false(undersized)) {
1333 			pkthdr = tmp;
1334 			dump_mi(mi);
1335 			panic("discontig packet - fixxorz");
1336 		} else
1337 			pkthdr = m0->m_data;
1338 
1339 		if (__predict_false(m0->m_flags & M_VLANTAG)) {
1340 			eth_type = CPL_ETH_II_VLAN;
1341 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1342 			    ETHER_VLAN_ENCAP_LEN);
1343 		} else {
1344 			eth_type = CPL_ETH_II;
1345 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1346 		}
1347 		tcp = (struct tcphdr *)((uint8_t *)ip +
1348 		    sizeof(*ip));
1349 
1350 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1351 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1352 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1353 		hdr->lso_info = htonl(tso_info);
1354 		flits = 3;
1355 	} else {
1356 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1357 
1358 		GET_VTAG(cntrl, m0);
1359 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1360 		cpl->cntrl = htonl(cntrl);
1361 		mlen = m0->m_pkthdr.len;
1362 		cpl->len = htonl(mlen | 0x80000000);
1363 
1364 		if (mlen <= PIO_LEN) {
1365 			txq_prod(txq, 1, &txqs);
1366 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1367 			m_freem(m0);
1368 			m0 = NULL;
1369 			flits = (mlen + 7) / 8 + 2;
1370 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1371 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1372 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1373 			wmb();
1374 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1375 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1376 
1377 			wr_gen2(txd, txqs.gen);
1378 			check_ring_tx_db(sc, txq);
1379 			DPRINTF("pio buf\n");
1380 			return (0);
1381 		}
1382 		DPRINTF("regular buf\n");
1383 		flits = 2;
1384 	}
1385 	wrp = (struct work_request_hdr *)txd;
1386 
1387 #ifdef	nomore
1388 	/*
1389 	 * XXX need to move into one of the helper routines above
1390 	 *
1391 	 */
1392 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1393 		return (err);
1394 	m0 = *m;
1395 #endif
1396 	ndesc = calc_tx_descs(m0, nsegs);
1397 
1398 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1399 	make_sgl(sgp, segs, nsegs);
1400 
1401 	sgl_flits = sgl_len(nsegs);
1402 
1403 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1404 	txq_prod(txq, ndesc, &txqs);
1405 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1406 	wr_lo = htonl(V_WR_TID(txq->token));
1407 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1408 	check_ring_tx_db(pi->adapter, txq);
1409 
1410 	if ((m0->m_type == MT_DATA) &&
1411 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1412 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1413 		m0->m_flags &= ~M_EXT ;
1414 		cxgb_mbufs_outstanding--;
1415 		m_free(m0);
1416 	}
1417 
1418 	return (0);
1419 }
1420 
1421 
1422 /**
1423  *	write_imm - write a packet into a Tx descriptor as immediate data
1424  *	@d: the Tx descriptor to write
1425  *	@m: the packet
1426  *	@len: the length of packet data to write as immediate data
1427  *	@gen: the generation bit value to write
1428  *
1429  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1430  *	contains a work request at its beginning.  We must write the packet
1431  *	carefully so the SGE doesn't read accidentally before it's written in
1432  *	its entirety.
1433  */
1434 static __inline void
1435 write_imm(struct tx_desc *d, struct mbuf *m,
1436 	  unsigned int len, unsigned int gen)
1437 {
1438 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1439 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1440 
1441 	if (len > WR_LEN)
1442 		panic("len too big %d\n", len);
1443 	if (len < sizeof(*from))
1444 		panic("len too small %d", len);
1445 
1446 	memcpy(&to[1], &from[1], len - sizeof(*from));
1447 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1448 					V_WR_BCNTLFLT(len & 7));
1449 	wmb();
1450 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1451 					V_WR_LEN((len + 7) / 8));
1452 	wr_gen2(d, gen);
1453 
1454 	/*
1455 	 * This check is a hack we should really fix the logic so
1456 	 * that this can't happen
1457 	 */
1458 	if (m->m_type != MT_DONTFREE)
1459 		m_freem(m);
1460 
1461 }
1462 
1463 /**
1464  *	check_desc_avail - check descriptor availability on a send queue
1465  *	@adap: the adapter
1466  *	@q: the TX queue
1467  *	@m: the packet needing the descriptors
1468  *	@ndesc: the number of Tx descriptors needed
1469  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1470  *
1471  *	Checks if the requested number of Tx descriptors is available on an
1472  *	SGE send queue.  If the queue is already suspended or not enough
1473  *	descriptors are available the packet is queued for later transmission.
1474  *	Must be called with the Tx queue locked.
1475  *
1476  *	Returns 0 if enough descriptors are available, 1 if there aren't
1477  *	enough descriptors and the packet has been queued, and 2 if the caller
1478  *	needs to retry because there weren't enough descriptors at the
1479  *	beginning of the call but some freed up in the mean time.
1480  */
1481 static __inline int
1482 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1483 		 struct mbuf *m, unsigned int ndesc,
1484 		 unsigned int qid)
1485 {
1486 	/*
1487 	 * XXX We currently only use this for checking the control queue
1488 	 * the control queue is only used for binding qsets which happens
1489 	 * at init time so we are guaranteed enough descriptors
1490 	 */
1491 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1492 addq_exit:	mbufq_tail(&q->sendq, m);
1493 		return 1;
1494 	}
1495 	if (__predict_false(q->size - q->in_use < ndesc)) {
1496 
1497 		struct sge_qset *qs = txq_to_qset(q, qid);
1498 
1499 		printf("stopping q\n");
1500 
1501 		setbit(&qs->txq_stopped, qid);
1502 		smp_mb();
1503 
1504 		if (should_restart_tx(q) &&
1505 		    test_and_clear_bit(qid, &qs->txq_stopped))
1506 			return 2;
1507 
1508 		q->stops++;
1509 		goto addq_exit;
1510 	}
1511 	return 0;
1512 }
1513 
1514 
1515 /**
1516  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1517  *	@q: the SGE control Tx queue
1518  *
1519  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1520  *	that send only immediate data (presently just the control queues) and
1521  *	thus do not have any mbufs
1522  */
1523 static __inline void
1524 reclaim_completed_tx_imm(struct sge_txq *q)
1525 {
1526 	unsigned int reclaim = q->processed - q->cleaned;
1527 
1528 	mtx_assert(&q->lock, MA_OWNED);
1529 
1530 	q->in_use -= reclaim;
1531 	q->cleaned += reclaim;
1532 }
1533 
1534 static __inline int
1535 immediate(const struct mbuf *m)
1536 {
1537 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1538 }
1539 
1540 /**
1541  *	ctrl_xmit - send a packet through an SGE control Tx queue
1542  *	@adap: the adapter
1543  *	@q: the control queue
1544  *	@m: the packet
1545  *
1546  *	Send a packet through an SGE control Tx queue.  Packets sent through
1547  *	a control queue must fit entirely as immediate data in a single Tx
1548  *	descriptor and have no page fragments.
1549  */
1550 static int
1551 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1552 {
1553 	int ret;
1554 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1555 
1556 	if (__predict_false(!immediate(m))) {
1557 		m_freem(m);
1558 		return 0;
1559 	}
1560 
1561 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1562 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1563 
1564 	mtx_lock(&q->lock);
1565 again:	reclaim_completed_tx_imm(q);
1566 
1567 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1568 	if (__predict_false(ret)) {
1569 		if (ret == 1) {
1570 			mtx_unlock(&q->lock);
1571 			log(LOG_ERR, "no desc available\n");
1572 			return (ENOSPC);
1573 		}
1574 		goto again;
1575 	}
1576 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1577 
1578 	q->in_use++;
1579 	if (++q->pidx >= q->size) {
1580 		q->pidx = 0;
1581 		q->gen ^= 1;
1582 	}
1583 	mtx_unlock(&q->lock);
1584 	wmb();
1585 	t3_write_reg(adap, A_SG_KDOORBELL,
1586 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1587 	return (0);
1588 }
1589 
1590 
1591 /**
1592  *	restart_ctrlq - restart a suspended control queue
1593  *	@qs: the queue set cotaining the control queue
1594  *
1595  *	Resumes transmission on a suspended Tx control queue.
1596  */
1597 static void
1598 restart_ctrlq(void *data, int npending)
1599 {
1600 	struct mbuf *m;
1601 	struct sge_qset *qs = (struct sge_qset *)data;
1602 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1603 	adapter_t *adap = qs->port->adapter;
1604 
1605 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1606 
1607 	mtx_lock(&q->lock);
1608 again:	reclaim_completed_tx_imm(q);
1609 
1610 	while (q->in_use < q->size &&
1611 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1612 
1613 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1614 
1615 		if (++q->pidx >= q->size) {
1616 			q->pidx = 0;
1617 			q->gen ^= 1;
1618 		}
1619 		q->in_use++;
1620 	}
1621 	if (!mbufq_empty(&q->sendq)) {
1622 		setbit(&qs->txq_stopped, TXQ_CTRL);
1623 		smp_mb();
1624 
1625 		if (should_restart_tx(q) &&
1626 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1627 			goto again;
1628 		q->stops++;
1629 	}
1630 	mtx_unlock(&q->lock);
1631 	wmb();
1632 	t3_write_reg(adap, A_SG_KDOORBELL,
1633 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1634 }
1635 
1636 
1637 /*
1638  * Send a management message through control queue 0
1639  */
1640 int
1641 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1642 {
1643 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1644 }
1645 
1646 
1647 /**
1648  *	free_qset - free the resources of an SGE queue set
1649  *	@sc: the controller owning the queue set
1650  *	@q: the queue set
1651  *
1652  *	Release the HW and SW resources associated with an SGE queue set, such
1653  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1654  *	queue set must be quiesced prior to calling this.
1655  */
1656 void
1657 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1658 {
1659 	int i;
1660 
1661 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1662 
1663 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1664 		if (q->txq[i].txq_mr.br_ring != NULL) {
1665 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1666 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1667 		}
1668 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1669 		if (q->fl[i].desc) {
1670 			mtx_lock_spin(&sc->sge.reg_lock);
1671 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1672 			mtx_unlock_spin(&sc->sge.reg_lock);
1673 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1674 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1675 					q->fl[i].desc_map);
1676 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1677 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1678 		}
1679 		if (q->fl[i].sdesc) {
1680 			free_rx_bufs(sc, &q->fl[i]);
1681 			free(q->fl[i].sdesc, M_DEVBUF);
1682 		}
1683 	}
1684 
1685 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1686 		if (q->txq[i].desc) {
1687 			mtx_lock_spin(&sc->sge.reg_lock);
1688 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1689 			mtx_unlock_spin(&sc->sge.reg_lock);
1690 			bus_dmamap_unload(q->txq[i].desc_tag,
1691 					q->txq[i].desc_map);
1692 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1693 					q->txq[i].desc_map);
1694 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1695 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1696 			MTX_DESTROY(&q->txq[i].lock);
1697 		}
1698 		if (q->txq[i].sdesc) {
1699 			free(q->txq[i].sdesc, M_DEVBUF);
1700 		}
1701 	}
1702 
1703 	if (q->rspq.desc) {
1704 		mtx_lock_spin(&sc->sge.reg_lock);
1705 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1706 		mtx_unlock_spin(&sc->sge.reg_lock);
1707 
1708 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1709 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1710 			        q->rspq.desc_map);
1711 		bus_dma_tag_destroy(q->rspq.desc_tag);
1712 		MTX_DESTROY(&q->rspq.lock);
1713 	}
1714 
1715 	bzero(q, sizeof(*q));
1716 }
1717 
1718 /**
1719  *	t3_free_sge_resources - free SGE resources
1720  *	@sc: the adapter softc
1721  *
1722  *	Frees resources used by the SGE queue sets.
1723  */
1724 void
1725 t3_free_sge_resources(adapter_t *sc)
1726 {
1727 	int i, nqsets;
1728 
1729 #ifdef IFNET_MULTIQUEUE
1730 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1731 #endif
1732 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1733 		nqsets += sc->port[i].nqsets;
1734 
1735 	for (i = 0; i < nqsets; ++i)
1736 		t3_free_qset(sc, &sc->sge.qs[i]);
1737 }
1738 
1739 /**
1740  *	t3_sge_start - enable SGE
1741  *	@sc: the controller softc
1742  *
1743  *	Enables the SGE for DMAs.  This is the last step in starting packet
1744  *	transfers.
1745  */
1746 void
1747 t3_sge_start(adapter_t *sc)
1748 {
1749 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1750 }
1751 
1752 /**
1753  *	t3_sge_stop - disable SGE operation
1754  *	@sc: the adapter
1755  *
1756  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1757  *	from error interrupts) or from normal process context.  In the latter
1758  *	case it also disables any pending queue restart tasklets.  Note that
1759  *	if it is called in interrupt context it cannot disable the restart
1760  *	tasklets as it cannot wait, however the tasklets will have no effect
1761  *	since the doorbells are disabled and the driver will call this again
1762  *	later from process context, at which time the tasklets will be stopped
1763  *	if they are still running.
1764  */
1765 void
1766 t3_sge_stop(adapter_t *sc)
1767 {
1768 	int i, nqsets;
1769 
1770 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1771 
1772 	if (sc->tq == NULL)
1773 		return;
1774 
1775 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1776 		nqsets += sc->port[i].nqsets;
1777 #ifdef notyet
1778 	/*
1779 	 *
1780 	 * XXX
1781 	 */
1782 	for (i = 0; i < nqsets; ++i) {
1783 		struct sge_qset *qs = &sc->sge.qs[i];
1784 
1785 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1786 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1787 	}
1788 #endif
1789 }
1790 
1791 /**
1792  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1793  *	@adapter: the adapter
1794  *	@q: the Tx queue to reclaim descriptors from
1795  *	@reclaimable: the number of descriptors to reclaim
1796  *      @m_vec_size: maximum number of buffers to reclaim
1797  *      @desc_reclaimed: returns the number of descriptors reclaimed
1798  *
1799  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1800  *	Tx buffers.  Called with the Tx queue lock held.
1801  *
1802  *      Returns number of buffers of reclaimed
1803  */
1804 void
1805 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1806 {
1807 	struct tx_sw_desc *txsd;
1808 	unsigned int cidx;
1809 
1810 #ifdef T3_TRACE
1811 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1812 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1813 #endif
1814 	cidx = q->cidx;
1815 	txsd = &q->sdesc[cidx];
1816 	DPRINTF("reclaiming %d WR\n", reclaimable);
1817 	mtx_assert(&q->lock, MA_OWNED);
1818 	while (reclaimable--) {
1819 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1820 		if (txsd->mi.mi_base != NULL) {
1821 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1822 				bus_dmamap_unload(q->entry_tag, txsd->map);
1823 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1824 			}
1825 			m_freem_iovec(&txsd->mi);
1826 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1827 			txsd->mi.mi_base = NULL;
1828 
1829 #if defined(DIAGNOSTIC) && 0
1830 			if (m_get_priority(txsd->m[0]) != cidx)
1831 				printf("pri=%d cidx=%d\n",
1832 				    (int)m_get_priority(txsd->m[0]), cidx);
1833 #endif
1834 
1835 		} else
1836 			q->txq_skipped++;
1837 
1838 		++txsd;
1839 		if (++cidx == q->size) {
1840 			cidx = 0;
1841 			txsd = q->sdesc;
1842 		}
1843 	}
1844 	q->cidx = cidx;
1845 
1846 }
1847 
1848 void
1849 t3_free_tx_desc_all(struct sge_txq *q)
1850 {
1851 	int i;
1852 	struct tx_sw_desc *txsd;
1853 
1854 	for (i = 0; i < q->size; i++) {
1855 		txsd = &q->sdesc[i];
1856 		if (txsd->mi.mi_base != NULL) {
1857 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1858 				bus_dmamap_unload(q->entry_tag, txsd->map);
1859 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1860 			}
1861 			m_freem_iovec(&txsd->mi);
1862 			bzero(&txsd->mi, sizeof(txsd->mi));
1863 		}
1864 	}
1865 }
1866 
1867 /**
1868  *	is_new_response - check if a response is newly written
1869  *	@r: the response descriptor
1870  *	@q: the response queue
1871  *
1872  *	Returns true if a response descriptor contains a yet unprocessed
1873  *	response.
1874  */
1875 static __inline int
1876 is_new_response(const struct rsp_desc *r,
1877     const struct sge_rspq *q)
1878 {
1879 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1880 }
1881 
1882 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1883 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1884 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1885 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1886 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1887 
1888 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1889 #define NOMEM_INTR_DELAY 2500
1890 
1891 /**
1892  *	write_ofld_wr - write an offload work request
1893  *	@adap: the adapter
1894  *	@m: the packet to send
1895  *	@q: the Tx queue
1896  *	@pidx: index of the first Tx descriptor to write
1897  *	@gen: the generation value to use
1898  *	@ndesc: number of descriptors the packet will occupy
1899  *
1900  *	Write an offload work request to send the supplied packet.  The packet
1901  *	data already carry the work request with most fields populated.
1902  */
1903 static void
1904 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1905     struct sge_txq *q, unsigned int pidx,
1906     unsigned int gen, unsigned int ndesc,
1907     bus_dma_segment_t *segs, unsigned int nsegs)
1908 {
1909 	unsigned int sgl_flits, flits;
1910 	struct work_request_hdr *from;
1911 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1912 	struct tx_desc *d = &q->desc[pidx];
1913 	struct txq_state txqs;
1914 
1915 	if (immediate(m) && nsegs == 0) {
1916 		write_imm(d, m, m->m_len, gen);
1917 		return;
1918 	}
1919 
1920 	/* Only TX_DATA builds SGLs */
1921 	from = mtod(m, struct work_request_hdr *);
1922 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1923 
1924 	flits = m->m_len / 8;
1925 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1926 
1927 	make_sgl(sgp, segs, nsegs);
1928 	sgl_flits = sgl_len(nsegs);
1929 
1930 	txqs.gen = gen;
1931 	txqs.pidx = pidx;
1932 	txqs.compl = 0;
1933 
1934 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1935 	    from->wr_hi, from->wr_lo);
1936 }
1937 
1938 /**
1939  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1940  *	@m: the packet
1941  *
1942  * 	Returns the number of Tx descriptors needed for the given offload
1943  * 	packet.  These packets are already fully constructed.
1944  */
1945 static __inline unsigned int
1946 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1947 {
1948 	unsigned int flits, cnt = 0;
1949 	int ndescs;
1950 
1951 	if (m->m_len <= WR_LEN && nsegs == 0)
1952 		return (1);                 /* packet fits as immediate data */
1953 
1954 	if (m->m_flags & M_IOVEC)
1955 		cnt = mtomv(m)->mv_count;
1956 	else
1957 		cnt = nsegs;
1958 
1959 	/* headers */
1960 	flits = m->m_len / 8;
1961 
1962 	ndescs = flits_to_desc(flits + sgl_len(cnt));
1963 
1964 	CTR4(KTR_CXGB, "flits=%d sgl_len=%d nsegs=%d ndescs=%d",
1965 	    flits, sgl_len(cnt), nsegs, ndescs);
1966 
1967 	return (ndescs);
1968 }
1969 
1970 /**
1971  *	ofld_xmit - send a packet through an offload queue
1972  *	@adap: the adapter
1973  *	@q: the Tx offload queue
1974  *	@m: the packet
1975  *
1976  *	Send an offload packet through an SGE offload queue.
1977  */
1978 static int
1979 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1980 {
1981 	int ret, nsegs;
1982 	unsigned int ndesc;
1983 	unsigned int pidx, gen;
1984 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
1985 	struct tx_sw_desc *stx;
1986 
1987 	nsegs = m_get_sgllen(m);
1988 	vsegs = m_get_sgl(m);
1989 	ndesc = calc_tx_descs_ofld(m, nsegs);
1990 	busdma_map_sgl(vsegs, segs, nsegs);
1991 
1992 	stx = &q->sdesc[q->pidx];
1993 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
1994 
1995 	mtx_lock(&q->lock);
1996 again:	reclaim_completed_tx_(q, 16);
1997 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1998 	if (__predict_false(ret)) {
1999 		if (ret == 1) {
2000 			printf("no ofld desc avail\n");
2001 
2002 			m_set_priority(m, ndesc);     /* save for restart */
2003 			mtx_unlock(&q->lock);
2004 			return (EINTR);
2005 		}
2006 		goto again;
2007 	}
2008 
2009 	gen = q->gen;
2010 	q->in_use += ndesc;
2011 	pidx = q->pidx;
2012 	q->pidx += ndesc;
2013 	if (q->pidx >= q->size) {
2014 		q->pidx -= q->size;
2015 		q->gen ^= 1;
2016 	}
2017 #ifdef T3_TRACE
2018 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2019 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2020 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2021 		  skb_shinfo(skb)->nr_frags);
2022 #endif
2023 	mtx_unlock(&q->lock);
2024 
2025 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2026 	check_ring_tx_db(adap, q);
2027 	return (0);
2028 }
2029 
2030 /**
2031  *	restart_offloadq - restart a suspended offload queue
2032  *	@qs: the queue set cotaining the offload queue
2033  *
2034  *	Resumes transmission on a suspended Tx offload queue.
2035  */
2036 static void
2037 restart_offloadq(void *data, int npending)
2038 {
2039 	struct mbuf *m;
2040 	struct sge_qset *qs = data;
2041 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2042 	adapter_t *adap = qs->port->adapter;
2043 	bus_dma_segment_t segs[TX_MAX_SEGS];
2044 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2045 	int nsegs, cleaned;
2046 
2047 	mtx_lock(&q->lock);
2048 again:	cleaned = reclaim_completed_tx_(q, 16);
2049 
2050 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2051 		unsigned int gen, pidx;
2052 		unsigned int ndesc = m_get_priority(m);
2053 
2054 		if (__predict_false(q->size - q->in_use < ndesc)) {
2055 			setbit(&qs->txq_stopped, TXQ_OFLD);
2056 			smp_mb();
2057 
2058 			if (should_restart_tx(q) &&
2059 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2060 				goto again;
2061 			q->stops++;
2062 			break;
2063 		}
2064 
2065 		gen = q->gen;
2066 		q->in_use += ndesc;
2067 		pidx = q->pidx;
2068 		q->pidx += ndesc;
2069 		if (q->pidx >= q->size) {
2070 			q->pidx -= q->size;
2071 			q->gen ^= 1;
2072 		}
2073 
2074 		(void)mbufq_dequeue(&q->sendq);
2075 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2076 		mtx_unlock(&q->lock);
2077 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2078 		mtx_lock(&q->lock);
2079 	}
2080 	mtx_unlock(&q->lock);
2081 
2082 #if USE_GTS
2083 	set_bit(TXQ_RUNNING, &q->flags);
2084 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2085 #endif
2086 	wmb();
2087 	t3_write_reg(adap, A_SG_KDOORBELL,
2088 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2089 }
2090 
2091 /**
2092  *	queue_set - return the queue set a packet should use
2093  *	@m: the packet
2094  *
2095  *	Maps a packet to the SGE queue set it should use.  The desired queue
2096  *	set is carried in bits 1-3 in the packet's priority.
2097  */
2098 static __inline int
2099 queue_set(const struct mbuf *m)
2100 {
2101 	return m_get_priority(m) >> 1;
2102 }
2103 
2104 /**
2105  *	is_ctrl_pkt - return whether an offload packet is a control packet
2106  *	@m: the packet
2107  *
2108  *	Determines whether an offload packet should use an OFLD or a CTRL
2109  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2110  */
2111 static __inline int
2112 is_ctrl_pkt(const struct mbuf *m)
2113 {
2114 	return m_get_priority(m) & 1;
2115 }
2116 
2117 /**
2118  *	t3_offload_tx - send an offload packet
2119  *	@tdev: the offload device to send to
2120  *	@m: the packet
2121  *
2122  *	Sends an offload packet.  We use the packet priority to select the
2123  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2124  *	should be sent as regular or control, bits 1-3 select the queue set.
2125  */
2126 int
2127 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2128 {
2129 	adapter_t *adap = tdev2adap(tdev);
2130 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2131 
2132 	if (__predict_false(is_ctrl_pkt(m)))
2133 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2134 
2135 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2136 }
2137 
2138 /**
2139  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2140  *	@tdev: the offload device that will be receiving the packets
2141  *	@q: the SGE response queue that assembled the bundle
2142  *	@m: the partial bundle
2143  *	@n: the number of packets in the bundle
2144  *
2145  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2146  */
2147 static __inline void
2148 deliver_partial_bundle(struct t3cdev *tdev,
2149 			struct sge_rspq *q,
2150 			struct mbuf *mbufs[], int n)
2151 {
2152 	if (n) {
2153 		q->offload_bundles++;
2154 		cxgb_ofld_recv(tdev, mbufs, n);
2155 	}
2156 }
2157 
2158 static __inline int
2159 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2160     struct mbuf *m, struct mbuf *rx_gather[],
2161     unsigned int gather_idx)
2162 {
2163 
2164 	rq->offload_pkts++;
2165 	m->m_pkthdr.header = mtod(m, void *);
2166 	rx_gather[gather_idx++] = m;
2167 	if (gather_idx == RX_BUNDLE_SIZE) {
2168 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2169 		gather_idx = 0;
2170 		rq->offload_bundles++;
2171 	}
2172 	return (gather_idx);
2173 }
2174 
2175 static void
2176 restart_tx(struct sge_qset *qs)
2177 {
2178 	struct adapter *sc = qs->port->adapter;
2179 
2180 
2181 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2182 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2183 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2184 		qs->txq[TXQ_OFLD].restarts++;
2185 		DPRINTF("restarting TXQ_OFLD\n");
2186 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2187 	}
2188 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2189 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2190 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2191 	    qs->txq[TXQ_CTRL].in_use);
2192 
2193 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2194 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2195 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2196 		qs->txq[TXQ_CTRL].restarts++;
2197 		DPRINTF("restarting TXQ_CTRL\n");
2198 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2199 	}
2200 }
2201 
2202 /**
2203  *	t3_sge_alloc_qset - initialize an SGE queue set
2204  *	@sc: the controller softc
2205  *	@id: the queue set id
2206  *	@nports: how many Ethernet ports will be using this queue set
2207  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2208  *	@p: configuration parameters for this queue set
2209  *	@ntxq: number of Tx queues for the queue set
2210  *	@pi: port info for queue set
2211  *
2212  *	Allocate resources and initialize an SGE queue set.  A queue set
2213  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2214  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2215  *	queue, offload queue, and control queue.
2216  */
2217 int
2218 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2219 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2220 {
2221 	struct sge_qset *q = &sc->sge.qs[id];
2222 	int i, header_size, ret = 0;
2223 
2224 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2225 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2226 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2227 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2228 			goto err;
2229 		}
2230 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2231 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2232 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2233 	}
2234 
2235 	init_qset_cntxt(q, id);
2236 	q->idx = id;
2237 
2238 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2239 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2240 		    &q->fl[0].desc, &q->fl[0].sdesc,
2241 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2242 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2243 		printf("error %d from alloc ring fl0\n", ret);
2244 		goto err;
2245 	}
2246 
2247 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2248 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2249 		    &q->fl[1].desc, &q->fl[1].sdesc,
2250 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2251 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2252 		printf("error %d from alloc ring fl1\n", ret);
2253 		goto err;
2254 	}
2255 
2256 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2257 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2258 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2259 		    NULL, NULL)) != 0) {
2260 		printf("error %d from alloc ring rspq\n", ret);
2261 		goto err;
2262 	}
2263 
2264 	for (i = 0; i < ntxq; ++i) {
2265 		/*
2266 		 * The control queue always uses immediate data so does not
2267 		 * need to keep track of any mbufs.
2268 		 * XXX Placeholder for future TOE support.
2269 		 */
2270 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2271 
2272 		if ((ret = alloc_ring(sc, p->txq_size[i],
2273 			    sizeof(struct tx_desc), sz,
2274 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2275 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2276 			    &q->txq[i].desc_map,
2277 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2278 			printf("error %d from alloc ring tx %i\n", ret, i);
2279 			goto err;
2280 		}
2281 		mbufq_init(&q->txq[i].sendq);
2282 		q->txq[i].gen = 1;
2283 		q->txq[i].size = p->txq_size[i];
2284 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2285 		    device_get_unit(sc->dev), irq_vec_idx, i);
2286 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2287 	}
2288 
2289 	q->txq[TXQ_ETH].port = pi;
2290 
2291 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2292 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2293 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2294 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2295 
2296 	q->fl[0].gen = q->fl[1].gen = 1;
2297 	q->fl[0].size = p->fl_size;
2298 	q->fl[1].size = p->jumbo_size;
2299 
2300 	q->rspq.gen = 1;
2301 	q->rspq.cidx = 0;
2302 	q->rspq.size = p->rspq_size;
2303 
2304 
2305 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2306 	q->txq[TXQ_ETH].stop_thres = nports *
2307 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2308 
2309 	q->fl[0].buf_size = (MCLBYTES - header_size);
2310 	q->fl[0].zone = zone_clust;
2311 	q->fl[0].type = EXT_CLUSTER;
2312 #if __FreeBSD_version > 800000
2313 	if (cxgb_use_16k_clusters) {
2314 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2315 		q->fl[1].zone = zone_jumbo16;
2316 		q->fl[1].type = EXT_JUMBO16;
2317 	} else {
2318 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2319 		q->fl[1].zone = zone_jumbo9;
2320 		q->fl[1].type = EXT_JUMBO9;
2321 	}
2322 #else
2323 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2324 	q->fl[1].zone = zone_jumbop;
2325 	q->fl[1].type = EXT_JUMBOP;
2326 #endif
2327 	q->lro.enabled = lro_default;
2328 
2329 	mtx_lock_spin(&sc->sge.reg_lock);
2330 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2331 				   q->rspq.phys_addr, q->rspq.size,
2332 				   q->fl[0].buf_size, 1, 0);
2333 	if (ret) {
2334 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2335 		goto err_unlock;
2336 	}
2337 
2338 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2339 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2340 					  q->fl[i].phys_addr, q->fl[i].size,
2341 					  q->fl[i].buf_size, p->cong_thres, 1,
2342 					  0);
2343 		if (ret) {
2344 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2345 			goto err_unlock;
2346 		}
2347 	}
2348 
2349 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2350 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2351 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2352 				 1, 0);
2353 	if (ret) {
2354 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2355 		goto err_unlock;
2356 	}
2357 
2358 	if (ntxq > 1) {
2359 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2360 					 USE_GTS, SGE_CNTXT_OFLD, id,
2361 					 q->txq[TXQ_OFLD].phys_addr,
2362 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2363 		if (ret) {
2364 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2365 			goto err_unlock;
2366 		}
2367 	}
2368 
2369 	if (ntxq > 2) {
2370 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2371 					 SGE_CNTXT_CTRL, id,
2372 					 q->txq[TXQ_CTRL].phys_addr,
2373 					 q->txq[TXQ_CTRL].size,
2374 					 q->txq[TXQ_CTRL].token, 1, 0);
2375 		if (ret) {
2376 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2377 			goto err_unlock;
2378 		}
2379 	}
2380 
2381 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2382 	    device_get_unit(sc->dev), irq_vec_idx);
2383 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2384 
2385 	mtx_unlock_spin(&sc->sge.reg_lock);
2386 	t3_update_qset_coalesce(q, p);
2387 	q->port = pi;
2388 
2389 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2390 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2391 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2392 
2393 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2394 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2395 
2396 	return (0);
2397 
2398 err_unlock:
2399 	mtx_unlock_spin(&sc->sge.reg_lock);
2400 err:
2401 	t3_free_qset(sc, q);
2402 
2403 	return (ret);
2404 }
2405 
2406 void
2407 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2408 {
2409 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2410 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2411 	struct ifnet *ifp = pi->ifp;
2412 
2413 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2414 
2415 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2416 	    cpl->csum_valid && cpl->csum == 0xffff) {
2417 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2418 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2419 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2420 		m->m_pkthdr.csum_data = 0xffff;
2421 	}
2422 	/*
2423 	 * XXX need to add VLAN support for 6.x
2424 	 */
2425 #ifdef VLAN_SUPPORTED
2426 	if (__predict_false(cpl->vlan_valid)) {
2427 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2428 		m->m_flags |= M_VLANTAG;
2429 	}
2430 #endif
2431 
2432 	m->m_pkthdr.rcvif = ifp;
2433 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2434 #ifndef DISABLE_MBUF_IOVEC
2435 	m_explode(m);
2436 #endif
2437 	/*
2438 	 * adjust after conversion to mbuf chain
2439 	 */
2440 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2441 	m->m_len -= (sizeof(*cpl) + ethpad);
2442 	m->m_data += (sizeof(*cpl) + ethpad);
2443 
2444 	(*ifp->if_input)(ifp, m);
2445 }
2446 
2447 static void
2448 ext_free_handler(void *arg1, void * arg2)
2449 {
2450 	uintptr_t type = (uintptr_t)arg2;
2451 	uma_zone_t zone;
2452 	struct mbuf *m;
2453 
2454 	m = arg1;
2455 	zone = m_getzonefromtype(type);
2456 	m->m_ext.ext_type = (int)type;
2457 	cxgb_ext_freed++;
2458 	cxgb_cache_put(zone, m);
2459 }
2460 
2461 static void
2462 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2463 {
2464 	struct mbuf *m;
2465 	int header_size;
2466 
2467 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2468 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2469 
2470 	bzero(cl, header_size);
2471 	m = (struct mbuf *)cl;
2472 
2473 	cxgb_ext_inited++;
2474 	SLIST_INIT(&m->m_pkthdr.tags);
2475 	m->m_type = MT_DATA;
2476 	m->m_flags = flags | M_NOFREE | M_EXT;
2477 	m->m_data = cl + header_size;
2478 	m->m_ext.ext_buf = cl;
2479 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2480 	m->m_ext.ext_size = m_getsizefromtype(type);
2481 	m->m_ext.ext_free = ext_free_handler;
2482 	m->m_ext.ext_arg1 = cl;
2483 	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2484 	m->m_ext.ext_type = EXT_EXTREF;
2485 	*(m->m_ext.ref_cnt) = 1;
2486 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2487 }
2488 
2489 
2490 /**
2491  *	get_packet - return the next ingress packet buffer from a free list
2492  *	@adap: the adapter that received the packet
2493  *	@drop_thres: # of remaining buffers before we start dropping packets
2494  *	@qs: the qset that the SGE free list holding the packet belongs to
2495  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2496  *      @r: response descriptor
2497  *
2498  *	Get the next packet from a free list and complete setup of the
2499  *	sk_buff.  If the packet is small we make a copy and recycle the
2500  *	original buffer, otherwise we use the original buffer itself.  If a
2501  *	positive drop threshold is supplied packets are dropped and their
2502  *	buffers recycled if (a) the number of remaining buffers is under the
2503  *	threshold and the packet is too big to copy, or (b) the packet should
2504  *	be copied but there is no memory for the copy.
2505  */
2506 #ifdef DISABLE_MBUF_IOVEC
2507 
2508 static int
2509 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2510     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2511 {
2512 
2513 	unsigned int len_cq =  ntohl(r->len_cq);
2514 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2515 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2516 	uint32_t len = G_RSPD_LEN(len_cq);
2517 	uint32_t flags = ntohl(r->flags);
2518 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2519 	caddr_t cl;
2520 	struct mbuf *m, *m0;
2521 	int ret = 0;
2522 
2523 	prefetch(sd->rxsd_cl);
2524 
2525 	fl->credits--;
2526 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2527 
2528 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2529 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2530 			goto skip_recycle;
2531 		cl = mtod(m0, void *);
2532 		memcpy(cl, sd->data, len);
2533 		recycle_rx_buf(adap, fl, fl->cidx);
2534 		m = m0;
2535 		m0->m_len = len;
2536 	} else {
2537 	skip_recycle:
2538 
2539 		bus_dmamap_unload(fl->entry_tag, sd->map);
2540 		cl = sd->rxsd_cl;
2541 		m = m0 = (struct mbuf *)cl;
2542 
2543 		if ((sopeop == RSPQ_SOP_EOP) ||
2544 		    (sopeop == RSPQ_SOP))
2545 			flags = M_PKTHDR;
2546 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2547 		m0->m_len = len;
2548 	}
2549 	switch(sopeop) {
2550 	case RSPQ_SOP_EOP:
2551 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2552 		mh->mh_head = mh->mh_tail = m;
2553 		m->m_pkthdr.len = len;
2554 		ret = 1;
2555 		break;
2556 	case RSPQ_NSOP_NEOP:
2557 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2558 		if (mh->mh_tail == NULL) {
2559 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2560 			m_freem(m);
2561 			break;
2562 		}
2563 		mh->mh_tail->m_next = m;
2564 		mh->mh_tail = m;
2565 		mh->mh_head->m_pkthdr.len += len;
2566 		ret = 0;
2567 		break;
2568 	case RSPQ_SOP:
2569 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2570 		m->m_pkthdr.len = len;
2571 		mh->mh_head = mh->mh_tail = m;
2572 		ret = 0;
2573 		break;
2574 	case RSPQ_EOP:
2575 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2576 		mh->mh_head->m_pkthdr.len += len;
2577 		mh->mh_tail->m_next = m;
2578 		mh->mh_tail = m;
2579 		ret = 1;
2580 		break;
2581 	}
2582 	if (++fl->cidx == fl->size)
2583 		fl->cidx = 0;
2584 
2585 	return (ret);
2586 }
2587 
2588 #else
2589 
2590 static int
2591 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2592     struct mbuf **m, struct rsp_desc *r)
2593 {
2594 
2595 	unsigned int len_cq =  ntohl(r->len_cq);
2596 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2597 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2598 	uint32_t len = G_RSPD_LEN(len_cq);
2599 	uint32_t flags = ntohl(r->flags);
2600 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2601 	void *cl;
2602 	int ret = 0;
2603 	struct mbuf *m0;
2604 #if 0
2605 	if ((sd + 1 )->rxsd_cl)
2606 		prefetch((sd + 1)->rxsd_cl);
2607 	if ((sd + 2)->rxsd_cl)
2608 		prefetch((sd + 2)->rxsd_cl);
2609 #endif
2610 	DPRINTF("rx cpu=%d\n", curcpu);
2611 	fl->credits--;
2612 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2613 
2614 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2615 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2616 			goto skip_recycle;
2617 		cl = mtod(m0, void *);
2618 		memcpy(cl, sd->data, len);
2619 		recycle_rx_buf(adap, fl, fl->cidx);
2620 		*m = m0;
2621 	} else {
2622 	skip_recycle:
2623 		bus_dmamap_unload(fl->entry_tag, sd->map);
2624 		cl = sd->rxsd_cl;
2625 		*m = m0 = (struct mbuf *)cl;
2626 	}
2627 
2628 	switch(sopeop) {
2629 	case RSPQ_SOP_EOP:
2630 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2631 		if (cl == sd->rxsd_cl)
2632 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2633 		m0->m_len = m0->m_pkthdr.len = len;
2634 		ret = 1;
2635 		goto done;
2636 		break;
2637 	case RSPQ_NSOP_NEOP:
2638 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2639 		panic("chaining unsupported");
2640 		ret = 0;
2641 		break;
2642 	case RSPQ_SOP:
2643 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2644 		panic("chaining unsupported");
2645 		m_iovinit(m0);
2646 		ret = 0;
2647 		break;
2648 	case RSPQ_EOP:
2649 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2650 		panic("chaining unsupported");
2651 		ret = 1;
2652 		break;
2653 	}
2654 	panic("append not supported");
2655 #if 0
2656 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2657 #endif
2658 done:
2659 	if (++fl->cidx == fl->size)
2660 		fl->cidx = 0;
2661 
2662 	return (ret);
2663 }
2664 #endif
2665 /**
2666  *	handle_rsp_cntrl_info - handles control information in a response
2667  *	@qs: the queue set corresponding to the response
2668  *	@flags: the response control flags
2669  *
2670  *	Handles the control information of an SGE response, such as GTS
2671  *	indications and completion credits for the queue set's Tx queues.
2672  *	HW coalesces credits, we don't do any extra SW coalescing.
2673  */
2674 static __inline void
2675 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2676 {
2677 	unsigned int credits;
2678 
2679 #if USE_GTS
2680 	if (flags & F_RSPD_TXQ0_GTS)
2681 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2682 #endif
2683 	credits = G_RSPD_TXQ0_CR(flags);
2684 	if (credits)
2685 		qs->txq[TXQ_ETH].processed += credits;
2686 
2687 	credits = G_RSPD_TXQ2_CR(flags);
2688 	if (credits)
2689 		qs->txq[TXQ_CTRL].processed += credits;
2690 
2691 # if USE_GTS
2692 	if (flags & F_RSPD_TXQ1_GTS)
2693 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2694 # endif
2695 	credits = G_RSPD_TXQ1_CR(flags);
2696 	if (credits)
2697 		qs->txq[TXQ_OFLD].processed += credits;
2698 
2699 }
2700 
2701 static void
2702 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2703     unsigned int sleeping)
2704 {
2705 	;
2706 }
2707 
2708 /**
2709  *	process_responses - process responses from an SGE response queue
2710  *	@adap: the adapter
2711  *	@qs: the queue set to which the response queue belongs
2712  *	@budget: how many responses can be processed in this round
2713  *
2714  *	Process responses from an SGE response queue up to the supplied budget.
2715  *	Responses include received packets as well as credits and other events
2716  *	for the queues that belong to the response queue's queue set.
2717  *	A negative budget is effectively unlimited.
2718  *
2719  *	Additionally choose the interrupt holdoff time for the next interrupt
2720  *	on this queue.  If the system is under memory shortage use a fairly
2721  *	long delay to help recovery.
2722  */
2723 int
2724 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2725 {
2726 	struct sge_rspq *rspq = &qs->rspq;
2727 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2728 	int budget_left = budget;
2729 	unsigned int sleeping = 0;
2730 	int lro = qs->lro.enabled;
2731 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2732 	int ngathered = 0;
2733 #ifdef DEBUG
2734 	static int last_holdoff = 0;
2735 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2736 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2737 		last_holdoff = rspq->holdoff_tmr;
2738 	}
2739 #endif
2740 	rspq->next_holdoff = rspq->holdoff_tmr;
2741 
2742 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2743 		int eth, eop = 0, ethpad = 0;
2744 		uint32_t flags = ntohl(r->flags);
2745 		uint32_t rss_csum = *(const uint32_t *)r;
2746 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2747 
2748 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2749 
2750 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2751 			struct mbuf *m;
2752 
2753 			if (cxgb_debug)
2754 				printf("async notification\n");
2755 
2756 			if (rspq->rspq_mh.mh_head == NULL) {
2757 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2758 				m = rspq->rspq_mh.mh_head;
2759 			} else {
2760 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2761 			}
2762 
2763 			/* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2764 
2765 			if (m == NULL)
2766 				goto no_mem;
2767 
2768                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2769 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2770                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
2771 			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2772 			eop = 1;
2773                         rspq->async_notif++;
2774 			goto skip;
2775 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2776 			struct mbuf *m = NULL;
2777 
2778 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2779 			    r->rss_hdr.opcode, rspq->cidx);
2780 			if (rspq->rspq_mh.mh_head == NULL)
2781 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2782                         else
2783 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2784 
2785 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2786 		no_mem:
2787 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2788 				budget_left--;
2789 				break;
2790 			}
2791 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2792 			eop = 1;
2793 			rspq->imm_data++;
2794 		} else if (r->len_cq) {
2795 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2796 
2797 #ifdef DISABLE_MBUF_IOVEC
2798 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2799 #else
2800 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2801 #endif
2802 #ifdef IFNET_MULTIQUEUE
2803 			rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2804 #endif
2805 			ethpad = 2;
2806 		} else {
2807 			DPRINTF("pure response\n");
2808 			rspq->pure_rsps++;
2809 		}
2810 	skip:
2811 		if (flags & RSPD_CTRL_MASK) {
2812 			sleeping |= flags & RSPD_GTS_MASK;
2813 			handle_rsp_cntrl_info(qs, flags);
2814 		}
2815 
2816 		r++;
2817 		if (__predict_false(++rspq->cidx == rspq->size)) {
2818 			rspq->cidx = 0;
2819 			rspq->gen ^= 1;
2820 			r = rspq->desc;
2821 		}
2822 		prefetch(r);
2823 		if (++rspq->credits >= (rspq->size / 4)) {
2824 			refill_rspq(adap, rspq, rspq->credits);
2825 			rspq->credits = 0;
2826 		}
2827 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2828 
2829 		if (!eth && eop) {
2830 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2831 			/*
2832 			 * XXX size mismatch
2833 			 */
2834 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2835 
2836 
2837 			ngathered = rx_offload(&adap->tdev, rspq,
2838 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2839 			rspq->rspq_mh.mh_head = NULL;
2840 			DPRINTF("received offload packet\n");
2841 
2842 		} else if (eth && eop) {
2843 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2844 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2845 
2846 			t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2847 			    rss_hash, rss_csum, lro);
2848 			DPRINTF("received tunnel packet\n");
2849 				rspq->rspq_mh.mh_head = NULL;
2850 
2851 		}
2852 		__refill_fl_lt(adap, &qs->fl[0], 32);
2853 		__refill_fl_lt(adap, &qs->fl[1], 32);
2854 		--budget_left;
2855 	}
2856 
2857 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2858 	t3_lro_flush(adap, qs, &qs->lro);
2859 
2860 	if (sleeping)
2861 		check_ring_db(adap, qs, sleeping);
2862 
2863 	smp_mb();  /* commit Tx queue processed updates */
2864 	if (__predict_false(qs->txq_stopped > 1)) {
2865 		printf("restarting tx on %p\n", qs);
2866 
2867 		restart_tx(qs);
2868 	}
2869 
2870 	__refill_fl_lt(adap, &qs->fl[0], 512);
2871 	__refill_fl_lt(adap, &qs->fl[1], 512);
2872 	budget -= budget_left;
2873 	return (budget);
2874 }
2875 
2876 /*
2877  * A helper function that processes responses and issues GTS.
2878  */
2879 static __inline int
2880 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2881 {
2882 	int work;
2883 	static int last_holdoff = 0;
2884 
2885 	work = process_responses(adap, rspq_to_qset(rq), -1);
2886 
2887 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2888 		printf("next_holdoff=%d\n", rq->next_holdoff);
2889 		last_holdoff = rq->next_holdoff;
2890 	}
2891 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2892 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2893 
2894 	return (work);
2895 }
2896 
2897 
2898 /*
2899  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2900  * Handles data events from SGE response queues as well as error and other
2901  * async events as they all use the same interrupt pin.  We use one SGE
2902  * response queue per port in this mode and protect all response queues with
2903  * queue 0's lock.
2904  */
2905 void
2906 t3b_intr(void *data)
2907 {
2908 	uint32_t i, map;
2909 	adapter_t *adap = data;
2910 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2911 
2912 	t3_write_reg(adap, A_PL_CLI, 0);
2913 	map = t3_read_reg(adap, A_SG_DATA_INTR);
2914 
2915 	if (!map)
2916 		return;
2917 
2918 	if (__predict_false(map & F_ERRINTR))
2919 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2920 
2921 	mtx_lock(&q0->lock);
2922 	for_each_port(adap, i)
2923 	    if (map & (1 << i))
2924 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
2925 	mtx_unlock(&q0->lock);
2926 }
2927 
2928 /*
2929  * The MSI interrupt handler.  This needs to handle data events from SGE
2930  * response queues as well as error and other async events as they all use
2931  * the same MSI vector.  We use one SGE response queue per port in this mode
2932  * and protect all response queues with queue 0's lock.
2933  */
2934 void
2935 t3_intr_msi(void *data)
2936 {
2937 	adapter_t *adap = data;
2938 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2939 	int i, new_packets = 0;
2940 
2941 	mtx_lock(&q0->lock);
2942 
2943 	for_each_port(adap, i)
2944 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2945 		    new_packets = 1;
2946 	mtx_unlock(&q0->lock);
2947 	if (new_packets == 0)
2948 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2949 }
2950 
2951 void
2952 t3_intr_msix(void *data)
2953 {
2954 	struct sge_qset *qs = data;
2955 	adapter_t *adap = qs->port->adapter;
2956 	struct sge_rspq *rspq = &qs->rspq;
2957 #ifndef IFNET_MULTIQUEUE
2958 	mtx_lock(&rspq->lock);
2959 #else
2960 	if (mtx_trylock(&rspq->lock))
2961 #endif
2962 	{
2963 
2964 		if (process_responses_gts(adap, rspq) == 0)
2965 			rspq->unhandled_irqs++;
2966 		mtx_unlock(&rspq->lock);
2967 	}
2968 }
2969 
2970 #define QDUMP_SBUF_SIZE		32 * 400
2971 static int
2972 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
2973 {
2974 	struct sge_rspq *rspq;
2975 	struct sge_qset *qs;
2976 	int i, err, dump_end, idx;
2977 	static int multiplier = 1;
2978 	struct sbuf *sb;
2979 	struct rsp_desc *rspd;
2980 	uint32_t data[4];
2981 
2982 	rspq = arg1;
2983 	qs = rspq_to_qset(rspq);
2984 	if (rspq->rspq_dump_count == 0)
2985 		return (0);
2986 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
2987 		log(LOG_WARNING,
2988 		    "dump count is too large %d\n", rspq->rspq_dump_count);
2989 		rspq->rspq_dump_count = 0;
2990 		return (EINVAL);
2991 	}
2992 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
2993 		log(LOG_WARNING,
2994 		    "dump start of %d is greater than queue size\n",
2995 		    rspq->rspq_dump_start);
2996 		rspq->rspq_dump_start = 0;
2997 		return (EINVAL);
2998 	}
2999 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3000 	if (err)
3001 		return (err);
3002 retry_sbufops:
3003 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3004 
3005 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3006 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3007 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3008 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3009 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3010 
3011 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3012 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3013 
3014 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3015 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3016 		idx = i & (RSPQ_Q_SIZE-1);
3017 
3018 		rspd = &rspq->desc[idx];
3019 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3020 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3021 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3022 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3023 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3024 		    be32toh(rspd->len_cq), rspd->intr_gen);
3025 	}
3026 	if (sbuf_overflowed(sb)) {
3027 		sbuf_delete(sb);
3028 		multiplier++;
3029 		goto retry_sbufops;
3030 	}
3031 	sbuf_finish(sb);
3032 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3033 	sbuf_delete(sb);
3034 	return (err);
3035 }
3036 
3037 static int
3038 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3039 {
3040 	struct sge_txq *txq;
3041 	struct sge_qset *qs;
3042 	int i, j, err, dump_end;
3043 	static int multiplier = 1;
3044 	struct sbuf *sb;
3045 	struct tx_desc *txd;
3046 	uint32_t *WR, wr_hi, wr_lo, gen;
3047 	uint32_t data[4];
3048 
3049 	txq = arg1;
3050 	qs = txq_to_qset(txq, TXQ_ETH);
3051 	if (txq->txq_dump_count == 0) {
3052 		return (0);
3053 	}
3054 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3055 		log(LOG_WARNING,
3056 		    "dump count is too large %d\n", txq->txq_dump_count);
3057 		txq->txq_dump_count = 1;
3058 		return (EINVAL);
3059 	}
3060 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3061 		log(LOG_WARNING,
3062 		    "dump start of %d is greater than queue size\n",
3063 		    txq->txq_dump_start);
3064 		txq->txq_dump_start = 0;
3065 		return (EINVAL);
3066 	}
3067 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3068 	if (err)
3069 		return (err);
3070 
3071 
3072 retry_sbufops:
3073 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3074 
3075 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3076 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3077 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3078 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3079 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3080 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3081 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3082 	    txq->txq_dump_start,
3083 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3084 
3085 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3086 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3087 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3088 		WR = (uint32_t *)txd->flit;
3089 		wr_hi = ntohl(WR[0]);
3090 		wr_lo = ntohl(WR[1]);
3091 		gen = G_WR_GEN(wr_lo);
3092 
3093 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3094 		    wr_hi, wr_lo, gen);
3095 		for (j = 2; j < 30; j += 4)
3096 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3097 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3098 
3099 	}
3100 	if (sbuf_overflowed(sb)) {
3101 		sbuf_delete(sb);
3102 		multiplier++;
3103 		goto retry_sbufops;
3104 	}
3105 	sbuf_finish(sb);
3106 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3107 	sbuf_delete(sb);
3108 	return (err);
3109 }
3110 
3111 static int
3112 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3113 {
3114 	struct sge_txq *txq;
3115 	struct sge_qset *qs;
3116 	int i, j, err, dump_end;
3117 	static int multiplier = 1;
3118 	struct sbuf *sb;
3119 	struct tx_desc *txd;
3120 	uint32_t *WR, wr_hi, wr_lo, gen;
3121 
3122 	txq = arg1;
3123 	qs = txq_to_qset(txq, TXQ_CTRL);
3124 	if (txq->txq_dump_count == 0) {
3125 		return (0);
3126 	}
3127 	if (txq->txq_dump_count > 256) {
3128 		log(LOG_WARNING,
3129 		    "dump count is too large %d\n", txq->txq_dump_count);
3130 		txq->txq_dump_count = 1;
3131 		return (EINVAL);
3132 	}
3133 	if (txq->txq_dump_start > 255) {
3134 		log(LOG_WARNING,
3135 		    "dump start of %d is greater than queue size\n",
3136 		    txq->txq_dump_start);
3137 		txq->txq_dump_start = 0;
3138 		return (EINVAL);
3139 	}
3140 
3141 retry_sbufops:
3142 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3143 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3144 	    txq->txq_dump_start,
3145 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3146 
3147 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3148 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3149 		txd = &txq->desc[i & (255)];
3150 		WR = (uint32_t *)txd->flit;
3151 		wr_hi = ntohl(WR[0]);
3152 		wr_lo = ntohl(WR[1]);
3153 		gen = G_WR_GEN(wr_lo);
3154 
3155 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3156 		    wr_hi, wr_lo, gen);
3157 		for (j = 2; j < 30; j += 4)
3158 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3159 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3160 
3161 	}
3162 	if (sbuf_overflowed(sb)) {
3163 		sbuf_delete(sb);
3164 		multiplier++;
3165 		goto retry_sbufops;
3166 	}
3167 	sbuf_finish(sb);
3168 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3169 	sbuf_delete(sb);
3170 	return (err);
3171 }
3172 
3173 static int
3174 t3_lro_enable(SYSCTL_HANDLER_ARGS)
3175 {
3176 	adapter_t *sc;
3177 	int i, j, enabled, err, nqsets = 0;
3178 
3179 #ifndef LRO_WORKING
3180 	return (0);
3181 #endif
3182 	sc = arg1;
3183 	enabled = sc->sge.qs[0].lro.enabled;
3184         err = sysctl_handle_int(oidp, &enabled, arg2, req);
3185 
3186 	if (err != 0)
3187 		return (err);
3188 	if (enabled == sc->sge.qs[0].lro.enabled)
3189 		return (0);
3190 
3191 	for (i = 0; i < sc->params.nports; i++)
3192 		for (j = 0; j < sc->port[i].nqsets; j++)
3193 			nqsets++;
3194 
3195 	for (i = 0; i < nqsets; i++)
3196 		sc->sge.qs[i].lro.enabled = enabled;
3197 
3198 	return (0);
3199 }
3200 
3201 static int
3202 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
3203 {
3204 	adapter_t *sc = arg1;
3205 	struct qset_params *qsp = &sc->params.sge.qset[0];
3206 	int coalesce_nsecs;
3207 	struct sge_qset *qs;
3208 	int i, j, err, nqsets = 0;
3209 	struct mtx *lock;
3210 
3211 	if ((sc->flags & FULL_INIT_DONE) == 0)
3212 		return (ENXIO);
3213 
3214 	coalesce_nsecs = qsp->coalesce_nsecs;
3215         err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
3216 
3217 	if (err != 0) {
3218 		return (err);
3219 	}
3220 	if (coalesce_nsecs == qsp->coalesce_nsecs)
3221 		return (0);
3222 
3223 	for (i = 0; i < sc->params.nports; i++)
3224 		for (j = 0; j < sc->port[i].nqsets; j++)
3225 			nqsets++;
3226 
3227 	coalesce_nsecs = max(100, coalesce_nsecs);
3228 
3229 	for (i = 0; i < nqsets; i++) {
3230 		qs = &sc->sge.qs[i];
3231 		qsp = &sc->params.sge.qset[i];
3232 		qsp->coalesce_nsecs = coalesce_nsecs;
3233 
3234 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3235 			    &sc->sge.qs[0].rspq.lock;
3236 
3237 		mtx_lock(lock);
3238 		t3_update_qset_coalesce(qs, qsp);
3239 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3240 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3241 		mtx_unlock(lock);
3242 	}
3243 
3244 	return (0);
3245 }
3246 
3247 
3248 void
3249 t3_add_attach_sysctls(adapter_t *sc)
3250 {
3251 	struct sysctl_ctx_list *ctx;
3252 	struct sysctl_oid_list *children;
3253 
3254 	ctx = device_get_sysctl_ctx(sc->dev);
3255 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3256 
3257 	/* random information */
3258 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3259 	    "firmware_version",
3260 	    CTLFLAG_RD, &sc->fw_version,
3261 	    0, "firmware version");
3262 
3263 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3264 	    "enable_lro",
3265 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3266 	    0, t3_lro_enable,
3267 	    "I", "enable large receive offload");
3268 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3269 	    "hw_revision",
3270 	    CTLFLAG_RD, &sc->params.rev,
3271 	    0, "chip model");
3272 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3273 	    "enable_debug",
3274 	    CTLFLAG_RW, &cxgb_debug,
3275 	    0, "enable verbose debugging output");
3276 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3277 	    CTLFLAG_RD, &sc->tunq_coalesce,
3278 	    "#tunneled packets freed");
3279 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3280 	    "txq_overrun",
3281 	    CTLFLAG_RD, &txq_fills,
3282 	    0, "#times txq overrun");
3283 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3284 	    "pcpu_cache_enable",
3285 	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3286 	    0, "#enable driver local pcpu caches");
3287 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3288 	    "cache_alloc",
3289 	    CTLFLAG_RD, &cxgb_cached_allocations,
3290 	    0, "#times a cluster was allocated from cache");
3291 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3292 	    "cached",
3293 	    CTLFLAG_RD, &cxgb_cached,
3294 	    0, "#times a cluster was cached");
3295 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3296 	    "ext_freed",
3297 	    CTLFLAG_RD, &cxgb_ext_freed,
3298 	    0, "#times a cluster was freed through ext_free");
3299 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3300 	    "ext_inited",
3301 	    CTLFLAG_RD, &cxgb_ext_inited,
3302 	    0, "#times a cluster was initialized for ext_free");
3303 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3304 	    "mbufs_outstanding",
3305 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3306 	    0, "#mbufs in flight in the driver");
3307 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3308 	    "pack_outstanding",
3309 	    CTLFLAG_RD, &cxgb_pack_outstanding,
3310 	    0, "#packet in flight in the driver");
3311 }
3312 
3313 
3314 static const char *rspq_name = "rspq";
3315 static const char *txq_names[] =
3316 {
3317 	"txq_eth",
3318 	"txq_ofld",
3319 	"txq_ctrl"
3320 };
3321 
3322 void
3323 t3_add_configured_sysctls(adapter_t *sc)
3324 {
3325 	struct sysctl_ctx_list *ctx;
3326 	struct sysctl_oid_list *children;
3327 	int i, j;
3328 
3329 	ctx = device_get_sysctl_ctx(sc->dev);
3330 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3331 
3332 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3333 	    "intr_coal",
3334 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3335 	    0, t3_set_coalesce_nsecs,
3336 	    "I", "interrupt coalescing timer (ns)");
3337 
3338 	for (i = 0; i < sc->params.nports; i++) {
3339 		struct port_info *pi = &sc->port[i];
3340 		struct sysctl_oid *poid;
3341 		struct sysctl_oid_list *poidlist;
3342 
3343 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3344 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3345 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3346 		poidlist = SYSCTL_CHILDREN(poid);
3347 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3348 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3349 		    0, "#queue sets");
3350 
3351 		for (j = 0; j < pi->nqsets; j++) {
3352 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3353 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid;
3354 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist;
3355 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3356 
3357 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3358 
3359 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3360 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3361 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3362 
3363 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3364 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3365 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3366 
3367 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3368 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3369 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3370 
3371 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3372 			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3373 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3374 
3375 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3376 			    CTLFLAG_RD, &qs->rspq.size,
3377 			    0, "#entries in response queue");
3378 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3379 			    CTLFLAG_RD, &qs->rspq.cidx,
3380 			    0, "consumer index");
3381 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3382 			    CTLFLAG_RD, &qs->rspq.credits,
3383 			    0, "#credits");
3384 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3385 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3386 			    "physical_address_of the queue");
3387 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3388 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3389 			    0, "start rspq dump entry");
3390 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3391 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3392 			    0, "#rspq entries to dump");
3393 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3394 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3395 			    0, t3_dump_rspq, "A", "dump of the response queue");
3396 
3397 
3398 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3399 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3400 			    0, "#tunneled packets dropped");
3401 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3402 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3403 			    0, "#tunneled packets waiting to be sent");
3404 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3405 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3406 			    0, "#tunneled packets queue producer index");
3407 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3408 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3409 			    0, "#tunneled packets queue consumer index");
3410 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3411 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3412 			    0, "#tunneled packets processed by the card");
3413 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3414 			    CTLFLAG_RD, &txq->cleaned,
3415 			    0, "#tunneled packets cleaned");
3416 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3417 			    CTLFLAG_RD, &txq->in_use,
3418 			    0, "#tunneled packet slots in use");
3419 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3420 			    CTLFLAG_RD, &txq->txq_frees,
3421 			    "#tunneled packets freed");
3422 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3423 			    CTLFLAG_RD, &txq->txq_skipped,
3424 			    0, "#tunneled packet descriptors skipped");
3425 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3426 			    CTLFLAG_RD, &txq->txq_coalesced,
3427 			    0, "#tunneled packets coalesced");
3428 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3429 			    CTLFLAG_RD, &txq->txq_enqueued,
3430 			    0, "#tunneled packets enqueued to hardware");
3431 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3432 			    CTLFLAG_RD, &qs->txq_stopped,
3433 			    0, "tx queues stopped");
3434 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3435 			    CTLFLAG_RD, &txq->phys_addr,
3436 			    "physical_address_of the queue");
3437 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3438 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3439 			    0, "txq generation");
3440 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3441 			    CTLFLAG_RD, &txq->cidx,
3442 			    0, "hardware queue cidx");
3443 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3444 			    CTLFLAG_RD, &txq->pidx,
3445 			    0, "hardware queue pidx");
3446 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3447 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3448 			    0, "txq start idx for dump");
3449 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3450 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3451 			    0, "txq #entries to dump");
3452 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3453 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3454 			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3455 
3456 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3457 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3458 			    0, "ctrlq start idx for dump");
3459 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3460 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3461 			    0, "ctrl #entries to dump");
3462 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3463 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3464 			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3465 
3466 
3467 
3468 
3469 
3470 		}
3471 	}
3472 }
3473 
3474 /**
3475  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3476  *	@qs: the queue set
3477  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3478  *	@idx: the descriptor index in the queue
3479  *	@data: where to dump the descriptor contents
3480  *
3481  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3482  *	size of the descriptor.
3483  */
3484 int
3485 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3486 		unsigned char *data)
3487 {
3488 	if (qnum >= 6)
3489 		return (EINVAL);
3490 
3491 	if (qnum < 3) {
3492 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3493 			return -EINVAL;
3494 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3495 		return sizeof(struct tx_desc);
3496 	}
3497 
3498 	if (qnum == 3) {
3499 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3500 			return (EINVAL);
3501 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3502 		return sizeof(struct rsp_desc);
3503 	}
3504 
3505 	qnum -= 4;
3506 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3507 		return (EINVAL);
3508 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3509 	return sizeof(struct rx_desc);
3510 }
3511