xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision 1e413cf93298b5b97441a21d9a50fdcd0ee9945e)
1 /**************************************************************************
2 
3 Copyright (c) 2007, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #ifdef CONFIG_DEFINED
68 #include <cxgb_include.h>
69 #include <sys/mvec.h>
70 #else
71 #include <dev/cxgb/cxgb_include.h>
72 #include <dev/cxgb/sys/mvec.h>
73 #endif
74 
75 int      txq_fills = 0;
76 static int bogus_imm = 0;
77 static int recycle_enable = 0;
78 extern int cxgb_txq_buf_ring_size;
79 int cxgb_cached_allocations;
80 int cxgb_cached;
81 int cxgb_ext_freed;
82 extern int cxgb_use_16k_clusters;
83 
84 
85 #define USE_GTS 0
86 
87 #define SGE_RX_SM_BUF_SIZE	1536
88 #define SGE_RX_DROP_THRES	16
89 #define SGE_RX_COPY_THRES	128
90 
91 /*
92  * Period of the Tx buffer reclaim timer.  This timer does not need to run
93  * frequently as Tx buffers are usually reclaimed by new Tx packets.
94  */
95 #define TX_RECLAIM_PERIOD       (hz >> 1)
96 
97 
98 /*
99  * Values for sge_txq.flags
100  */
101 enum {
102 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
103 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
104 };
105 
106 struct tx_desc {
107 	uint64_t	flit[TX_DESC_FLITS];
108 } __packed;
109 
110 struct rx_desc {
111 	uint32_t	addr_lo;
112 	uint32_t	len_gen;
113 	uint32_t	gen2;
114 	uint32_t	addr_hi;
115 } __packed;;
116 
117 struct rsp_desc {               /* response queue descriptor */
118 	struct rss_header	rss_hdr;
119 	uint32_t		flags;
120 	uint32_t		len_cq;
121 	uint8_t			imm_data[47];
122 	uint8_t			intr_gen;
123 } __packed;
124 
125 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
126 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
127 #define RX_SW_DESC_INUSE        (1 << 3)
128 #define TX_SW_DESC_MAPPED       (1 << 4)
129 
130 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
131 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
132 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
133 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
134 
135 struct tx_sw_desc {                /* SW state per Tx descriptor */
136 	struct mbuf_iovec mi;
137 	bus_dmamap_t	map;
138 	int		flags;
139 };
140 
141 struct rx_sw_desc {                /* SW state per Rx descriptor */
142 	caddr_t	         rxsd_cl;
143 	caddr_t	         data;
144 	bus_dmamap_t	  map;
145 	int		  flags;
146 };
147 
148 struct txq_state {
149 	unsigned int compl;
150 	unsigned int gen;
151 	unsigned int pidx;
152 };
153 
154 struct refill_fl_cb_arg {
155 	int               error;
156 	bus_dma_segment_t seg;
157 	int               nseg;
158 };
159 
160 /*
161  * Maps a number of flits to the number of Tx descriptors that can hold them.
162  * The formula is
163  *
164  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
165  *
166  * HW allows up to 4 descriptors to be combined into a WR.
167  */
168 static uint8_t flit_desc_map[] = {
169 	0,
170 #if SGE_NUM_GENBITS == 1
171 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
172 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
173 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
174 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
175 #elif SGE_NUM_GENBITS == 2
176 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
177 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
178 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
179 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
180 #else
181 # error "SGE_NUM_GENBITS must be 1 or 2"
182 #endif
183 };
184 
185 
186 static int lro_default = 0;
187 int cxgb_debug = 0;
188 
189 static void sge_timer_cb(void *arg);
190 static void sge_timer_reclaim(void *arg, int ncount);
191 static void sge_txq_reclaim_handler(void *arg, int ncount);
192 
193 /**
194  *	reclaim_completed_tx - reclaims completed Tx descriptors
195  *	@adapter: the adapter
196  *	@q: the Tx queue to reclaim completed descriptors from
197  *
198  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
199  *	and frees the associated buffers if possible.  Called with the Tx
200  *	queue's lock held.
201  */
202 static __inline int
203 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
204 {
205 	int reclaim = desc_reclaimable(q);
206 
207 	if (reclaim < reclaim_min)
208 		return (0);
209 
210 	mtx_assert(&q->lock, MA_OWNED);
211 	if (reclaim > 0) {
212 		t3_free_tx_desc(q, reclaim);
213 		q->cleaned += reclaim;
214 		q->in_use -= reclaim;
215 	}
216 	return (reclaim);
217 }
218 
219 /**
220  *	should_restart_tx - are there enough resources to restart a Tx queue?
221  *	@q: the Tx queue
222  *
223  *	Checks if there are enough descriptors to restart a suspended Tx queue.
224  */
225 static __inline int
226 should_restart_tx(const struct sge_txq *q)
227 {
228 	unsigned int r = q->processed - q->cleaned;
229 
230 	return q->in_use - r < (q->size >> 1);
231 }
232 
233 /**
234  *	t3_sge_init - initialize SGE
235  *	@adap: the adapter
236  *	@p: the SGE parameters
237  *
238  *	Performs SGE initialization needed every time after a chip reset.
239  *	We do not initialize any of the queue sets here, instead the driver
240  *	top-level must request those individually.  We also do not enable DMA
241  *	here, that should be done after the queues have been set up.
242  */
243 void
244 t3_sge_init(adapter_t *adap, struct sge_params *p)
245 {
246 	u_int ctrl, ups;
247 
248 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
249 
250 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
251 	       F_CQCRDTCTRL |
252 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
253 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
254 #if SGE_NUM_GENBITS == 1
255 	ctrl |= F_EGRGENCTRL;
256 #endif
257 	if (adap->params.rev > 0) {
258 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
259 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
260 		ctrl |= F_CQCRDTCTRL | F_AVOIDCQOVFL;
261 	}
262 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
263 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
264 		     V_LORCQDRBTHRSH(512));
265 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
266 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
267 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
268 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH, 1000);
269 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
270 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
271 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
272 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
273 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
274 }
275 
276 
277 /**
278  *	sgl_len - calculates the size of an SGL of the given capacity
279  *	@n: the number of SGL entries
280  *
281  *	Calculates the number of flits needed for a scatter/gather list that
282  *	can hold the given number of entries.
283  */
284 static __inline unsigned int
285 sgl_len(unsigned int n)
286 {
287 	return ((3 * n) / 2 + (n & 1));
288 }
289 
290 /**
291  *	get_imm_packet - return the next ingress packet buffer from a response
292  *	@resp: the response descriptor containing the packet data
293  *
294  *	Return a packet containing the immediate data of the given response.
295  */
296 #ifdef DISABLE_MBUF_IOVEC
297 static __inline int
298 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct t3_mbuf_hdr *mh)
299 {
300 	struct mbuf *m = mh->m_head;
301 
302 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
303 	m->m_pkthdr.len = m->m_len = len;
304 	return (0);
305 }
306 
307 #else
308 static int
309 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m, void *cl, uint32_t flags)
310 {
311 
312 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
313 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
314 	return (0);
315 
316 }
317 #endif
318 
319 static __inline u_int
320 flits_to_desc(u_int n)
321 {
322 	return (flit_desc_map[n]);
323 }
324 
325 void
326 t3_sge_err_intr_handler(adapter_t *adapter)
327 {
328 	unsigned int v, status;
329 
330 
331 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
332 
333 	if (status & F_RSPQCREDITOVERFOW)
334 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
335 
336 	if (status & F_RSPQDISABLED) {
337 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
338 
339 		CH_ALERT(adapter,
340 			 "packet delivered to disabled response queue (0x%x)\n",
341 			 (v >> S_RSPQ0DISABLED) & 0xff);
342 	}
343 
344 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
345 	if (status & (F_RSPQCREDITOVERFOW | F_RSPQDISABLED))
346 		t3_fatal_err(adapter);
347 }
348 
349 void
350 t3_sge_prep(adapter_t *adap, struct sge_params *p)
351 {
352 	int i;
353 
354 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
355 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
356 
357 	for (i = 0; i < SGE_QSETS; ++i) {
358 		struct qset_params *q = p->qset + i;
359 
360 		q->polling = adap->params.rev > 0;
361 
362 		if (adap->params.nports > 2) {
363 			q->coalesce_nsecs = 50000;
364 		} else {
365 #ifdef INVARIANTS
366 			q->coalesce_nsecs = 10000;
367 #else
368 			q->coalesce_nsecs = 5000;
369 #endif
370 		}
371 		q->rspq_size = RSPQ_Q_SIZE;
372 		q->fl_size = FL_Q_SIZE;
373 		q->jumbo_size = JUMBO_Q_SIZE;
374 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
375 		q->txq_size[TXQ_OFLD] = 1024;
376 		q->txq_size[TXQ_CTRL] = 256;
377 		q->cong_thres = 0;
378 	}
379 }
380 
381 int
382 t3_sge_alloc(adapter_t *sc)
383 {
384 
385 	/* The parent tag. */
386 	if (bus_dma_tag_create( NULL,			/* parent */
387 				1, 0,			/* algnmnt, boundary */
388 				BUS_SPACE_MAXADDR,	/* lowaddr */
389 				BUS_SPACE_MAXADDR,	/* highaddr */
390 				NULL, NULL,		/* filter, filterarg */
391 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
392 				BUS_SPACE_UNRESTRICTED, /* nsegments */
393 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
394 				0,			/* flags */
395 				NULL, NULL,		/* lock, lockarg */
396 				&sc->parent_dmat)) {
397 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
398 		return (ENOMEM);
399 	}
400 
401 	/*
402 	 * DMA tag for normal sized RX frames
403 	 */
404 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
405 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
406 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
407 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
408 		return (ENOMEM);
409 	}
410 
411 	/*
412 	 * DMA tag for jumbo sized RX frames.
413 	 */
414 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
415 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
416 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
417 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
418 		return (ENOMEM);
419 	}
420 
421 	/*
422 	 * DMA tag for TX frames.
423 	 */
424 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
425 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
426 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
427 		NULL, NULL, &sc->tx_dmat)) {
428 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
429 		return (ENOMEM);
430 	}
431 
432 	return (0);
433 }
434 
435 int
436 t3_sge_free(struct adapter * sc)
437 {
438 
439 	if (sc->tx_dmat != NULL)
440 		bus_dma_tag_destroy(sc->tx_dmat);
441 
442 	if (sc->rx_jumbo_dmat != NULL)
443 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
444 
445 	if (sc->rx_dmat != NULL)
446 		bus_dma_tag_destroy(sc->rx_dmat);
447 
448 	if (sc->parent_dmat != NULL)
449 		bus_dma_tag_destroy(sc->parent_dmat);
450 
451 	return (0);
452 }
453 
454 void
455 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
456 {
457 
458 	qs->rspq.holdoff_tmr = max(p->coalesce_nsecs/100, 1U);
459 	qs->rspq.polling = 0 /* p->polling */;
460 }
461 
462 #if !defined(__i386__) && !defined(__amd64__)
463 static void
464 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
465 {
466 	struct refill_fl_cb_arg *cb_arg = arg;
467 
468 	cb_arg->error = error;
469 	cb_arg->seg = segs[0];
470 	cb_arg->nseg = nseg;
471 
472 }
473 #endif
474 /**
475  *	refill_fl - refill an SGE free-buffer list
476  *	@sc: the controller softc
477  *	@q: the free-list to refill
478  *	@n: the number of new buffers to allocate
479  *
480  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
481  *	The caller must assure that @n does not exceed the queue's capacity.
482  */
483 static void
484 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
485 {
486 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
487 	struct rx_desc *d = &q->desc[q->pidx];
488 	struct refill_fl_cb_arg cb_arg;
489 	caddr_t cl;
490 	int err;
491 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
492 
493 	cb_arg.error = 0;
494 	while (n--) {
495 		/*
496 		 * We only allocate a cluster, mbuf allocation happens after rx
497 		 */
498 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
499 			log(LOG_WARNING, "Failed to allocate cluster\n");
500 			goto done;
501 		}
502 
503 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
504 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
505 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
506 				uma_zfree(q->zone, cl);
507 				goto done;
508 			}
509 			sd->flags |= RX_SW_DESC_MAP_CREATED;
510 		}
511 #if !defined(__i386__) && !defined(__amd64__)
512 		err = bus_dmamap_load(q->entry_tag, sd->map,
513 		    cl + header_size, q->buf_size,
514 		    refill_fl_cb, &cb_arg, 0);
515 
516 		if (err != 0 || cb_arg.error) {
517 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
518 			/*
519 			 * XXX free cluster
520 			 */
521 			return;
522 		}
523 #else
524 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
525 #endif
526 		sd->flags |= RX_SW_DESC_INUSE;
527 		sd->rxsd_cl = cl;
528 		sd->data = cl + header_size;
529 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
530 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
531 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
532 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
533 
534 		d++;
535 		sd++;
536 
537 		if (++q->pidx == q->size) {
538 			q->pidx = 0;
539 			q->gen ^= 1;
540 			sd = q->sdesc;
541 			d = q->desc;
542 		}
543 		q->credits++;
544 	}
545 
546 done:
547 	t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
548 }
549 
550 
551 /**
552  *	free_rx_bufs - free the Rx buffers on an SGE free list
553  *	@sc: the controle softc
554  *	@q: the SGE free list to clean up
555  *
556  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
557  *	this queue should be stopped before calling this function.
558  */
559 static void
560 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
561 {
562 	u_int cidx = q->cidx;
563 
564 	while (q->credits--) {
565 		struct rx_sw_desc *d = &q->sdesc[cidx];
566 
567 		if (d->flags & RX_SW_DESC_INUSE) {
568 			bus_dmamap_unload(q->entry_tag, d->map);
569 			bus_dmamap_destroy(q->entry_tag, d->map);
570 			uma_zfree(q->zone, d->rxsd_cl);
571 		}
572 		d->rxsd_cl = NULL;
573 		if (++cidx == q->size)
574 			cidx = 0;
575 	}
576 }
577 
578 static __inline void
579 __refill_fl(adapter_t *adap, struct sge_fl *fl)
580 {
581 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
582 }
583 
584 static __inline void
585 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
586 {
587 	if ((fl->size - fl->credits) < max)
588 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
589 }
590 
591 void
592 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
593 {
594 	__refill_fl_lt(adap, fl, 512);
595 }
596 
597 /**
598  *	recycle_rx_buf - recycle a receive buffer
599  *	@adapter: the adapter
600  *	@q: the SGE free list
601  *	@idx: index of buffer to recycle
602  *
603  *	Recycles the specified buffer on the given free list by adding it at
604  *	the next available slot on the list.
605  */
606 static void
607 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
608 {
609 	struct rx_desc *from = &q->desc[idx];
610 	struct rx_desc *to   = &q->desc[q->pidx];
611 
612 	q->sdesc[q->pidx] = q->sdesc[idx];
613 	to->addr_lo = from->addr_lo;        // already big endian
614 	to->addr_hi = from->addr_hi;        // likewise
615 	wmb();
616 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
617 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
618 	q->credits++;
619 
620 	if (++q->pidx == q->size) {
621 		q->pidx = 0;
622 		q->gen ^= 1;
623 	}
624 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
625 }
626 
627 static void
628 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
629 {
630 	uint32_t *addr;
631 
632 	addr = arg;
633 	*addr = segs[0].ds_addr;
634 }
635 
636 static int
637 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
638     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
639     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
640 {
641 	size_t len = nelem * elem_size;
642 	void *s = NULL;
643 	void *p = NULL;
644 	int err;
645 
646 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
647 				      BUS_SPACE_MAXADDR_32BIT,
648 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
649 				      len, 0, NULL, NULL, tag)) != 0) {
650 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
651 		return (ENOMEM);
652 	}
653 
654 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
655 				    map)) != 0) {
656 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
657 		return (ENOMEM);
658 	}
659 
660 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
661 	bzero(p, len);
662 	*(void **)desc = p;
663 
664 	if (sw_size) {
665 		len = nelem * sw_size;
666 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
667 		*(void **)sdesc = s;
668 	}
669 	if (parent_entry_tag == NULL)
670 		return (0);
671 
672 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
673 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
674 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
675 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
676 		                      NULL, NULL, entry_tag)) != 0) {
677 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
678 		return (ENOMEM);
679 	}
680 	return (0);
681 }
682 
683 static void
684 sge_slow_intr_handler(void *arg, int ncount)
685 {
686 	adapter_t *sc = arg;
687 
688 	t3_slow_intr_handler(sc);
689 }
690 
691 /**
692  *	sge_timer_cb - perform periodic maintenance of an SGE qset
693  *	@data: the SGE queue set to maintain
694  *
695  *	Runs periodically from a timer to perform maintenance of an SGE queue
696  *	set.  It performs two tasks:
697  *
698  *	a) Cleans up any completed Tx descriptors that may still be pending.
699  *	Normal descriptor cleanup happens when new packets are added to a Tx
700  *	queue so this timer is relatively infrequent and does any cleanup only
701  *	if the Tx queue has not seen any new packets in a while.  We make a
702  *	best effort attempt to reclaim descriptors, in that we don't wait
703  *	around if we cannot get a queue's lock (which most likely is because
704  *	someone else is queueing new packets and so will also handle the clean
705  *	up).  Since control queues use immediate data exclusively we don't
706  *	bother cleaning them up here.
707  *
708  *	b) Replenishes Rx queues that have run out due to memory shortage.
709  *	Normally new Rx buffers are added when existing ones are consumed but
710  *	when out of memory a queue can become empty.  We try to add only a few
711  *	buffers here, the queue will be replenished fully as these new buffers
712  *	are used up if memory shortage has subsided.
713  *
714  *	c) Return coalesced response queue credits in case a response queue is
715  *	starved.
716  *
717  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
718  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
719  */
720 static void
721 sge_timer_cb(void *arg)
722 {
723 	adapter_t *sc = arg;
724 #ifndef IFNET_MULTIQUEUE
725 	struct port_info *pi;
726 	struct sge_qset *qs;
727 	struct sge_txq  *txq;
728 	int i, j;
729 	int reclaim_ofl, refill_rx;
730 
731 	for (i = 0; i < sc->params.nports; i++)
732 		for (j = 0; j < sc->port[i].nqsets; j++) {
733 			qs = &sc->sge.qs[i + j];
734 			txq = &qs->txq[0];
735 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
736 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
737 			    (qs->fl[1].credits < qs->fl[1].size));
738 			if (reclaim_ofl || refill_rx) {
739 				pi = &sc->port[i];
740 				taskqueue_enqueue(pi->tq, &pi->timer_reclaim_task);
741 				break;
742 			}
743 		}
744 #endif
745 	if (sc->params.nports > 2) {
746 		int i;
747 
748 		for_each_port(sc, i) {
749 			struct port_info *pi = &sc->port[i];
750 
751 			t3_write_reg(sc, A_SG_KDOORBELL,
752 				     F_SELEGRCNTX |
753 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
754 		}
755 	}
756 	if (sc->open_device_map != 0)
757 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
758 }
759 
760 /*
761  * This is meant to be a catch-all function to keep sge state private
762  * to sge.c
763  *
764  */
765 int
766 t3_sge_init_adapter(adapter_t *sc)
767 {
768 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
769 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
770 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
771 	mi_init();
772 	cxgb_cache_init();
773 	return (0);
774 }
775 
776 int
777 t3_sge_reset_adapter(adapter_t *sc)
778 {
779 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
780 	return (0);
781 }
782 
783 int
784 t3_sge_init_port(struct port_info *pi)
785 {
786 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
787 	return (0);
788 }
789 
790 void
791 t3_sge_deinit_sw(adapter_t *sc)
792 {
793 	int i;
794 
795 	callout_drain(&sc->sge_timer_ch);
796 	if (sc->tq)
797 		taskqueue_drain(sc->tq, &sc->slow_intr_task);
798 	for (i = 0; i < sc->params.nports; i++)
799 		if (sc->port[i].tq != NULL)
800 			taskqueue_drain(sc->port[i].tq, &sc->port[i].timer_reclaim_task);
801 
802 	mi_deinit();
803 }
804 
805 /**
806  *	refill_rspq - replenish an SGE response queue
807  *	@adapter: the adapter
808  *	@q: the response queue to replenish
809  *	@credits: how many new responses to make available
810  *
811  *	Replenishes a response queue by making the supplied number of responses
812  *	available to HW.
813  */
814 static __inline void
815 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
816 {
817 
818 	/* mbufs are allocated on demand when a rspq entry is processed. */
819 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
820 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
821 }
822 
823 static __inline void
824 sge_txq_reclaim_(struct sge_txq *txq, int force)
825 {
826 
827 	if (desc_reclaimable(txq) < 16)
828 		return;
829 	if (mtx_trylock(&txq->lock) == 0)
830 		return;
831 	reclaim_completed_tx_(txq, 16);
832 	mtx_unlock(&txq->lock);
833 
834 }
835 
836 static void
837 sge_txq_reclaim_handler(void *arg, int ncount)
838 {
839 	struct sge_txq *q = arg;
840 
841 	sge_txq_reclaim_(q, TRUE);
842 }
843 
844 
845 
846 static void
847 sge_timer_reclaim(void *arg, int ncount)
848 {
849 	struct port_info *pi = arg;
850 	int i, nqsets = pi->nqsets;
851 	adapter_t *sc = pi->adapter;
852 	struct sge_qset *qs;
853 	struct sge_txq *txq;
854 	struct mtx *lock;
855 
856 #ifdef IFNET_MULTIQUEUE
857 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
858 #endif
859 	for (i = 0; i < nqsets; i++) {
860 		qs = &sc->sge.qs[i];
861 
862 		txq = &qs->txq[TXQ_OFLD];
863 		sge_txq_reclaim_(txq, FALSE);
864 
865 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
866 			    &sc->sge.qs[0].rspq.lock;
867 
868 		if (mtx_trylock(lock)) {
869 			/* XXX currently assume that we are *NOT* polling */
870 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
871 
872 			if (qs->fl[0].credits < qs->fl[0].size - 16)
873 				__refill_fl(sc, &qs->fl[0]);
874 			if (qs->fl[1].credits < qs->fl[1].size - 16)
875 				__refill_fl(sc, &qs->fl[1]);
876 
877 			if (status & (1 << qs->rspq.cntxt_id)) {
878 				if (qs->rspq.credits) {
879 					refill_rspq(sc, &qs->rspq, 1);
880 					qs->rspq.credits--;
881 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
882 					    1 << qs->rspq.cntxt_id);
883 				}
884 			}
885 			mtx_unlock(lock);
886 		}
887 	}
888 }
889 
890 /**
891  *	init_qset_cntxt - initialize an SGE queue set context info
892  *	@qs: the queue set
893  *	@id: the queue set id
894  *
895  *	Initializes the TIDs and context ids for the queues of a queue set.
896  */
897 static void
898 init_qset_cntxt(struct sge_qset *qs, u_int id)
899 {
900 
901 	qs->rspq.cntxt_id = id;
902 	qs->fl[0].cntxt_id = 2 * id;
903 	qs->fl[1].cntxt_id = 2 * id + 1;
904 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
905 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
906 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
907 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
908 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
909 
910 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
911 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
912 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
913 }
914 
915 
916 static void
917 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
918 {
919 	txq->in_use += ndesc;
920 	/*
921 	 * XXX we don't handle stopping of queue
922 	 * presumably start handles this when we bump against the end
923 	 */
924 	txqs->gen = txq->gen;
925 	txq->unacked += ndesc;
926 	txqs->compl = (txq->unacked & 8) << (S_WR_COMPL - 3);
927 	txq->unacked &= 7;
928 	txqs->pidx = txq->pidx;
929 	txq->pidx += ndesc;
930 #ifdef INVARIANTS
931 	if (((txqs->pidx > txq->cidx) &&
932 		(txq->pidx < txqs->pidx) &&
933 		(txq->pidx >= txq->cidx)) ||
934 	    ((txqs->pidx < txq->cidx) &&
935 		(txq->pidx >= txq-> cidx)) ||
936 	    ((txqs->pidx < txq->cidx) &&
937 		(txq->cidx < txqs->pidx)))
938 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
939 		    txqs->pidx, txq->pidx, txq->cidx);
940 #endif
941 	if (txq->pidx >= txq->size) {
942 		txq->pidx -= txq->size;
943 		txq->gen ^= 1;
944 	}
945 
946 }
947 
948 /**
949  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
950  *	@m: the packet mbufs
951  *      @nsegs: the number of segments
952  *
953  * 	Returns the number of Tx descriptors needed for the given Ethernet
954  * 	packet.  Ethernet packets require addition of WR and CPL headers.
955  */
956 static __inline unsigned int
957 calc_tx_descs(const struct mbuf *m, int nsegs)
958 {
959 	unsigned int flits;
960 
961 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
962 		return 1;
963 
964 	flits = sgl_len(nsegs) + 2;
965 #ifdef TSO_SUPPORTED
966 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
967 		flits++;
968 #endif
969 	return flits_to_desc(flits);
970 }
971 
972 static unsigned int
973 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
974     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
975 {
976 	struct mbuf *m0;
977 	int err, pktlen, pass = 0;
978 
979 retry:
980 	err = 0;
981 	m0 = *m;
982 	pktlen = m0->m_pkthdr.len;
983 #if defined(__i386__) || defined(__amd64__)
984 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
985 		goto done;
986 	} else
987 #endif
988 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
989 
990 	if (err == 0) {
991 		goto done;
992 	}
993 	if (err == EFBIG && pass == 0) {
994 		pass = 1;
995 		/* Too many segments, try to defrag */
996 		m0 = m_defrag(m0, M_DONTWAIT);
997 		if (m0 == NULL) {
998 			m_freem(*m);
999 			*m = NULL;
1000 			return (ENOBUFS);
1001 		}
1002 		*m = m0;
1003 		goto retry;
1004 	} else if (err == ENOMEM) {
1005 		return (err);
1006 	} if (err) {
1007 		if (cxgb_debug)
1008 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1009 		m_freem(m0);
1010 		*m = NULL;
1011 		return (err);
1012 	}
1013 done:
1014 #if !defined(__i386__) && !defined(__amd64__)
1015 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1016 #endif
1017 	txsd->flags |= TX_SW_DESC_MAPPED;
1018 
1019 	return (0);
1020 }
1021 
1022 /**
1023  *	make_sgl - populate a scatter/gather list for a packet
1024  *	@sgp: the SGL to populate
1025  *	@segs: the packet dma segments
1026  *	@nsegs: the number of segments
1027  *
1028  *	Generates a scatter/gather list for the buffers that make up a packet
1029  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1030  *	appropriately.
1031  */
1032 static __inline void
1033 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1034 {
1035 	int i, idx;
1036 
1037 	for (idx = 0, i = 0; i < nsegs; i++) {
1038 		/*
1039 		 * firmware doesn't like empty segments
1040 		 */
1041 		if (segs[i].ds_len == 0)
1042 			continue;
1043 		if (i && idx == 0)
1044 			++sgp;
1045 
1046 		sgp->len[idx] = htobe32(segs[i].ds_len);
1047 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1048 		idx ^= 1;
1049 	}
1050 
1051 	if (idx) {
1052 		sgp->len[idx] = 0;
1053 		sgp->addr[idx] = 0;
1054 	}
1055 }
1056 
1057 /**
1058  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1059  *	@adap: the adapter
1060  *	@q: the Tx queue
1061  *
1062  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1063  *	where the HW is going to sleep just after we checked, however,
1064  *	then the interrupt handler will detect the outstanding TX packet
1065  *	and ring the doorbell for us.
1066  *
1067  *	When GTS is disabled we unconditionally ring the doorbell.
1068  */
1069 static __inline void
1070 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1071 {
1072 #if USE_GTS
1073 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1074 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1075 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1076 #ifdef T3_TRACE
1077 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1078 			  q->cntxt_id);
1079 #endif
1080 		t3_write_reg(adap, A_SG_KDOORBELL,
1081 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1082 	}
1083 #else
1084 	wmb();            /* write descriptors before telling HW */
1085 	t3_write_reg(adap, A_SG_KDOORBELL,
1086 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1087 #endif
1088 }
1089 
1090 static __inline void
1091 wr_gen2(struct tx_desc *d, unsigned int gen)
1092 {
1093 #if SGE_NUM_GENBITS == 2
1094 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1095 #endif
1096 }
1097 
1098 /**
1099  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1100  *	@ndesc: number of Tx descriptors spanned by the SGL
1101  *	@txd: first Tx descriptor to be written
1102  *	@txqs: txq state (generation and producer index)
1103  *	@txq: the SGE Tx queue
1104  *	@sgl: the SGL
1105  *	@flits: number of flits to the start of the SGL in the first descriptor
1106  *	@sgl_flits: the SGL size in flits
1107  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1108  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1109  *
1110  *	Write a work request header and an associated SGL.  If the SGL is
1111  *	small enough to fit into one Tx descriptor it has already been written
1112  *	and we just need to write the WR header.  Otherwise we distribute the
1113  *	SGL across the number of descriptors it spans.
1114  */
1115 static void
1116 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1117     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1118     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1119 {
1120 
1121 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1122 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1123 
1124 	if (__predict_true(ndesc == 1)) {
1125 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1126 		    V_WR_SGLSFLT(flits)) | wr_hi;
1127 		wmb();
1128 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1129 		    V_WR_GEN(txqs->gen)) | wr_lo;
1130 		/* XXX gen? */
1131 		wr_gen2(txd, txqs->gen);
1132 
1133 	} else {
1134 		unsigned int ogen = txqs->gen;
1135 		const uint64_t *fp = (const uint64_t *)sgl;
1136 		struct work_request_hdr *wp = wrp;
1137 
1138 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1139 		    V_WR_SGLSFLT(flits)) | wr_hi;
1140 
1141 		while (sgl_flits) {
1142 			unsigned int avail = WR_FLITS - flits;
1143 
1144 			if (avail > sgl_flits)
1145 				avail = sgl_flits;
1146 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1147 			sgl_flits -= avail;
1148 			ndesc--;
1149 			if (!sgl_flits)
1150 				break;
1151 
1152 			fp += avail;
1153 			txd++;
1154 			txsd++;
1155 			if (++txqs->pidx == txq->size) {
1156 				txqs->pidx = 0;
1157 				txqs->gen ^= 1;
1158 				txd = txq->desc;
1159 				txsd = txq->sdesc;
1160 			}
1161 
1162 			/*
1163 			 * when the head of the mbuf chain
1164 			 * is freed all clusters will be freed
1165 			 * with it
1166 			 */
1167 			KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p", txsd->mi.mi_base));
1168 			wrp = (struct work_request_hdr *)txd;
1169 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1170 			    V_WR_SGLSFLT(1)) | wr_hi;
1171 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1172 				    sgl_flits + 1)) |
1173 			    V_WR_GEN(txqs->gen)) | wr_lo;
1174 			wr_gen2(txd, txqs->gen);
1175 			flits = 1;
1176 		}
1177 		wrp->wr_hi |= htonl(F_WR_EOP);
1178 		wmb();
1179 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1180 		wr_gen2((struct tx_desc *)wp, ogen);
1181 	}
1182 }
1183 
1184 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1185 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1186 
1187 #ifdef VLAN_SUPPORTED
1188 #define GET_VTAG(cntrl, m) \
1189 do { \
1190 	if ((m)->m_flags & M_VLANTAG)					            \
1191 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1192 } while (0)
1193 
1194 #define GET_VTAG_MI(cntrl, mi) \
1195 do { \
1196 	if ((mi)->mi_flags & M_VLANTAG)					\
1197 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1198 } while (0)
1199 #else
1200 #define GET_VTAG(cntrl, m)
1201 #define GET_VTAG_MI(cntrl, m)
1202 #endif
1203 
1204 int
1205 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1206 {
1207 	adapter_t *sc;
1208 	struct mbuf *m0;
1209 	struct sge_txq *txq;
1210 	struct txq_state txqs;
1211 	struct port_info *pi;
1212 	unsigned int ndesc, flits, cntrl, mlen;
1213 	int err, nsegs, tso_info = 0;
1214 
1215 	struct work_request_hdr *wrp;
1216 	struct tx_sw_desc *txsd;
1217 	struct sg_ent *sgp, *sgl;
1218 	uint32_t wr_hi, wr_lo, sgl_flits;
1219 	bus_dma_segment_t segs[TX_MAX_SEGS];
1220 
1221 	struct tx_desc *txd;
1222 	struct mbuf_vec *mv;
1223 	struct mbuf_iovec *mi;
1224 
1225 	DPRINTF("t3_encap cpu=%d ", curcpu);
1226 	KASSERT(qs->idx == 0, ("invalid qs %d", qs->idx));
1227 
1228 	mi = NULL;
1229 	pi = qs->port;
1230 	sc = pi->adapter;
1231 	txq = &qs->txq[TXQ_ETH];
1232 	txd = &txq->desc[txq->pidx];
1233 	txsd = &txq->sdesc[txq->pidx];
1234 	sgl = txq->txq_sgl;
1235 	m0 = *m;
1236 
1237 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1238 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1239 	if (cxgb_debug)
1240 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1241 
1242 	mtx_assert(&txq->lock, MA_OWNED);
1243 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1244 /*
1245  * XXX need to add VLAN support for 6.x
1246  */
1247 #ifdef VLAN_SUPPORTED
1248 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1249 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1250 #endif
1251 	KASSERT(txsd->mi.mi_base == NULL, ("overwrting valid entry mi_base==%p",
1252 		txsd->mi.mi_base));
1253 	if (cxgb_debug)
1254 		printf("uipc_mvec PIO_LEN=%ld\n", PIO_LEN);
1255 
1256 	if (count > 1) {
1257 		panic("count > 1 not support in CVS\n");
1258 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1259 			return (err);
1260 		nsegs = count;
1261 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1262 		if (cxgb_debug)
1263 			printf("failed ... err=%d\n", err);
1264 		return (err);
1265 	}
1266 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1267 
1268 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1269 		mi_collapse_mbuf(&txsd->mi, m0);
1270 		mi = &txsd->mi;
1271 	}
1272 	if (count > 1) {
1273 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1274 		int i, fidx;
1275 		struct mbuf_iovec *batchmi;
1276 
1277 		mv = mtomv(m0);
1278 		batchmi = mv->mv_vec;
1279 
1280 		wrp = (struct work_request_hdr *)txd;
1281 
1282 		flits = count*2 + 1;
1283 		txq_prod(txq, 1, &txqs);
1284 
1285 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1286 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1287 
1288 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1289 			GET_VTAG_MI(cntrl, batchmi);
1290 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1291 			cbe->cntrl = htonl(cntrl);
1292 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1293 			cbe->addr = htobe64(segs[i].ds_addr);
1294 			txd->flit[fidx] |= htobe64(1 << 24);
1295 		}
1296 
1297 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1298 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1299 		wmb();
1300 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1301 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1302 		/* XXX gen? */
1303 		wr_gen2(txd, txqs.gen);
1304 		check_ring_tx_db(sc, txq);
1305 
1306 		return (0);
1307 	} else if (tso_info) {
1308 		int undersized, eth_type;
1309 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1310 		struct ip *ip;
1311 		struct tcphdr *tcp;
1312 		char *pkthdr, tmp[TCPPKTHDRSIZE];
1313 		struct mbuf_vec *mv;
1314 		struct mbuf_iovec *tmpmi;
1315 
1316 		mv = mtomv(m0);
1317 		tmpmi = mv->mv_vec;
1318 
1319 		txd->flit[2] = 0;
1320 		GET_VTAG_MI(cntrl, mi);
1321 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1322 		hdr->cntrl = htonl(cntrl);
1323 		mlen = m0->m_pkthdr.len;
1324 		hdr->len = htonl(mlen | 0x80000000);
1325 
1326 		DPRINTF("tso buf len=%d\n", mlen);
1327 		undersized = (((tmpmi->mi_len < TCPPKTHDRSIZE) &&
1328 			(m0->m_flags & M_VLANTAG)) ||
1329 		    (tmpmi->mi_len < TCPPKTHDRSIZE - ETHER_VLAN_ENCAP_LEN));
1330 		if (__predict_false(undersized)) {
1331 			pkthdr = tmp;
1332 			dump_mi(mi);
1333 			panic("discontig packet - fixxorz");
1334 		} else
1335 			pkthdr = m0->m_data;
1336 
1337 		if (__predict_false(m0->m_flags & M_VLANTAG)) {
1338 			eth_type = CPL_ETH_II_VLAN;
1339 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1340 			    ETHER_VLAN_ENCAP_LEN);
1341 		} else {
1342 			eth_type = CPL_ETH_II;
1343 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1344 		}
1345 		tcp = (struct tcphdr *)((uint8_t *)ip +
1346 		    sizeof(*ip));
1347 
1348 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1349 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1350 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1351 		hdr->lso_info = htonl(tso_info);
1352 		flits = 3;
1353 	} else {
1354 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1355 
1356 		GET_VTAG(cntrl, m0);
1357 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1358 		cpl->cntrl = htonl(cntrl);
1359 		mlen = m0->m_pkthdr.len;
1360 		cpl->len = htonl(mlen | 0x80000000);
1361 
1362 		if (mlen <= PIO_LEN) {
1363 			txq_prod(txq, 1, &txqs);
1364 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1365 			m_freem(m0);
1366 			m0 = NULL;
1367 			flits = (mlen + 7) / 8 + 2;
1368 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1369 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1370 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1371 			wmb();
1372 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1373 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1374 
1375 			wr_gen2(txd, txqs.gen);
1376 			check_ring_tx_db(sc, txq);
1377 			DPRINTF("pio buf\n");
1378 			return (0);
1379 		}
1380 		DPRINTF("regular buf\n");
1381 		flits = 2;
1382 	}
1383 	wrp = (struct work_request_hdr *)txd;
1384 
1385 #ifdef	nomore
1386 	/*
1387 	 * XXX need to move into one of the helper routines above
1388 	 *
1389 	 */
1390 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1391 		return (err);
1392 	m0 = *m;
1393 #endif
1394 	ndesc = calc_tx_descs(m0, nsegs);
1395 
1396 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1397 	make_sgl(sgp, segs, nsegs);
1398 
1399 	sgl_flits = sgl_len(nsegs);
1400 
1401 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1402 	txq_prod(txq, ndesc, &txqs);
1403 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1404 	wr_lo = htonl(V_WR_TID(txq->token));
1405 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1406 	check_ring_tx_db(pi->adapter, txq);
1407 
1408 	if ((m0->m_type == MT_DATA) &&
1409 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1410 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1411 		m0->m_flags &= ~M_EXT ;
1412 		mbufs_outstanding--;
1413 		m_free(m0);
1414 	}
1415 
1416 	return (0);
1417 }
1418 
1419 
1420 /**
1421  *	write_imm - write a packet into a Tx descriptor as immediate data
1422  *	@d: the Tx descriptor to write
1423  *	@m: the packet
1424  *	@len: the length of packet data to write as immediate data
1425  *	@gen: the generation bit value to write
1426  *
1427  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1428  *	contains a work request at its beginning.  We must write the packet
1429  *	carefully so the SGE doesn't read accidentally before it's written in
1430  *	its entirety.
1431  */
1432 static __inline void
1433 write_imm(struct tx_desc *d, struct mbuf *m,
1434 	  unsigned int len, unsigned int gen)
1435 {
1436 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1437 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1438 
1439 	if (len > WR_LEN)
1440 		panic("len too big %d\n", len);
1441 	if (len < sizeof(*from))
1442 		panic("len too small %d", len);
1443 
1444 	memcpy(&to[1], &from[1], len - sizeof(*from));
1445 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1446 					V_WR_BCNTLFLT(len & 7));
1447 	wmb();
1448 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1449 					V_WR_LEN((len + 7) / 8));
1450 	wr_gen2(d, gen);
1451 
1452 	/*
1453 	 * This check is a hack we should really fix the logic so
1454 	 * that this can't happen
1455 	 */
1456 	if (m->m_type != MT_DONTFREE)
1457 		m_freem(m);
1458 
1459 }
1460 
1461 /**
1462  *	check_desc_avail - check descriptor availability on a send queue
1463  *	@adap: the adapter
1464  *	@q: the TX queue
1465  *	@m: the packet needing the descriptors
1466  *	@ndesc: the number of Tx descriptors needed
1467  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1468  *
1469  *	Checks if the requested number of Tx descriptors is available on an
1470  *	SGE send queue.  If the queue is already suspended or not enough
1471  *	descriptors are available the packet is queued for later transmission.
1472  *	Must be called with the Tx queue locked.
1473  *
1474  *	Returns 0 if enough descriptors are available, 1 if there aren't
1475  *	enough descriptors and the packet has been queued, and 2 if the caller
1476  *	needs to retry because there weren't enough descriptors at the
1477  *	beginning of the call but some freed up in the mean time.
1478  */
1479 static __inline int
1480 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1481 		 struct mbuf *m, unsigned int ndesc,
1482 		 unsigned int qid)
1483 {
1484 	/*
1485 	 * XXX We currently only use this for checking the control queue
1486 	 * the control queue is only used for binding qsets which happens
1487 	 * at init time so we are guaranteed enough descriptors
1488 	 */
1489 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1490 addq_exit:	mbufq_tail(&q->sendq, m);
1491 		return 1;
1492 	}
1493 	if (__predict_false(q->size - q->in_use < ndesc)) {
1494 
1495 		struct sge_qset *qs = txq_to_qset(q, qid);
1496 
1497 		printf("stopping q\n");
1498 
1499 		setbit(&qs->txq_stopped, qid);
1500 		smp_mb();
1501 
1502 		if (should_restart_tx(q) &&
1503 		    test_and_clear_bit(qid, &qs->txq_stopped))
1504 			return 2;
1505 
1506 		q->stops++;
1507 		goto addq_exit;
1508 	}
1509 	return 0;
1510 }
1511 
1512 
1513 /**
1514  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1515  *	@q: the SGE control Tx queue
1516  *
1517  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1518  *	that send only immediate data (presently just the control queues) and
1519  *	thus do not have any mbufs
1520  */
1521 static __inline void
1522 reclaim_completed_tx_imm(struct sge_txq *q)
1523 {
1524 	unsigned int reclaim = q->processed - q->cleaned;
1525 
1526 	mtx_assert(&q->lock, MA_OWNED);
1527 
1528 	q->in_use -= reclaim;
1529 	q->cleaned += reclaim;
1530 }
1531 
1532 static __inline int
1533 immediate(const struct mbuf *m)
1534 {
1535 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1536 }
1537 
1538 /**
1539  *	ctrl_xmit - send a packet through an SGE control Tx queue
1540  *	@adap: the adapter
1541  *	@q: the control queue
1542  *	@m: the packet
1543  *
1544  *	Send a packet through an SGE control Tx queue.  Packets sent through
1545  *	a control queue must fit entirely as immediate data in a single Tx
1546  *	descriptor and have no page fragments.
1547  */
1548 static int
1549 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1550 {
1551 	int ret;
1552 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1553 
1554 	if (__predict_false(!immediate(m))) {
1555 		m_freem(m);
1556 		return 0;
1557 	}
1558 
1559 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1560 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1561 
1562 	mtx_lock(&q->lock);
1563 again:	reclaim_completed_tx_imm(q);
1564 
1565 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1566 	if (__predict_false(ret)) {
1567 		if (ret == 1) {
1568 			mtx_unlock(&q->lock);
1569 			log(LOG_ERR, "no desc available\n");
1570 
1571 			return (ENOSPC);
1572 		}
1573 		goto again;
1574 	}
1575 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1576 
1577 	q->in_use++;
1578 	if (++q->pidx >= q->size) {
1579 		q->pidx = 0;
1580 		q->gen ^= 1;
1581 	}
1582 	mtx_unlock(&q->lock);
1583 	wmb();
1584 	t3_write_reg(adap, A_SG_KDOORBELL,
1585 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1586 	return (0);
1587 }
1588 
1589 
1590 /**
1591  *	restart_ctrlq - restart a suspended control queue
1592  *	@qs: the queue set cotaining the control queue
1593  *
1594  *	Resumes transmission on a suspended Tx control queue.
1595  */
1596 static void
1597 restart_ctrlq(void *data, int npending)
1598 {
1599 	struct mbuf *m;
1600 	struct sge_qset *qs = (struct sge_qset *)data;
1601 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1602 	adapter_t *adap = qs->port->adapter;
1603 
1604 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1605 
1606 	mtx_lock(&q->lock);
1607 again:	reclaim_completed_tx_imm(q);
1608 
1609 	while (q->in_use < q->size &&
1610 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1611 
1612 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1613 
1614 		if (++q->pidx >= q->size) {
1615 			q->pidx = 0;
1616 			q->gen ^= 1;
1617 		}
1618 		q->in_use++;
1619 	}
1620 	if (!mbufq_empty(&q->sendq)) {
1621 		setbit(&qs->txq_stopped, TXQ_CTRL);
1622 		smp_mb();
1623 
1624 		if (should_restart_tx(q) &&
1625 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1626 			goto again;
1627 		q->stops++;
1628 	}
1629 	mtx_unlock(&q->lock);
1630 	t3_write_reg(adap, A_SG_KDOORBELL,
1631 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1632 }
1633 
1634 
1635 /*
1636  * Send a management message through control queue 0
1637  */
1638 int
1639 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1640 {
1641 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1642 }
1643 
1644 
1645 /**
1646  *	free_qset - free the resources of an SGE queue set
1647  *	@sc: the controller owning the queue set
1648  *	@q: the queue set
1649  *
1650  *	Release the HW and SW resources associated with an SGE queue set, such
1651  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1652  *	queue set must be quiesced prior to calling this.
1653  */
1654 void
1655 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1656 {
1657 	int i;
1658 
1659 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1660 
1661 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1662 		if (q->txq[i].txq_mr.br_ring != NULL) {
1663 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1664 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1665 		}
1666 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1667 		if (q->fl[i].desc) {
1668 			mtx_lock(&sc->sge.reg_lock);
1669 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1670 			mtx_unlock(&sc->sge.reg_lock);
1671 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1672 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1673 					q->fl[i].desc_map);
1674 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1675 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1676 		}
1677 		if (q->fl[i].sdesc) {
1678 			free_rx_bufs(sc, &q->fl[i]);
1679 			free(q->fl[i].sdesc, M_DEVBUF);
1680 		}
1681 	}
1682 
1683 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1684 		if (q->txq[i].desc) {
1685 			mtx_lock(&sc->sge.reg_lock);
1686 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1687 			mtx_unlock(&sc->sge.reg_lock);
1688 			bus_dmamap_unload(q->txq[i].desc_tag,
1689 					q->txq[i].desc_map);
1690 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1691 					q->txq[i].desc_map);
1692 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1693 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1694 			MTX_DESTROY(&q->txq[i].lock);
1695 		}
1696 		if (q->txq[i].sdesc) {
1697 			free(q->txq[i].sdesc, M_DEVBUF);
1698 		}
1699 	}
1700 
1701 	if (q->rspq.desc) {
1702 		mtx_lock(&sc->sge.reg_lock);
1703 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1704 		mtx_unlock(&sc->sge.reg_lock);
1705 
1706 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1707 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1708 			        q->rspq.desc_map);
1709 		bus_dma_tag_destroy(q->rspq.desc_tag);
1710 		MTX_DESTROY(&q->rspq.lock);
1711 	}
1712 
1713 	bzero(q, sizeof(*q));
1714 }
1715 
1716 /**
1717  *	t3_free_sge_resources - free SGE resources
1718  *	@sc: the adapter softc
1719  *
1720  *	Frees resources used by the SGE queue sets.
1721  */
1722 void
1723 t3_free_sge_resources(adapter_t *sc)
1724 {
1725 	int i, nqsets;
1726 
1727 #ifdef IFNET_MULTIQUEUE
1728 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1729 #endif
1730 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1731 		nqsets += sc->port[i].nqsets;
1732 
1733 	for (i = 0; i < nqsets; ++i)
1734 		t3_free_qset(sc, &sc->sge.qs[i]);
1735 }
1736 
1737 /**
1738  *	t3_sge_start - enable SGE
1739  *	@sc: the controller softc
1740  *
1741  *	Enables the SGE for DMAs.  This is the last step in starting packet
1742  *	transfers.
1743  */
1744 void
1745 t3_sge_start(adapter_t *sc)
1746 {
1747 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1748 }
1749 
1750 /**
1751  *	t3_sge_stop - disable SGE operation
1752  *	@sc: the adapter
1753  *
1754  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1755  *	from error interrupts) or from normal process context.  In the latter
1756  *	case it also disables any pending queue restart tasklets.  Note that
1757  *	if it is called in interrupt context it cannot disable the restart
1758  *	tasklets as it cannot wait, however the tasklets will have no effect
1759  *	since the doorbells are disabled and the driver will call this again
1760  *	later from process context, at which time the tasklets will be stopped
1761  *	if they are still running.
1762  */
1763 void
1764 t3_sge_stop(adapter_t *sc)
1765 {
1766 	int i, nqsets;
1767 
1768 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1769 
1770 	if (sc->tq == NULL)
1771 		return;
1772 
1773 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1774 		nqsets += sc->port[i].nqsets;
1775 #ifdef notyet
1776 	/*
1777 	 *
1778 	 * XXX
1779 	 */
1780 	for (i = 0; i < nqsets; ++i) {
1781 		struct sge_qset *qs = &sc->sge.qs[i];
1782 
1783 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1784 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1785 	}
1786 #endif
1787 }
1788 
1789 /**
1790  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1791  *	@adapter: the adapter
1792  *	@q: the Tx queue to reclaim descriptors from
1793  *	@reclaimable: the number of descriptors to reclaim
1794  *      @m_vec_size: maximum number of buffers to reclaim
1795  *      @desc_reclaimed: returns the number of descriptors reclaimed
1796  *
1797  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1798  *	Tx buffers.  Called with the Tx queue lock held.
1799  *
1800  *      Returns number of buffers of reclaimed
1801  */
1802 void
1803 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1804 {
1805 	struct tx_sw_desc *txsd;
1806 	unsigned int cidx;
1807 
1808 #ifdef T3_TRACE
1809 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1810 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1811 #endif
1812 	cidx = q->cidx;
1813 	txsd = &q->sdesc[cidx];
1814 	DPRINTF("reclaiming %d WR\n", reclaimable);
1815 	mtx_assert(&q->lock, MA_OWNED);
1816 	while (reclaimable--) {
1817 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1818 		if (txsd->mi.mi_base != NULL) {
1819 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1820 				bus_dmamap_unload(q->entry_tag, txsd->map);
1821 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1822 			}
1823 			m_freem_iovec(&txsd->mi);
1824 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1825 			txsd->mi.mi_base = NULL;
1826 
1827 #if defined(DIAGNOSTIC) && 0
1828 			if (m_get_priority(txsd->m[0]) != cidx)
1829 				printf("pri=%d cidx=%d\n",
1830 				    (int)m_get_priority(txsd->m[0]), cidx);
1831 #endif
1832 
1833 		} else
1834 			q->txq_skipped++;
1835 
1836 		++txsd;
1837 		if (++cidx == q->size) {
1838 			cidx = 0;
1839 			txsd = q->sdesc;
1840 		}
1841 	}
1842 	q->cidx = cidx;
1843 
1844 }
1845 
1846 void
1847 t3_free_tx_desc_all(struct sge_txq *q)
1848 {
1849 	int i;
1850 	struct tx_sw_desc *txsd;
1851 
1852 	for (i = 0; i < q->size; i++) {
1853 		txsd = &q->sdesc[i];
1854 		if (txsd->mi.mi_base != NULL) {
1855 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1856 				bus_dmamap_unload(q->entry_tag, txsd->map);
1857 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1858 			}
1859 			m_freem_iovec(&txsd->mi);
1860 			bzero(&txsd->mi, sizeof(txsd->mi));
1861 		}
1862 	}
1863 }
1864 
1865 /**
1866  *	is_new_response - check if a response is newly written
1867  *	@r: the response descriptor
1868  *	@q: the response queue
1869  *
1870  *	Returns true if a response descriptor contains a yet unprocessed
1871  *	response.
1872  */
1873 static __inline int
1874 is_new_response(const struct rsp_desc *r,
1875     const struct sge_rspq *q)
1876 {
1877 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1878 }
1879 
1880 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1881 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1882 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1883 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1884 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1885 
1886 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1887 #define NOMEM_INTR_DELAY 2500
1888 
1889 /**
1890  *	write_ofld_wr - write an offload work request
1891  *	@adap: the adapter
1892  *	@m: the packet to send
1893  *	@q: the Tx queue
1894  *	@pidx: index of the first Tx descriptor to write
1895  *	@gen: the generation value to use
1896  *	@ndesc: number of descriptors the packet will occupy
1897  *
1898  *	Write an offload work request to send the supplied packet.  The packet
1899  *	data already carry the work request with most fields populated.
1900  */
1901 static void
1902 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1903     struct sge_txq *q, unsigned int pidx,
1904     unsigned int gen, unsigned int ndesc,
1905     bus_dma_segment_t *segs, unsigned int nsegs)
1906 {
1907 	unsigned int sgl_flits, flits;
1908 	struct work_request_hdr *from;
1909 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1910 	struct tx_desc *d = &q->desc[pidx];
1911 	struct txq_state txqs;
1912 
1913 	if (immediate(m) && segs == NULL) {
1914 		write_imm(d, m, m->m_len, gen);
1915 		return;
1916 	}
1917 
1918 	/* Only TX_DATA builds SGLs */
1919 	from = mtod(m, struct work_request_hdr *);
1920 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1921 
1922 	flits = m->m_len / 8;
1923 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1924 
1925 	make_sgl(sgp, segs, nsegs);
1926 	sgl_flits = sgl_len(nsegs);
1927 
1928 	txqs.gen = gen;
1929 	txqs.pidx = pidx;
1930 	txqs.compl = 0;
1931 
1932 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
1933 	    from->wr_hi, from->wr_lo);
1934 }
1935 
1936 /**
1937  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
1938  *	@m: the packet
1939  *
1940  * 	Returns the number of Tx descriptors needed for the given offload
1941  * 	packet.  These packets are already fully constructed.
1942  */
1943 static __inline unsigned int
1944 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
1945 {
1946 	unsigned int flits, cnt = 0;
1947 
1948 
1949 	if (m->m_len <= WR_LEN)
1950 		return 1;                 /* packet fits as immediate data */
1951 
1952 	if (m->m_flags & M_IOVEC)
1953 		cnt = mtomv(m)->mv_count;
1954 
1955 	/* headers */
1956 	flits = ((uint8_t *)m->m_pkthdr.header - mtod(m, uint8_t *)) / 8;
1957 
1958 	return flits_to_desc(flits + sgl_len(cnt));
1959 }
1960 
1961 /**
1962  *	ofld_xmit - send a packet through an offload queue
1963  *	@adap: the adapter
1964  *	@q: the Tx offload queue
1965  *	@m: the packet
1966  *
1967  *	Send an offload packet through an SGE offload queue.
1968  */
1969 static int
1970 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1971 {
1972 	int ret, nsegs;
1973 	unsigned int ndesc;
1974 	unsigned int pidx, gen;
1975 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
1976 	struct tx_sw_desc *stx;
1977 
1978 	nsegs = m_get_sgllen(m);
1979 	vsegs = m_get_sgl(m);
1980 	ndesc = calc_tx_descs_ofld(m, nsegs);
1981 	busdma_map_sgl(vsegs, segs, nsegs);
1982 
1983 	stx = &q->sdesc[q->pidx];
1984 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
1985 
1986 	mtx_lock(&q->lock);
1987 again:	reclaim_completed_tx_(q, 16);
1988 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
1989 	if (__predict_false(ret)) {
1990 		if (ret == 1) {
1991 			printf("no ofld desc avail\n");
1992 
1993 			m_set_priority(m, ndesc);     /* save for restart */
1994 			mtx_unlock(&q->lock);
1995 			return (EINTR);
1996 		}
1997 		goto again;
1998 	}
1999 
2000 	gen = q->gen;
2001 	q->in_use += ndesc;
2002 	pidx = q->pidx;
2003 	q->pidx += ndesc;
2004 	if (q->pidx >= q->size) {
2005 		q->pidx -= q->size;
2006 		q->gen ^= 1;
2007 	}
2008 #ifdef T3_TRACE
2009 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2010 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2011 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2012 		  skb_shinfo(skb)->nr_frags);
2013 #endif
2014 	mtx_unlock(&q->lock);
2015 
2016 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2017 	check_ring_tx_db(adap, q);
2018 
2019 	return (0);
2020 }
2021 
2022 /**
2023  *	restart_offloadq - restart a suspended offload queue
2024  *	@qs: the queue set cotaining the offload queue
2025  *
2026  *	Resumes transmission on a suspended Tx offload queue.
2027  */
2028 static void
2029 restart_offloadq(void *data, int npending)
2030 {
2031 	struct mbuf *m;
2032 	struct sge_qset *qs = data;
2033 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2034 	adapter_t *adap = qs->port->adapter;
2035 	bus_dma_segment_t segs[TX_MAX_SEGS];
2036 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2037 	int nsegs, cleaned;
2038 
2039 	mtx_lock(&q->lock);
2040 again:	cleaned = reclaim_completed_tx_(q, 16);
2041 
2042 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2043 		unsigned int gen, pidx;
2044 		unsigned int ndesc = m_get_priority(m);
2045 
2046 		if (__predict_false(q->size - q->in_use < ndesc)) {
2047 			setbit(&qs->txq_stopped, TXQ_OFLD);
2048 			smp_mb();
2049 
2050 			if (should_restart_tx(q) &&
2051 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2052 				goto again;
2053 			q->stops++;
2054 			break;
2055 		}
2056 
2057 		gen = q->gen;
2058 		q->in_use += ndesc;
2059 		pidx = q->pidx;
2060 		q->pidx += ndesc;
2061 		if (q->pidx >= q->size) {
2062 			q->pidx -= q->size;
2063 			q->gen ^= 1;
2064 		}
2065 
2066 		(void)mbufq_dequeue(&q->sendq);
2067 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2068 		mtx_unlock(&q->lock);
2069 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2070 		mtx_lock(&q->lock);
2071 	}
2072 	mtx_unlock(&q->lock);
2073 
2074 #if USE_GTS
2075 	set_bit(TXQ_RUNNING, &q->flags);
2076 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2077 #endif
2078 	t3_write_reg(adap, A_SG_KDOORBELL,
2079 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2080 }
2081 
2082 /**
2083  *	queue_set - return the queue set a packet should use
2084  *	@m: the packet
2085  *
2086  *	Maps a packet to the SGE queue set it should use.  The desired queue
2087  *	set is carried in bits 1-3 in the packet's priority.
2088  */
2089 static __inline int
2090 queue_set(const struct mbuf *m)
2091 {
2092 	return m_get_priority(m) >> 1;
2093 }
2094 
2095 /**
2096  *	is_ctrl_pkt - return whether an offload packet is a control packet
2097  *	@m: the packet
2098  *
2099  *	Determines whether an offload packet should use an OFLD or a CTRL
2100  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2101  */
2102 static __inline int
2103 is_ctrl_pkt(const struct mbuf *m)
2104 {
2105 	return m_get_priority(m) & 1;
2106 }
2107 
2108 /**
2109  *	t3_offload_tx - send an offload packet
2110  *	@tdev: the offload device to send to
2111  *	@m: the packet
2112  *
2113  *	Sends an offload packet.  We use the packet priority to select the
2114  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2115  *	should be sent as regular or control, bits 1-3 select the queue set.
2116  */
2117 int
2118 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2119 {
2120 	adapter_t *adap = tdev2adap(tdev);
2121 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2122 
2123 	if (__predict_false(is_ctrl_pkt(m)))
2124 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2125 
2126 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2127 }
2128 
2129 /**
2130  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2131  *	@tdev: the offload device that will be receiving the packets
2132  *	@q: the SGE response queue that assembled the bundle
2133  *	@m: the partial bundle
2134  *	@n: the number of packets in the bundle
2135  *
2136  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2137  */
2138 static __inline void
2139 deliver_partial_bundle(struct t3cdev *tdev,
2140 			struct sge_rspq *q,
2141 			struct mbuf *mbufs[], int n)
2142 {
2143 	if (n) {
2144 		q->offload_bundles++;
2145 		cxgb_ofld_recv(tdev, mbufs, n);
2146 	}
2147 }
2148 
2149 static __inline int
2150 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2151     struct mbuf *m, struct mbuf *rx_gather[],
2152     unsigned int gather_idx)
2153 {
2154 
2155 	rq->offload_pkts++;
2156 	m->m_pkthdr.header = mtod(m, void *);
2157 	rx_gather[gather_idx++] = m;
2158 	if (gather_idx == RX_BUNDLE_SIZE) {
2159 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2160 		gather_idx = 0;
2161 		rq->offload_bundles++;
2162 	}
2163 	return (gather_idx);
2164 }
2165 
2166 static void
2167 restart_tx(struct sge_qset *qs)
2168 {
2169 	struct adapter *sc = qs->port->adapter;
2170 
2171 
2172 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2173 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2174 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2175 		qs->txq[TXQ_OFLD].restarts++;
2176 		DPRINTF("restarting TXQ_OFLD\n");
2177 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2178 	}
2179 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2180 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2181 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2182 	    qs->txq[TXQ_CTRL].in_use);
2183 
2184 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2185 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2186 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2187 		qs->txq[TXQ_CTRL].restarts++;
2188 		DPRINTF("restarting TXQ_CTRL\n");
2189 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2190 	}
2191 }
2192 
2193 /**
2194  *	t3_sge_alloc_qset - initialize an SGE queue set
2195  *	@sc: the controller softc
2196  *	@id: the queue set id
2197  *	@nports: how many Ethernet ports will be using this queue set
2198  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2199  *	@p: configuration parameters for this queue set
2200  *	@ntxq: number of Tx queues for the queue set
2201  *	@pi: port info for queue set
2202  *
2203  *	Allocate resources and initialize an SGE queue set.  A queue set
2204  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2205  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2206  *	queue, offload queue, and control queue.
2207  */
2208 int
2209 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2210 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2211 {
2212 	struct sge_qset *q = &sc->sge.qs[id];
2213 	int i, header_size, ret = 0;
2214 
2215 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2216 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2217 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2218 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2219 			goto err;
2220 		}
2221 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2222 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2223 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2224 	}
2225 
2226 	init_qset_cntxt(q, id);
2227 	q->idx = id;
2228 
2229 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2230 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2231 		    &q->fl[0].desc, &q->fl[0].sdesc,
2232 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2233 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2234 		printf("error %d from alloc ring fl0\n", ret);
2235 		goto err;
2236 	}
2237 
2238 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2239 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2240 		    &q->fl[1].desc, &q->fl[1].sdesc,
2241 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2242 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2243 		printf("error %d from alloc ring fl1\n", ret);
2244 		goto err;
2245 	}
2246 
2247 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2248 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2249 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2250 		    NULL, NULL)) != 0) {
2251 		printf("error %d from alloc ring rspq\n", ret);
2252 		goto err;
2253 	}
2254 
2255 	for (i = 0; i < ntxq; ++i) {
2256 		/*
2257 		 * The control queue always uses immediate data so does not
2258 		 * need to keep track of any mbufs.
2259 		 * XXX Placeholder for future TOE support.
2260 		 */
2261 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2262 
2263 		if ((ret = alloc_ring(sc, p->txq_size[i],
2264 			    sizeof(struct tx_desc), sz,
2265 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2266 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2267 			    &q->txq[i].desc_map,
2268 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2269 			printf("error %d from alloc ring tx %i\n", ret, i);
2270 			goto err;
2271 		}
2272 		mbufq_init(&q->txq[i].sendq);
2273 		q->txq[i].gen = 1;
2274 		q->txq[i].size = p->txq_size[i];
2275 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2276 		    device_get_unit(sc->dev), irq_vec_idx, i);
2277 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2278 	}
2279 
2280 	q->txq[TXQ_ETH].port = pi;
2281 
2282 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2283 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2284 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2285 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2286 
2287 	q->fl[0].gen = q->fl[1].gen = 1;
2288 	q->fl[0].size = p->fl_size;
2289 	q->fl[1].size = p->jumbo_size;
2290 
2291 	q->rspq.gen = 1;
2292 	q->rspq.cidx = 0;
2293 	q->rspq.size = p->rspq_size;
2294 
2295 
2296 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2297 	q->txq[TXQ_ETH].stop_thres = nports *
2298 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2299 
2300 	q->fl[0].buf_size = (MCLBYTES - header_size);
2301 	q->fl[0].zone = zone_clust;
2302 	q->fl[0].type = EXT_CLUSTER;
2303 #if __FreeBSD_version > 800000
2304 	if (cxgb_use_16k_clusters) {
2305 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2306 		q->fl[1].zone = zone_jumbo16;
2307 		q->fl[1].type = EXT_JUMBO16;
2308 	} else {
2309 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2310 		q->fl[1].zone = zone_jumbo9;
2311 		q->fl[1].type = EXT_JUMBO9;
2312 	}
2313 #else
2314 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2315 	q->fl[1].zone = zone_jumbop;
2316 	q->fl[1].type = EXT_JUMBOP;
2317 #endif
2318 	q->lro.enabled = lro_default;
2319 
2320 	mtx_lock(&sc->sge.reg_lock);
2321 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2322 				   q->rspq.phys_addr, q->rspq.size,
2323 				   q->fl[0].buf_size, 1, 0);
2324 	if (ret) {
2325 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2326 		goto err_unlock;
2327 	}
2328 
2329 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2330 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2331 					  q->fl[i].phys_addr, q->fl[i].size,
2332 					  q->fl[i].buf_size, p->cong_thres, 1,
2333 					  0);
2334 		if (ret) {
2335 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2336 			goto err_unlock;
2337 		}
2338 	}
2339 
2340 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2341 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2342 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2343 				 1, 0);
2344 	if (ret) {
2345 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2346 		goto err_unlock;
2347 	}
2348 
2349 	if (ntxq > 1) {
2350 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2351 					 USE_GTS, SGE_CNTXT_OFLD, id,
2352 					 q->txq[TXQ_OFLD].phys_addr,
2353 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2354 		if (ret) {
2355 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2356 			goto err_unlock;
2357 		}
2358 	}
2359 
2360 	if (ntxq > 2) {
2361 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2362 					 SGE_CNTXT_CTRL, id,
2363 					 q->txq[TXQ_CTRL].phys_addr,
2364 					 q->txq[TXQ_CTRL].size,
2365 					 q->txq[TXQ_CTRL].token, 1, 0);
2366 		if (ret) {
2367 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2368 			goto err_unlock;
2369 		}
2370 	}
2371 
2372 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2373 	    device_get_unit(sc->dev), irq_vec_idx);
2374 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2375 
2376 	mtx_unlock(&sc->sge.reg_lock);
2377 	t3_update_qset_coalesce(q, p);
2378 	q->port = pi;
2379 
2380 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2381 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2382 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2383 
2384 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2385 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2386 
2387 	return (0);
2388 
2389 err_unlock:
2390 	mtx_unlock(&sc->sge.reg_lock);
2391 err:
2392 	t3_free_qset(sc, q);
2393 
2394 	return (ret);
2395 }
2396 
2397 void
2398 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2399 {
2400 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2401 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2402 	struct ifnet *ifp = pi->ifp;
2403 
2404 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2405 
2406 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2407 	    cpl->csum_valid && cpl->csum == 0xffff) {
2408 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2409 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2410 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2411 		m->m_pkthdr.csum_data = 0xffff;
2412 	}
2413 	/*
2414 	 * XXX need to add VLAN support for 6.x
2415 	 */
2416 #ifdef VLAN_SUPPORTED
2417 	if (__predict_false(cpl->vlan_valid)) {
2418 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2419 		m->m_flags |= M_VLANTAG;
2420 	}
2421 #endif
2422 
2423 	m->m_pkthdr.rcvif = ifp;
2424 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2425 #ifndef DISABLE_MBUF_IOVEC
2426 	m_explode(m);
2427 #endif
2428 	/*
2429 	 * adjust after conversion to mbuf chain
2430 	 */
2431 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2432 	m->m_len -= (sizeof(*cpl) + ethpad);
2433 	m->m_data += (sizeof(*cpl) + ethpad);
2434 
2435 	(*ifp->if_input)(ifp, m);
2436 }
2437 
2438 static void
2439 ext_free_handler(void *cl, void * arg)
2440 {
2441 	uintptr_t type = (uintptr_t)arg;
2442 	uma_zone_t zone;
2443 	struct mbuf *m;
2444 
2445 	m = cl;
2446 	zone = m_getzonefromtype(type);
2447 	m->m_ext.ext_type = (int)type;
2448 	cxgb_ext_freed++;
2449 	cxgb_cache_put(zone, cl);
2450 }
2451 
2452 static void
2453 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2454 {
2455 	struct mbuf *m;
2456 	int header_size;
2457 
2458 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2459 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2460 
2461 	bzero(cl, header_size);
2462 	m = (struct mbuf *)cl;
2463 
2464 	SLIST_INIT(&m->m_pkthdr.tags);
2465 	m->m_type = MT_DATA;
2466 	m->m_flags = flags | M_NOFREE | M_EXT;
2467 	m->m_data = cl + header_size;
2468 	m->m_ext.ext_buf = cl;
2469 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2470 	m->m_ext.ext_size = m_getsizefromtype(type);
2471 	m->m_ext.ext_free = ext_free_handler;
2472 	m->m_ext.ext_args = (void *)(uintptr_t)type;
2473 	m->m_ext.ext_type = EXT_EXTREF;
2474 	*(m->m_ext.ref_cnt) = 1;
2475 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2476 }
2477 
2478 
2479 /**
2480  *	get_packet - return the next ingress packet buffer from a free list
2481  *	@adap: the adapter that received the packet
2482  *	@drop_thres: # of remaining buffers before we start dropping packets
2483  *	@qs: the qset that the SGE free list holding the packet belongs to
2484  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2485  *      @r: response descriptor
2486  *
2487  *	Get the next packet from a free list and complete setup of the
2488  *	sk_buff.  If the packet is small we make a copy and recycle the
2489  *	original buffer, otherwise we use the original buffer itself.  If a
2490  *	positive drop threshold is supplied packets are dropped and their
2491  *	buffers recycled if (a) the number of remaining buffers is under the
2492  *	threshold and the packet is too big to copy, or (b) the packet should
2493  *	be copied but there is no memory for the copy.
2494  */
2495 #ifdef DISABLE_MBUF_IOVEC
2496 
2497 static int
2498 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2499     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2500 {
2501 
2502 	unsigned int len_cq =  ntohl(r->len_cq);
2503 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2504 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2505 	uint32_t len = G_RSPD_LEN(len_cq);
2506 	uint32_t flags = ntohl(r->flags);
2507 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2508 	struct mbuf *m;
2509 	uint32_t *ref;
2510 	int ret = 0;
2511 
2512 	prefetch(sd->rxsd_cl);
2513 
2514 	fl->credits--;
2515 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2516 
2517 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2518 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2519 			goto skip_recycle;
2520 		cl = mtod(m0, void *);
2521 		memcpy(cl, sd->data, len);
2522 		recycle_rx_buf(adap, fl, fl->cidx);
2523 		m = m0;
2524 	} else {
2525 	skip_recycle:
2526 		int flags = 0;
2527 		bus_dmamap_unload(fl->entry_tag, sd->map);
2528 		cl = sd->rxsd_cl;
2529 		m = m0 = (struct mbuf *)cl;
2530 
2531 		m0->m_len = len;
2532 		if ((sopeop == RSPQ_SOP_EOP) ||
2533 		    (sopeop == RSPQ_SOP))
2534 			flags = M_PKTHDR;
2535 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2536 	}
2537 
2538 	switch(sopeop) {
2539 	case RSPQ_SOP_EOP:
2540 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2541 		mh->mh_head = mh->mh_tail = m;
2542 		m->m_pkthdr.len = len;
2543 		ret = 1;
2544 		break;
2545 	case RSPQ_NSOP_NEOP:
2546 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2547 		if (mh->mh_tail == NULL) {
2548 			printf("discarding intermediate descriptor entry\n");
2549 			m_freem(m);
2550 			break;
2551 		}
2552 		mh->mh_tail->m_next = m;
2553 		mh->mh_tail = m;
2554 		mh->mh_head->m_pkthdr.len += len;
2555 		ret = 0;
2556 		break;
2557 	case RSPQ_SOP:
2558 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2559 		m->m_pkthdr.len = len;
2560 		mh->mh_head = mh->mh_tail = m;
2561 		ret = 0;
2562 		break;
2563 	case RSPQ_EOP:
2564 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2565 		mh->mh_head->m_pkthdr.len += len;
2566 		mh->mh_tail->m_next = m;
2567 		mh->mh_tail = m;
2568 		ret = 1;
2569 		break;
2570 	}
2571 	if (++fl->cidx == fl->size)
2572 		fl->cidx = 0;
2573 
2574 	return (ret);
2575 }
2576 
2577 #else
2578 
2579 static int
2580 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2581     struct mbuf **m, struct rsp_desc *r)
2582 {
2583 
2584 	unsigned int len_cq =  ntohl(r->len_cq);
2585 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2586 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2587 	uint32_t len = G_RSPD_LEN(len_cq);
2588 	uint32_t flags = ntohl(r->flags);
2589 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2590 	void *cl;
2591 	int ret = 0;
2592 	struct mbuf *m0;
2593 #if 0
2594 	if ((sd + 1 )->rxsd_cl)
2595 		prefetch((sd + 1)->rxsd_cl);
2596 	if ((sd + 2)->rxsd_cl)
2597 		prefetch((sd + 2)->rxsd_cl);
2598 #endif
2599 	DPRINTF("rx cpu=%d\n", curcpu);
2600 	fl->credits--;
2601 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2602 
2603 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2604 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2605 			goto skip_recycle;
2606 		cl = mtod(m0, void *);
2607 		memcpy(cl, sd->data, len);
2608 		recycle_rx_buf(adap, fl, fl->cidx);
2609 		*m = m0;
2610 	} else {
2611 	skip_recycle:
2612 		bus_dmamap_unload(fl->entry_tag, sd->map);
2613 		cl = sd->rxsd_cl;
2614 		*m = m0 = (struct mbuf *)cl;
2615 	}
2616 
2617 	switch(sopeop) {
2618 	case RSPQ_SOP_EOP:
2619 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2620 		if (cl == sd->rxsd_cl)
2621 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2622 		m0->m_len = m0->m_pkthdr.len = len;
2623 		ret = 1;
2624 		goto done;
2625 		break;
2626 	case RSPQ_NSOP_NEOP:
2627 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2628 		panic("chaining unsupported");
2629 		ret = 0;
2630 		break;
2631 	case RSPQ_SOP:
2632 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2633 		panic("chaining unsupported");
2634 		m_iovinit(m0);
2635 		ret = 0;
2636 		break;
2637 	case RSPQ_EOP:
2638 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2639 		panic("chaining unsupported");
2640 		ret = 1;
2641 		break;
2642 	}
2643 	panic("append not supported");
2644 #if 0
2645 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2646 #endif
2647 done:
2648 	if (++fl->cidx == fl->size)
2649 		fl->cidx = 0;
2650 
2651 	return (ret);
2652 }
2653 #endif
2654 /**
2655  *	handle_rsp_cntrl_info - handles control information in a response
2656  *	@qs: the queue set corresponding to the response
2657  *	@flags: the response control flags
2658  *
2659  *	Handles the control information of an SGE response, such as GTS
2660  *	indications and completion credits for the queue set's Tx queues.
2661  *	HW coalesces credits, we don't do any extra SW coalescing.
2662  */
2663 static __inline void
2664 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2665 {
2666 	unsigned int credits;
2667 
2668 #if USE_GTS
2669 	if (flags & F_RSPD_TXQ0_GTS)
2670 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2671 #endif
2672 	credits = G_RSPD_TXQ0_CR(flags);
2673 	if (credits)
2674 		qs->txq[TXQ_ETH].processed += credits;
2675 
2676 	credits = G_RSPD_TXQ2_CR(flags);
2677 	if (credits)
2678 		qs->txq[TXQ_CTRL].processed += credits;
2679 
2680 # if USE_GTS
2681 	if (flags & F_RSPD_TXQ1_GTS)
2682 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2683 # endif
2684 	credits = G_RSPD_TXQ1_CR(flags);
2685 	if (credits)
2686 		qs->txq[TXQ_OFLD].processed += credits;
2687 
2688 }
2689 
2690 static void
2691 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2692     unsigned int sleeping)
2693 {
2694 	;
2695 }
2696 
2697 /**
2698  *	process_responses - process responses from an SGE response queue
2699  *	@adap: the adapter
2700  *	@qs: the queue set to which the response queue belongs
2701  *	@budget: how many responses can be processed in this round
2702  *
2703  *	Process responses from an SGE response queue up to the supplied budget.
2704  *	Responses include received packets as well as credits and other events
2705  *	for the queues that belong to the response queue's queue set.
2706  *	A negative budget is effectively unlimited.
2707  *
2708  *	Additionally choose the interrupt holdoff time for the next interrupt
2709  *	on this queue.  If the system is under memory shortage use a fairly
2710  *	long delay to help recovery.
2711  */
2712 int
2713 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2714 {
2715 	struct sge_rspq *rspq = &qs->rspq;
2716 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2717 	int budget_left = budget;
2718 	unsigned int sleeping = 0;
2719 	int lro = qs->lro.enabled;
2720 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2721 	int ngathered = 0;
2722 #ifdef DEBUG
2723 	static int last_holdoff = 0;
2724 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2725 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2726 		last_holdoff = rspq->holdoff_tmr;
2727 	}
2728 #endif
2729 	rspq->next_holdoff = rspq->holdoff_tmr;
2730 
2731 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2732 		int eth, eop = 0, ethpad = 0;
2733 		uint32_t flags = ntohl(r->flags);
2734 		uint32_t rss_csum = *(const uint32_t *)r;
2735 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2736 
2737 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2738 
2739 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2740 			/* XXX */
2741 			printf("async notification\n");
2742 
2743 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2744 			struct mbuf *m = NULL;
2745 
2746 #ifdef DISABLE_MBUF_IOVEC
2747 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2748 			    r->rss_hdr.opcode, rspq->cidx);
2749 
2750 			m = rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2751 			if (m == NULL) {
2752 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2753 				budget_left--;
2754 				break;
2755 			}
2756 
2757 			get_imm_packet(adap, r, &rspq->rspq_mh);
2758 			eop = 1;
2759 #else
2760 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2761 			    r->rss_hdr.opcode, rspq->cidx);
2762 			if (rspq->rspq_mbuf == NULL)
2763 				rspq->rspq_mbuf = m_gethdr(M_DONTWAIT, MT_DATA);
2764                         else
2765 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2766 
2767 			/*
2768 			 * XXX revisit me
2769 			 */
2770 			if (rspq->rspq_mbuf == NULL &&  m == NULL) {
2771 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2772 				budget_left--;
2773 				break;
2774 			}
2775 			get_imm_packet(adap, r, rspq->rspq_mbuf, m, flags);
2776 
2777 			eop = 1;
2778 			rspq->imm_data++;
2779 #endif
2780 		} else if (r->len_cq) {
2781 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2782 
2783 #ifdef DISABLE_MBUF_IOVEC
2784 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2785 #else
2786 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2787 #ifdef IFNET_MULTIQUEUE
2788 			rspq->rspq_mbuf->m_pkthdr.rss_hash = rss_hash;
2789 #endif
2790 #endif
2791 			ethpad = 2;
2792 		} else {
2793 			DPRINTF("pure response\n");
2794 			rspq->pure_rsps++;
2795 		}
2796 
2797 		if (flags & RSPD_CTRL_MASK) {
2798 			sleeping |= flags & RSPD_GTS_MASK;
2799 			handle_rsp_cntrl_info(qs, flags);
2800 		}
2801 
2802 		r++;
2803 		if (__predict_false(++rspq->cidx == rspq->size)) {
2804 			rspq->cidx = 0;
2805 			rspq->gen ^= 1;
2806 			r = rspq->desc;
2807 		}
2808 		prefetch(r);
2809 		if (++rspq->credits >= (rspq->size / 4)) {
2810 			refill_rspq(adap, rspq, rspq->credits);
2811 			rspq->credits = 0;
2812 		}
2813 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2814 
2815 		if (!eth && eop) {
2816 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2817 			/*
2818 			 * XXX size mismatch
2819 			 */
2820 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2821 
2822 			ngathered = rx_offload(&adap->tdev, rspq,
2823 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2824 			rspq->rspq_mh.mh_head = NULL;
2825 			DPRINTF("received offload packet\n");
2826 
2827 		} else if (eth && eop) {
2828 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *));
2829 			prefetch(mtod(rspq->rspq_mh.mh_head, uint8_t *) + L1_CACHE_BYTES);
2830 
2831 			t3_rx_eth_lro(adap, rspq, rspq->rspq_mh.mh_head, ethpad,
2832 			    rss_hash, rss_csum, lro);
2833 			DPRINTF("received tunnel packet\n");
2834 				rspq->rspq_mh.mh_head = NULL;
2835 
2836 		}
2837 		__refill_fl_lt(adap, &qs->fl[0], 32);
2838 		__refill_fl_lt(adap, &qs->fl[1], 32);
2839 		--budget_left;
2840 	}
2841 
2842 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2843 	t3_lro_flush(adap, qs, &qs->lro);
2844 
2845 	if (sleeping)
2846 		check_ring_db(adap, qs, sleeping);
2847 
2848 	smp_mb();  /* commit Tx queue processed updates */
2849 	if (__predict_false(qs->txq_stopped > 1)) {
2850 		printf("restarting tx on %p\n", qs);
2851 
2852 		restart_tx(qs);
2853 	}
2854 
2855 	__refill_fl_lt(adap, &qs->fl[0], 512);
2856 	__refill_fl_lt(adap, &qs->fl[1], 512);
2857 	budget -= budget_left;
2858 	return (budget);
2859 }
2860 
2861 /*
2862  * A helper function that processes responses and issues GTS.
2863  */
2864 static __inline int
2865 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
2866 {
2867 	int work;
2868 	static int last_holdoff = 0;
2869 
2870 	work = process_responses(adap, rspq_to_qset(rq), -1);
2871 
2872 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
2873 		printf("next_holdoff=%d\n", rq->next_holdoff);
2874 		last_holdoff = rq->next_holdoff;
2875 	}
2876 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
2877 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
2878 
2879 	return (work);
2880 }
2881 
2882 
2883 /*
2884  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
2885  * Handles data events from SGE response queues as well as error and other
2886  * async events as they all use the same interrupt pin.  We use one SGE
2887  * response queue per port in this mode and protect all response queues with
2888  * queue 0's lock.
2889  */
2890 void
2891 t3b_intr(void *data)
2892 {
2893 	uint32_t i, map;
2894 	adapter_t *adap = data;
2895 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2896 
2897 	t3_write_reg(adap, A_PL_CLI, 0);
2898 	map = t3_read_reg(adap, A_SG_DATA_INTR);
2899 
2900 	if (!map)
2901 		return;
2902 
2903 	if (__predict_false(map & F_ERRINTR))
2904 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2905 
2906 	mtx_lock(&q0->lock);
2907 	for_each_port(adap, i)
2908 	    if (map & (1 << i))
2909 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
2910 	mtx_unlock(&q0->lock);
2911 }
2912 
2913 /*
2914  * The MSI interrupt handler.  This needs to handle data events from SGE
2915  * response queues as well as error and other async events as they all use
2916  * the same MSI vector.  We use one SGE response queue per port in this mode
2917  * and protect all response queues with queue 0's lock.
2918  */
2919 void
2920 t3_intr_msi(void *data)
2921 {
2922 	adapter_t *adap = data;
2923 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
2924 	int i, new_packets = 0;
2925 
2926 	mtx_lock(&q0->lock);
2927 
2928 	for_each_port(adap, i)
2929 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
2930 		    new_packets = 1;
2931 	mtx_unlock(&q0->lock);
2932 	if (new_packets == 0)
2933 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
2934 }
2935 
2936 void
2937 t3_intr_msix(void *data)
2938 {
2939 	struct sge_qset *qs = data;
2940 	adapter_t *adap = qs->port->adapter;
2941 	struct sge_rspq *rspq = &qs->rspq;
2942 #ifndef IFNET_MULTIQUEUE
2943 	mtx_lock(&rspq->lock);
2944 #else
2945 	if (mtx_trylock(&rspq->lock))
2946 #endif
2947 	{
2948 
2949 		if (process_responses_gts(adap, rspq) == 0)
2950 			rspq->unhandled_irqs++;
2951 		mtx_unlock(&rspq->lock);
2952 	}
2953 }
2954 
2955 #define QDUMP_SBUF_SIZE		32 * 400
2956 static int
2957 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
2958 {
2959 	struct sge_rspq *rspq;
2960 	struct sge_qset *qs;
2961 	int i, err, dump_end, idx;
2962 	static int multiplier = 1;
2963 	struct sbuf *sb;
2964 	struct rsp_desc *rspd;
2965 	uint32_t data[4];
2966 
2967 	rspq = arg1;
2968 	qs = rspq_to_qset(rspq);
2969 	if (rspq->rspq_dump_count == 0)
2970 		return (0);
2971 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
2972 		log(LOG_WARNING,
2973 		    "dump count is too large %d\n", rspq->rspq_dump_count);
2974 		rspq->rspq_dump_count = 0;
2975 		return (EINVAL);
2976 	}
2977 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
2978 		log(LOG_WARNING,
2979 		    "dump start of %d is greater than queue size\n",
2980 		    rspq->rspq_dump_start);
2981 		rspq->rspq_dump_start = 0;
2982 		return (EINVAL);
2983 	}
2984 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
2985 	if (err)
2986 		return (err);
2987 retry_sbufops:
2988 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
2989 
2990 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
2991 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
2992 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
2993 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
2994 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
2995 
2996 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
2997 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
2998 
2999 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3000 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3001 		idx = i & (RSPQ_Q_SIZE-1);
3002 
3003 		rspd = &rspq->desc[idx];
3004 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3005 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3006 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3007 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3008 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3009 		    be32toh(rspd->len_cq), rspd->intr_gen);
3010 	}
3011 	if (sbuf_overflowed(sb)) {
3012 		sbuf_delete(sb);
3013 		multiplier++;
3014 		goto retry_sbufops;
3015 	}
3016 	sbuf_finish(sb);
3017 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3018 	sbuf_delete(sb);
3019 	return (err);
3020 }
3021 
3022 
3023 /*
3024  * broken by recent mbuf changes
3025  */
3026 static int
3027 t3_dump_txq(SYSCTL_HANDLER_ARGS)
3028 {
3029 	struct sge_txq *txq;
3030 	struct sge_qset *qs;
3031 	int i, j, err, dump_end;
3032 	static int multiplier = 1;
3033 	struct sbuf *sb;
3034 	struct tx_desc *txd;
3035 	uint32_t *WR, wr_hi, wr_lo, gen;
3036 	uint32_t data[4];
3037 
3038 	txq = arg1;
3039 	qs = txq_to_qset(txq, TXQ_ETH);
3040 	if (txq->txq_dump_count == 0) {
3041 		return (0);
3042 	}
3043 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3044 		log(LOG_WARNING,
3045 		    "dump count is too large %d\n", txq->txq_dump_count);
3046 		txq->txq_dump_count = 1;
3047 		return (EINVAL);
3048 	}
3049 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3050 		log(LOG_WARNING,
3051 		    "dump start of %d is greater than queue size\n",
3052 		    txq->txq_dump_start);
3053 		txq->txq_dump_start = 0;
3054 		return (EINVAL);
3055 	}
3056 	err = t3_sge_read_ecntxt(qs->port->adapter, txq->cntxt_id, data);
3057 	if (err)
3058 		return (err);
3059 
3060 
3061 retry_sbufops:
3062 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3063 
3064 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3065 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3066 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3067 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3068 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3069 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3070 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3071 	    txq->txq_dump_start,
3072 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3073 
3074 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3075 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3076 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3077 		WR = (uint32_t *)txd->flit;
3078 		wr_hi = ntohl(WR[0]);
3079 		wr_lo = ntohl(WR[1]);
3080 		gen = G_WR_GEN(wr_lo);
3081 
3082 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3083 		    wr_hi, wr_lo, gen);
3084 		for (j = 2; j < 30; j += 4)
3085 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3086 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3087 
3088 	}
3089 	if (sbuf_overflowed(sb)) {
3090 		sbuf_delete(sb);
3091 		multiplier++;
3092 		goto retry_sbufops;
3093 	}
3094 	sbuf_finish(sb);
3095 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3096 	sbuf_delete(sb);
3097 	return (err);
3098 }
3099 
3100 
3101 static int
3102 t3_lro_enable(SYSCTL_HANDLER_ARGS)
3103 {
3104 	adapter_t *sc;
3105 	int i, j, enabled, err, nqsets = 0;
3106 
3107 #ifndef LRO_WORKING
3108 	return (0);
3109 #endif
3110 	sc = arg1;
3111 	enabled = sc->sge.qs[0].lro.enabled;
3112         err = sysctl_handle_int(oidp, &enabled, arg2, req);
3113 
3114 	if (err != 0)
3115 		return (err);
3116 	if (enabled == sc->sge.qs[0].lro.enabled)
3117 		return (0);
3118 
3119 	for (i = 0; i < sc->params.nports; i++)
3120 		for (j = 0; j < sc->port[i].nqsets; j++)
3121 			nqsets++;
3122 
3123 	for (i = 0; i < nqsets; i++)
3124 		sc->sge.qs[i].lro.enabled = enabled;
3125 
3126 	return (0);
3127 }
3128 
3129 static int
3130 t3_set_coalesce_nsecs(SYSCTL_HANDLER_ARGS)
3131 {
3132 	adapter_t *sc = arg1;
3133 	struct qset_params *qsp = &sc->params.sge.qset[0];
3134 	int coalesce_nsecs;
3135 	struct sge_qset *qs;
3136 	int i, j, err, nqsets = 0;
3137 	struct mtx *lock;
3138 
3139 	if ((sc->flags & FULL_INIT_DONE) == 0)
3140 		return (ENXIO);
3141 
3142 	coalesce_nsecs = qsp->coalesce_nsecs;
3143         err = sysctl_handle_int(oidp, &coalesce_nsecs, arg2, req);
3144 
3145 	if (err != 0) {
3146 		return (err);
3147 	}
3148 	if (coalesce_nsecs == qsp->coalesce_nsecs)
3149 		return (0);
3150 
3151 	for (i = 0; i < sc->params.nports; i++)
3152 		for (j = 0; j < sc->port[i].nqsets; j++)
3153 			nqsets++;
3154 
3155 	coalesce_nsecs = max(100, coalesce_nsecs);
3156 
3157 	for (i = 0; i < nqsets; i++) {
3158 		qs = &sc->sge.qs[i];
3159 		qsp = &sc->params.sge.qset[i];
3160 		qsp->coalesce_nsecs = coalesce_nsecs;
3161 
3162 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3163 			    &sc->sge.qs[0].rspq.lock;
3164 
3165 		mtx_lock(lock);
3166 		t3_update_qset_coalesce(qs, qsp);
3167 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3168 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3169 		mtx_unlock(lock);
3170 	}
3171 
3172 	return (0);
3173 }
3174 
3175 
3176 void
3177 t3_add_attach_sysctls(adapter_t *sc)
3178 {
3179 	struct sysctl_ctx_list *ctx;
3180 	struct sysctl_oid_list *children;
3181 
3182 	ctx = device_get_sysctl_ctx(sc->dev);
3183 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3184 
3185 	/* random information */
3186 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3187 	    "firmware_version",
3188 	    CTLFLAG_RD, &sc->fw_version,
3189 	    0, "firmware version");
3190 
3191 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3192 	    "enable_lro",
3193 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3194 	    0, t3_lro_enable,
3195 	    "I", "enable large receive offload");
3196 
3197 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3198 	    "enable_debug",
3199 	    CTLFLAG_RW, &cxgb_debug,
3200 	    0, "enable verbose debugging output");
3201 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3202 	    CTLFLAG_RD, &sc->tunq_coalesce,
3203 	    "#tunneled packets freed");
3204 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3205 	    "txq_overrun",
3206 	    CTLFLAG_RD, &txq_fills,
3207 	    0, "#times txq overrun");
3208 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3209 	    "bogus_imm",
3210 	    CTLFLAG_RD, &bogus_imm,
3211 	    0, "#times a bogus immediate response was seen");
3212 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3213 	    "cache_alloc",
3214 	    CTLFLAG_RD, &cxgb_cached_allocations,
3215 	    0, "#times a cluster was allocated from cache");
3216 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3217 	    "cached",
3218 	    CTLFLAG_RD, &cxgb_cached,
3219 	    0, "#times a cluster was cached");
3220 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3221 	    "ext_freed",
3222 	    CTLFLAG_RD, &cxgb_ext_freed,
3223 	    0, "#times a cluster was freed through ext_free");
3224 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3225 	    "mbufs_outstanding",
3226 	    CTLFLAG_RD, &mbufs_outstanding,
3227 	    0, "#mbufs in flight in the driver");
3228 }
3229 
3230 
3231 static const char *rspq_name = "rspq";
3232 static const char *txq_names[] =
3233 {
3234 	"txq_eth",
3235 	"txq_ofld",
3236 	"txq_ctrl"
3237 };
3238 
3239 void
3240 t3_add_configured_sysctls(adapter_t *sc)
3241 {
3242 	struct sysctl_ctx_list *ctx;
3243 	struct sysctl_oid_list *children;
3244 	int i, j;
3245 
3246 	ctx = device_get_sysctl_ctx(sc->dev);
3247 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3248 
3249 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3250 	    "intr_coal",
3251 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3252 	    0, t3_set_coalesce_nsecs,
3253 	    "I", "interrupt coalescing timer (ns)");
3254 
3255 	for (i = 0; i < sc->params.nports; i++) {
3256 		struct port_info *pi = &sc->port[i];
3257 		struct sysctl_oid *poid;
3258 		struct sysctl_oid_list *poidlist;
3259 
3260 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3261 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3262 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3263 		poidlist = SYSCTL_CHILDREN(poid);
3264 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3265 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3266 		    0, "#queue sets");
3267 
3268 		for (j = 0; j < pi->nqsets; j++) {
3269 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3270 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid;
3271 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist;
3272 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3273 
3274 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3275 
3276 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3277 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3278 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3279 
3280 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3281 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3282 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3283 
3284 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3285 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3286 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3287 
3288 
3289 
3290 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3291 			    CTLFLAG_RD, &qs->rspq.size,
3292 			    0, "#entries in response queue");
3293 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3294 			    CTLFLAG_RD, &qs->rspq.cidx,
3295 			    0, "consumer index");
3296 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3297 			    CTLFLAG_RD, &qs->rspq.credits,
3298 			    0, "#credits");
3299 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3300 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3301 			    "physical_address_of the queue");
3302 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3303 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3304 			    0, "start rspq dump entry");
3305 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3306 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3307 			    0, "#rspq entries to dump");
3308 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3309 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3310 			    0, t3_dump_rspq, "A", "dump of the response queue");
3311 
3312 
3313 
3314 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3315 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3316 			    0, "#tunneled packets dropped");
3317 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3318 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3319 			    0, "#tunneled packets waiting to be sent");
3320 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3321 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3322 			    0, "#tunneled packets queue producer index");
3323 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3324 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3325 			    0, "#tunneled packets queue consumer index");
3326 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3327 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3328 			    0, "#tunneled packets processed by the card");
3329 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3330 			    CTLFLAG_RD, &txq->cleaned,
3331 			    0, "#tunneled packets cleaned");
3332 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3333 			    CTLFLAG_RD, &txq->in_use,
3334 			    0, "#tunneled packet slots in use");
3335 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3336 			    CTLFLAG_RD, &txq->txq_frees,
3337 			    "#tunneled packets freed");
3338 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3339 			    CTLFLAG_RD, &txq->txq_skipped,
3340 			    0, "#tunneled packet descriptors skipped");
3341 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3342 			    CTLFLAG_RD, &txq->txq_coalesced,
3343 			    0, "#tunneled packets coalesced");
3344 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3345 			    CTLFLAG_RD, &txq->txq_enqueued,
3346 			    0, "#tunneled packets enqueued to hardware");
3347 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3348 			    CTLFLAG_RD, &qs->txq_stopped,
3349 			    0, "tx queues stopped");
3350 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3351 			    CTLFLAG_RD, &txq->phys_addr,
3352 			    "physical_address_of the queue");
3353 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3354 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3355 			    0, "txq generation");
3356 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3357 			    CTLFLAG_RD, &txq->cidx,
3358 			    0, "hardware queue cidx");
3359 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3360 			    CTLFLAG_RD, &txq->pidx,
3361 			    0, "hardware queue pidx");
3362 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3363 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3364 			    0, "txq start idx for dump");
3365 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3366 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3367 			    0, "txq #entries to dump");
3368 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3369 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3370 			    0, t3_dump_txq, "A", "dump of the transmit queue");
3371 		}
3372 	}
3373 }
3374 
3375 /**
3376  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3377  *	@qs: the queue set
3378  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3379  *	@idx: the descriptor index in the queue
3380  *	@data: where to dump the descriptor contents
3381  *
3382  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3383  *	size of the descriptor.
3384  */
3385 int
3386 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3387 		unsigned char *data)
3388 {
3389 	if (qnum >= 6)
3390 		return (EINVAL);
3391 
3392 	if (qnum < 3) {
3393 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3394 			return -EINVAL;
3395 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3396 		return sizeof(struct tx_desc);
3397 	}
3398 
3399 	if (qnum == 3) {
3400 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3401 			return (EINVAL);
3402 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3403 		return sizeof(struct rsp_desc);
3404 	}
3405 
3406 	qnum -= 4;
3407 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3408 		return (EINVAL);
3409 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3410 	return sizeof(struct rx_desc);
3411 }
3412