xref: /freebsd/sys/dev/cxgb/cxgb_sge.c (revision a9148abd9da5db2f1c682fb17bed791845fc41c9)
1 /**************************************************************************
2 
3 Copyright (c) 2007-2008, Chelsio Inc.
4 All rights reserved.
5 
6 Redistribution and use in source and binary forms, with or without
7 modification, are permitted provided that the following conditions are met:
8 
9  1. Redistributions of source code must retain the above copyright notice,
10     this list of conditions and the following disclaimer.
11 
12  2. Neither the name of the Chelsio Corporation nor the names of its
13     contributors may be used to endorse or promote products derived from
14     this software without specific prior written permission.
15 
16 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
17 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
18 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
19 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
20 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
21 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
22 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
23 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
24 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
25 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
26 POSSIBILITY OF SUCH DAMAGE.
27 
28 ***************************************************************************/
29 #define DEBUG_BUFRING
30 
31 
32 #include <sys/cdefs.h>
33 __FBSDID("$FreeBSD$");
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/bus.h>
40 #include <sys/conf.h>
41 #include <machine/bus.h>
42 #include <machine/resource.h>
43 #include <sys/bus_dma.h>
44 #include <sys/rman.h>
45 #include <sys/queue.h>
46 #include <sys/sysctl.h>
47 #include <sys/taskqueue.h>
48 
49 #include <sys/proc.h>
50 #include <sys/sbuf.h>
51 #include <sys/sched.h>
52 #include <sys/smp.h>
53 #include <sys/systm.h>
54 #include <sys/syslog.h>
55 
56 #include <netinet/in_systm.h>
57 #include <netinet/in.h>
58 #include <netinet/ip.h>
59 #include <netinet/tcp.h>
60 
61 #include <dev/pci/pcireg.h>
62 #include <dev/pci/pcivar.h>
63 
64 #include <vm/vm.h>
65 #include <vm/pmap.h>
66 
67 #include <cxgb_include.h>
68 #include <sys/mvec.h>
69 
70 int      txq_fills = 0;
71 /*
72  * XXX don't re-enable this until TOE stops assuming
73  * we have an m_ext
74  */
75 static int recycle_enable = 0;
76 extern int cxgb_txq_buf_ring_size;
77 int cxgb_cached_allocations;
78 int cxgb_cached;
79 int cxgb_ext_freed = 0;
80 int cxgb_ext_inited = 0;
81 int fl_q_size = 0;
82 int jumbo_q_size = 0;
83 
84 extern int cxgb_use_16k_clusters;
85 extern int cxgb_pcpu_cache_enable;
86 extern int nmbjumbo4;
87 extern int nmbjumbo9;
88 extern int nmbjumbo16;
89 
90 
91 
92 
93 #define USE_GTS 0
94 
95 #define SGE_RX_SM_BUF_SIZE	1536
96 #define SGE_RX_DROP_THRES	16
97 #define SGE_RX_COPY_THRES	128
98 
99 /*
100  * Period of the Tx buffer reclaim timer.  This timer does not need to run
101  * frequently as Tx buffers are usually reclaimed by new Tx packets.
102  */
103 #define TX_RECLAIM_PERIOD       (hz >> 1)
104 
105 /*
106  * Values for sge_txq.flags
107  */
108 enum {
109 	TXQ_RUNNING	= 1 << 0,  /* fetch engine is running */
110 	TXQ_LAST_PKT_DB = 1 << 1,  /* last packet rang the doorbell */
111 };
112 
113 struct tx_desc {
114 	uint64_t	flit[TX_DESC_FLITS];
115 } __packed;
116 
117 struct rx_desc {
118 	uint32_t	addr_lo;
119 	uint32_t	len_gen;
120 	uint32_t	gen2;
121 	uint32_t	addr_hi;
122 } __packed;;
123 
124 struct rsp_desc {               /* response queue descriptor */
125 	struct rss_header	rss_hdr;
126 	uint32_t		flags;
127 	uint32_t		len_cq;
128 	uint8_t			imm_data[47];
129 	uint8_t			intr_gen;
130 } __packed;
131 
132 #define RX_SW_DESC_MAP_CREATED	(1 << 0)
133 #define TX_SW_DESC_MAP_CREATED	(1 << 1)
134 #define RX_SW_DESC_INUSE        (1 << 3)
135 #define TX_SW_DESC_MAPPED       (1 << 4)
136 
137 #define RSPQ_NSOP_NEOP           G_RSPD_SOP_EOP(0)
138 #define RSPQ_EOP                 G_RSPD_SOP_EOP(F_RSPD_EOP)
139 #define RSPQ_SOP                 G_RSPD_SOP_EOP(F_RSPD_SOP)
140 #define RSPQ_SOP_EOP             G_RSPD_SOP_EOP(F_RSPD_SOP|F_RSPD_EOP)
141 
142 struct tx_sw_desc {                /* SW state per Tx descriptor */
143 	struct mbuf_iovec mi;
144 	bus_dmamap_t	map;
145 	int		flags;
146 };
147 
148 struct rx_sw_desc {                /* SW state per Rx descriptor */
149 	caddr_t	         rxsd_cl;
150 	caddr_t	         data;
151 	bus_dmamap_t	  map;
152 	int		  flags;
153 };
154 
155 struct txq_state {
156 	unsigned int compl;
157 	unsigned int gen;
158 	unsigned int pidx;
159 };
160 
161 struct refill_fl_cb_arg {
162 	int               error;
163 	bus_dma_segment_t seg;
164 	int               nseg;
165 };
166 
167 /*
168  * Maps a number of flits to the number of Tx descriptors that can hold them.
169  * The formula is
170  *
171  * desc = 1 + (flits - 2) / (WR_FLITS - 1).
172  *
173  * HW allows up to 4 descriptors to be combined into a WR.
174  */
175 static uint8_t flit_desc_map[] = {
176 	0,
177 #if SGE_NUM_GENBITS == 1
178 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
179 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
180 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
181 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4
182 #elif SGE_NUM_GENBITS == 2
183 	1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
184 	2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
185 	3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3,
186 	4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4, 4,
187 #else
188 # error "SGE_NUM_GENBITS must be 1 or 2"
189 #endif
190 };
191 
192 
193 int cxgb_debug = 0;
194 
195 static void sge_timer_cb(void *arg);
196 static void sge_timer_reclaim(void *arg, int ncount);
197 static void sge_txq_reclaim_handler(void *arg, int ncount);
198 
199 /**
200  *	reclaim_completed_tx - reclaims completed Tx descriptors
201  *	@adapter: the adapter
202  *	@q: the Tx queue to reclaim completed descriptors from
203  *
204  *	Reclaims Tx descriptors that the SGE has indicated it has processed,
205  *	and frees the associated buffers if possible.  Called with the Tx
206  *	queue's lock held.
207  */
208 static __inline int
209 reclaim_completed_tx_(struct sge_txq *q, int reclaim_min)
210 {
211 	int reclaim = desc_reclaimable(q);
212 
213 	if (reclaim < reclaim_min)
214 		return (0);
215 
216 	mtx_assert(&q->lock, MA_OWNED);
217 	if (reclaim > 0) {
218 		t3_free_tx_desc(q, reclaim);
219 		q->cleaned += reclaim;
220 		q->in_use -= reclaim;
221 	}
222 	return (reclaim);
223 }
224 
225 /**
226  *	should_restart_tx - are there enough resources to restart a Tx queue?
227  *	@q: the Tx queue
228  *
229  *	Checks if there are enough descriptors to restart a suspended Tx queue.
230  */
231 static __inline int
232 should_restart_tx(const struct sge_txq *q)
233 {
234 	unsigned int r = q->processed - q->cleaned;
235 
236 	return q->in_use - r < (q->size >> 1);
237 }
238 
239 /**
240  *	t3_sge_init - initialize SGE
241  *	@adap: the adapter
242  *	@p: the SGE parameters
243  *
244  *	Performs SGE initialization needed every time after a chip reset.
245  *	We do not initialize any of the queue sets here, instead the driver
246  *	top-level must request those individually.  We also do not enable DMA
247  *	here, that should be done after the queues have been set up.
248  */
249 void
250 t3_sge_init(adapter_t *adap, struct sge_params *p)
251 {
252 	u_int ctrl, ups;
253 
254 	ups = 0; /* = ffs(pci_resource_len(adap->pdev, 2) >> 12); */
255 
256 	ctrl = F_DROPPKT | V_PKTSHIFT(2) | F_FLMODE | F_AVOIDCQOVFL |
257 	       F_CQCRDTCTRL | F_CONGMODE | F_TNLFLMODE | F_FATLPERREN |
258 	       V_HOSTPAGESIZE(PAGE_SHIFT - 11) | F_BIGENDIANINGRESS |
259 	       V_USERSPACESIZE(ups ? ups - 1 : 0) | F_ISCSICOALESCING;
260 #if SGE_NUM_GENBITS == 1
261 	ctrl |= F_EGRGENCTRL;
262 #endif
263 	if (adap->params.rev > 0) {
264 		if (!(adap->flags & (USING_MSIX | USING_MSI)))
265 			ctrl |= F_ONEINTMULTQ | F_OPTONEINTMULTQ;
266 	}
267 	t3_write_reg(adap, A_SG_CONTROL, ctrl);
268 	t3_write_reg(adap, A_SG_EGR_RCQ_DRB_THRSH, V_HIRCQDRBTHRSH(512) |
269 		     V_LORCQDRBTHRSH(512));
270 	t3_write_reg(adap, A_SG_TIMER_TICK, core_ticks_per_usec(adap) / 10);
271 	t3_write_reg(adap, A_SG_CMDQ_CREDIT_TH, V_THRESHOLD(32) |
272 		     V_TIMEOUT(200 * core_ticks_per_usec(adap)));
273 	t3_write_reg(adap, A_SG_HI_DRB_HI_THRSH,
274 		     adap->params.rev < T3_REV_C ? 1000 : 500);
275 	t3_write_reg(adap, A_SG_HI_DRB_LO_THRSH, 256);
276 	t3_write_reg(adap, A_SG_LO_DRB_HI_THRSH, 1000);
277 	t3_write_reg(adap, A_SG_LO_DRB_LO_THRSH, 256);
278 	t3_write_reg(adap, A_SG_OCO_BASE, V_BASE1(0xfff));
279 	t3_write_reg(adap, A_SG_DRB_PRI_THRESH, 63 * 1024);
280 }
281 
282 
283 /**
284  *	sgl_len - calculates the size of an SGL of the given capacity
285  *	@n: the number of SGL entries
286  *
287  *	Calculates the number of flits needed for a scatter/gather list that
288  *	can hold the given number of entries.
289  */
290 static __inline unsigned int
291 sgl_len(unsigned int n)
292 {
293 	return ((3 * n) / 2 + (n & 1));
294 }
295 
296 /**
297  *	get_imm_packet - return the next ingress packet buffer from a response
298  *	@resp: the response descriptor containing the packet data
299  *
300  *	Return a packet containing the immediate data of the given response.
301  */
302 static int
303 get_imm_packet(adapter_t *sc, const struct rsp_desc *resp, struct mbuf *m)
304 {
305 
306 	m->m_len = m->m_pkthdr.len = IMMED_PKT_SIZE;
307 	m->m_ext.ext_buf = NULL;
308 	m->m_ext.ext_type = 0;
309 	memcpy(mtod(m, uint8_t *), resp->imm_data, IMMED_PKT_SIZE);
310 	return (0);
311 }
312 
313 static __inline u_int
314 flits_to_desc(u_int n)
315 {
316 	return (flit_desc_map[n]);
317 }
318 
319 #define SGE_PARERR (F_CPPARITYERROR | F_OCPARITYERROR | F_RCPARITYERROR | \
320 		    F_IRPARITYERROR | V_ITPARITYERROR(M_ITPARITYERROR) | \
321 		    V_FLPARITYERROR(M_FLPARITYERROR) | F_LODRBPARITYERROR | \
322 		    F_HIDRBPARITYERROR | F_LORCQPARITYERROR | \
323 		    F_HIRCQPARITYERROR)
324 #define SGE_FRAMINGERR (F_UC_REQ_FRAMINGERROR | F_R_REQ_FRAMINGERROR)
325 #define SGE_FATALERR (SGE_PARERR | SGE_FRAMINGERR | F_RSPQCREDITOVERFOW | \
326 		      F_RSPQDISABLED)
327 
328 /**
329  *	t3_sge_err_intr_handler - SGE async event interrupt handler
330  *	@adapter: the adapter
331  *
332  *	Interrupt handler for SGE asynchronous (non-data) events.
333  */
334 void
335 t3_sge_err_intr_handler(adapter_t *adapter)
336 {
337 	unsigned int v, status;
338 
339 	status = t3_read_reg(adapter, A_SG_INT_CAUSE);
340 	if (status & SGE_PARERR)
341 		CH_ALERT(adapter, "SGE parity error (0x%x)\n",
342 			 status & SGE_PARERR);
343 	if (status & SGE_FRAMINGERR)
344 		CH_ALERT(adapter, "SGE framing error (0x%x)\n",
345 			 status & SGE_FRAMINGERR);
346 	if (status & F_RSPQCREDITOVERFOW)
347 		CH_ALERT(adapter, "SGE response queue credit overflow\n");
348 
349 	if (status & F_RSPQDISABLED) {
350 		v = t3_read_reg(adapter, A_SG_RSPQ_FL_STATUS);
351 
352 		CH_ALERT(adapter,
353 			 "packet delivered to disabled response queue (0x%x)\n",
354 			 (v >> S_RSPQ0DISABLED) & 0xff);
355 	}
356 
357 	t3_write_reg(adapter, A_SG_INT_CAUSE, status);
358 	if (status & SGE_FATALERR)
359 		t3_fatal_err(adapter);
360 }
361 
362 void
363 t3_sge_prep(adapter_t *adap, struct sge_params *p)
364 {
365 	int i, nqsets;
366 
367 	nqsets = min(SGE_QSETS, mp_ncpus*4);
368 
369 	fl_q_size = min(nmbclusters/(3*nqsets), FL_Q_SIZE);
370 
371 	while (!powerof2(fl_q_size))
372 		fl_q_size--;
373 #if __FreeBSD_version >= 700111
374 	if (cxgb_use_16k_clusters)
375 		jumbo_q_size = min(nmbjumbo16/(3*nqsets), JUMBO_Q_SIZE);
376 	else
377 		jumbo_q_size = min(nmbjumbo9/(3*nqsets), JUMBO_Q_SIZE);
378 #else
379 	jumbo_q_size = min(nmbjumbo4/(3*nqsets), JUMBO_Q_SIZE);
380 #endif
381 	while (!powerof2(jumbo_q_size))
382 		jumbo_q_size--;
383 
384 	/* XXX Does ETHER_ALIGN need to be accounted for here? */
385 	p->max_pkt_size = adap->sge.qs[0].fl[1].buf_size - sizeof(struct cpl_rx_data);
386 
387 	for (i = 0; i < SGE_QSETS; ++i) {
388 		struct qset_params *q = p->qset + i;
389 
390 		if (adap->params.nports > 2) {
391 			q->coalesce_usecs = 50;
392 		} else {
393 #ifdef INVARIANTS
394 			q->coalesce_usecs = 10;
395 #else
396 			q->coalesce_usecs = 5;
397 #endif
398 		}
399 		q->polling = 0;
400 		q->rspq_size = RSPQ_Q_SIZE;
401 		q->fl_size = fl_q_size;
402 		q->jumbo_size = jumbo_q_size;
403 		q->txq_size[TXQ_ETH] = TX_ETH_Q_SIZE;
404 		q->txq_size[TXQ_OFLD] = 1024;
405 		q->txq_size[TXQ_CTRL] = 256;
406 		q->cong_thres = 0;
407 	}
408 }
409 
410 int
411 t3_sge_alloc(adapter_t *sc)
412 {
413 
414 	/* The parent tag. */
415 	if (bus_dma_tag_create( NULL,			/* parent */
416 				1, 0,			/* algnmnt, boundary */
417 				BUS_SPACE_MAXADDR,	/* lowaddr */
418 				BUS_SPACE_MAXADDR,	/* highaddr */
419 				NULL, NULL,		/* filter, filterarg */
420 				BUS_SPACE_MAXSIZE_32BIT,/* maxsize */
421 				BUS_SPACE_UNRESTRICTED, /* nsegments */
422 				BUS_SPACE_MAXSIZE_32BIT,/* maxsegsize */
423 				0,			/* flags */
424 				NULL, NULL,		/* lock, lockarg */
425 				&sc->parent_dmat)) {
426 		device_printf(sc->dev, "Cannot allocate parent DMA tag\n");
427 		return (ENOMEM);
428 	}
429 
430 	/*
431 	 * DMA tag for normal sized RX frames
432 	 */
433 	if (bus_dma_tag_create(sc->parent_dmat, MCLBYTES, 0, BUS_SPACE_MAXADDR,
434 		BUS_SPACE_MAXADDR, NULL, NULL, MCLBYTES, 1,
435 		MCLBYTES, BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_dmat)) {
436 		device_printf(sc->dev, "Cannot allocate RX DMA tag\n");
437 		return (ENOMEM);
438 	}
439 
440 	/*
441 	 * DMA tag for jumbo sized RX frames.
442 	 */
443 	if (bus_dma_tag_create(sc->parent_dmat, MJUM16BYTES, 0, BUS_SPACE_MAXADDR,
444 		BUS_SPACE_MAXADDR, NULL, NULL, MJUM16BYTES, 1, MJUM16BYTES,
445 		BUS_DMA_ALLOCNOW, NULL, NULL, &sc->rx_jumbo_dmat)) {
446 		device_printf(sc->dev, "Cannot allocate RX jumbo DMA tag\n");
447 		return (ENOMEM);
448 	}
449 
450 	/*
451 	 * DMA tag for TX frames.
452 	 */
453 	if (bus_dma_tag_create(sc->parent_dmat, 1, 0, BUS_SPACE_MAXADDR,
454 		BUS_SPACE_MAXADDR, NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
455 		TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
456 		NULL, NULL, &sc->tx_dmat)) {
457 		device_printf(sc->dev, "Cannot allocate TX DMA tag\n");
458 		return (ENOMEM);
459 	}
460 
461 	return (0);
462 }
463 
464 int
465 t3_sge_free(struct adapter * sc)
466 {
467 
468 	if (sc->tx_dmat != NULL)
469 		bus_dma_tag_destroy(sc->tx_dmat);
470 
471 	if (sc->rx_jumbo_dmat != NULL)
472 		bus_dma_tag_destroy(sc->rx_jumbo_dmat);
473 
474 	if (sc->rx_dmat != NULL)
475 		bus_dma_tag_destroy(sc->rx_dmat);
476 
477 	if (sc->parent_dmat != NULL)
478 		bus_dma_tag_destroy(sc->parent_dmat);
479 
480 	return (0);
481 }
482 
483 void
484 t3_update_qset_coalesce(struct sge_qset *qs, const struct qset_params *p)
485 {
486 
487 	qs->rspq.holdoff_tmr = max(p->coalesce_usecs * 10, 1U);
488 	qs->rspq.polling = 0 /* p->polling */;
489 }
490 
491 #if !defined(__i386__) && !defined(__amd64__)
492 static void
493 refill_fl_cb(void *arg, bus_dma_segment_t *segs, int nseg, int error)
494 {
495 	struct refill_fl_cb_arg *cb_arg = arg;
496 
497 	cb_arg->error = error;
498 	cb_arg->seg = segs[0];
499 	cb_arg->nseg = nseg;
500 
501 }
502 #endif
503 /**
504  *	refill_fl - refill an SGE free-buffer list
505  *	@sc: the controller softc
506  *	@q: the free-list to refill
507  *	@n: the number of new buffers to allocate
508  *
509  *	(Re)populate an SGE free-buffer list with up to @n new packet buffers.
510  *	The caller must assure that @n does not exceed the queue's capacity.
511  */
512 static void
513 refill_fl(adapter_t *sc, struct sge_fl *q, int n)
514 {
515 	struct rx_sw_desc *sd = &q->sdesc[q->pidx];
516 	struct rx_desc *d = &q->desc[q->pidx];
517 	struct refill_fl_cb_arg cb_arg;
518 	caddr_t cl;
519 	int err, count = 0;
520 	int header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
521 
522 	cb_arg.error = 0;
523 	while (n--) {
524 		/*
525 		 * We only allocate a cluster, mbuf allocation happens after rx
526 		 */
527 		if ((cl = cxgb_cache_get(q->zone)) == NULL) {
528 			log(LOG_WARNING, "Failed to allocate cluster\n");
529 			goto done;
530 		}
531 
532 		if ((sd->flags & RX_SW_DESC_MAP_CREATED) == 0) {
533 			if ((err = bus_dmamap_create(q->entry_tag, 0, &sd->map))) {
534 				log(LOG_WARNING, "bus_dmamap_create failed %d\n", err);
535 				uma_zfree(q->zone, cl);
536 				goto done;
537 			}
538 			sd->flags |= RX_SW_DESC_MAP_CREATED;
539 		}
540 #if !defined(__i386__) && !defined(__amd64__)
541 		err = bus_dmamap_load(q->entry_tag, sd->map,
542 		    cl + header_size, q->buf_size,
543 		    refill_fl_cb, &cb_arg, 0);
544 
545 		if (err != 0 || cb_arg.error) {
546 			log(LOG_WARNING, "failure in refill_fl %d\n", cb_arg.error);
547 			/*
548 			 * XXX free cluster
549 			 */
550 			return;
551 		}
552 #else
553 		cb_arg.seg.ds_addr = pmap_kextract((vm_offset_t)(cl + header_size));
554 #endif
555 		sd->flags |= RX_SW_DESC_INUSE;
556 		sd->rxsd_cl = cl;
557 		sd->data = cl + header_size;
558 		d->addr_lo = htobe32(cb_arg.seg.ds_addr & 0xffffffff);
559 		d->addr_hi = htobe32(((uint64_t)cb_arg.seg.ds_addr >>32) & 0xffffffff);
560 		d->len_gen = htobe32(V_FLD_GEN1(q->gen));
561 		d->gen2 = htobe32(V_FLD_GEN2(q->gen));
562 
563 		d++;
564 		sd++;
565 
566 		if (++q->pidx == q->size) {
567 			q->pidx = 0;
568 			q->gen ^= 1;
569 			sd = q->sdesc;
570 			d = q->desc;
571 		}
572 		q->credits++;
573 		count++;
574 	}
575 
576 done:
577 	if (count)
578 		t3_write_reg(sc, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
579 }
580 
581 
582 /**
583  *	free_rx_bufs - free the Rx buffers on an SGE free list
584  *	@sc: the controle softc
585  *	@q: the SGE free list to clean up
586  *
587  *	Release the buffers on an SGE free-buffer Rx queue.  HW fetching from
588  *	this queue should be stopped before calling this function.
589  */
590 static void
591 free_rx_bufs(adapter_t *sc, struct sge_fl *q)
592 {
593 	u_int cidx = q->cidx;
594 
595 	while (q->credits--) {
596 		struct rx_sw_desc *d = &q->sdesc[cidx];
597 
598 		if (d->flags & RX_SW_DESC_INUSE) {
599 			bus_dmamap_unload(q->entry_tag, d->map);
600 			bus_dmamap_destroy(q->entry_tag, d->map);
601 			uma_zfree(q->zone, d->rxsd_cl);
602 		}
603 		d->rxsd_cl = NULL;
604 		if (++cidx == q->size)
605 			cidx = 0;
606 	}
607 }
608 
609 static __inline void
610 __refill_fl(adapter_t *adap, struct sge_fl *fl)
611 {
612 	refill_fl(adap, fl, min(16U, fl->size - fl->credits));
613 }
614 
615 static __inline void
616 __refill_fl_lt(adapter_t *adap, struct sge_fl *fl, int max)
617 {
618 	if ((fl->size - fl->credits) < max)
619 		refill_fl(adap, fl, min(max, fl->size - fl->credits));
620 }
621 
622 void
623 refill_fl_service(adapter_t *adap, struct sge_fl *fl)
624 {
625 	__refill_fl_lt(adap, fl, 512);
626 }
627 
628 /**
629  *	recycle_rx_buf - recycle a receive buffer
630  *	@adapter: the adapter
631  *	@q: the SGE free list
632  *	@idx: index of buffer to recycle
633  *
634  *	Recycles the specified buffer on the given free list by adding it at
635  *	the next available slot on the list.
636  */
637 static void
638 recycle_rx_buf(adapter_t *adap, struct sge_fl *q, unsigned int idx)
639 {
640 	struct rx_desc *from = &q->desc[idx];
641 	struct rx_desc *to   = &q->desc[q->pidx];
642 
643 	q->sdesc[q->pidx] = q->sdesc[idx];
644 	to->addr_lo = from->addr_lo;        // already big endian
645 	to->addr_hi = from->addr_hi;        // likewise
646 	wmb();
647 	to->len_gen = htobe32(V_FLD_GEN1(q->gen));
648 	to->gen2 = htobe32(V_FLD_GEN2(q->gen));
649 	q->credits++;
650 
651 	if (++q->pidx == q->size) {
652 		q->pidx = 0;
653 		q->gen ^= 1;
654 	}
655 	t3_write_reg(adap, A_SG_KDOORBELL, V_EGRCNTX(q->cntxt_id));
656 }
657 
658 static void
659 alloc_ring_cb(void *arg, bus_dma_segment_t *segs, int nsegs, int error)
660 {
661 	uint32_t *addr;
662 
663 	addr = arg;
664 	*addr = segs[0].ds_addr;
665 }
666 
667 static int
668 alloc_ring(adapter_t *sc, size_t nelem, size_t elem_size, size_t sw_size,
669     bus_addr_t *phys, void *desc, void *sdesc, bus_dma_tag_t *tag,
670     bus_dmamap_t *map, bus_dma_tag_t parent_entry_tag, bus_dma_tag_t *entry_tag)
671 {
672 	size_t len = nelem * elem_size;
673 	void *s = NULL;
674 	void *p = NULL;
675 	int err;
676 
677 	if ((err = bus_dma_tag_create(sc->parent_dmat, PAGE_SIZE, 0,
678 				      BUS_SPACE_MAXADDR_32BIT,
679 				      BUS_SPACE_MAXADDR, NULL, NULL, len, 1,
680 				      len, 0, NULL, NULL, tag)) != 0) {
681 		device_printf(sc->dev, "Cannot allocate descriptor tag\n");
682 		return (ENOMEM);
683 	}
684 
685 	if ((err = bus_dmamem_alloc(*tag, (void **)&p, BUS_DMA_NOWAIT,
686 				    map)) != 0) {
687 		device_printf(sc->dev, "Cannot allocate descriptor memory\n");
688 		return (ENOMEM);
689 	}
690 
691 	bus_dmamap_load(*tag, *map, p, len, alloc_ring_cb, phys, 0);
692 	bzero(p, len);
693 	*(void **)desc = p;
694 
695 	if (sw_size) {
696 		len = nelem * sw_size;
697 		s = malloc(len, M_DEVBUF, M_WAITOK|M_ZERO);
698 		*(void **)sdesc = s;
699 	}
700 	if (parent_entry_tag == NULL)
701 		return (0);
702 
703 	if ((err = bus_dma_tag_create(parent_entry_tag, 1, 0,
704 				      BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR,
705 		                      NULL, NULL, TX_MAX_SIZE, TX_MAX_SEGS,
706 				      TX_MAX_SIZE, BUS_DMA_ALLOCNOW,
707 		                      NULL, NULL, entry_tag)) != 0) {
708 		device_printf(sc->dev, "Cannot allocate descriptor entry tag\n");
709 		return (ENOMEM);
710 	}
711 	return (0);
712 }
713 
714 static void
715 sge_slow_intr_handler(void *arg, int ncount)
716 {
717 	adapter_t *sc = arg;
718 
719 	t3_slow_intr_handler(sc);
720 }
721 
722 /**
723  *	sge_timer_cb - perform periodic maintenance of an SGE qset
724  *	@data: the SGE queue set to maintain
725  *
726  *	Runs periodically from a timer to perform maintenance of an SGE queue
727  *	set.  It performs two tasks:
728  *
729  *	a) Cleans up any completed Tx descriptors that may still be pending.
730  *	Normal descriptor cleanup happens when new packets are added to a Tx
731  *	queue so this timer is relatively infrequent and does any cleanup only
732  *	if the Tx queue has not seen any new packets in a while.  We make a
733  *	best effort attempt to reclaim descriptors, in that we don't wait
734  *	around if we cannot get a queue's lock (which most likely is because
735  *	someone else is queueing new packets and so will also handle the clean
736  *	up).  Since control queues use immediate data exclusively we don't
737  *	bother cleaning them up here.
738  *
739  *	b) Replenishes Rx queues that have run out due to memory shortage.
740  *	Normally new Rx buffers are added when existing ones are consumed but
741  *	when out of memory a queue can become empty.  We try to add only a few
742  *	buffers here, the queue will be replenished fully as these new buffers
743  *	are used up if memory shortage has subsided.
744  *
745  *	c) Return coalesced response queue credits in case a response queue is
746  *	starved.
747  *
748  *	d) Ring doorbells for T304 tunnel queues since we have seen doorbell
749  *	fifo overflows and the FW doesn't implement any recovery scheme yet.
750  */
751 static void
752 sge_timer_cb(void *arg)
753 {
754 	adapter_t *sc = arg;
755 #ifndef IFNET_MULTIQUEUE
756 	struct port_info *pi;
757 	struct sge_qset *qs;
758 	struct sge_txq  *txq;
759 	int i, j;
760 	int reclaim_ofl, refill_rx;
761 
762 	for (i = 0; i < sc->params.nports; i++) {
763 		pi = &sc->port[i];
764 		for (j = 0; j < pi->nqsets; j++) {
765 			qs = &sc->sge.qs[pi->first_qset + j];
766 			txq = &qs->txq[0];
767 			reclaim_ofl = txq[TXQ_OFLD].processed - txq[TXQ_OFLD].cleaned;
768 			refill_rx = ((qs->fl[0].credits < qs->fl[0].size) ||
769 			    (qs->fl[1].credits < qs->fl[1].size));
770 			if (reclaim_ofl || refill_rx) {
771 				taskqueue_enqueue(sc->tq, &pi->timer_reclaim_task);
772 				break;
773 			}
774 		}
775 	}
776 #endif
777 	if (sc->params.nports > 2) {
778 		int i;
779 
780 		for_each_port(sc, i) {
781 			struct port_info *pi = &sc->port[i];
782 
783 			t3_write_reg(sc, A_SG_KDOORBELL,
784 				     F_SELEGRCNTX |
785 				     (FW_TUNNEL_SGEEC_START + pi->first_qset));
786 		}
787 	}
788 	if (sc->open_device_map != 0)
789 		callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
790 }
791 
792 /*
793  * This is meant to be a catch-all function to keep sge state private
794  * to sge.c
795  *
796  */
797 int
798 t3_sge_init_adapter(adapter_t *sc)
799 {
800 	callout_init(&sc->sge_timer_ch, CALLOUT_MPSAFE);
801 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
802 	TASK_INIT(&sc->slow_intr_task, 0, sge_slow_intr_handler, sc);
803 	mi_init();
804 	cxgb_cache_init();
805 	return (0);
806 }
807 
808 int
809 t3_sge_reset_adapter(adapter_t *sc)
810 {
811 	callout_reset(&sc->sge_timer_ch, TX_RECLAIM_PERIOD, sge_timer_cb, sc);
812 	return (0);
813 }
814 
815 int
816 t3_sge_init_port(struct port_info *pi)
817 {
818 	TASK_INIT(&pi->timer_reclaim_task, 0, sge_timer_reclaim, pi);
819 	return (0);
820 }
821 
822 void
823 t3_sge_deinit_sw(adapter_t *sc)
824 {
825 
826 	mi_deinit();
827 }
828 
829 /**
830  *	refill_rspq - replenish an SGE response queue
831  *	@adapter: the adapter
832  *	@q: the response queue to replenish
833  *	@credits: how many new responses to make available
834  *
835  *	Replenishes a response queue by making the supplied number of responses
836  *	available to HW.
837  */
838 static __inline void
839 refill_rspq(adapter_t *sc, const struct sge_rspq *q, u_int credits)
840 {
841 
842 	/* mbufs are allocated on demand when a rspq entry is processed. */
843 	t3_write_reg(sc, A_SG_RSPQ_CREDIT_RETURN,
844 		     V_RSPQ(q->cntxt_id) | V_CREDITS(credits));
845 }
846 
847 static __inline void
848 sge_txq_reclaim_(struct sge_txq *txq, int force)
849 {
850 
851 	if (desc_reclaimable(txq) < 16)
852 		return;
853 	if (mtx_trylock(&txq->lock) == 0)
854 		return;
855 	reclaim_completed_tx_(txq, 16);
856 	mtx_unlock(&txq->lock);
857 
858 }
859 
860 static void
861 sge_txq_reclaim_handler(void *arg, int ncount)
862 {
863 	struct sge_txq *q = arg;
864 
865 	sge_txq_reclaim_(q, TRUE);
866 }
867 
868 
869 
870 static void
871 sge_timer_reclaim(void *arg, int ncount)
872 {
873 	struct port_info *pi = arg;
874 	int i, nqsets = pi->nqsets;
875 	adapter_t *sc = pi->adapter;
876 	struct sge_qset *qs;
877 	struct sge_txq *txq;
878 	struct mtx *lock;
879 
880 #ifdef IFNET_MULTIQUEUE
881 	panic("%s should not be called with multiqueue support\n", __FUNCTION__);
882 #endif
883 	for (i = 0; i < nqsets; i++) {
884 		qs = &sc->sge.qs[pi->first_qset + i];
885 
886 		txq = &qs->txq[TXQ_OFLD];
887 		sge_txq_reclaim_(txq, FALSE);
888 
889 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
890 			    &sc->sge.qs[0].rspq.lock;
891 
892 		if (mtx_trylock(lock)) {
893 			/* XXX currently assume that we are *NOT* polling */
894 			uint32_t status = t3_read_reg(sc, A_SG_RSPQ_FL_STATUS);
895 
896 			if (qs->fl[0].credits < qs->fl[0].size - 16)
897 				__refill_fl(sc, &qs->fl[0]);
898 			if (qs->fl[1].credits < qs->fl[1].size - 16)
899 				__refill_fl(sc, &qs->fl[1]);
900 
901 			if (status & (1 << qs->rspq.cntxt_id)) {
902 				if (qs->rspq.credits) {
903 					refill_rspq(sc, &qs->rspq, 1);
904 					qs->rspq.credits--;
905 					t3_write_reg(sc, A_SG_RSPQ_FL_STATUS,
906 					    1 << qs->rspq.cntxt_id);
907 				}
908 			}
909 			mtx_unlock(lock);
910 		}
911 	}
912 }
913 
914 /**
915  *	init_qset_cntxt - initialize an SGE queue set context info
916  *	@qs: the queue set
917  *	@id: the queue set id
918  *
919  *	Initializes the TIDs and context ids for the queues of a queue set.
920  */
921 static void
922 init_qset_cntxt(struct sge_qset *qs, u_int id)
923 {
924 
925 	qs->rspq.cntxt_id = id;
926 	qs->fl[0].cntxt_id = 2 * id;
927 	qs->fl[1].cntxt_id = 2 * id + 1;
928 	qs->txq[TXQ_ETH].cntxt_id = FW_TUNNEL_SGEEC_START + id;
929 	qs->txq[TXQ_ETH].token = FW_TUNNEL_TID_START + id;
930 	qs->txq[TXQ_OFLD].cntxt_id = FW_OFLD_SGEEC_START + id;
931 	qs->txq[TXQ_CTRL].cntxt_id = FW_CTRL_SGEEC_START + id;
932 	qs->txq[TXQ_CTRL].token = FW_CTRL_TID_START + id;
933 
934 	mbufq_init(&qs->txq[TXQ_ETH].sendq);
935 	mbufq_init(&qs->txq[TXQ_OFLD].sendq);
936 	mbufq_init(&qs->txq[TXQ_CTRL].sendq);
937 }
938 
939 
940 static void
941 txq_prod(struct sge_txq *txq, unsigned int ndesc, struct txq_state *txqs)
942 {
943 	txq->in_use += ndesc;
944 	/*
945 	 * XXX we don't handle stopping of queue
946 	 * presumably start handles this when we bump against the end
947 	 */
948 	txqs->gen = txq->gen;
949 	txq->unacked += ndesc;
950 	txqs->compl = (txq->unacked & 32) << (S_WR_COMPL - 5);
951 	txq->unacked &= 31;
952 	txqs->pidx = txq->pidx;
953 	txq->pidx += ndesc;
954 #ifdef INVARIANTS
955 	if (((txqs->pidx > txq->cidx) &&
956 		(txq->pidx < txqs->pidx) &&
957 		(txq->pidx >= txq->cidx)) ||
958 	    ((txqs->pidx < txq->cidx) &&
959 		(txq->pidx >= txq-> cidx)) ||
960 	    ((txqs->pidx < txq->cidx) &&
961 		(txq->cidx < txqs->pidx)))
962 		panic("txqs->pidx=%d txq->pidx=%d txq->cidx=%d",
963 		    txqs->pidx, txq->pidx, txq->cidx);
964 #endif
965 	if (txq->pidx >= txq->size) {
966 		txq->pidx -= txq->size;
967 		txq->gen ^= 1;
968 	}
969 
970 }
971 
972 /**
973  *	calc_tx_descs - calculate the number of Tx descriptors for a packet
974  *	@m: the packet mbufs
975  *      @nsegs: the number of segments
976  *
977  * 	Returns the number of Tx descriptors needed for the given Ethernet
978  * 	packet.  Ethernet packets require addition of WR and CPL headers.
979  */
980 static __inline unsigned int
981 calc_tx_descs(const struct mbuf *m, int nsegs)
982 {
983 	unsigned int flits;
984 
985 	if (m->m_pkthdr.len <= WR_LEN - sizeof(struct cpl_tx_pkt))
986 		return 1;
987 
988 	flits = sgl_len(nsegs) + 2;
989 #ifdef TSO_SUPPORTED
990 	if (m->m_pkthdr.csum_flags & CSUM_TSO)
991 		flits++;
992 #endif
993 	return flits_to_desc(flits);
994 }
995 
996 static unsigned int
997 busdma_map_mbufs(struct mbuf **m, struct sge_txq *txq,
998     struct tx_sw_desc *txsd, bus_dma_segment_t *segs, int *nsegs)
999 {
1000 	struct mbuf *m0;
1001 	int err, pktlen, pass = 0;
1002 
1003 retry:
1004 	err = 0;
1005 	m0 = *m;
1006 	pktlen = m0->m_pkthdr.len;
1007 #if defined(__i386__) || defined(__amd64__)
1008 	if (busdma_map_sg_collapse(m, segs, nsegs) == 0) {
1009 		goto done;
1010 	} else
1011 #endif
1012 		err = bus_dmamap_load_mbuf_sg(txq->entry_tag, txsd->map, m0, segs, nsegs, 0);
1013 
1014 	if (err == 0) {
1015 		goto done;
1016 	}
1017 	if (err == EFBIG && pass == 0) {
1018 		pass = 1;
1019 		/* Too many segments, try to defrag */
1020 		m0 = m_defrag(m0, M_DONTWAIT);
1021 		if (m0 == NULL) {
1022 			m_freem(*m);
1023 			*m = NULL;
1024 			return (ENOBUFS);
1025 		}
1026 		*m = m0;
1027 		goto retry;
1028 	} else if (err == ENOMEM) {
1029 		return (err);
1030 	} if (err) {
1031 		if (cxgb_debug)
1032 			printf("map failure err=%d pktlen=%d\n", err, pktlen);
1033 		m_freem(m0);
1034 		*m = NULL;
1035 		return (err);
1036 	}
1037 done:
1038 #if !defined(__i386__) && !defined(__amd64__)
1039 	bus_dmamap_sync(txq->entry_tag, txsd->map, BUS_DMASYNC_PREWRITE);
1040 #endif
1041 	txsd->flags |= TX_SW_DESC_MAPPED;
1042 
1043 	return (0);
1044 }
1045 
1046 /**
1047  *	make_sgl - populate a scatter/gather list for a packet
1048  *	@sgp: the SGL to populate
1049  *	@segs: the packet dma segments
1050  *	@nsegs: the number of segments
1051  *
1052  *	Generates a scatter/gather list for the buffers that make up a packet
1053  *	and returns the SGL size in 8-byte words.  The caller must size the SGL
1054  *	appropriately.
1055  */
1056 static __inline void
1057 make_sgl(struct sg_ent *sgp, bus_dma_segment_t *segs, int nsegs)
1058 {
1059 	int i, idx;
1060 
1061 	for (idx = 0, i = 0; i < nsegs; i++) {
1062 		/*
1063 		 * firmware doesn't like empty segments
1064 		 */
1065 		if (segs[i].ds_len == 0)
1066 			continue;
1067 		if (i && idx == 0)
1068 			++sgp;
1069 
1070 		sgp->len[idx] = htobe32(segs[i].ds_len);
1071 		sgp->addr[idx] = htobe64(segs[i].ds_addr);
1072 		idx ^= 1;
1073 	}
1074 
1075 	if (idx) {
1076 		sgp->len[idx] = 0;
1077 		sgp->addr[idx] = 0;
1078 	}
1079 }
1080 
1081 /**
1082  *	check_ring_tx_db - check and potentially ring a Tx queue's doorbell
1083  *	@adap: the adapter
1084  *	@q: the Tx queue
1085  *
1086  *	Ring the doorbel if a Tx queue is asleep.  There is a natural race,
1087  *	where the HW is going to sleep just after we checked, however,
1088  *	then the interrupt handler will detect the outstanding TX packet
1089  *	and ring the doorbell for us.
1090  *
1091  *	When GTS is disabled we unconditionally ring the doorbell.
1092  */
1093 static __inline void
1094 check_ring_tx_db(adapter_t *adap, struct sge_txq *q)
1095 {
1096 #if USE_GTS
1097 	clear_bit(TXQ_LAST_PKT_DB, &q->flags);
1098 	if (test_and_set_bit(TXQ_RUNNING, &q->flags) == 0) {
1099 		set_bit(TXQ_LAST_PKT_DB, &q->flags);
1100 #ifdef T3_TRACE
1101 		T3_TRACE1(adap->tb[q->cntxt_id & 7], "doorbell Tx, cntxt %d",
1102 			  q->cntxt_id);
1103 #endif
1104 		t3_write_reg(adap, A_SG_KDOORBELL,
1105 			     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1106 	}
1107 #else
1108 	wmb();            /* write descriptors before telling HW */
1109 	t3_write_reg(adap, A_SG_KDOORBELL,
1110 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1111 #endif
1112 }
1113 
1114 static __inline void
1115 wr_gen2(struct tx_desc *d, unsigned int gen)
1116 {
1117 #if SGE_NUM_GENBITS == 2
1118 	d->flit[TX_DESC_FLITS - 1] = htobe64(gen);
1119 #endif
1120 }
1121 
1122 /**
1123  *	write_wr_hdr_sgl - write a WR header and, optionally, SGL
1124  *	@ndesc: number of Tx descriptors spanned by the SGL
1125  *	@txd: first Tx descriptor to be written
1126  *	@txqs: txq state (generation and producer index)
1127  *	@txq: the SGE Tx queue
1128  *	@sgl: the SGL
1129  *	@flits: number of flits to the start of the SGL in the first descriptor
1130  *	@sgl_flits: the SGL size in flits
1131  *	@wr_hi: top 32 bits of WR header based on WR type (big endian)
1132  *	@wr_lo: low 32 bits of WR header based on WR type (big endian)
1133  *
1134  *	Write a work request header and an associated SGL.  If the SGL is
1135  *	small enough to fit into one Tx descriptor it has already been written
1136  *	and we just need to write the WR header.  Otherwise we distribute the
1137  *	SGL across the number of descriptors it spans.
1138  */
1139 static void
1140 write_wr_hdr_sgl(unsigned int ndesc, struct tx_desc *txd, struct txq_state *txqs,
1141     const struct sge_txq *txq, const struct sg_ent *sgl, unsigned int flits,
1142     unsigned int sgl_flits, unsigned int wr_hi, unsigned int wr_lo)
1143 {
1144 
1145 	struct work_request_hdr *wrp = (struct work_request_hdr *)txd;
1146 	struct tx_sw_desc *txsd = &txq->sdesc[txqs->pidx];
1147 
1148 	if (__predict_true(ndesc == 1)) {
1149 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1150 		    V_WR_SGLSFLT(flits)) | wr_hi;
1151 		wmb();
1152 		wrp->wr_lo = htonl(V_WR_LEN(flits + sgl_flits) |
1153 		    V_WR_GEN(txqs->gen)) | wr_lo;
1154 		/* XXX gen? */
1155 		wr_gen2(txd, txqs->gen);
1156 
1157 	} else {
1158 		unsigned int ogen = txqs->gen;
1159 		const uint64_t *fp = (const uint64_t *)sgl;
1160 		struct work_request_hdr *wp = wrp;
1161 
1162 		wrp->wr_hi = htonl(F_WR_SOP | V_WR_DATATYPE(1) |
1163 		    V_WR_SGLSFLT(flits)) | wr_hi;
1164 
1165 		while (sgl_flits) {
1166 			unsigned int avail = WR_FLITS - flits;
1167 
1168 			if (avail > sgl_flits)
1169 				avail = sgl_flits;
1170 			memcpy(&txd->flit[flits], fp, avail * sizeof(*fp));
1171 			sgl_flits -= avail;
1172 			ndesc--;
1173 			if (!sgl_flits)
1174 				break;
1175 
1176 			fp += avail;
1177 			txd++;
1178 			txsd++;
1179 			if (++txqs->pidx == txq->size) {
1180 				txqs->pidx = 0;
1181 				txqs->gen ^= 1;
1182 				txd = txq->desc;
1183 				txsd = txq->sdesc;
1184 			}
1185 
1186 			/*
1187 			 * when the head of the mbuf chain
1188 			 * is freed all clusters will be freed
1189 			 * with it
1190 			 */
1191 			KASSERT(txsd->mi.mi_base == NULL,
1192 			    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1193 			wrp = (struct work_request_hdr *)txd;
1194 			wrp->wr_hi = htonl(V_WR_DATATYPE(1) |
1195 			    V_WR_SGLSFLT(1)) | wr_hi;
1196 			wrp->wr_lo = htonl(V_WR_LEN(min(WR_FLITS,
1197 				    sgl_flits + 1)) |
1198 			    V_WR_GEN(txqs->gen)) | wr_lo;
1199 			wr_gen2(txd, txqs->gen);
1200 			flits = 1;
1201 		}
1202 		wrp->wr_hi |= htonl(F_WR_EOP);
1203 		wmb();
1204 		wp->wr_lo = htonl(V_WR_LEN(WR_FLITS) | V_WR_GEN(ogen)) | wr_lo;
1205 		wr_gen2((struct tx_desc *)wp, ogen);
1206 	}
1207 }
1208 
1209 /* sizeof(*eh) + sizeof(*vhdr) + sizeof(*ip) + sizeof(*tcp) */
1210 #define TCPPKTHDRSIZE (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + 20 + 20)
1211 
1212 #ifdef VLAN_SUPPORTED
1213 #define GET_VTAG(cntrl, m) \
1214 do { \
1215 	if ((m)->m_flags & M_VLANTAG)					            \
1216 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((m)->m_pkthdr.ether_vtag); \
1217 } while (0)
1218 
1219 #define GET_VTAG_MI(cntrl, mi) \
1220 do { \
1221 	if ((mi)->mi_flags & M_VLANTAG)					\
1222 		cntrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN((mi)->mi_ether_vtag); \
1223 } while (0)
1224 #else
1225 #define GET_VTAG(cntrl, m)
1226 #define GET_VTAG_MI(cntrl, m)
1227 #endif
1228 
1229 int
1230 t3_encap(struct sge_qset *qs, struct mbuf **m, int count)
1231 {
1232 	adapter_t *sc;
1233 	struct mbuf *m0;
1234 	struct sge_txq *txq;
1235 	struct txq_state txqs;
1236 	struct port_info *pi;
1237 	unsigned int ndesc, flits, cntrl, mlen;
1238 	int err, nsegs, tso_info = 0;
1239 
1240 	struct work_request_hdr *wrp;
1241 	struct tx_sw_desc *txsd;
1242 	struct sg_ent *sgp, *sgl;
1243 	uint32_t wr_hi, wr_lo, sgl_flits;
1244 	bus_dma_segment_t segs[TX_MAX_SEGS];
1245 
1246 	struct tx_desc *txd;
1247 	struct mbuf_vec *mv;
1248 	struct mbuf_iovec *mi;
1249 
1250 	DPRINTF("t3_encap cpu=%d ", curcpu);
1251 
1252 	mi = NULL;
1253 	pi = qs->port;
1254 	sc = pi->adapter;
1255 	txq = &qs->txq[TXQ_ETH];
1256 	txd = &txq->desc[txq->pidx];
1257 	txsd = &txq->sdesc[txq->pidx];
1258 	sgl = txq->txq_sgl;
1259 	m0 = *m;
1260 
1261 	DPRINTF("t3_encap port_id=%d qsidx=%d ", pi->port_id, pi->first_qset);
1262 	DPRINTF("mlen=%d txpkt_intf=%d tx_chan=%d\n", m[0]->m_pkthdr.len, pi->txpkt_intf, pi->tx_chan);
1263 	if (cxgb_debug)
1264 		printf("mi_base=%p cidx=%d pidx=%d\n\n", txsd->mi.mi_base, txq->cidx, txq->pidx);
1265 
1266 	mtx_assert(&txq->lock, MA_OWNED);
1267 	cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1268 /*
1269  * XXX need to add VLAN support for 6.x
1270  */
1271 #ifdef VLAN_SUPPORTED
1272 	if  (m0->m_pkthdr.csum_flags & (CSUM_TSO))
1273 		tso_info = V_LSO_MSS(m0->m_pkthdr.tso_segsz);
1274 #endif
1275 	KASSERT(txsd->mi.mi_base == NULL,
1276 	    ("overwriting valid entry mi_base==%p", txsd->mi.mi_base));
1277 	if (count > 1) {
1278 		panic("count > 1 not support in CVS\n");
1279 		if ((err = busdma_map_sg_vec(m, &m0, segs, count)))
1280 			return (err);
1281 		nsegs = count;
1282 	} else if ((err = busdma_map_sg_collapse(&m0, segs, &nsegs))) {
1283 		if (cxgb_debug)
1284 			printf("failed ... err=%d\n", err);
1285 		return (err);
1286 	}
1287 	KASSERT(m0->m_pkthdr.len, ("empty packet nsegs=%d count=%d", nsegs, count));
1288 
1289 	if (!(m0->m_pkthdr.len <= PIO_LEN)) {
1290 		mi_collapse_mbuf(&txsd->mi, m0);
1291 		mi = &txsd->mi;
1292 	}
1293 	if (count > 1) {
1294 		struct cpl_tx_pkt_batch *cpl_batch = (struct cpl_tx_pkt_batch *)txd;
1295 		int i, fidx;
1296 		struct mbuf_iovec *batchmi;
1297 
1298 		mv = mtomv(m0);
1299 		batchmi = mv->mv_vec;
1300 
1301 		wrp = (struct work_request_hdr *)txd;
1302 
1303 		flits = count*2 + 1;
1304 		txq_prod(txq, 1, &txqs);
1305 
1306 		for (fidx = 1, i = 0; i < count; i++, batchmi++, fidx += 2) {
1307 			struct cpl_tx_pkt_batch_entry *cbe = &cpl_batch->pkt_entry[i];
1308 
1309 			cntrl = V_TXPKT_INTF(pi->txpkt_intf);
1310 			GET_VTAG_MI(cntrl, batchmi);
1311 			cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1312 			if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1313 				cntrl |= F_TXPKT_IPCSUM_DIS;
1314 			if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1315 				cntrl |= F_TXPKT_L4CSUM_DIS;
1316 			cbe->cntrl = htonl(cntrl);
1317 			cbe->len = htonl(batchmi->mi_len | 0x80000000);
1318 			cbe->addr = htobe64(segs[i].ds_addr);
1319 			txd->flit[fidx] |= htobe64(1 << 24);
1320 		}
1321 
1322 		wrp->wr_hi = htonl(F_WR_SOP | F_WR_EOP | V_WR_DATATYPE(1) |
1323 		    V_WR_SGLSFLT(flits)) | htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1324 		wmb();
1325 		wrp->wr_lo = htonl(V_WR_LEN(flits) |
1326 		    V_WR_GEN(txqs.gen)) | htonl(V_WR_TID(txq->token));
1327 		/* XXX gen? */
1328 		wr_gen2(txd, txqs.gen);
1329 		check_ring_tx_db(sc, txq);
1330 
1331 		return (0);
1332 	} else if (tso_info) {
1333 		int min_size = TCPPKTHDRSIZE, eth_type, tagged;
1334 		struct cpl_tx_pkt_lso *hdr = (struct cpl_tx_pkt_lso *)txd;
1335 		struct ip *ip;
1336 		struct tcphdr *tcp;
1337 		char *pkthdr;
1338 
1339 		txd->flit[2] = 0;
1340 		GET_VTAG(cntrl, m0);
1341 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT_LSO);
1342 		hdr->cntrl = htonl(cntrl);
1343 		mlen = m0->m_pkthdr.len;
1344 		hdr->len = htonl(mlen | 0x80000000);
1345 
1346 		DPRINTF("tso buf len=%d\n", mlen);
1347 
1348 		tagged = m0->m_flags & M_VLANTAG;
1349 		if (!tagged)
1350 			min_size -= ETHER_VLAN_ENCAP_LEN;
1351 
1352 		if (__predict_false(mlen < min_size)) {
1353 			printf("mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1354 			    m0, mlen, m0->m_pkthdr.tso_segsz,
1355 			    m0->m_pkthdr.csum_flags, m0->m_flags);
1356 			panic("tx tso packet too small");
1357 		}
1358 
1359 		/* Make sure that ether, ip, tcp headers are all in m0 */
1360 		if (__predict_false(m0->m_len < min_size)) {
1361 			m0 = m_pullup(m0, min_size);
1362 			if (__predict_false(m0 == NULL)) {
1363 				/* XXX panic probably an overreaction */
1364 				panic("couldn't fit header into mbuf");
1365 			}
1366 		}
1367 		pkthdr = m0->m_data;
1368 
1369 		if (tagged) {
1370 			eth_type = CPL_ETH_II_VLAN;
1371 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN +
1372 			    ETHER_VLAN_ENCAP_LEN);
1373 		} else {
1374 			eth_type = CPL_ETH_II;
1375 			ip = (struct ip *)(pkthdr + ETHER_HDR_LEN);
1376 		}
1377 		tcp = (struct tcphdr *)((uint8_t *)ip +
1378 		    sizeof(*ip));
1379 
1380 		tso_info |= V_LSO_ETH_TYPE(eth_type) |
1381 			    V_LSO_IPHDR_WORDS(ip->ip_hl) |
1382 			    V_LSO_TCPHDR_WORDS(tcp->th_off);
1383 		hdr->lso_info = htonl(tso_info);
1384 
1385 		if (__predict_false(mlen <= PIO_LEN)) {
1386 			/* pkt not undersized but fits in PIO_LEN
1387 			 * Indicates a TSO bug at the higher levels.
1388 			 *
1389 			 */
1390 			DPRINTF("**5592 Fix** mbuf=%p,len=%d,tso_segsz=%d,csum_flags=%#x,flags=%#x",
1391 			    m0, mlen, m0->m_pkthdr.tso_segsz, m0->m_pkthdr.csum_flags, m0->m_flags);
1392 			txq_prod(txq, 1, &txqs);
1393 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[3]);
1394 			m_freem(m0);
1395 			m0 = NULL;
1396 			flits = (mlen + 7) / 8 + 3;
1397 			hdr->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1398 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1399 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1400 			wmb();
1401 			hdr->wr.wr_lo = htonl(V_WR_LEN(flits) |
1402 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1403 
1404 			wr_gen2(txd, txqs.gen);
1405 			check_ring_tx_db(sc, txq);
1406 			return (0);
1407 		}
1408 		flits = 3;
1409 	} else {
1410 		struct cpl_tx_pkt *cpl = (struct cpl_tx_pkt *)txd;
1411 
1412 		GET_VTAG(cntrl, m0);
1413 		cntrl |= V_TXPKT_OPCODE(CPL_TX_PKT);
1414 		if (__predict_false(!(m0->m_pkthdr.csum_flags & CSUM_IP)))
1415 			cntrl |= F_TXPKT_IPCSUM_DIS;
1416 		if (__predict_false(!(m0->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP))))
1417 			cntrl |= F_TXPKT_L4CSUM_DIS;
1418 		cpl->cntrl = htonl(cntrl);
1419 		mlen = m0->m_pkthdr.len;
1420 		cpl->len = htonl(mlen | 0x80000000);
1421 
1422 		if (mlen <= PIO_LEN) {
1423 			txq_prod(txq, 1, &txqs);
1424 			m_copydata(m0, 0, mlen, (caddr_t)&txd->flit[2]);
1425 			m_freem(m0);
1426 			m0 = NULL;
1427 			flits = (mlen + 7) / 8 + 2;
1428 			cpl->wr.wr_hi = htonl(V_WR_BCNTLFLT(mlen & 7) |
1429 					  V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) |
1430 					  F_WR_SOP | F_WR_EOP | txqs.compl);
1431 			wmb();
1432 			cpl->wr.wr_lo = htonl(V_WR_LEN(flits) |
1433 			    V_WR_GEN(txqs.gen) | V_WR_TID(txq->token));
1434 
1435 			wr_gen2(txd, txqs.gen);
1436 			check_ring_tx_db(sc, txq);
1437 			DPRINTF("pio buf\n");
1438 			return (0);
1439 		}
1440 		DPRINTF("regular buf\n");
1441 		flits = 2;
1442 	}
1443 	wrp = (struct work_request_hdr *)txd;
1444 
1445 #ifdef	nomore
1446 	/*
1447 	 * XXX need to move into one of the helper routines above
1448 	 *
1449 	 */
1450 	if ((err = busdma_map_mbufs(m, txq, txsd, segs, &nsegs)) != 0)
1451 		return (err);
1452 	m0 = *m;
1453 #endif
1454 	ndesc = calc_tx_descs(m0, nsegs);
1455 
1456 	sgp = (ndesc == 1) ? (struct sg_ent *)&txd->flit[flits] : sgl;
1457 	make_sgl(sgp, segs, nsegs);
1458 
1459 	sgl_flits = sgl_len(nsegs);
1460 
1461 	DPRINTF("make_sgl success nsegs==%d ndesc==%d\n", nsegs, ndesc);
1462 	txq_prod(txq, ndesc, &txqs);
1463 	wr_hi = htonl(V_WR_OP(FW_WROPCODE_TUNNEL_TX_PKT) | txqs.compl);
1464 	wr_lo = htonl(V_WR_TID(txq->token));
1465 	write_wr_hdr_sgl(ndesc, txd, &txqs, txq, sgl, flits, sgl_flits, wr_hi, wr_lo);
1466 	check_ring_tx_db(pi->adapter, txq);
1467 
1468 	if ((m0->m_type == MT_DATA) &&
1469 	    ((m0->m_flags & (M_EXT|M_NOFREE)) == M_EXT) &&
1470 	    (m0->m_ext.ext_type != EXT_PACKET)) {
1471 		m0->m_flags &= ~M_EXT ;
1472 		cxgb_mbufs_outstanding--;
1473 		m_free(m0);
1474 	}
1475 
1476 	return (0);
1477 }
1478 
1479 
1480 /**
1481  *	write_imm - write a packet into a Tx descriptor as immediate data
1482  *	@d: the Tx descriptor to write
1483  *	@m: the packet
1484  *	@len: the length of packet data to write as immediate data
1485  *	@gen: the generation bit value to write
1486  *
1487  *	Writes a packet as immediate data into a Tx descriptor.  The packet
1488  *	contains a work request at its beginning.  We must write the packet
1489  *	carefully so the SGE doesn't read accidentally before it's written in
1490  *	its entirety.
1491  */
1492 static __inline void
1493 write_imm(struct tx_desc *d, struct mbuf *m,
1494 	  unsigned int len, unsigned int gen)
1495 {
1496 	struct work_request_hdr *from = mtod(m, struct work_request_hdr *);
1497 	struct work_request_hdr *to = (struct work_request_hdr *)d;
1498 
1499 	if (len > WR_LEN)
1500 		panic("len too big %d\n", len);
1501 	if (len < sizeof(*from))
1502 		panic("len too small %d", len);
1503 
1504 	memcpy(&to[1], &from[1], len - sizeof(*from));
1505 	to->wr_hi = from->wr_hi | htonl(F_WR_SOP | F_WR_EOP |
1506 					V_WR_BCNTLFLT(len & 7));
1507 	wmb();
1508 	to->wr_lo = from->wr_lo | htonl(V_WR_GEN(gen) |
1509 					V_WR_LEN((len + 7) / 8));
1510 	wr_gen2(d, gen);
1511 
1512 	/*
1513 	 * This check is a hack we should really fix the logic so
1514 	 * that this can't happen
1515 	 */
1516 	if (m->m_type != MT_DONTFREE)
1517 		m_freem(m);
1518 
1519 }
1520 
1521 /**
1522  *	check_desc_avail - check descriptor availability on a send queue
1523  *	@adap: the adapter
1524  *	@q: the TX queue
1525  *	@m: the packet needing the descriptors
1526  *	@ndesc: the number of Tx descriptors needed
1527  *	@qid: the Tx queue number in its queue set (TXQ_OFLD or TXQ_CTRL)
1528  *
1529  *	Checks if the requested number of Tx descriptors is available on an
1530  *	SGE send queue.  If the queue is already suspended or not enough
1531  *	descriptors are available the packet is queued for later transmission.
1532  *	Must be called with the Tx queue locked.
1533  *
1534  *	Returns 0 if enough descriptors are available, 1 if there aren't
1535  *	enough descriptors and the packet has been queued, and 2 if the caller
1536  *	needs to retry because there weren't enough descriptors at the
1537  *	beginning of the call but some freed up in the mean time.
1538  */
1539 static __inline int
1540 check_desc_avail(adapter_t *adap, struct sge_txq *q,
1541 		 struct mbuf *m, unsigned int ndesc,
1542 		 unsigned int qid)
1543 {
1544 	/*
1545 	 * XXX We currently only use this for checking the control queue
1546 	 * the control queue is only used for binding qsets which happens
1547 	 * at init time so we are guaranteed enough descriptors
1548 	 */
1549 	if (__predict_false(!mbufq_empty(&q->sendq))) {
1550 addq_exit:	mbufq_tail(&q->sendq, m);
1551 		return 1;
1552 	}
1553 	if (__predict_false(q->size - q->in_use < ndesc)) {
1554 
1555 		struct sge_qset *qs = txq_to_qset(q, qid);
1556 
1557 		printf("stopping q\n");
1558 
1559 		setbit(&qs->txq_stopped, qid);
1560 		smp_mb();
1561 
1562 		if (should_restart_tx(q) &&
1563 		    test_and_clear_bit(qid, &qs->txq_stopped))
1564 			return 2;
1565 
1566 		q->stops++;
1567 		goto addq_exit;
1568 	}
1569 	return 0;
1570 }
1571 
1572 
1573 /**
1574  *	reclaim_completed_tx_imm - reclaim completed control-queue Tx descs
1575  *	@q: the SGE control Tx queue
1576  *
1577  *	This is a variant of reclaim_completed_tx() that is used for Tx queues
1578  *	that send only immediate data (presently just the control queues) and
1579  *	thus do not have any mbufs
1580  */
1581 static __inline void
1582 reclaim_completed_tx_imm(struct sge_txq *q)
1583 {
1584 	unsigned int reclaim = q->processed - q->cleaned;
1585 
1586 	mtx_assert(&q->lock, MA_OWNED);
1587 
1588 	q->in_use -= reclaim;
1589 	q->cleaned += reclaim;
1590 }
1591 
1592 static __inline int
1593 immediate(const struct mbuf *m)
1594 {
1595 	return m->m_len <= WR_LEN  && m->m_pkthdr.len <= WR_LEN ;
1596 }
1597 
1598 /**
1599  *	ctrl_xmit - send a packet through an SGE control Tx queue
1600  *	@adap: the adapter
1601  *	@q: the control queue
1602  *	@m: the packet
1603  *
1604  *	Send a packet through an SGE control Tx queue.  Packets sent through
1605  *	a control queue must fit entirely as immediate data in a single Tx
1606  *	descriptor and have no page fragments.
1607  */
1608 static int
1609 ctrl_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
1610 {
1611 	int ret;
1612 	struct work_request_hdr *wrp = mtod(m, struct work_request_hdr *);
1613 
1614 	if (__predict_false(!immediate(m))) {
1615 		m_freem(m);
1616 		return 0;
1617 	}
1618 
1619 	wrp->wr_hi |= htonl(F_WR_SOP | F_WR_EOP);
1620 	wrp->wr_lo = htonl(V_WR_TID(q->token));
1621 
1622 	mtx_lock(&q->lock);
1623 again:	reclaim_completed_tx_imm(q);
1624 
1625 	ret = check_desc_avail(adap, q, m, 1, TXQ_CTRL);
1626 	if (__predict_false(ret)) {
1627 		if (ret == 1) {
1628 			mtx_unlock(&q->lock);
1629 			log(LOG_ERR, "no desc available\n");
1630 			return (ENOSPC);
1631 		}
1632 		goto again;
1633 	}
1634 	write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1635 
1636 	q->in_use++;
1637 	if (++q->pidx >= q->size) {
1638 		q->pidx = 0;
1639 		q->gen ^= 1;
1640 	}
1641 	mtx_unlock(&q->lock);
1642 	wmb();
1643 	t3_write_reg(adap, A_SG_KDOORBELL,
1644 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1645 	return (0);
1646 }
1647 
1648 
1649 /**
1650  *	restart_ctrlq - restart a suspended control queue
1651  *	@qs: the queue set cotaining the control queue
1652  *
1653  *	Resumes transmission on a suspended Tx control queue.
1654  */
1655 static void
1656 restart_ctrlq(void *data, int npending)
1657 {
1658 	struct mbuf *m;
1659 	struct sge_qset *qs = (struct sge_qset *)data;
1660 	struct sge_txq *q = &qs->txq[TXQ_CTRL];
1661 	adapter_t *adap = qs->port->adapter;
1662 
1663 	log(LOG_WARNING, "Restart_ctrlq in_use=%d\n", q->in_use);
1664 
1665 	mtx_lock(&q->lock);
1666 again:	reclaim_completed_tx_imm(q);
1667 
1668 	while (q->in_use < q->size &&
1669 	       (m = mbufq_dequeue(&q->sendq)) != NULL) {
1670 
1671 		write_imm(&q->desc[q->pidx], m, m->m_len, q->gen);
1672 
1673 		if (++q->pidx >= q->size) {
1674 			q->pidx = 0;
1675 			q->gen ^= 1;
1676 		}
1677 		q->in_use++;
1678 	}
1679 	if (!mbufq_empty(&q->sendq)) {
1680 		setbit(&qs->txq_stopped, TXQ_CTRL);
1681 		smp_mb();
1682 
1683 		if (should_restart_tx(q) &&
1684 		    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped))
1685 			goto again;
1686 		q->stops++;
1687 	}
1688 	mtx_unlock(&q->lock);
1689 	wmb();
1690 	t3_write_reg(adap, A_SG_KDOORBELL,
1691 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
1692 }
1693 
1694 
1695 /*
1696  * Send a management message through control queue 0
1697  */
1698 int
1699 t3_mgmt_tx(struct adapter *adap, struct mbuf *m)
1700 {
1701 	return ctrl_xmit(adap, &adap->sge.qs[0].txq[TXQ_CTRL], m);
1702 }
1703 
1704 
1705 /**
1706  *	free_qset - free the resources of an SGE queue set
1707  *	@sc: the controller owning the queue set
1708  *	@q: the queue set
1709  *
1710  *	Release the HW and SW resources associated with an SGE queue set, such
1711  *	as HW contexts, packet buffers, and descriptor rings.  Traffic to the
1712  *	queue set must be quiesced prior to calling this.
1713  */
1714 void
1715 t3_free_qset(adapter_t *sc, struct sge_qset *q)
1716 {
1717 	int i;
1718 
1719 	t3_free_tx_desc_all(&q->txq[TXQ_ETH]);
1720 
1721 	for (i = 0; i < SGE_TXQ_PER_SET; i++)
1722 		if (q->txq[i].txq_mr.br_ring != NULL) {
1723 			free(q->txq[i].txq_mr.br_ring, M_DEVBUF);
1724 			mtx_destroy(&q->txq[i].txq_mr.br_lock);
1725 		}
1726 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
1727 		if (q->fl[i].desc) {
1728 			mtx_lock_spin(&sc->sge.reg_lock);
1729 			t3_sge_disable_fl(sc, q->fl[i].cntxt_id);
1730 			mtx_unlock_spin(&sc->sge.reg_lock);
1731 			bus_dmamap_unload(q->fl[i].desc_tag, q->fl[i].desc_map);
1732 			bus_dmamem_free(q->fl[i].desc_tag, q->fl[i].desc,
1733 					q->fl[i].desc_map);
1734 			bus_dma_tag_destroy(q->fl[i].desc_tag);
1735 			bus_dma_tag_destroy(q->fl[i].entry_tag);
1736 		}
1737 		if (q->fl[i].sdesc) {
1738 			free_rx_bufs(sc, &q->fl[i]);
1739 			free(q->fl[i].sdesc, M_DEVBUF);
1740 		}
1741 	}
1742 
1743 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
1744 		if (q->txq[i].desc) {
1745 			mtx_lock_spin(&sc->sge.reg_lock);
1746 			t3_sge_enable_ecntxt(sc, q->txq[i].cntxt_id, 0);
1747 			mtx_unlock_spin(&sc->sge.reg_lock);
1748 			bus_dmamap_unload(q->txq[i].desc_tag,
1749 					q->txq[i].desc_map);
1750 			bus_dmamem_free(q->txq[i].desc_tag, q->txq[i].desc,
1751 					q->txq[i].desc_map);
1752 			bus_dma_tag_destroy(q->txq[i].desc_tag);
1753 			bus_dma_tag_destroy(q->txq[i].entry_tag);
1754 			MTX_DESTROY(&q->txq[i].lock);
1755 		}
1756 		if (q->txq[i].sdesc) {
1757 			free(q->txq[i].sdesc, M_DEVBUF);
1758 		}
1759 	}
1760 
1761 	if (q->rspq.desc) {
1762 		mtx_lock_spin(&sc->sge.reg_lock);
1763 		t3_sge_disable_rspcntxt(sc, q->rspq.cntxt_id);
1764 		mtx_unlock_spin(&sc->sge.reg_lock);
1765 
1766 		bus_dmamap_unload(q->rspq.desc_tag, q->rspq.desc_map);
1767 		bus_dmamem_free(q->rspq.desc_tag, q->rspq.desc,
1768 			        q->rspq.desc_map);
1769 		bus_dma_tag_destroy(q->rspq.desc_tag);
1770 		MTX_DESTROY(&q->rspq.lock);
1771 	}
1772 
1773 #ifdef LRO_SUPPORTED
1774 	tcp_lro_free(&q->lro.ctrl);
1775 #endif
1776 
1777 	bzero(q, sizeof(*q));
1778 }
1779 
1780 /**
1781  *	t3_free_sge_resources - free SGE resources
1782  *	@sc: the adapter softc
1783  *
1784  *	Frees resources used by the SGE queue sets.
1785  */
1786 void
1787 t3_free_sge_resources(adapter_t *sc)
1788 {
1789 	int i, nqsets;
1790 
1791 #ifdef IFNET_MULTIQUEUE
1792 	panic("%s should not be called when IFNET_MULTIQUEUE is defined", __FUNCTION__);
1793 #endif
1794 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1795 		nqsets += sc->port[i].nqsets;
1796 
1797 	for (i = 0; i < nqsets; ++i)
1798 		t3_free_qset(sc, &sc->sge.qs[i]);
1799 }
1800 
1801 /**
1802  *	t3_sge_start - enable SGE
1803  *	@sc: the controller softc
1804  *
1805  *	Enables the SGE for DMAs.  This is the last step in starting packet
1806  *	transfers.
1807  */
1808 void
1809 t3_sge_start(adapter_t *sc)
1810 {
1811 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, F_GLOBALENABLE);
1812 }
1813 
1814 /**
1815  *	t3_sge_stop - disable SGE operation
1816  *	@sc: the adapter
1817  *
1818  *	Disables the DMA engine.  This can be called in emeregencies (e.g.,
1819  *	from error interrupts) or from normal process context.  In the latter
1820  *	case it also disables any pending queue restart tasklets.  Note that
1821  *	if it is called in interrupt context it cannot disable the restart
1822  *	tasklets as it cannot wait, however the tasklets will have no effect
1823  *	since the doorbells are disabled and the driver will call this again
1824  *	later from process context, at which time the tasklets will be stopped
1825  *	if they are still running.
1826  */
1827 void
1828 t3_sge_stop(adapter_t *sc)
1829 {
1830 	int i, nqsets;
1831 
1832 	t3_set_reg_field(sc, A_SG_CONTROL, F_GLOBALENABLE, 0);
1833 
1834 	if (sc->tq == NULL)
1835 		return;
1836 
1837 	for (nqsets = i = 0; i < (sc)->params.nports; i++)
1838 		nqsets += sc->port[i].nqsets;
1839 #ifdef notyet
1840 	/*
1841 	 *
1842 	 * XXX
1843 	 */
1844 	for (i = 0; i < nqsets; ++i) {
1845 		struct sge_qset *qs = &sc->sge.qs[i];
1846 
1847 		taskqueue_drain(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
1848 		taskqueue_drain(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
1849 	}
1850 #endif
1851 }
1852 
1853 /**
1854  *	t3_free_tx_desc - reclaims Tx descriptors and their buffers
1855  *	@adapter: the adapter
1856  *	@q: the Tx queue to reclaim descriptors from
1857  *	@reclaimable: the number of descriptors to reclaim
1858  *      @m_vec_size: maximum number of buffers to reclaim
1859  *      @desc_reclaimed: returns the number of descriptors reclaimed
1860  *
1861  *	Reclaims Tx descriptors from an SGE Tx queue and frees the associated
1862  *	Tx buffers.  Called with the Tx queue lock held.
1863  *
1864  *      Returns number of buffers of reclaimed
1865  */
1866 void
1867 t3_free_tx_desc(struct sge_txq *q, int reclaimable)
1868 {
1869 	struct tx_sw_desc *txsd;
1870 	unsigned int cidx;
1871 
1872 #ifdef T3_TRACE
1873 	T3_TRACE2(sc->tb[q->cntxt_id & 7],
1874 		  "reclaiming %u Tx descriptors at cidx %u", reclaimable, cidx);
1875 #endif
1876 	cidx = q->cidx;
1877 	txsd = &q->sdesc[cidx];
1878 	DPRINTF("reclaiming %d WR\n", reclaimable);
1879 	mtx_assert(&q->lock, MA_OWNED);
1880 	while (reclaimable--) {
1881 		DPRINTF("cidx=%d d=%p\n", cidx, txsd);
1882 		if (txsd->mi.mi_base != NULL) {
1883 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1884 				bus_dmamap_unload(q->entry_tag, txsd->map);
1885 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1886 			}
1887 			m_freem_iovec(&txsd->mi);
1888 			buf_ring_scan(&q->txq_mr, txsd->mi.mi_base, __FILE__, __LINE__);
1889 			txsd->mi.mi_base = NULL;
1890 			/*
1891 			 * XXX check for cache hit rate here
1892 			 *
1893 			 */
1894 			q->port->ifp->if_opackets++;
1895 #if defined(DIAGNOSTIC) && 0
1896 			if (m_get_priority(txsd->m[0]) != cidx)
1897 				printf("pri=%d cidx=%d\n",
1898 				    (int)m_get_priority(txsd->m[0]), cidx);
1899 #endif
1900 
1901 		} else
1902 			q->txq_skipped++;
1903 
1904 		++txsd;
1905 		if (++cidx == q->size) {
1906 			cidx = 0;
1907 			txsd = q->sdesc;
1908 		}
1909 	}
1910 	q->cidx = cidx;
1911 
1912 }
1913 
1914 void
1915 t3_free_tx_desc_all(struct sge_txq *q)
1916 {
1917 	int i;
1918 	struct tx_sw_desc *txsd;
1919 
1920 	for (i = 0; i < q->size; i++) {
1921 		txsd = &q->sdesc[i];
1922 		if (txsd->mi.mi_base != NULL) {
1923 			if (txsd->flags & TX_SW_DESC_MAPPED) {
1924 				bus_dmamap_unload(q->entry_tag, txsd->map);
1925 				txsd->flags &= ~TX_SW_DESC_MAPPED;
1926 			}
1927 			m_freem_iovec(&txsd->mi);
1928 			bzero(&txsd->mi, sizeof(txsd->mi));
1929 		}
1930 	}
1931 }
1932 
1933 /**
1934  *	is_new_response - check if a response is newly written
1935  *	@r: the response descriptor
1936  *	@q: the response queue
1937  *
1938  *	Returns true if a response descriptor contains a yet unprocessed
1939  *	response.
1940  */
1941 static __inline int
1942 is_new_response(const struct rsp_desc *r,
1943     const struct sge_rspq *q)
1944 {
1945 	return (r->intr_gen & F_RSPD_GEN2) == q->gen;
1946 }
1947 
1948 #define RSPD_GTS_MASK  (F_RSPD_TXQ0_GTS | F_RSPD_TXQ1_GTS)
1949 #define RSPD_CTRL_MASK (RSPD_GTS_MASK | \
1950 			V_RSPD_TXQ0_CR(M_RSPD_TXQ0_CR) | \
1951 			V_RSPD_TXQ1_CR(M_RSPD_TXQ1_CR) | \
1952 			V_RSPD_TXQ2_CR(M_RSPD_TXQ2_CR))
1953 
1954 /* How long to delay the next interrupt in case of memory shortage, in 0.1us. */
1955 #define NOMEM_INTR_DELAY 2500
1956 
1957 /**
1958  *	write_ofld_wr - write an offload work request
1959  *	@adap: the adapter
1960  *	@m: the packet to send
1961  *	@q: the Tx queue
1962  *	@pidx: index of the first Tx descriptor to write
1963  *	@gen: the generation value to use
1964  *	@ndesc: number of descriptors the packet will occupy
1965  *
1966  *	Write an offload work request to send the supplied packet.  The packet
1967  *	data already carry the work request with most fields populated.
1968  */
1969 static void
1970 write_ofld_wr(adapter_t *adap, struct mbuf *m,
1971     struct sge_txq *q, unsigned int pidx,
1972     unsigned int gen, unsigned int ndesc,
1973     bus_dma_segment_t *segs, unsigned int nsegs)
1974 {
1975 	unsigned int sgl_flits, flits;
1976 	struct work_request_hdr *from;
1977 	struct sg_ent *sgp, sgl[TX_MAX_SEGS / 2 + 1];
1978 	struct tx_desc *d = &q->desc[pidx];
1979 	struct txq_state txqs;
1980 
1981 	if (immediate(m) && nsegs == 0) {
1982 		write_imm(d, m, m->m_len, gen);
1983 		return;
1984 	}
1985 
1986 	/* Only TX_DATA builds SGLs */
1987 	from = mtod(m, struct work_request_hdr *);
1988 	memcpy(&d->flit[1], &from[1], m->m_len - sizeof(*from));
1989 
1990 	flits = m->m_len / 8;
1991 	sgp = (ndesc == 1) ? (struct sg_ent *)&d->flit[flits] : sgl;
1992 
1993 	make_sgl(sgp, segs, nsegs);
1994 	sgl_flits = sgl_len(nsegs);
1995 
1996 	txqs.gen = gen;
1997 	txqs.pidx = pidx;
1998 	txqs.compl = 0;
1999 
2000 	write_wr_hdr_sgl(ndesc, d, &txqs, q, sgl, flits, sgl_flits,
2001 	    from->wr_hi, from->wr_lo);
2002 }
2003 
2004 /**
2005  *	calc_tx_descs_ofld - calculate # of Tx descriptors for an offload packet
2006  *	@m: the packet
2007  *
2008  * 	Returns the number of Tx descriptors needed for the given offload
2009  * 	packet.  These packets are already fully constructed.
2010  */
2011 static __inline unsigned int
2012 calc_tx_descs_ofld(struct mbuf *m, unsigned int nsegs)
2013 {
2014 	unsigned int flits, cnt = 0;
2015 	int ndescs;
2016 
2017 	if (m->m_len <= WR_LEN && nsegs == 0)
2018 		return (1);                 /* packet fits as immediate data */
2019 
2020 	if (m->m_flags & M_IOVEC)
2021 		cnt = mtomv(m)->mv_count;
2022 	else
2023 		cnt = nsegs;
2024 
2025 	/* headers */
2026 	flits = m->m_len / 8;
2027 
2028 	ndescs = flits_to_desc(flits + sgl_len(cnt));
2029 
2030 	return (ndescs);
2031 }
2032 
2033 /**
2034  *	ofld_xmit - send a packet through an offload queue
2035  *	@adap: the adapter
2036  *	@q: the Tx offload queue
2037  *	@m: the packet
2038  *
2039  *	Send an offload packet through an SGE offload queue.
2040  */
2041 static int
2042 ofld_xmit(adapter_t *adap, struct sge_txq *q, struct mbuf *m)
2043 {
2044 	int ret, nsegs;
2045 	unsigned int ndesc;
2046 	unsigned int pidx, gen;
2047 	bus_dma_segment_t segs[TX_MAX_SEGS], *vsegs;
2048 	struct tx_sw_desc *stx;
2049 
2050 	nsegs = m_get_sgllen(m);
2051 	vsegs = m_get_sgl(m);
2052 	ndesc = calc_tx_descs_ofld(m, nsegs);
2053 	busdma_map_sgl(vsegs, segs, nsegs);
2054 
2055 	stx = &q->sdesc[q->pidx];
2056 	KASSERT(stx->mi.mi_base == NULL, ("mi_base set"));
2057 
2058 	mtx_lock(&q->lock);
2059 again:	reclaim_completed_tx_(q, 16);
2060 	ret = check_desc_avail(adap, q, m, ndesc, TXQ_OFLD);
2061 	if (__predict_false(ret)) {
2062 		if (ret == 1) {
2063 			printf("no ofld desc avail\n");
2064 
2065 			m_set_priority(m, ndesc);     /* save for restart */
2066 			mtx_unlock(&q->lock);
2067 			return (EINTR);
2068 		}
2069 		goto again;
2070 	}
2071 
2072 	gen = q->gen;
2073 	q->in_use += ndesc;
2074 	pidx = q->pidx;
2075 	q->pidx += ndesc;
2076 	if (q->pidx >= q->size) {
2077 		q->pidx -= q->size;
2078 		q->gen ^= 1;
2079 	}
2080 #ifdef T3_TRACE
2081 	T3_TRACE5(adap->tb[q->cntxt_id & 7],
2082 		  "ofld_xmit: ndesc %u, pidx %u, len %u, main %u, frags %u",
2083 		  ndesc, pidx, skb->len, skb->len - skb->data_len,
2084 		  skb_shinfo(skb)->nr_frags);
2085 #endif
2086 	mtx_unlock(&q->lock);
2087 
2088 	write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2089 	check_ring_tx_db(adap, q);
2090 	return (0);
2091 }
2092 
2093 /**
2094  *	restart_offloadq - restart a suspended offload queue
2095  *	@qs: the queue set cotaining the offload queue
2096  *
2097  *	Resumes transmission on a suspended Tx offload queue.
2098  */
2099 static void
2100 restart_offloadq(void *data, int npending)
2101 {
2102 	struct mbuf *m;
2103 	struct sge_qset *qs = data;
2104 	struct sge_txq *q = &qs->txq[TXQ_OFLD];
2105 	adapter_t *adap = qs->port->adapter;
2106 	bus_dma_segment_t segs[TX_MAX_SEGS];
2107 	struct tx_sw_desc *stx = &q->sdesc[q->pidx];
2108 	int nsegs, cleaned;
2109 
2110 	mtx_lock(&q->lock);
2111 again:	cleaned = reclaim_completed_tx_(q, 16);
2112 
2113 	while ((m = mbufq_peek(&q->sendq)) != NULL) {
2114 		unsigned int gen, pidx;
2115 		unsigned int ndesc = m_get_priority(m);
2116 
2117 		if (__predict_false(q->size - q->in_use < ndesc)) {
2118 			setbit(&qs->txq_stopped, TXQ_OFLD);
2119 			smp_mb();
2120 
2121 			if (should_restart_tx(q) &&
2122 			    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped))
2123 				goto again;
2124 			q->stops++;
2125 			break;
2126 		}
2127 
2128 		gen = q->gen;
2129 		q->in_use += ndesc;
2130 		pidx = q->pidx;
2131 		q->pidx += ndesc;
2132 		if (q->pidx >= q->size) {
2133 			q->pidx -= q->size;
2134 			q->gen ^= 1;
2135 		}
2136 
2137 		(void)mbufq_dequeue(&q->sendq);
2138 		busdma_map_mbufs(&m, q, stx, segs, &nsegs);
2139 		mtx_unlock(&q->lock);
2140 		write_ofld_wr(adap, m, q, pidx, gen, ndesc, segs, nsegs);
2141 		mtx_lock(&q->lock);
2142 	}
2143 	mtx_unlock(&q->lock);
2144 
2145 #if USE_GTS
2146 	set_bit(TXQ_RUNNING, &q->flags);
2147 	set_bit(TXQ_LAST_PKT_DB, &q->flags);
2148 #endif
2149 	wmb();
2150 	t3_write_reg(adap, A_SG_KDOORBELL,
2151 		     F_SELEGRCNTX | V_EGRCNTX(q->cntxt_id));
2152 }
2153 
2154 /**
2155  *	queue_set - return the queue set a packet should use
2156  *	@m: the packet
2157  *
2158  *	Maps a packet to the SGE queue set it should use.  The desired queue
2159  *	set is carried in bits 1-3 in the packet's priority.
2160  */
2161 static __inline int
2162 queue_set(const struct mbuf *m)
2163 {
2164 	return m_get_priority(m) >> 1;
2165 }
2166 
2167 /**
2168  *	is_ctrl_pkt - return whether an offload packet is a control packet
2169  *	@m: the packet
2170  *
2171  *	Determines whether an offload packet should use an OFLD or a CTRL
2172  *	Tx queue.  This is indicated by bit 0 in the packet's priority.
2173  */
2174 static __inline int
2175 is_ctrl_pkt(const struct mbuf *m)
2176 {
2177 	return m_get_priority(m) & 1;
2178 }
2179 
2180 /**
2181  *	t3_offload_tx - send an offload packet
2182  *	@tdev: the offload device to send to
2183  *	@m: the packet
2184  *
2185  *	Sends an offload packet.  We use the packet priority to select the
2186  *	appropriate Tx queue as follows: bit 0 indicates whether the packet
2187  *	should be sent as regular or control, bits 1-3 select the queue set.
2188  */
2189 int
2190 t3_offload_tx(struct t3cdev *tdev, struct mbuf *m)
2191 {
2192 	adapter_t *adap = tdev2adap(tdev);
2193 	struct sge_qset *qs = &adap->sge.qs[queue_set(m)];
2194 
2195 	if (__predict_false(is_ctrl_pkt(m)))
2196 		return ctrl_xmit(adap, &qs->txq[TXQ_CTRL], m);
2197 
2198 	return ofld_xmit(adap, &qs->txq[TXQ_OFLD], m);
2199 }
2200 
2201 /**
2202  *	deliver_partial_bundle - deliver a (partial) bundle of Rx offload pkts
2203  *	@tdev: the offload device that will be receiving the packets
2204  *	@q: the SGE response queue that assembled the bundle
2205  *	@m: the partial bundle
2206  *	@n: the number of packets in the bundle
2207  *
2208  *	Delivers a (partial) bundle of Rx offload packets to an offload device.
2209  */
2210 static __inline void
2211 deliver_partial_bundle(struct t3cdev *tdev,
2212 			struct sge_rspq *q,
2213 			struct mbuf *mbufs[], int n)
2214 {
2215 	if (n) {
2216 		q->offload_bundles++;
2217 		cxgb_ofld_recv(tdev, mbufs, n);
2218 	}
2219 }
2220 
2221 static __inline int
2222 rx_offload(struct t3cdev *tdev, struct sge_rspq *rq,
2223     struct mbuf *m, struct mbuf *rx_gather[],
2224     unsigned int gather_idx)
2225 {
2226 
2227 	rq->offload_pkts++;
2228 	m->m_pkthdr.header = mtod(m, void *);
2229 	rx_gather[gather_idx++] = m;
2230 	if (gather_idx == RX_BUNDLE_SIZE) {
2231 		cxgb_ofld_recv(tdev, rx_gather, RX_BUNDLE_SIZE);
2232 		gather_idx = 0;
2233 		rq->offload_bundles++;
2234 	}
2235 	return (gather_idx);
2236 }
2237 
2238 static void
2239 restart_tx(struct sge_qset *qs)
2240 {
2241 	struct adapter *sc = qs->port->adapter;
2242 
2243 
2244 	if (isset(&qs->txq_stopped, TXQ_OFLD) &&
2245 	    should_restart_tx(&qs->txq[TXQ_OFLD]) &&
2246 	    test_and_clear_bit(TXQ_OFLD, &qs->txq_stopped)) {
2247 		qs->txq[TXQ_OFLD].restarts++;
2248 		DPRINTF("restarting TXQ_OFLD\n");
2249 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_OFLD].qresume_task);
2250 	}
2251 	DPRINTF("stopped=0x%x restart=%d processed=%d cleaned=%d in_use=%d\n",
2252 	    qs->txq_stopped, should_restart_tx(&qs->txq[TXQ_CTRL]),
2253 	    qs->txq[TXQ_CTRL].processed, qs->txq[TXQ_CTRL].cleaned,
2254 	    qs->txq[TXQ_CTRL].in_use);
2255 
2256 	if (isset(&qs->txq_stopped, TXQ_CTRL) &&
2257 	    should_restart_tx(&qs->txq[TXQ_CTRL]) &&
2258 	    test_and_clear_bit(TXQ_CTRL, &qs->txq_stopped)) {
2259 		qs->txq[TXQ_CTRL].restarts++;
2260 		DPRINTF("restarting TXQ_CTRL\n");
2261 		taskqueue_enqueue(sc->tq, &qs->txq[TXQ_CTRL].qresume_task);
2262 	}
2263 }
2264 
2265 /**
2266  *	t3_sge_alloc_qset - initialize an SGE queue set
2267  *	@sc: the controller softc
2268  *	@id: the queue set id
2269  *	@nports: how many Ethernet ports will be using this queue set
2270  *	@irq_vec_idx: the IRQ vector index for response queue interrupts
2271  *	@p: configuration parameters for this queue set
2272  *	@ntxq: number of Tx queues for the queue set
2273  *	@pi: port info for queue set
2274  *
2275  *	Allocate resources and initialize an SGE queue set.  A queue set
2276  *	comprises a response queue, two Rx free-buffer queues, and up to 3
2277  *	Tx queues.  The Tx queues are assigned roles in the order Ethernet
2278  *	queue, offload queue, and control queue.
2279  */
2280 int
2281 t3_sge_alloc_qset(adapter_t *sc, u_int id, int nports, int irq_vec_idx,
2282 		  const struct qset_params *p, int ntxq, struct port_info *pi)
2283 {
2284 	struct sge_qset *q = &sc->sge.qs[id];
2285 	int i, header_size, ret = 0;
2286 
2287 	for (i = 0; i < SGE_TXQ_PER_SET; i++) {
2288 		if ((q->txq[i].txq_mr.br_ring = malloc(cxgb_txq_buf_ring_size*sizeof(struct mbuf *),
2289 			    M_DEVBUF, M_WAITOK|M_ZERO)) == NULL) {
2290 			device_printf(sc->dev, "failed to allocate mbuf ring\n");
2291 			goto err;
2292 		}
2293 		q->txq[i].txq_mr.br_prod = q->txq[i].txq_mr.br_cons = 0;
2294 		q->txq[i].txq_mr.br_size = cxgb_txq_buf_ring_size;
2295 		mtx_init(&q->txq[i].txq_mr.br_lock, "txq mbuf ring", NULL, MTX_DEF);
2296 	}
2297 
2298 	init_qset_cntxt(q, id);
2299 	q->idx = id;
2300 
2301 	if ((ret = alloc_ring(sc, p->fl_size, sizeof(struct rx_desc),
2302 		    sizeof(struct rx_sw_desc), &q->fl[0].phys_addr,
2303 		    &q->fl[0].desc, &q->fl[0].sdesc,
2304 		    &q->fl[0].desc_tag, &q->fl[0].desc_map,
2305 		    sc->rx_dmat, &q->fl[0].entry_tag)) != 0) {
2306 		printf("error %d from alloc ring fl0\n", ret);
2307 		goto err;
2308 	}
2309 
2310 	if ((ret = alloc_ring(sc, p->jumbo_size, sizeof(struct rx_desc),
2311 		    sizeof(struct rx_sw_desc), &q->fl[1].phys_addr,
2312 		    &q->fl[1].desc, &q->fl[1].sdesc,
2313 		    &q->fl[1].desc_tag, &q->fl[1].desc_map,
2314 		    sc->rx_jumbo_dmat, &q->fl[1].entry_tag)) != 0) {
2315 		printf("error %d from alloc ring fl1\n", ret);
2316 		goto err;
2317 	}
2318 
2319 	if ((ret = alloc_ring(sc, p->rspq_size, sizeof(struct rsp_desc), 0,
2320 		    &q->rspq.phys_addr, &q->rspq.desc, NULL,
2321 		    &q->rspq.desc_tag, &q->rspq.desc_map,
2322 		    NULL, NULL)) != 0) {
2323 		printf("error %d from alloc ring rspq\n", ret);
2324 		goto err;
2325 	}
2326 
2327 	for (i = 0; i < ntxq; ++i) {
2328 		/*
2329 		 * The control queue always uses immediate data so does not
2330 		 * need to keep track of any mbufs.
2331 		 * XXX Placeholder for future TOE support.
2332 		 */
2333 		size_t sz = i == TXQ_CTRL ? 0 : sizeof(struct tx_sw_desc);
2334 
2335 		if ((ret = alloc_ring(sc, p->txq_size[i],
2336 			    sizeof(struct tx_desc), sz,
2337 			    &q->txq[i].phys_addr, &q->txq[i].desc,
2338 			    &q->txq[i].sdesc, &q->txq[i].desc_tag,
2339 			    &q->txq[i].desc_map,
2340 			    sc->tx_dmat, &q->txq[i].entry_tag)) != 0) {
2341 			printf("error %d from alloc ring tx %i\n", ret, i);
2342 			goto err;
2343 		}
2344 		mbufq_init(&q->txq[i].sendq);
2345 		q->txq[i].gen = 1;
2346 		q->txq[i].size = p->txq_size[i];
2347 		snprintf(q->txq[i].lockbuf, TXQ_NAME_LEN, "t3 txq lock %d:%d:%d",
2348 		    device_get_unit(sc->dev), irq_vec_idx, i);
2349 		MTX_INIT(&q->txq[i].lock, q->txq[i].lockbuf, NULL, MTX_DEF);
2350 	}
2351 
2352 	q->txq[TXQ_ETH].port = pi;
2353 
2354 	TASK_INIT(&q->txq[TXQ_OFLD].qresume_task, 0, restart_offloadq, q);
2355 	TASK_INIT(&q->txq[TXQ_CTRL].qresume_task, 0, restart_ctrlq, q);
2356 	TASK_INIT(&q->txq[TXQ_ETH].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_ETH]);
2357 	TASK_INIT(&q->txq[TXQ_OFLD].qreclaim_task, 0, sge_txq_reclaim_handler, &q->txq[TXQ_OFLD]);
2358 
2359 	q->fl[0].gen = q->fl[1].gen = 1;
2360 	q->fl[0].size = p->fl_size;
2361 	q->fl[1].size = p->jumbo_size;
2362 
2363 	q->rspq.gen = 1;
2364 	q->rspq.cidx = 0;
2365 	q->rspq.size = p->rspq_size;
2366 
2367 
2368 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) + sizeof(struct m_ext_) + sizeof(uint32_t);
2369 	q->txq[TXQ_ETH].stop_thres = nports *
2370 	    flits_to_desc(sgl_len(TX_MAX_SEGS + 1) + 3);
2371 
2372 	q->fl[0].buf_size = (MCLBYTES - header_size);
2373 	q->fl[0].zone = zone_clust;
2374 	q->fl[0].type = EXT_CLUSTER;
2375 #if __FreeBSD_version > 800000
2376 	if (cxgb_use_16k_clusters) {
2377 		q->fl[1].buf_size = MJUM16BYTES - header_size;
2378 		q->fl[1].zone = zone_jumbo16;
2379 		q->fl[1].type = EXT_JUMBO16;
2380 	} else {
2381 		q->fl[1].buf_size = MJUM9BYTES - header_size;
2382 		q->fl[1].zone = zone_jumbo9;
2383 		q->fl[1].type = EXT_JUMBO9;
2384 	}
2385 #else
2386 	q->fl[1].buf_size = MJUMPAGESIZE - header_size;
2387 	q->fl[1].zone = zone_jumbop;
2388 	q->fl[1].type = EXT_JUMBOP;
2389 #endif
2390 
2391 #ifdef LRO_SUPPORTED
2392 	/* Allocate and setup the lro_ctrl structure */
2393 	q->lro.enabled = !!(pi->ifp->if_capenable & IFCAP_LRO);
2394 	ret = tcp_lro_init(&q->lro.ctrl);
2395 	if (ret) {
2396 		printf("error %d from tcp_lro_init\n", ret);
2397 		goto err;
2398 	}
2399 	q->lro.ctrl.ifp = pi->ifp;
2400 #endif
2401 
2402 	mtx_lock_spin(&sc->sge.reg_lock);
2403 	ret = -t3_sge_init_rspcntxt(sc, q->rspq.cntxt_id, irq_vec_idx,
2404 				   q->rspq.phys_addr, q->rspq.size,
2405 				   q->fl[0].buf_size, 1, 0);
2406 	if (ret) {
2407 		printf("error %d from t3_sge_init_rspcntxt\n", ret);
2408 		goto err_unlock;
2409 	}
2410 
2411 	for (i = 0; i < SGE_RXQ_PER_SET; ++i) {
2412 		ret = -t3_sge_init_flcntxt(sc, q->fl[i].cntxt_id, 0,
2413 					  q->fl[i].phys_addr, q->fl[i].size,
2414 					  q->fl[i].buf_size, p->cong_thres, 1,
2415 					  0);
2416 		if (ret) {
2417 			printf("error %d from t3_sge_init_flcntxt for index i=%d\n", ret, i);
2418 			goto err_unlock;
2419 		}
2420 	}
2421 
2422 	ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_ETH].cntxt_id, USE_GTS,
2423 				 SGE_CNTXT_ETH, id, q->txq[TXQ_ETH].phys_addr,
2424 				 q->txq[TXQ_ETH].size, q->txq[TXQ_ETH].token,
2425 				 1, 0);
2426 	if (ret) {
2427 		printf("error %d from t3_sge_init_ecntxt\n", ret);
2428 		goto err_unlock;
2429 	}
2430 
2431 	if (ntxq > 1) {
2432 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_OFLD].cntxt_id,
2433 					 USE_GTS, SGE_CNTXT_OFLD, id,
2434 					 q->txq[TXQ_OFLD].phys_addr,
2435 					 q->txq[TXQ_OFLD].size, 0, 1, 0);
2436 		if (ret) {
2437 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2438 			goto err_unlock;
2439 		}
2440 	}
2441 
2442 	if (ntxq > 2) {
2443 		ret = -t3_sge_init_ecntxt(sc, q->txq[TXQ_CTRL].cntxt_id, 0,
2444 					 SGE_CNTXT_CTRL, id,
2445 					 q->txq[TXQ_CTRL].phys_addr,
2446 					 q->txq[TXQ_CTRL].size,
2447 					 q->txq[TXQ_CTRL].token, 1, 0);
2448 		if (ret) {
2449 			printf("error %d from t3_sge_init_ecntxt\n", ret);
2450 			goto err_unlock;
2451 		}
2452 	}
2453 
2454 	snprintf(q->rspq.lockbuf, RSPQ_NAME_LEN, "t3 rspq lock %d:%d",
2455 	    device_get_unit(sc->dev), irq_vec_idx);
2456 	MTX_INIT(&q->rspq.lock, q->rspq.lockbuf, NULL, MTX_DEF);
2457 
2458 	mtx_unlock_spin(&sc->sge.reg_lock);
2459 	t3_update_qset_coalesce(q, p);
2460 	q->port = pi;
2461 
2462 	refill_fl(sc, &q->fl[0], q->fl[0].size);
2463 	refill_fl(sc, &q->fl[1], q->fl[1].size);
2464 	refill_rspq(sc, &q->rspq, q->rspq.size - 1);
2465 
2466 	t3_write_reg(sc, A_SG_GTS, V_RSPQ(q->rspq.cntxt_id) |
2467 		     V_NEWTIMER(q->rspq.holdoff_tmr));
2468 
2469 	return (0);
2470 
2471 err_unlock:
2472 	mtx_unlock_spin(&sc->sge.reg_lock);
2473 err:
2474 	t3_free_qset(sc, q);
2475 
2476 	return (ret);
2477 }
2478 
2479 /*
2480  * Remove CPL_RX_PKT headers from the mbuf and reduce it to a regular mbuf with
2481  * ethernet data.  Hardware assistance with various checksums and any vlan tag
2482  * will also be taken into account here.
2483  */
2484 void
2485 t3_rx_eth(struct adapter *adap, struct sge_rspq *rq, struct mbuf *m, int ethpad)
2486 {
2487 	struct cpl_rx_pkt *cpl = (struct cpl_rx_pkt *)(mtod(m, uint8_t *) + ethpad);
2488 	struct port_info *pi = &adap->port[adap->rxpkt_map[cpl->iff]];
2489 	struct ifnet *ifp = pi->ifp;
2490 
2491 	DPRINTF("rx_eth m=%p m->m_data=%p p->iff=%d\n", m, mtod(m, uint8_t *), cpl->iff);
2492 
2493 	if ((ifp->if_capenable & IFCAP_RXCSUM) && !cpl->fragment &&
2494 	    cpl->csum_valid && cpl->csum == 0xffff) {
2495 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID);
2496 		rspq_to_qset(rq)->port_stats[SGE_PSTAT_RX_CSUM_GOOD]++;
2497 		m->m_pkthdr.csum_flags = (CSUM_IP_CHECKED|CSUM_IP_VALID|CSUM_DATA_VALID|CSUM_PSEUDO_HDR);
2498 		m->m_pkthdr.csum_data = 0xffff;
2499 	}
2500 	/*
2501 	 * XXX need to add VLAN support for 6.x
2502 	 */
2503 #ifdef VLAN_SUPPORTED
2504 	if (__predict_false(cpl->vlan_valid)) {
2505 		m->m_pkthdr.ether_vtag = ntohs(cpl->vlan);
2506 		m->m_flags |= M_VLANTAG;
2507 	}
2508 #endif
2509 
2510 	m->m_pkthdr.rcvif = ifp;
2511 	m->m_pkthdr.header = mtod(m, uint8_t *) + sizeof(*cpl) + ethpad;
2512 	ifp->if_ipackets++;
2513 #ifndef DISABLE_MBUF_IOVEC
2514 	m_explode(m);
2515 #endif
2516 	/*
2517 	 * adjust after conversion to mbuf chain
2518 	 */
2519 	m->m_pkthdr.len -= (sizeof(*cpl) + ethpad);
2520 	m->m_len -= (sizeof(*cpl) + ethpad);
2521 	m->m_data += (sizeof(*cpl) + ethpad);
2522 }
2523 
2524 static void
2525 ext_free_handler(void *arg1, void * arg2)
2526 {
2527 	uintptr_t type = (uintptr_t)arg2;
2528 	uma_zone_t zone;
2529 	struct mbuf *m;
2530 
2531 	m = arg1;
2532 	zone = m_getzonefromtype(type);
2533 	m->m_ext.ext_type = (int)type;
2534 	cxgb_ext_freed++;
2535 	cxgb_cache_put(zone, m);
2536 }
2537 
2538 static void
2539 init_cluster_mbuf(caddr_t cl, int flags, int type, uma_zone_t zone)
2540 {
2541 	struct mbuf *m;
2542 	int header_size;
2543 
2544 	header_size = sizeof(struct m_hdr) + sizeof(struct pkthdr) +
2545 	    sizeof(struct m_ext_) + sizeof(uint32_t);
2546 
2547 	bzero(cl, header_size);
2548 	m = (struct mbuf *)cl;
2549 
2550 	cxgb_ext_inited++;
2551 	SLIST_INIT(&m->m_pkthdr.tags);
2552 	m->m_type = MT_DATA;
2553 	m->m_flags = flags | M_NOFREE | M_EXT;
2554 	m->m_data = cl + header_size;
2555 	m->m_ext.ext_buf = cl;
2556 	m->m_ext.ref_cnt = (uint32_t *)(cl + header_size - sizeof(uint32_t));
2557 	m->m_ext.ext_size = m_getsizefromtype(type);
2558 	m->m_ext.ext_free = ext_free_handler;
2559 #if __FreeBSD_version >= 800016
2560 	m->m_ext.ext_arg1 = cl;
2561 	m->m_ext.ext_arg2 = (void *)(uintptr_t)type;
2562 #else
2563 	m->m_ext.ext_args = (void *)(uintptr_t)type;
2564 #endif
2565 	m->m_ext.ext_type = EXT_EXTREF;
2566 	*(m->m_ext.ref_cnt) = 1;
2567 	DPRINTF("data=%p ref_cnt=%p\n", m->m_data, m->m_ext.ref_cnt);
2568 }
2569 
2570 
2571 /**
2572  *	get_packet - return the next ingress packet buffer from a free list
2573  *	@adap: the adapter that received the packet
2574  *	@drop_thres: # of remaining buffers before we start dropping packets
2575  *	@qs: the qset that the SGE free list holding the packet belongs to
2576  *      @mh: the mbuf header, contains a pointer to the head and tail of the mbuf chain
2577  *      @r: response descriptor
2578  *
2579  *	Get the next packet from a free list and complete setup of the
2580  *	sk_buff.  If the packet is small we make a copy and recycle the
2581  *	original buffer, otherwise we use the original buffer itself.  If a
2582  *	positive drop threshold is supplied packets are dropped and their
2583  *	buffers recycled if (a) the number of remaining buffers is under the
2584  *	threshold and the packet is too big to copy, or (b) the packet should
2585  *	be copied but there is no memory for the copy.
2586  */
2587 #ifdef DISABLE_MBUF_IOVEC
2588 
2589 static int
2590 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2591     struct t3_mbuf_hdr *mh, struct rsp_desc *r)
2592 {
2593 
2594 	unsigned int len_cq =  ntohl(r->len_cq);
2595 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2596 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2597 	uint32_t len = G_RSPD_LEN(len_cq);
2598 	uint32_t flags = ntohl(r->flags);
2599 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2600 	caddr_t cl;
2601 	struct mbuf *m, *m0;
2602 	int ret = 0;
2603 
2604 	prefetch(sd->rxsd_cl);
2605 
2606 	fl->credits--;
2607 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2608 
2609 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2610 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2611 			goto skip_recycle;
2612 		cl = mtod(m0, void *);
2613 		memcpy(cl, sd->data, len);
2614 		recycle_rx_buf(adap, fl, fl->cidx);
2615 		m = m0;
2616 		m0->m_len = len;
2617 	} else {
2618 	skip_recycle:
2619 
2620 		bus_dmamap_unload(fl->entry_tag, sd->map);
2621 		cl = sd->rxsd_cl;
2622 		m = m0 = (struct mbuf *)cl;
2623 
2624 		if ((sopeop == RSPQ_SOP_EOP) ||
2625 		    (sopeop == RSPQ_SOP))
2626 			flags = M_PKTHDR;
2627 		init_cluster_mbuf(cl, flags, fl->type, fl->zone);
2628 		m0->m_len = len;
2629 	}
2630 	switch(sopeop) {
2631 	case RSPQ_SOP_EOP:
2632 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2633 		mh->mh_head = mh->mh_tail = m;
2634 		m->m_pkthdr.len = len;
2635 		ret = 1;
2636 		break;
2637 	case RSPQ_NSOP_NEOP:
2638 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2639 		if (mh->mh_tail == NULL) {
2640 			log(LOG_ERR, "discarding intermediate descriptor entry\n");
2641 			m_freem(m);
2642 			break;
2643 		}
2644 		mh->mh_tail->m_next = m;
2645 		mh->mh_tail = m;
2646 		mh->mh_head->m_pkthdr.len += len;
2647 		ret = 0;
2648 		break;
2649 	case RSPQ_SOP:
2650 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2651 		m->m_pkthdr.len = len;
2652 		mh->mh_head = mh->mh_tail = m;
2653 		ret = 0;
2654 		break;
2655 	case RSPQ_EOP:
2656 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2657 		mh->mh_head->m_pkthdr.len += len;
2658 		mh->mh_tail->m_next = m;
2659 		mh->mh_tail = m;
2660 		ret = 1;
2661 		break;
2662 	}
2663 	if (++fl->cidx == fl->size)
2664 		fl->cidx = 0;
2665 
2666 	return (ret);
2667 }
2668 
2669 #else
2670 
2671 static int
2672 get_packet(adapter_t *adap, unsigned int drop_thres, struct sge_qset *qs,
2673     struct mbuf **m, struct rsp_desc *r)
2674 {
2675 
2676 	unsigned int len_cq =  ntohl(r->len_cq);
2677 	struct sge_fl *fl = (len_cq & F_RSPD_FLQ) ? &qs->fl[1] : &qs->fl[0];
2678 	struct rx_sw_desc *sd = &fl->sdesc[fl->cidx];
2679 	uint32_t len = G_RSPD_LEN(len_cq);
2680 	uint32_t flags = ntohl(r->flags);
2681 	uint8_t sopeop = G_RSPD_SOP_EOP(flags);
2682 	void *cl;
2683 	int ret = 0;
2684 	struct mbuf *m0;
2685 #if 0
2686 	if ((sd + 1 )->rxsd_cl)
2687 		prefetch((sd + 1)->rxsd_cl);
2688 	if ((sd + 2)->rxsd_cl)
2689 		prefetch((sd + 2)->rxsd_cl);
2690 #endif
2691 	DPRINTF("rx cpu=%d\n", curcpu);
2692 	fl->credits--;
2693 	bus_dmamap_sync(fl->entry_tag, sd->map, BUS_DMASYNC_POSTREAD);
2694 
2695 	if (recycle_enable && len <= SGE_RX_COPY_THRES && sopeop == RSPQ_SOP_EOP) {
2696 		if ((m0 = m_gethdr(M_DONTWAIT, MT_DATA)) == NULL)
2697 			goto skip_recycle;
2698 		cl = mtod(m0, void *);
2699 		memcpy(cl, sd->data, len);
2700 		recycle_rx_buf(adap, fl, fl->cidx);
2701 		*m = m0;
2702 	} else {
2703 	skip_recycle:
2704 		bus_dmamap_unload(fl->entry_tag, sd->map);
2705 		cl = sd->rxsd_cl;
2706 		*m = m0 = (struct mbuf *)cl;
2707 	}
2708 
2709 	switch(sopeop) {
2710 	case RSPQ_SOP_EOP:
2711 		DBG(DBG_RX, ("get_packet: SOP-EOP m %p\n", m));
2712 		if (cl == sd->rxsd_cl)
2713 			init_cluster_mbuf(cl, M_PKTHDR, fl->type, fl->zone);
2714 		m0->m_len = m0->m_pkthdr.len = len;
2715 		ret = 1;
2716 		goto done;
2717 		break;
2718 	case RSPQ_NSOP_NEOP:
2719 		DBG(DBG_RX, ("get_packet: NO_SOP-NO_EOP m %p\n", m));
2720 		panic("chaining unsupported");
2721 		ret = 0;
2722 		break;
2723 	case RSPQ_SOP:
2724 		DBG(DBG_RX, ("get_packet: SOP m %p\n", m));
2725 		panic("chaining unsupported");
2726 		m_iovinit(m0);
2727 		ret = 0;
2728 		break;
2729 	case RSPQ_EOP:
2730 		DBG(DBG_RX, ("get_packet: EOP m %p\n", m));
2731 		panic("chaining unsupported");
2732 		ret = 1;
2733 		break;
2734 	}
2735 	panic("append not supported");
2736 #if 0
2737 	m_iovappend(m0, cl, fl->buf_size, len, sizeof(uint32_t), sd->rxsd_ref);
2738 #endif
2739 done:
2740 	if (++fl->cidx == fl->size)
2741 		fl->cidx = 0;
2742 
2743 	return (ret);
2744 }
2745 #endif
2746 /**
2747  *	handle_rsp_cntrl_info - handles control information in a response
2748  *	@qs: the queue set corresponding to the response
2749  *	@flags: the response control flags
2750  *
2751  *	Handles the control information of an SGE response, such as GTS
2752  *	indications and completion credits for the queue set's Tx queues.
2753  *	HW coalesces credits, we don't do any extra SW coalescing.
2754  */
2755 static __inline void
2756 handle_rsp_cntrl_info(struct sge_qset *qs, uint32_t flags)
2757 {
2758 	unsigned int credits;
2759 
2760 #if USE_GTS
2761 	if (flags & F_RSPD_TXQ0_GTS)
2762 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_ETH].flags);
2763 #endif
2764 	credits = G_RSPD_TXQ0_CR(flags);
2765 	if (credits)
2766 		qs->txq[TXQ_ETH].processed += credits;
2767 
2768 	credits = G_RSPD_TXQ2_CR(flags);
2769 	if (credits)
2770 		qs->txq[TXQ_CTRL].processed += credits;
2771 
2772 # if USE_GTS
2773 	if (flags & F_RSPD_TXQ1_GTS)
2774 		clear_bit(TXQ_RUNNING, &qs->txq[TXQ_OFLD].flags);
2775 # endif
2776 	credits = G_RSPD_TXQ1_CR(flags);
2777 	if (credits)
2778 		qs->txq[TXQ_OFLD].processed += credits;
2779 
2780 }
2781 
2782 static void
2783 check_ring_db(adapter_t *adap, struct sge_qset *qs,
2784     unsigned int sleeping)
2785 {
2786 	;
2787 }
2788 
2789 /**
2790  *	process_responses - process responses from an SGE response queue
2791  *	@adap: the adapter
2792  *	@qs: the queue set to which the response queue belongs
2793  *	@budget: how many responses can be processed in this round
2794  *
2795  *	Process responses from an SGE response queue up to the supplied budget.
2796  *	Responses include received packets as well as credits and other events
2797  *	for the queues that belong to the response queue's queue set.
2798  *	A negative budget is effectively unlimited.
2799  *
2800  *	Additionally choose the interrupt holdoff time for the next interrupt
2801  *	on this queue.  If the system is under memory shortage use a fairly
2802  *	long delay to help recovery.
2803  */
2804 int
2805 process_responses(adapter_t *adap, struct sge_qset *qs, int budget)
2806 {
2807 	struct sge_rspq *rspq = &qs->rspq;
2808 	struct rsp_desc *r = &rspq->desc[rspq->cidx];
2809 	int budget_left = budget;
2810 	unsigned int sleeping = 0;
2811 #ifdef LRO_SUPPORTED
2812 	int lro_enabled = qs->lro.enabled;
2813 	int skip_lro;
2814 	struct lro_ctrl *lro_ctrl = &qs->lro.ctrl;
2815 #endif
2816 	struct mbuf *offload_mbufs[RX_BUNDLE_SIZE];
2817 	int ngathered = 0;
2818 #ifdef DEBUG
2819 	static int last_holdoff = 0;
2820 	if (cxgb_debug && rspq->holdoff_tmr != last_holdoff) {
2821 		printf("next_holdoff=%d\n", rspq->holdoff_tmr);
2822 		last_holdoff = rspq->holdoff_tmr;
2823 	}
2824 #endif
2825 	rspq->next_holdoff = rspq->holdoff_tmr;
2826 
2827 	while (__predict_true(budget_left && is_new_response(r, rspq))) {
2828 		int eth, eop = 0, ethpad = 0;
2829 		uint32_t flags = ntohl(r->flags);
2830 		uint32_t rss_csum = *(const uint32_t *)r;
2831 		uint32_t rss_hash = be32toh(r->rss_hdr.rss_hash_val);
2832 
2833 		eth = (r->rss_hdr.opcode == CPL_RX_PKT);
2834 
2835 		if (__predict_false(flags & F_RSPD_ASYNC_NOTIF)) {
2836 			struct mbuf *m;
2837 
2838 			if (cxgb_debug)
2839 				printf("async notification\n");
2840 
2841 			if (rspq->rspq_mh.mh_head == NULL) {
2842 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2843 				m = rspq->rspq_mh.mh_head;
2844 			} else {
2845 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2846 			}
2847 
2848 			/* XXX m is lost here if rspq->rspq_mbuf is not NULL */
2849 
2850 			if (m == NULL)
2851 				goto no_mem;
2852 
2853                         memcpy(mtod(m, char *), r, AN_PKT_SIZE);
2854 			m->m_len = m->m_pkthdr.len = AN_PKT_SIZE;
2855                         *mtod(m, char *) = CPL_ASYNC_NOTIF;
2856 			rss_csum = htonl(CPL_ASYNC_NOTIF << 24);
2857 			eop = 1;
2858                         rspq->async_notif++;
2859 			goto skip;
2860 		} else if  (flags & F_RSPD_IMM_DATA_VALID) {
2861 			struct mbuf *m = NULL;
2862 
2863 			DPRINTF("IMM DATA VALID opcode=0x%x rspq->cidx=%d\n",
2864 			    r->rss_hdr.opcode, rspq->cidx);
2865 			if (rspq->rspq_mh.mh_head == NULL)
2866 				rspq->rspq_mh.mh_head = m_gethdr(M_DONTWAIT, MT_DATA);
2867                         else
2868 				m = m_gethdr(M_DONTWAIT, MT_DATA);
2869 
2870 			if (rspq->rspq_mh.mh_head == NULL &&  m == NULL) {
2871 		no_mem:
2872 				rspq->next_holdoff = NOMEM_INTR_DELAY;
2873 				budget_left--;
2874 				break;
2875 			}
2876 			get_imm_packet(adap, r, rspq->rspq_mh.mh_head);
2877 			eop = 1;
2878 			rspq->imm_data++;
2879 		} else if (r->len_cq) {
2880 			int drop_thresh = eth ? SGE_RX_DROP_THRES : 0;
2881 
2882 #ifdef DISABLE_MBUF_IOVEC
2883 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mh, r);
2884 #else
2885 			eop = get_packet(adap, drop_thresh, qs, &rspq->rspq_mbuf, r);
2886 #endif
2887 #ifdef IFNET_MULTIQUEUE
2888 			rspq->rspq_mh.mh_head->m_pkthdr.rss_hash = rss_hash;
2889 #endif
2890 			ethpad = 2;
2891 		} else {
2892 			DPRINTF("pure response\n");
2893 			rspq->pure_rsps++;
2894 		}
2895 	skip:
2896 		if (flags & RSPD_CTRL_MASK) {
2897 			sleeping |= flags & RSPD_GTS_MASK;
2898 			handle_rsp_cntrl_info(qs, flags);
2899 		}
2900 
2901 		r++;
2902 		if (__predict_false(++rspq->cidx == rspq->size)) {
2903 			rspq->cidx = 0;
2904 			rspq->gen ^= 1;
2905 			r = rspq->desc;
2906 		}
2907 		prefetch(r);
2908 		if (++rspq->credits >= (rspq->size / 4)) {
2909 			refill_rspq(adap, rspq, rspq->credits);
2910 			rspq->credits = 0;
2911 		}
2912 		DPRINTF("eth=%d eop=%d flags=0x%x\n", eth, eop, flags);
2913 
2914 		if (!eth && eop) {
2915 			rspq->rspq_mh.mh_head->m_pkthdr.csum_data = rss_csum;
2916 			/*
2917 			 * XXX size mismatch
2918 			 */
2919 			m_set_priority(rspq->rspq_mh.mh_head, rss_hash);
2920 
2921 
2922 			ngathered = rx_offload(&adap->tdev, rspq,
2923 			    rspq->rspq_mh.mh_head, offload_mbufs, ngathered);
2924 			rspq->rspq_mh.mh_head = NULL;
2925 			DPRINTF("received offload packet\n");
2926 
2927 		} else if (eth && eop) {
2928 			struct mbuf *m = rspq->rspq_mh.mh_head;
2929 			prefetch(mtod(m, uint8_t *));
2930 			prefetch(mtod(m, uint8_t *) + L1_CACHE_BYTES);
2931 
2932 			t3_rx_eth(adap, rspq, m, ethpad);
2933 
2934 #ifdef LRO_SUPPORTED
2935 			/*
2936 			 * The T304 sends incoming packets on any qset.  If LRO
2937 			 * is also enabled, we could end up sending packet up
2938 			 * lro_ctrl->ifp's input.  That is incorrect.
2939 			 *
2940 			 * The mbuf's rcvif was derived from the cpl header and
2941 			 * is accurate.  Skip LRO and just use that.
2942 			 */
2943 			skip_lro = __predict_false(qs->port->ifp != m->m_pkthdr.rcvif);
2944 
2945 			if (lro_enabled && lro_ctrl->lro_cnt && !skip_lro &&
2946 			    (tcp_lro_rx(lro_ctrl, m, 0) == 0)) {
2947 				/* successfully queue'd for LRO */
2948 			} else
2949 #endif
2950 			{
2951 				/*
2952 				 * LRO not enabled, packet unsuitable for LRO,
2953 				 * or unable to queue.  Pass it up right now in
2954 				 * either case.
2955 				 */
2956 				struct ifnet *ifp = m->m_pkthdr.rcvif;
2957 				(*ifp->if_input)(ifp, m);
2958 			}
2959 			DPRINTF("received tunnel packet\n");
2960 			rspq->rspq_mh.mh_head = NULL;
2961 
2962 		}
2963 		__refill_fl_lt(adap, &qs->fl[0], 32);
2964 		__refill_fl_lt(adap, &qs->fl[1], 32);
2965 		--budget_left;
2966 	}
2967 
2968 	deliver_partial_bundle(&adap->tdev, rspq, offload_mbufs, ngathered);
2969 
2970 #ifdef LRO_SUPPORTED
2971 	/* Flush LRO */
2972 	while (!SLIST_EMPTY(&lro_ctrl->lro_active)) {
2973 		struct lro_entry *queued = SLIST_FIRST(&lro_ctrl->lro_active);
2974 		SLIST_REMOVE_HEAD(&lro_ctrl->lro_active, next);
2975 		tcp_lro_flush(lro_ctrl, queued);
2976 	}
2977 #endif
2978 
2979 	if (sleeping)
2980 		check_ring_db(adap, qs, sleeping);
2981 
2982 	smp_mb();  /* commit Tx queue processed updates */
2983 	if (__predict_false(qs->txq_stopped > 1)) {
2984 		printf("restarting tx on %p\n", qs);
2985 
2986 		restart_tx(qs);
2987 	}
2988 
2989 	__refill_fl_lt(adap, &qs->fl[0], 512);
2990 	__refill_fl_lt(adap, &qs->fl[1], 512);
2991 	budget -= budget_left;
2992 	return (budget);
2993 }
2994 
2995 /*
2996  * A helper function that processes responses and issues GTS.
2997  */
2998 static __inline int
2999 process_responses_gts(adapter_t *adap, struct sge_rspq *rq)
3000 {
3001 	int work;
3002 	static int last_holdoff = 0;
3003 
3004 	work = process_responses(adap, rspq_to_qset(rq), -1);
3005 
3006 	if (cxgb_debug && (rq->next_holdoff != last_holdoff)) {
3007 		printf("next_holdoff=%d\n", rq->next_holdoff);
3008 		last_holdoff = rq->next_holdoff;
3009 	}
3010 	t3_write_reg(adap, A_SG_GTS, V_RSPQ(rq->cntxt_id) |
3011 	    V_NEWTIMER(rq->next_holdoff) | V_NEWINDEX(rq->cidx));
3012 
3013 	return (work);
3014 }
3015 
3016 
3017 /*
3018  * Interrupt handler for legacy INTx interrupts for T3B-based cards.
3019  * Handles data events from SGE response queues as well as error and other
3020  * async events as they all use the same interrupt pin.  We use one SGE
3021  * response queue per port in this mode and protect all response queues with
3022  * queue 0's lock.
3023  */
3024 void
3025 t3b_intr(void *data)
3026 {
3027 	uint32_t i, map;
3028 	adapter_t *adap = data;
3029 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3030 
3031 	t3_write_reg(adap, A_PL_CLI, 0);
3032 	map = t3_read_reg(adap, A_SG_DATA_INTR);
3033 
3034 	if (!map)
3035 		return;
3036 
3037 	if (__predict_false(map & F_ERRINTR))
3038 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3039 
3040 	mtx_lock(&q0->lock);
3041 	for_each_port(adap, i)
3042 	    if (map & (1 << i))
3043 			process_responses_gts(adap, &adap->sge.qs[i].rspq);
3044 	mtx_unlock(&q0->lock);
3045 }
3046 
3047 /*
3048  * The MSI interrupt handler.  This needs to handle data events from SGE
3049  * response queues as well as error and other async events as they all use
3050  * the same MSI vector.  We use one SGE response queue per port in this mode
3051  * and protect all response queues with queue 0's lock.
3052  */
3053 void
3054 t3_intr_msi(void *data)
3055 {
3056 	adapter_t *adap = data;
3057 	struct sge_rspq *q0 = &adap->sge.qs[0].rspq;
3058 	int i, new_packets = 0;
3059 
3060 	mtx_lock(&q0->lock);
3061 
3062 	for_each_port(adap, i)
3063 	    if (process_responses_gts(adap, &adap->sge.qs[i].rspq))
3064 		    new_packets = 1;
3065 	mtx_unlock(&q0->lock);
3066 	if (new_packets == 0)
3067 		taskqueue_enqueue(adap->tq, &adap->slow_intr_task);
3068 }
3069 
3070 void
3071 t3_intr_msix(void *data)
3072 {
3073 	struct sge_qset *qs = data;
3074 	adapter_t *adap = qs->port->adapter;
3075 	struct sge_rspq *rspq = &qs->rspq;
3076 #ifndef IFNET_MULTIQUEUE
3077 	mtx_lock(&rspq->lock);
3078 #else
3079 	if (mtx_trylock(&rspq->lock))
3080 #endif
3081 	{
3082 
3083 		if (process_responses_gts(adap, rspq) == 0)
3084 			rspq->unhandled_irqs++;
3085 		mtx_unlock(&rspq->lock);
3086 	}
3087 }
3088 
3089 #define QDUMP_SBUF_SIZE		32 * 400
3090 static int
3091 t3_dump_rspq(SYSCTL_HANDLER_ARGS)
3092 {
3093 	struct sge_rspq *rspq;
3094 	struct sge_qset *qs;
3095 	int i, err, dump_end, idx;
3096 	static int multiplier = 1;
3097 	struct sbuf *sb;
3098 	struct rsp_desc *rspd;
3099 	uint32_t data[4];
3100 
3101 	rspq = arg1;
3102 	qs = rspq_to_qset(rspq);
3103 	if (rspq->rspq_dump_count == 0)
3104 		return (0);
3105 	if (rspq->rspq_dump_count > RSPQ_Q_SIZE) {
3106 		log(LOG_WARNING,
3107 		    "dump count is too large %d\n", rspq->rspq_dump_count);
3108 		rspq->rspq_dump_count = 0;
3109 		return (EINVAL);
3110 	}
3111 	if (rspq->rspq_dump_start > (RSPQ_Q_SIZE-1)) {
3112 		log(LOG_WARNING,
3113 		    "dump start of %d is greater than queue size\n",
3114 		    rspq->rspq_dump_start);
3115 		rspq->rspq_dump_start = 0;
3116 		return (EINVAL);
3117 	}
3118 	err = t3_sge_read_rspq(qs->port->adapter, rspq->cntxt_id, data);
3119 	if (err)
3120 		return (err);
3121 retry_sbufops:
3122 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3123 
3124 	sbuf_printf(sb, " \n index=%u size=%u MSI-X/RspQ=%u intr enable=%u intr armed=%u\n",
3125 	    (data[0] & 0xffff), data[0] >> 16, ((data[2] >> 20) & 0x3f),
3126 	    ((data[2] >> 26) & 1), ((data[2] >> 27) & 1));
3127 	sbuf_printf(sb, " generation=%u CQ mode=%u FL threshold=%u\n",
3128 	    ((data[2] >> 28) & 1), ((data[2] >> 31) & 1), data[3]);
3129 
3130 	sbuf_printf(sb, " start=%d -> end=%d\n", rspq->rspq_dump_start,
3131 	    (rspq->rspq_dump_start + rspq->rspq_dump_count) & (RSPQ_Q_SIZE-1));
3132 
3133 	dump_end = rspq->rspq_dump_start + rspq->rspq_dump_count;
3134 	for (i = rspq->rspq_dump_start; i < dump_end; i++) {
3135 		idx = i & (RSPQ_Q_SIZE-1);
3136 
3137 		rspd = &rspq->desc[idx];
3138 		sbuf_printf(sb, "\tidx=%04d opcode=%02x cpu_idx=%x hash_type=%x cq_idx=%x\n",
3139 		    idx, rspd->rss_hdr.opcode, rspd->rss_hdr.cpu_idx,
3140 		    rspd->rss_hdr.hash_type, be16toh(rspd->rss_hdr.cq_idx));
3141 		sbuf_printf(sb, "\trss_hash_val=%x flags=%08x len_cq=%x intr_gen=%x\n",
3142 		    rspd->rss_hdr.rss_hash_val, be32toh(rspd->flags),
3143 		    be32toh(rspd->len_cq), rspd->intr_gen);
3144 	}
3145 	if (sbuf_overflowed(sb)) {
3146 		sbuf_delete(sb);
3147 		multiplier++;
3148 		goto retry_sbufops;
3149 	}
3150 	sbuf_finish(sb);
3151 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3152 	sbuf_delete(sb);
3153 	return (err);
3154 }
3155 
3156 static int
3157 t3_dump_txq_eth(SYSCTL_HANDLER_ARGS)
3158 {
3159 	struct sge_txq *txq;
3160 	struct sge_qset *qs;
3161 	int i, j, err, dump_end;
3162 	static int multiplier = 1;
3163 	struct sbuf *sb;
3164 	struct tx_desc *txd;
3165 	uint32_t *WR, wr_hi, wr_lo, gen;
3166 	uint32_t data[4];
3167 
3168 	txq = arg1;
3169 	qs = txq_to_qset(txq, TXQ_ETH);
3170 	if (txq->txq_dump_count == 0) {
3171 		return (0);
3172 	}
3173 	if (txq->txq_dump_count > TX_ETH_Q_SIZE) {
3174 		log(LOG_WARNING,
3175 		    "dump count is too large %d\n", txq->txq_dump_count);
3176 		txq->txq_dump_count = 1;
3177 		return (EINVAL);
3178 	}
3179 	if (txq->txq_dump_start > (TX_ETH_Q_SIZE-1)) {
3180 		log(LOG_WARNING,
3181 		    "dump start of %d is greater than queue size\n",
3182 		    txq->txq_dump_start);
3183 		txq->txq_dump_start = 0;
3184 		return (EINVAL);
3185 	}
3186 	err = t3_sge_read_ecntxt(qs->port->adapter, qs->rspq.cntxt_id, data);
3187 	if (err)
3188 		return (err);
3189 
3190 
3191 retry_sbufops:
3192 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3193 
3194 	sbuf_printf(sb, " \n credits=%u GTS=%u index=%u size=%u rspq#=%u cmdq#=%u\n",
3195 	    (data[0] & 0x7fff), ((data[0] >> 15) & 1), (data[0] >> 16),
3196 	    (data[1] & 0xffff), ((data[3] >> 4) & 7), ((data[3] >> 7) & 1));
3197 	sbuf_printf(sb, " TUN=%u TOE=%u generation%u uP token=%u valid=%u\n",
3198 	    ((data[3] >> 8) & 1), ((data[3] >> 9) & 1), ((data[3] >> 10) & 1),
3199 	    ((data[3] >> 11) & 0xfffff), ((data[3] >> 31) & 1));
3200 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3201 	    txq->txq_dump_start,
3202 	    (txq->txq_dump_start + txq->txq_dump_count) & (TX_ETH_Q_SIZE-1));
3203 
3204 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3205 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3206 		txd = &txq->desc[i & (TX_ETH_Q_SIZE-1)];
3207 		WR = (uint32_t *)txd->flit;
3208 		wr_hi = ntohl(WR[0]);
3209 		wr_lo = ntohl(WR[1]);
3210 		gen = G_WR_GEN(wr_lo);
3211 
3212 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3213 		    wr_hi, wr_lo, gen);
3214 		for (j = 2; j < 30; j += 4)
3215 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3216 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3217 
3218 	}
3219 	if (sbuf_overflowed(sb)) {
3220 		sbuf_delete(sb);
3221 		multiplier++;
3222 		goto retry_sbufops;
3223 	}
3224 	sbuf_finish(sb);
3225 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3226 	sbuf_delete(sb);
3227 	return (err);
3228 }
3229 
3230 static int
3231 t3_dump_txq_ctrl(SYSCTL_HANDLER_ARGS)
3232 {
3233 	struct sge_txq *txq;
3234 	struct sge_qset *qs;
3235 	int i, j, err, dump_end;
3236 	static int multiplier = 1;
3237 	struct sbuf *sb;
3238 	struct tx_desc *txd;
3239 	uint32_t *WR, wr_hi, wr_lo, gen;
3240 
3241 	txq = arg1;
3242 	qs = txq_to_qset(txq, TXQ_CTRL);
3243 	if (txq->txq_dump_count == 0) {
3244 		return (0);
3245 	}
3246 	if (txq->txq_dump_count > 256) {
3247 		log(LOG_WARNING,
3248 		    "dump count is too large %d\n", txq->txq_dump_count);
3249 		txq->txq_dump_count = 1;
3250 		return (EINVAL);
3251 	}
3252 	if (txq->txq_dump_start > 255) {
3253 		log(LOG_WARNING,
3254 		    "dump start of %d is greater than queue size\n",
3255 		    txq->txq_dump_start);
3256 		txq->txq_dump_start = 0;
3257 		return (EINVAL);
3258 	}
3259 
3260 retry_sbufops:
3261 	sb = sbuf_new(NULL, NULL, QDUMP_SBUF_SIZE*multiplier, SBUF_FIXEDLEN);
3262 	sbuf_printf(sb, " qid=%d start=%d -> end=%d\n", qs->idx,
3263 	    txq->txq_dump_start,
3264 	    (txq->txq_dump_start + txq->txq_dump_count) & 255);
3265 
3266 	dump_end = txq->txq_dump_start + txq->txq_dump_count;
3267 	for (i = txq->txq_dump_start; i < dump_end; i++) {
3268 		txd = &txq->desc[i & (255)];
3269 		WR = (uint32_t *)txd->flit;
3270 		wr_hi = ntohl(WR[0]);
3271 		wr_lo = ntohl(WR[1]);
3272 		gen = G_WR_GEN(wr_lo);
3273 
3274 		sbuf_printf(sb," wr_hi %08x wr_lo %08x gen %d\n",
3275 		    wr_hi, wr_lo, gen);
3276 		for (j = 2; j < 30; j += 4)
3277 			sbuf_printf(sb, "\t%08x %08x %08x %08x \n",
3278 			    WR[j], WR[j + 1], WR[j + 2], WR[j + 3]);
3279 
3280 	}
3281 	if (sbuf_overflowed(sb)) {
3282 		sbuf_delete(sb);
3283 		multiplier++;
3284 		goto retry_sbufops;
3285 	}
3286 	sbuf_finish(sb);
3287 	err = SYSCTL_OUT(req, sbuf_data(sb), sbuf_len(sb) + 1);
3288 	sbuf_delete(sb);
3289 	return (err);
3290 }
3291 
3292 static int
3293 t3_set_coalesce_usecs(SYSCTL_HANDLER_ARGS)
3294 {
3295 	adapter_t *sc = arg1;
3296 	struct qset_params *qsp = &sc->params.sge.qset[0];
3297 	int coalesce_usecs;
3298 	struct sge_qset *qs;
3299 	int i, j, err, nqsets = 0;
3300 	struct mtx *lock;
3301 
3302 	if ((sc->flags & FULL_INIT_DONE) == 0)
3303 		return (ENXIO);
3304 
3305 	coalesce_usecs = qsp->coalesce_usecs;
3306         err = sysctl_handle_int(oidp, &coalesce_usecs, arg2, req);
3307 
3308 	if (err != 0) {
3309 		return (err);
3310 	}
3311 	if (coalesce_usecs == qsp->coalesce_usecs)
3312 		return (0);
3313 
3314 	for (i = 0; i < sc->params.nports; i++)
3315 		for (j = 0; j < sc->port[i].nqsets; j++)
3316 			nqsets++;
3317 
3318 	coalesce_usecs = max(1, coalesce_usecs);
3319 
3320 	for (i = 0; i < nqsets; i++) {
3321 		qs = &sc->sge.qs[i];
3322 		qsp = &sc->params.sge.qset[i];
3323 		qsp->coalesce_usecs = coalesce_usecs;
3324 
3325 		lock = (sc->flags & USING_MSIX) ? &qs->rspq.lock :
3326 			    &sc->sge.qs[0].rspq.lock;
3327 
3328 		mtx_lock(lock);
3329 		t3_update_qset_coalesce(qs, qsp);
3330 		t3_write_reg(sc, A_SG_GTS, V_RSPQ(qs->rspq.cntxt_id) |
3331 		    V_NEWTIMER(qs->rspq.holdoff_tmr));
3332 		mtx_unlock(lock);
3333 	}
3334 
3335 	return (0);
3336 }
3337 
3338 
3339 void
3340 t3_add_attach_sysctls(adapter_t *sc)
3341 {
3342 	struct sysctl_ctx_list *ctx;
3343 	struct sysctl_oid_list *children;
3344 
3345 	ctx = device_get_sysctl_ctx(sc->dev);
3346 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3347 
3348 	/* random information */
3349 	SYSCTL_ADD_STRING(ctx, children, OID_AUTO,
3350 	    "firmware_version",
3351 	    CTLFLAG_RD, &sc->fw_version,
3352 	    0, "firmware version");
3353 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3354 	    "hw_revision",
3355 	    CTLFLAG_RD, &sc->params.rev,
3356 	    0, "chip model");
3357 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3358 	    "enable_debug",
3359 	    CTLFLAG_RW, &cxgb_debug,
3360 	    0, "enable verbose debugging output");
3361 	SYSCTL_ADD_ULONG(ctx, children, OID_AUTO, "tunq_coalesce",
3362 	    CTLFLAG_RD, &sc->tunq_coalesce,
3363 	    "#tunneled packets freed");
3364 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3365 	    "txq_overrun",
3366 	    CTLFLAG_RD, &txq_fills,
3367 	    0, "#times txq overrun");
3368 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3369 	    "pcpu_cache_enable",
3370 	    CTLFLAG_RW, &cxgb_pcpu_cache_enable,
3371 	    0, "#enable driver local pcpu caches");
3372 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3373 	    "cache_alloc",
3374 	    CTLFLAG_RD, &cxgb_cached_allocations,
3375 	    0, "#times a cluster was allocated from cache");
3376 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3377 	    "cached",
3378 	    CTLFLAG_RD, &cxgb_cached,
3379 	    0, "#times a cluster was cached");
3380 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3381 	    "ext_freed",
3382 	    CTLFLAG_RD, &cxgb_ext_freed,
3383 	    0, "#times a cluster was freed through ext_free");
3384 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3385 	    "ext_inited",
3386 	    CTLFLAG_RD, &cxgb_ext_inited,
3387 	    0, "#times a cluster was initialized for ext_free");
3388 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3389 	    "mbufs_outstanding",
3390 	    CTLFLAG_RD, &cxgb_mbufs_outstanding,
3391 	    0, "#mbufs in flight in the driver");
3392 	SYSCTL_ADD_INT(ctx, children, OID_AUTO,
3393 	    "pack_outstanding",
3394 	    CTLFLAG_RD, &cxgb_pack_outstanding,
3395 	    0, "#packet in flight in the driver");
3396 }
3397 
3398 
3399 static const char *rspq_name = "rspq";
3400 static const char *txq_names[] =
3401 {
3402 	"txq_eth",
3403 	"txq_ofld",
3404 	"txq_ctrl"
3405 };
3406 
3407 static int
3408 sysctl_handle_macstat(SYSCTL_HANDLER_ARGS)
3409 {
3410 	struct port_info *p = arg1;
3411 	uint64_t *parg;
3412 
3413 	if (!p)
3414 		return (EINVAL);
3415 
3416 	parg = (uint64_t *) ((uint8_t *)&p->mac.stats + arg2);
3417 
3418 	PORT_LOCK(p);
3419 	t3_mac_update_stats(&p->mac);
3420 	PORT_UNLOCK(p);
3421 
3422 	return (sysctl_handle_quad(oidp, parg, 0, req));
3423 }
3424 
3425 void
3426 t3_add_configured_sysctls(adapter_t *sc)
3427 {
3428 	struct sysctl_ctx_list *ctx;
3429 	struct sysctl_oid_list *children;
3430 	int i, j;
3431 
3432 	ctx = device_get_sysctl_ctx(sc->dev);
3433 	children = SYSCTL_CHILDREN(device_get_sysctl_tree(sc->dev));
3434 
3435 	SYSCTL_ADD_PROC(ctx, children, OID_AUTO,
3436 	    "intr_coal",
3437 	    CTLTYPE_INT|CTLFLAG_RW, sc,
3438 	    0, t3_set_coalesce_usecs,
3439 	    "I", "interrupt coalescing timer (us)");
3440 
3441 	for (i = 0; i < sc->params.nports; i++) {
3442 		struct port_info *pi = &sc->port[i];
3443 		struct sysctl_oid *poid;
3444 		struct sysctl_oid_list *poidlist;
3445 		struct mac_stats *mstats = &pi->mac.stats;
3446 
3447 		snprintf(pi->namebuf, PORT_NAME_LEN, "port%d", i);
3448 		poid = SYSCTL_ADD_NODE(ctx, children, OID_AUTO,
3449 		    pi->namebuf, CTLFLAG_RD, NULL, "port statistics");
3450 		poidlist = SYSCTL_CHILDREN(poid);
3451 		SYSCTL_ADD_INT(ctx, poidlist, OID_AUTO,
3452 		    "nqsets", CTLFLAG_RD, &pi->nqsets,
3453 		    0, "#queue sets");
3454 
3455 		for (j = 0; j < pi->nqsets; j++) {
3456 			struct sge_qset *qs = &sc->sge.qs[pi->first_qset + j];
3457 			struct sysctl_oid *qspoid, *rspqpoid, *txqpoid, *ctrlqpoid, *lropoid;
3458 			struct sysctl_oid_list *qspoidlist, *rspqpoidlist, *txqpoidlist, *ctrlqpoidlist, *lropoidlist;
3459 			struct sge_txq *txq = &qs->txq[TXQ_ETH];
3460 
3461 			snprintf(qs->namebuf, QS_NAME_LEN, "qs%d", j);
3462 
3463 			qspoid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO,
3464 			    qs->namebuf, CTLFLAG_RD, NULL, "qset statistics");
3465 			qspoidlist = SYSCTL_CHILDREN(qspoid);
3466 
3467 			rspqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3468 			    rspq_name, CTLFLAG_RD, NULL, "rspq statistics");
3469 			rspqpoidlist = SYSCTL_CHILDREN(rspqpoid);
3470 
3471 			txqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3472 			    txq_names[0], CTLFLAG_RD, NULL, "txq statistics");
3473 			txqpoidlist = SYSCTL_CHILDREN(txqpoid);
3474 
3475 			ctrlqpoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3476 			    txq_names[2], CTLFLAG_RD, NULL, "ctrlq statistics");
3477 			ctrlqpoidlist = SYSCTL_CHILDREN(ctrlqpoid);
3478 
3479 			lropoid = SYSCTL_ADD_NODE(ctx, qspoidlist, OID_AUTO,
3480 			    "lro_stats", CTLFLAG_RD, NULL, "LRO statistics");
3481 			lropoidlist = SYSCTL_CHILDREN(lropoid);
3482 
3483 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "size",
3484 			    CTLFLAG_RD, &qs->rspq.size,
3485 			    0, "#entries in response queue");
3486 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "cidx",
3487 			    CTLFLAG_RD, &qs->rspq.cidx,
3488 			    0, "consumer index");
3489 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "credits",
3490 			    CTLFLAG_RD, &qs->rspq.credits,
3491 			    0, "#credits");
3492 			SYSCTL_ADD_XLONG(ctx, rspqpoidlist, OID_AUTO, "phys_addr",
3493 			    CTLFLAG_RD, &qs->rspq.phys_addr,
3494 			    "physical_address_of the queue");
3495 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_start",
3496 			    CTLFLAG_RW, &qs->rspq.rspq_dump_start,
3497 			    0, "start rspq dump entry");
3498 			SYSCTL_ADD_UINT(ctx, rspqpoidlist, OID_AUTO, "dump_count",
3499 			    CTLFLAG_RW, &qs->rspq.rspq_dump_count,
3500 			    0, "#rspq entries to dump");
3501 			SYSCTL_ADD_PROC(ctx, rspqpoidlist, OID_AUTO, "qdump",
3502 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->rspq,
3503 			    0, t3_dump_rspq, "A", "dump of the response queue");
3504 
3505 
3506 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "dropped",
3507 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].txq_drops,
3508 			    0, "#tunneled packets dropped");
3509 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "sendqlen",
3510 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].sendq.qlen,
3511 			    0, "#tunneled packets waiting to be sent");
3512 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_pidx",
3513 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_prod,
3514 			    0, "#tunneled packets queue producer index");
3515 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "queue_cidx",
3516 			    CTLFLAG_RD, (uint32_t *)(uintptr_t)&qs->txq[TXQ_ETH].txq_mr.br_cons,
3517 			    0, "#tunneled packets queue consumer index");
3518 			SYSCTL_ADD_INT(ctx, txqpoidlist, OID_AUTO, "processed",
3519 			    CTLFLAG_RD, &qs->txq[TXQ_ETH].processed,
3520 			    0, "#tunneled packets processed by the card");
3521 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "cleaned",
3522 			    CTLFLAG_RD, &txq->cleaned,
3523 			    0, "#tunneled packets cleaned");
3524 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "in_use",
3525 			    CTLFLAG_RD, &txq->in_use,
3526 			    0, "#tunneled packet slots in use");
3527 			SYSCTL_ADD_ULONG(ctx, txqpoidlist, OID_AUTO, "frees",
3528 			    CTLFLAG_RD, &txq->txq_frees,
3529 			    "#tunneled packets freed");
3530 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "skipped",
3531 			    CTLFLAG_RD, &txq->txq_skipped,
3532 			    0, "#tunneled packet descriptors skipped");
3533 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "coalesced",
3534 			    CTLFLAG_RD, &txq->txq_coalesced,
3535 			    0, "#tunneled packets coalesced");
3536 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "enqueued",
3537 			    CTLFLAG_RD, &txq->txq_enqueued,
3538 			    0, "#tunneled packets enqueued to hardware");
3539 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "stopped_flags",
3540 			    CTLFLAG_RD, &qs->txq_stopped,
3541 			    0, "tx queues stopped");
3542 			SYSCTL_ADD_XLONG(ctx, txqpoidlist, OID_AUTO, "phys_addr",
3543 			    CTLFLAG_RD, &txq->phys_addr,
3544 			    "physical_address_of the queue");
3545 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "qgen",
3546 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].gen,
3547 			    0, "txq generation");
3548 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_cidx",
3549 			    CTLFLAG_RD, &txq->cidx,
3550 			    0, "hardware queue cidx");
3551 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "hw_pidx",
3552 			    CTLFLAG_RD, &txq->pidx,
3553 			    0, "hardware queue pidx");
3554 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_start",
3555 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_start,
3556 			    0, "txq start idx for dump");
3557 			SYSCTL_ADD_UINT(ctx, txqpoidlist, OID_AUTO, "dump_count",
3558 			    CTLFLAG_RW, &qs->txq[TXQ_ETH].txq_dump_count,
3559 			    0, "txq #entries to dump");
3560 			SYSCTL_ADD_PROC(ctx, txqpoidlist, OID_AUTO, "qdump",
3561 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_ETH],
3562 			    0, t3_dump_txq_eth, "A", "dump of the transmit queue");
3563 
3564 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_start",
3565 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_start,
3566 			    0, "ctrlq start idx for dump");
3567 			SYSCTL_ADD_UINT(ctx, ctrlqpoidlist, OID_AUTO, "dump_count",
3568 			    CTLFLAG_RW, &qs->txq[TXQ_CTRL].txq_dump_count,
3569 			    0, "ctrl #entries to dump");
3570 			SYSCTL_ADD_PROC(ctx, ctrlqpoidlist, OID_AUTO, "qdump",
3571 			    CTLTYPE_STRING | CTLFLAG_RD, &qs->txq[TXQ_CTRL],
3572 			    0, t3_dump_txq_ctrl, "A", "dump of the transmit queue");
3573 
3574 #ifdef LRO_SUPPORTED
3575 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_queued",
3576 			    CTLFLAG_RD, &qs->lro.ctrl.lro_queued, 0, NULL);
3577 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_flushed",
3578 			    CTLFLAG_RD, &qs->lro.ctrl.lro_flushed, 0, NULL);
3579 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_bad_csum",
3580 			    CTLFLAG_RD, &qs->lro.ctrl.lro_bad_csum, 0, NULL);
3581 			SYSCTL_ADD_INT(ctx, lropoidlist, OID_AUTO, "lro_cnt",
3582 			    CTLFLAG_RD, &qs->lro.ctrl.lro_cnt, 0, NULL);
3583 #endif
3584 		}
3585 
3586 		/* Now add a node for mac stats. */
3587 		poid = SYSCTL_ADD_NODE(ctx, poidlist, OID_AUTO, "mac_stats",
3588 		    CTLFLAG_RD, NULL, "MAC statistics");
3589 		poidlist = SYSCTL_CHILDREN(poid);
3590 
3591 		/*
3592 		 * We (ab)use the length argument (arg2) to pass on the offset
3593 		 * of the data that we are interested in.  This is only required
3594 		 * for the quad counters that are updated from the hardware (we
3595 		 * make sure that we return the latest value).
3596 		 * sysctl_handle_macstat first updates *all* the counters from
3597 		 * the hardware, and then returns the latest value of the
3598 		 * requested counter.  Best would be to update only the
3599 		 * requested counter from hardware, but t3_mac_update_stats()
3600 		 * hides all the register details and we don't want to dive into
3601 		 * all that here.
3602 		 */
3603 #define CXGB_SYSCTL_ADD_QUAD(a)	SYSCTL_ADD_OID(ctx, poidlist, OID_AUTO, #a, \
3604     (CTLTYPE_QUAD | CTLFLAG_RD), pi, offsetof(struct mac_stats, a), \
3605     sysctl_handle_macstat, "QU", 0)
3606 		CXGB_SYSCTL_ADD_QUAD(tx_octets);
3607 		CXGB_SYSCTL_ADD_QUAD(tx_octets_bad);
3608 		CXGB_SYSCTL_ADD_QUAD(tx_frames);
3609 		CXGB_SYSCTL_ADD_QUAD(tx_mcast_frames);
3610 		CXGB_SYSCTL_ADD_QUAD(tx_bcast_frames);
3611 		CXGB_SYSCTL_ADD_QUAD(tx_pause);
3612 		CXGB_SYSCTL_ADD_QUAD(tx_deferred);
3613 		CXGB_SYSCTL_ADD_QUAD(tx_late_collisions);
3614 		CXGB_SYSCTL_ADD_QUAD(tx_total_collisions);
3615 		CXGB_SYSCTL_ADD_QUAD(tx_excess_collisions);
3616 		CXGB_SYSCTL_ADD_QUAD(tx_underrun);
3617 		CXGB_SYSCTL_ADD_QUAD(tx_len_errs);
3618 		CXGB_SYSCTL_ADD_QUAD(tx_mac_internal_errs);
3619 		CXGB_SYSCTL_ADD_QUAD(tx_excess_deferral);
3620 		CXGB_SYSCTL_ADD_QUAD(tx_fcs_errs);
3621 		CXGB_SYSCTL_ADD_QUAD(tx_frames_64);
3622 		CXGB_SYSCTL_ADD_QUAD(tx_frames_65_127);
3623 		CXGB_SYSCTL_ADD_QUAD(tx_frames_128_255);
3624 		CXGB_SYSCTL_ADD_QUAD(tx_frames_256_511);
3625 		CXGB_SYSCTL_ADD_QUAD(tx_frames_512_1023);
3626 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1024_1518);
3627 		CXGB_SYSCTL_ADD_QUAD(tx_frames_1519_max);
3628 		CXGB_SYSCTL_ADD_QUAD(rx_octets);
3629 		CXGB_SYSCTL_ADD_QUAD(rx_octets_bad);
3630 		CXGB_SYSCTL_ADD_QUAD(rx_frames);
3631 		CXGB_SYSCTL_ADD_QUAD(rx_mcast_frames);
3632 		CXGB_SYSCTL_ADD_QUAD(rx_bcast_frames);
3633 		CXGB_SYSCTL_ADD_QUAD(rx_pause);
3634 		CXGB_SYSCTL_ADD_QUAD(rx_align_errs);
3635 		CXGB_SYSCTL_ADD_QUAD(rx_symbol_errs);
3636 		CXGB_SYSCTL_ADD_QUAD(rx_data_errs);
3637 		CXGB_SYSCTL_ADD_QUAD(rx_sequence_errs);
3638 		CXGB_SYSCTL_ADD_QUAD(rx_runt);
3639 		CXGB_SYSCTL_ADD_QUAD(rx_jabber);
3640 		CXGB_SYSCTL_ADD_QUAD(rx_short);
3641 		CXGB_SYSCTL_ADD_QUAD(rx_too_long);
3642 		CXGB_SYSCTL_ADD_QUAD(rx_mac_internal_errs);
3643 		CXGB_SYSCTL_ADD_QUAD(rx_cong_drops);
3644 		CXGB_SYSCTL_ADD_QUAD(rx_frames_64);
3645 		CXGB_SYSCTL_ADD_QUAD(rx_frames_65_127);
3646 		CXGB_SYSCTL_ADD_QUAD(rx_frames_128_255);
3647 		CXGB_SYSCTL_ADD_QUAD(rx_frames_256_511);
3648 		CXGB_SYSCTL_ADD_QUAD(rx_frames_512_1023);
3649 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1024_1518);
3650 		CXGB_SYSCTL_ADD_QUAD(rx_frames_1519_max);
3651 #undef CXGB_SYSCTL_ADD_QUAD
3652 
3653 #define CXGB_SYSCTL_ADD_ULONG(a) SYSCTL_ADD_ULONG(ctx, poidlist, OID_AUTO, #a, \
3654     CTLFLAG_RD, &mstats->a, 0)
3655 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_parity_err);
3656 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_parity_err);
3657 		CXGB_SYSCTL_ADD_ULONG(tx_fifo_urun);
3658 		CXGB_SYSCTL_ADD_ULONG(rx_fifo_ovfl);
3659 		CXGB_SYSCTL_ADD_ULONG(serdes_signal_loss);
3660 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_ctc_err);
3661 		CXGB_SYSCTL_ADD_ULONG(xaui_pcs_align_change);
3662 		CXGB_SYSCTL_ADD_ULONG(num_toggled);
3663 		CXGB_SYSCTL_ADD_ULONG(num_resets);
3664 #undef CXGB_SYSCTL_ADD_ULONG
3665 	}
3666 }
3667 
3668 /**
3669  *	t3_get_desc - dump an SGE descriptor for debugging purposes
3670  *	@qs: the queue set
3671  *	@qnum: identifies the specific queue (0..2: Tx, 3:response, 4..5: Rx)
3672  *	@idx: the descriptor index in the queue
3673  *	@data: where to dump the descriptor contents
3674  *
3675  *	Dumps the contents of a HW descriptor of an SGE queue.  Returns the
3676  *	size of the descriptor.
3677  */
3678 int
3679 t3_get_desc(const struct sge_qset *qs, unsigned int qnum, unsigned int idx,
3680 		unsigned char *data)
3681 {
3682 	if (qnum >= 6)
3683 		return (EINVAL);
3684 
3685 	if (qnum < 3) {
3686 		if (!qs->txq[qnum].desc || idx >= qs->txq[qnum].size)
3687 			return -EINVAL;
3688 		memcpy(data, &qs->txq[qnum].desc[idx], sizeof(struct tx_desc));
3689 		return sizeof(struct tx_desc);
3690 	}
3691 
3692 	if (qnum == 3) {
3693 		if (!qs->rspq.desc || idx >= qs->rspq.size)
3694 			return (EINVAL);
3695 		memcpy(data, &qs->rspq.desc[idx], sizeof(struct rsp_desc));
3696 		return sizeof(struct rsp_desc);
3697 	}
3698 
3699 	qnum -= 4;
3700 	if (!qs->fl[qnum].desc || idx >= qs->fl[qnum].size)
3701 		return (EINVAL);
3702 	memcpy(data, &qs->fl[qnum].desc[idx], sizeof(struct rx_desc));
3703 	return sizeof(struct rx_desc);
3704 }
3705