xref: /freebsd/sys/dev/cxgbe/t4_sge.c (revision bb15ca603fa442c72dde3f3cb8b46db6970e3950)
1 /*-
2  * Copyright (c) 2011 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/socket.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/taskqueue.h>
40 #include <sys/sysctl.h>
41 #include <net/bpf.h>
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_vlan_var.h>
45 #include <netinet/in.h>
46 #include <netinet/ip.h>
47 #include <netinet/tcp.h>
48 
49 #include "common/common.h"
50 #include "common/t4_regs.h"
51 #include "common/t4_regs_values.h"
52 #include "common/t4_msg.h"
53 #include "common/t4fw_interface.h"
54 
55 struct fl_buf_info {
56 	int size;
57 	int type;
58 	uma_zone_t zone;
59 };
60 
61 /* Filled up by t4_sge_modload */
62 static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
63 
64 #define FL_BUF_SIZE(x)	(fl_buf_info[x].size)
65 #define FL_BUF_TYPE(x)	(fl_buf_info[x].type)
66 #define FL_BUF_ZONE(x)	(fl_buf_info[x].zone)
67 
68 enum {
69 	FL_PKTSHIFT = 2
70 };
71 
72 #define FL_ALIGN	min(CACHE_LINE_SIZE, 32)
73 #if CACHE_LINE_SIZE > 64
74 #define SPG_LEN		128
75 #else
76 #define SPG_LEN		64
77 #endif
78 
79 /* Used to track coalesced tx work request */
80 struct txpkts {
81 	uint64_t *flitp;	/* ptr to flit where next pkt should start */
82 	uint8_t npkt;		/* # of packets in this work request */
83 	uint8_t nflits;		/* # of flits used by this work request */
84 	uint16_t plen;		/* total payload (sum of all packets) */
85 };
86 
87 /* A packet's SGL.  This + m_pkthdr has all info needed for tx */
88 struct sgl {
89 	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
90 	int nflits;		/* # of flits needed for the SGL */
91 	bus_dma_segment_t seg[TX_SGL_SEGS];
92 };
93 
94 static void t4_evt_rx(void *);
95 static void t4_eth_rx(void *);
96 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
97     int, iq_intr_handler_t *, char *);
98 static inline void init_fl(struct sge_fl *, int, char *);
99 static inline void init_eq(struct sge_eq *, int, char *);
100 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
101     bus_addr_t *, void **);
102 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
103     void *);
104 static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
105     int, int);
106 static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
107 static int alloc_intrq(struct adapter *, int, int, int);
108 static int free_intrq(struct sge_iq *);
109 static int alloc_fwq(struct adapter *, int);
110 static int free_fwq(struct sge_iq *);
111 static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int);
112 static int free_rxq(struct port_info *, struct sge_rxq *);
113 static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int);
114 static int free_ctrlq(struct adapter *, struct sge_ctrlq *);
115 static int alloc_txq(struct port_info *, struct sge_txq *, int);
116 static int free_txq(struct port_info *, struct sge_txq *);
117 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
118 static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
119 static inline void iq_next(struct sge_iq *);
120 static inline void ring_fl_db(struct adapter *, struct sge_fl *);
121 static void refill_fl(struct adapter *, struct sge_fl *, int, int);
122 static int alloc_fl_sdesc(struct sge_fl *);
123 static void free_fl_sdesc(struct sge_fl *);
124 static int alloc_tx_maps(struct sge_txq *);
125 static void free_tx_maps(struct sge_txq *);
126 static void set_fl_tag_idx(struct sge_fl *, int);
127 
128 static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
129 static int free_pkt_sgl(struct sge_txq *, struct sgl *);
130 static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
131     struct sgl *);
132 static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
133     struct mbuf *, struct sgl *);
134 static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
135 static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
136     struct txpkts *, struct mbuf *, struct sgl *);
137 static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
138 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
139 static inline void ring_eq_db(struct adapter *, struct sge_eq *);
140 static inline int reclaimable(struct sge_eq *);
141 static int reclaim_tx_descs(struct sge_txq *, int, int);
142 static void write_eqflush_wr(struct sge_eq *);
143 static __be64 get_flit(bus_dma_segment_t *, int, int);
144 static int handle_sge_egr_update(struct adapter *,
145     const struct cpl_sge_egr_update *);
146 static void handle_cpl(struct adapter *, struct sge_iq *);
147 
148 static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *);
149 static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
150 
151 extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *);
152 
153 /*
154  * Called on MOD_LOAD and fills up fl_buf_info[].
155  */
156 void
157 t4_sge_modload(void)
158 {
159 	int i;
160 	int bufsize[FL_BUF_SIZES] = {
161 		MCLBYTES,
162 #if MJUMPAGESIZE != MCLBYTES
163 		MJUMPAGESIZE,
164 #endif
165 		MJUM9BYTES,
166 		MJUM16BYTES
167 	};
168 
169 	for (i = 0; i < FL_BUF_SIZES; i++) {
170 		FL_BUF_SIZE(i) = bufsize[i];
171 		FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
172 		FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
173 	}
174 }
175 
176 /**
177  *	t4_sge_init - initialize SGE
178  *	@sc: the adapter
179  *
180  *	Performs SGE initialization needed every time after a chip reset.
181  *	We do not initialize any of the queues here, instead the driver
182  *	top-level must request them individually.
183  */
184 void
185 t4_sge_init(struct adapter *sc)
186 {
187 	struct sge *s = &sc->sge;
188 	int i;
189 
190 	t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) |
191 			 V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
192 			 F_EGRSTATUSPAGESIZE,
193 			 V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) |
194 			 V_PKTSHIFT(FL_PKTSHIFT) |
195 			 F_RXPKTCPLMODE |
196 			 V_EGRSTATUSPAGESIZE(SPG_LEN == 128));
197 	t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE,
198 			 V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0),
199 			 V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10));
200 
201 	for (i = 0; i < FL_BUF_SIZES; i++) {
202 		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
203 		    FL_BUF_SIZE(i));
204 	}
205 
206 	i = t4_read_reg(sc, A_SGE_CONM_CTRL);
207 	s->fl_starve_threshold = G_EGRTHRESHOLD(i) * 2 + 1;
208 
209 	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
210 		     V_THRESHOLD_0(s->counter_val[0]) |
211 		     V_THRESHOLD_1(s->counter_val[1]) |
212 		     V_THRESHOLD_2(s->counter_val[2]) |
213 		     V_THRESHOLD_3(s->counter_val[3]));
214 
215 	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
216 		     V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) |
217 		     V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1])));
218 	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
219 		     V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) |
220 		     V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3])));
221 	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
222 		     V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) |
223 		     V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5])));
224 }
225 
226 int
227 t4_create_dma_tag(struct adapter *sc)
228 {
229 	int rc;
230 
231 	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
232 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
233 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
234 	    NULL, &sc->dmat);
235 	if (rc != 0) {
236 		device_printf(sc->dev,
237 		    "failed to create main DMA tag: %d\n", rc);
238 	}
239 
240 	return (rc);
241 }
242 
243 int
244 t4_destroy_dma_tag(struct adapter *sc)
245 {
246 	if (sc->dmat)
247 		bus_dma_tag_destroy(sc->dmat);
248 
249 	return (0);
250 }
251 
252 /*
253  * Allocate and initialize the firmware event queue, control queues, and the
254  * interrupt queues.  The adapter owns all of these queues.
255  *
256  * Returns errno on failure.  Resources allocated up to that point may still be
257  * allocated.  Caller is responsible for cleanup in case this function fails.
258  */
259 int
260 t4_setup_adapter_queues(struct adapter *sc)
261 {
262 	int i, j, rc, intr_idx, qsize;
263 	struct sge_iq *iq;
264 	struct sge_ctrlq *ctrlq;
265 	iq_intr_handler_t *handler;
266 	char name[16];
267 
268 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
269 
270 	if (sysctl_ctx_init(&sc->ctx) == 0) {
271 		struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
272 		struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
273 
274 		sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
275 		    "fwq", CTLFLAG_RD, NULL, "firmware event queue");
276 		sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
277 		    "ctrlq", CTLFLAG_RD, NULL, "ctrl queues");
278 		sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
279 		    "intrq", CTLFLAG_RD, NULL, "interrupt queues");
280 	}
281 
282 	/*
283 	 * Interrupt queues
284 	 */
285 	intr_idx = sc->intr_count - NINTRQ(sc);
286 	if (sc->flags & INTR_SHARED) {
287 		qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE);
288 		for (i = 0; i < NINTRQ(sc); i++, intr_idx++) {
289 			snprintf(name, sizeof(name), "%s intrq%d",
290 			    device_get_nameunit(sc->dev), i);
291 
292 			iq = &sc->sge.intrq[i];
293 			init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name);
294 			rc = alloc_intrq(sc, i % sc->params.nports, i,
295 			    intr_idx);
296 
297 			if (rc != 0) {
298 				device_printf(sc->dev,
299 				    "failed to create %s: %d\n", name, rc);
300 				return (rc);
301 			}
302 		}
303 	} else {
304 		int qidx = 0;
305 		struct port_info *pi;
306 
307 		for (i = 0; i < sc->params.nports; i++) {
308 			pi = sc->port[i];
309 			qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE);
310 			for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) {
311 				snprintf(name, sizeof(name), "%s intrq%d",
312 				    device_get_nameunit(pi->dev), j);
313 
314 				iq = &sc->sge.intrq[qidx];
315 				init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE,
316 				    NULL, name);
317 				rc = alloc_intrq(sc, i, qidx, intr_idx);
318 
319 				if (rc != 0) {
320 					device_printf(sc->dev,
321 					    "failed to create %s: %d\n",
322 					    name, rc);
323 					return (rc);
324 				}
325 			}
326 		}
327 	}
328 
329 	/*
330 	 * Firmware event queue
331 	 */
332 	snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev));
333 	if (sc->intr_count > T4_EXTRA_INTR) {
334 		handler = NULL;
335 		intr_idx = 1;
336 	} else {
337 		handler = t4_evt_rx;
338 		intr_idx = 0;
339 	}
340 
341 	iq = &sc->sge.fwq;
342 	init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name);
343 	rc = alloc_fwq(sc, intr_idx);
344 	if (rc != 0) {
345 		device_printf(sc->dev,
346 		    "failed to create firmware event queue: %d\n", rc);
347 
348 		return (rc);
349 	}
350 
351 	/*
352 	 * Control queues - one per port.
353 	 */
354 	ctrlq = &sc->sge.ctrlq[0];
355 	for (i = 0; i < sc->params.nports; i++, ctrlq++) {
356 		snprintf(name, sizeof(name), "%s ctrlq%d",
357 		    device_get_nameunit(sc->dev), i);
358 		init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name);
359 
360 		rc = alloc_ctrlq(sc, ctrlq, i);
361 		if (rc != 0) {
362 			device_printf(sc->dev,
363 			    "failed to create control queue %d: %d\n", i, rc);
364 			return (rc);
365 		}
366 	}
367 
368 	return (rc);
369 }
370 
371 /*
372  * Idempotent
373  */
374 int
375 t4_teardown_adapter_queues(struct adapter *sc)
376 {
377 	int i;
378 	struct sge_iq *iq;
379 
380 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
381 
382 	/* Do this before freeing the queues */
383 	if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) {
384 		sysctl_ctx_free(&sc->ctx);
385 		sc->oid_fwq = NULL;
386 		sc->oid_ctrlq = NULL;
387 		sc->oid_intrq = NULL;
388 	}
389 
390 	for (i = 0; i < sc->params.nports; i++)
391 		free_ctrlq(sc, &sc->sge.ctrlq[i]);
392 
393 	iq = &sc->sge.fwq;
394 	free_fwq(iq);
395 
396 	for (i = 0; i < NINTRQ(sc); i++) {
397 		iq = &sc->sge.intrq[i];
398 		free_intrq(iq);
399 	}
400 
401 	return (0);
402 }
403 
404 int
405 t4_setup_eth_queues(struct port_info *pi)
406 {
407 	int rc = 0, i, intr_idx;
408 	struct sge_rxq *rxq;
409 	struct sge_txq *txq;
410 	char name[16];
411 	struct adapter *sc = pi->adapter;
412 
413 	if (sysctl_ctx_init(&pi->ctx) == 0) {
414 		struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
415 		struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
416 
417 		pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
418 		    "rxq", CTLFLAG_RD, NULL, "rx queues");
419 		pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
420 		    "txq", CTLFLAG_RD, NULL, "tx queues");
421 	}
422 
423 	for_each_rxq(pi, i, rxq) {
424 
425 		snprintf(name, sizeof(name), "%s rxq%d-iq",
426 		    device_get_nameunit(pi->dev), i);
427 		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
428 		    pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name);
429 
430 		snprintf(name, sizeof(name), "%s rxq%d-fl",
431 		    device_get_nameunit(pi->dev), i);
432 		init_fl(&rxq->fl, pi->qsize_rxq / 8, name);
433 
434 		intr_idx = pi->first_rxq + i;
435 		if (sc->flags & INTR_SHARED)
436 			intr_idx %= NINTRQ(sc);
437 
438 		rc = alloc_rxq(pi, rxq, intr_idx, i);
439 		if (rc != 0)
440 			goto done;
441 	}
442 
443 	for_each_txq(pi, i, txq) {
444 
445 		snprintf(name, sizeof(name), "%s txq%d",
446 		    device_get_nameunit(pi->dev), i);
447 		init_eq(&txq->eq, pi->qsize_txq, name);
448 
449 		rc = alloc_txq(pi, txq, i);
450 		if (rc != 0)
451 			goto done;
452 	}
453 
454 done:
455 	if (rc)
456 		t4_teardown_eth_queues(pi);
457 
458 	return (rc);
459 }
460 
461 /*
462  * Idempotent
463  */
464 int
465 t4_teardown_eth_queues(struct port_info *pi)
466 {
467 	int i;
468 	struct sge_rxq *rxq;
469 	struct sge_txq *txq;
470 
471 	/* Do this before freeing the queues */
472 	if (pi->oid_txq || pi->oid_rxq) {
473 		sysctl_ctx_free(&pi->ctx);
474 		pi->oid_txq = pi->oid_rxq = NULL;
475 	}
476 
477 	for_each_txq(pi, i, txq) {
478 		free_txq(pi, txq);
479 	}
480 
481 	for_each_rxq(pi, i, rxq) {
482 		free_rxq(pi, rxq);
483 	}
484 
485 	return (0);
486 }
487 
488 /* Deals with errors and the first (and only) interrupt queue */
489 void
490 t4_intr_all(void *arg)
491 {
492 	struct adapter *sc = arg;
493 
494 	t4_intr_err(arg);
495 	t4_intr(&sc->sge.intrq[0]);
496 }
497 
498 /* Deals with interrupts, and a few CPLs, on the given interrupt queue */
499 void
500 t4_intr(void *arg)
501 {
502 	struct sge_iq *iq = arg, *q;
503 	struct adapter *sc = iq->adapter;
504 	struct rsp_ctrl *ctrl;
505 	const struct rss_header *rss;
506 	int ndesc_pending = 0, ndesc_total = 0;
507 	int qid, rsp_type;
508 
509 	if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY))
510 		return;
511 
512 	while (is_new_response(iq, &ctrl)) {
513 
514 		rmb();
515 
516 		rss = (const void *)iq->cdesc;
517 		rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
518 
519 		if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) {
520 			handle_cpl(sc, iq);
521 			goto nextdesc;
522 		}
523 
524 		qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start;
525 		q = sc->sge.iqmap[qid];
526 
527 		if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) {
528 			q->handler(q);
529 			atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE);
530 		}
531 
532 nextdesc:	ndesc_total++;
533 		if (++ndesc_pending >= iq->qsize / 4) {
534 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
535 			    V_CIDXINC(ndesc_pending) |
536 			    V_INGRESSQID(iq->cntxt_id) |
537 			    V_SEINTARM(
538 				V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
539 			ndesc_pending = 0;
540 		}
541 
542 		iq_next(iq);
543 	}
544 
545 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
546 	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
547 
548 	atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
549 }
550 
551 /* Deals with error interrupts */
552 void
553 t4_intr_err(void *arg)
554 {
555 	struct adapter *sc = arg;
556 
557 	t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
558 	t4_slow_intr_handler(sc);
559 }
560 
561 /* Deals with the firmware event queue */
562 void
563 t4_intr_evt(void *arg)
564 {
565 	struct sge_iq *iq = arg;
566 
567 	if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) {
568 		t4_evt_rx(arg);
569 		atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
570 	}
571 }
572 
573 static void
574 t4_evt_rx(void *arg)
575 {
576 	struct sge_iq *iq = arg;
577 	struct adapter *sc = iq->adapter;
578 	struct rsp_ctrl *ctrl;
579 	int ndesc_pending = 0, ndesc_total = 0;
580 
581 	KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__));
582 
583 	while (is_new_response(iq, &ctrl)) {
584 		int rsp_type;
585 
586 		rmb();
587 
588 		rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
589 		if (__predict_false(rsp_type != X_RSPD_TYPE_CPL))
590 			panic("%s: unexpected rsp_type %d", __func__, rsp_type);
591 
592 		handle_cpl(sc, iq);
593 
594 		ndesc_total++;
595 		if (++ndesc_pending >= iq->qsize / 4) {
596 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
597 			    V_CIDXINC(ndesc_pending) |
598 			    V_INGRESSQID(iq->cntxt_id) |
599 			    V_SEINTARM(
600 				V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
601 			ndesc_pending = 0;
602 		}
603 
604 		iq_next(iq);
605 	}
606 
607 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
608 	    V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params));
609 }
610 
611 #ifdef T4_PKT_TIMESTAMP
612 #define RX_COPY_THRESHOLD (MINCLSIZE - 8)
613 #else
614 #define RX_COPY_THRESHOLD MINCLSIZE
615 #endif
616 
617 static void
618 t4_eth_rx(void *arg)
619 {
620 	struct sge_rxq *rxq = arg;
621 	struct sge_iq *iq = arg;
622 	struct adapter *sc = iq->adapter;
623 	struct rsp_ctrl *ctrl;
624 	struct ifnet *ifp = rxq->ifp;
625 	struct sge_fl *fl = &rxq->fl;
626 	struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next;
627 	const struct rss_header *rss;
628 	const struct cpl_rx_pkt *cpl;
629 	uint32_t len;
630 	int ndescs = 0, i;
631 	struct mbuf *m0, *m;
632 #ifdef INET
633 	struct lro_ctrl *lro = &rxq->lro;
634 	struct lro_entry *l;
635 #endif
636 
637 	prefetch(sd->m);
638 	prefetch(sd->cl);
639 
640 	iq->intr_next = iq->intr_params;
641 	while (is_new_response(iq, &ctrl)) {
642 
643 		rmb();
644 
645 		rss = (const void *)iq->cdesc;
646 		i = G_RSPD_TYPE(ctrl->u.type_gen);
647 
648 		KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT,
649 		    ("%s: unexpected type %d CPL opcode 0x%x",
650 		    __func__, i, rss->opcode));
651 
652 		sd_next = sd + 1;
653 		if (__predict_false(fl->cidx + 1 == fl->cap))
654 			sd_next = fl->sdesc;
655 		prefetch(sd_next->m);
656 		prefetch(sd_next->cl);
657 
658 		cpl = (const void *)(rss + 1);
659 
660 		m0 = sd->m;
661 		sd->m = NULL;	/* consumed */
662 
663 		len = be32toh(ctrl->pldbuflen_qid);
664 		if (__predict_false((len & F_RSPD_NEWBUF) == 0))
665 			panic("%s: cannot handle packed frames", __func__);
666 		len = G_RSPD_LEN(len);
667 
668 		bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
669 		    BUS_DMASYNC_POSTREAD);
670 
671 		m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
672 
673 #ifdef T4_PKT_TIMESTAMP
674 		*mtod(m0, uint64_t *) =
675 		    be64toh(ctrl->u.last_flit & 0xfffffffffffffff);
676 		m0->m_data += 8;
677 
678 		/*
679 		 * 60 bit timestamp value is *(uint64_t *)m0->m_pktdat.  Note
680 		 * that it is in the leading free-space (see M_LEADINGSPACE) in
681 		 * the mbuf.  The kernel can clobber it during a pullup,
682 		 * m_copymdata, etc.  You need to make sure that the mbuf
683 		 * reaches you unmolested if you care about the timestamp.
684 		 */
685 #endif
686 
687 		if (len < RX_COPY_THRESHOLD) {
688 			/* copy data to mbuf, buffer will be recycled */
689 			bcopy(sd->cl, mtod(m0, caddr_t), len);
690 			m0->m_len = len;
691 		} else {
692 			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
693 			m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
694 			sd->cl = NULL;	/* consumed */
695 			m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
696 		}
697 
698 		len -= FL_PKTSHIFT;
699 		m0->m_len -= FL_PKTSHIFT;
700 		m0->m_data += FL_PKTSHIFT;
701 
702 		m0->m_pkthdr.len = len;
703 		m0->m_pkthdr.rcvif = ifp;
704 		m0->m_flags |= M_FLOWID;
705 		m0->m_pkthdr.flowid = rss->hash_val;
706 
707 		if (cpl->csum_calc && !cpl->err_vec &&
708 		    ifp->if_capenable & IFCAP_RXCSUM) {
709 			m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED |
710 			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
711 			if (cpl->ip_frag)
712 				m0->m_pkthdr.csum_data = be16toh(cpl->csum);
713 			else
714 				m0->m_pkthdr.csum_data = 0xffff;
715 			rxq->rxcsum++;
716 		}
717 
718 		if (cpl->vlan_ex) {
719 			m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
720 			m0->m_flags |= M_VLANTAG;
721 			rxq->vlan_extraction++;
722 		}
723 
724 		i = 1;	/* # of fl sdesc used */
725 		sd = sd_next;
726 		if (__predict_false(++fl->cidx == fl->cap))
727 			fl->cidx = 0;
728 
729 		len -= m0->m_len;
730 		m = m0;
731 		while (len) {
732 			i++;
733 
734 			sd_next = sd + 1;
735 			if (__predict_false(fl->cidx + 1 == fl->cap))
736 				sd_next = fl->sdesc;
737 			prefetch(sd_next->m);
738 			prefetch(sd_next->cl);
739 
740 			m->m_next = sd->m;
741 			sd->m = NULL;	/* consumed */
742 			m = m->m_next;
743 
744 			bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
745 			    BUS_DMASYNC_POSTREAD);
746 
747 			m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
748 			if (len <= MLEN) {
749 				bcopy(sd->cl, mtod(m, caddr_t), len);
750 				m->m_len = len;
751 			} else {
752 				bus_dmamap_unload(fl->tag[sd->tag_idx],
753 				    sd->map);
754 				m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
755 				sd->cl = NULL;	/* consumed */
756 				m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
757 			}
758 
759 			i++;
760 			sd = sd_next;
761 			if (__predict_false(++fl->cidx == fl->cap))
762 				fl->cidx = 0;
763 
764 			len -= m->m_len;
765 		}
766 
767 #ifdef INET
768 		if (cpl->l2info & htobe32(F_RXF_LRO) &&
769 		    rxq->flags & RXQ_LRO_ENABLED &&
770 		    tcp_lro_rx(lro, m0, 0) == 0) {
771 			/* queued for LRO */
772 		} else
773 #endif
774 		ifp->if_input(ifp, m0);
775 
776 		FL_LOCK(fl);
777 		fl->needed += i;
778 		if (fl->needed >= 32)
779 			refill_fl(sc, fl, 64, 32);
780 		FL_UNLOCK(fl);
781 
782 		if (++ndescs > 32) {
783 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
784 			    V_CIDXINC(ndescs) |
785 			    V_INGRESSQID((u32)iq->cntxt_id) |
786 			    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
787 			ndescs = 0;
788 		}
789 
790 		iq_next(iq);
791 	}
792 
793 #ifdef INET
794 	while (!SLIST_EMPTY(&lro->lro_active)) {
795 		l = SLIST_FIRST(&lro->lro_active);
796 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
797 		tcp_lro_flush(lro, l);
798 	}
799 #endif
800 
801 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
802 	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next));
803 
804 	FL_LOCK(fl);
805 	if (fl->needed >= 32)
806 		refill_fl(sc, fl, 128, 8);
807 	FL_UNLOCK(fl);
808 }
809 
810 int
811 t4_mgmt_tx(struct adapter *sc, struct mbuf *m)
812 {
813 	return ctrl_tx(sc, &sc->sge.ctrlq[0], m);
814 }
815 
816 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
817 #define TXPKTS_PKT_HDR ((\
818     sizeof(struct ulp_txpkt) + \
819     sizeof(struct ulptx_idata) + \
820     sizeof(struct cpl_tx_pkt_core) \
821     ) / 8)
822 
823 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */
824 #define TXPKTS_WR_HDR (\
825     sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
826     TXPKTS_PKT_HDR)
827 
828 /* Header of a tx WR, before SGL of first packet (in flits) */
829 #define TXPKT_WR_HDR ((\
830     sizeof(struct fw_eth_tx_pkt_wr) + \
831     sizeof(struct cpl_tx_pkt_core) \
832     ) / 8 )
833 
834 /* Header of a tx LSO WR, before SGL of first packet (in flits) */
835 #define TXPKT_LSO_WR_HDR ((\
836     sizeof(struct fw_eth_tx_pkt_wr) + \
837     sizeof(struct cpl_tx_pkt_lso) + \
838     sizeof(struct cpl_tx_pkt_core) \
839     ) / 8 )
840 
841 int
842 t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
843 {
844 	struct port_info *pi = (void *)ifp->if_softc;
845 	struct adapter *sc = pi->adapter;
846 	struct sge_eq *eq = &txq->eq;
847 	struct buf_ring *br = txq->br;
848 	struct mbuf *next;
849 	int rc, coalescing, can_reclaim;
850 	struct txpkts txpkts;
851 	struct sgl sgl;
852 
853 	TXQ_LOCK_ASSERT_OWNED(txq);
854 	KASSERT(m, ("%s: called with nothing to do.", __func__));
855 
856 	prefetch(&eq->desc[eq->pidx]);
857 	prefetch(&txq->sdesc[eq->pidx]);
858 
859 	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
860 	coalescing = 0;
861 
862 	if (eq->avail < 8)
863 		reclaim_tx_descs(txq, 0, 8);
864 
865 	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
866 
867 		if (eq->avail < 8)
868 			break;
869 
870 		next = m->m_nextpkt;
871 		m->m_nextpkt = NULL;
872 
873 		if (next || buf_ring_peek(br))
874 			coalescing = 1;
875 
876 		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
877 		if (rc != 0) {
878 			if (rc == ENOMEM) {
879 
880 				/* Short of resources, suspend tx */
881 
882 				m->m_nextpkt = next;
883 				break;
884 			}
885 
886 			/*
887 			 * Unrecoverable error for this packet, throw it away
888 			 * and move on to the next.  get_pkt_sgl may already
889 			 * have freed m (it will be NULL in that case and the
890 			 * m_freem here is still safe).
891 			 */
892 
893 			m_freem(m);
894 			continue;
895 		}
896 
897 		if (coalescing &&
898 		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
899 
900 			/* Successfully absorbed into txpkts */
901 
902 			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
903 			goto doorbell;
904 		}
905 
906 		/*
907 		 * We weren't coalescing to begin with, or current frame could
908 		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
909 		 * given to it can't be coalesced).  Either way there should be
910 		 * nothing in txpkts.
911 		 */
912 		KASSERT(txpkts.npkt == 0,
913 		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
914 
915 		/* We're sending out individual packets now */
916 		coalescing = 0;
917 
918 		if (eq->avail < 8)
919 			reclaim_tx_descs(txq, 0, 8);
920 		rc = write_txpkt_wr(pi, txq, m, &sgl);
921 		if (rc != 0) {
922 
923 			/* Short of hardware descriptors, suspend tx */
924 
925 			/*
926 			 * This is an unlikely but expensive failure.  We've
927 			 * done all the hard work (DMA mappings etc.) and now we
928 			 * can't send out the packet.  What's worse, we have to
929 			 * spend even more time freeing up everything in sgl.
930 			 */
931 			txq->no_desc++;
932 			free_pkt_sgl(txq, &sgl);
933 
934 			m->m_nextpkt = next;
935 			break;
936 		}
937 
938 		ETHER_BPF_MTAP(ifp, m);
939 		if (sgl.nsegs == 0)
940 			m_freem(m);
941 
942 doorbell:
943 		/* Fewer and fewer doorbells as the queue fills up */
944 		if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2)))
945 		    ring_eq_db(sc, eq);
946 
947 		can_reclaim = reclaimable(eq);
948 		if (can_reclaim >= 32)
949 			reclaim_tx_descs(txq, can_reclaim, 32);
950 	}
951 
952 	if (txpkts.npkt > 0)
953 		write_txpkts_wr(txq, &txpkts);
954 
955 	/*
956 	 * m not NULL means there was an error but we haven't thrown it away.
957 	 * This can happen when we're short of tx descriptors (no_desc) or maybe
958 	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
959 	 * will get things going again.
960 	 *
961 	 * If eq->avail is already 0 we know a credit flush was requested in the
962 	 * WR that reduced it to 0 so we don't need another flush (we don't have
963 	 * any descriptor for a flush WR anyway, duh).
964 	 */
965 	if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) {
966 		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
967 
968 		txsd->desc_used = 1;
969 		txsd->credits = 0;
970 		write_eqflush_wr(eq);
971 	}
972 	txq->m = m;
973 
974 	if (eq->pending)
975 		ring_eq_db(sc, eq);
976 
977 	can_reclaim = reclaimable(eq);
978 	if (can_reclaim >= 32)
979 		reclaim_tx_descs(txq, can_reclaim, 128);
980 
981 	return (0);
982 }
983 
984 void
985 t4_update_fl_bufsize(struct ifnet *ifp)
986 {
987 	struct port_info *pi = ifp->if_softc;
988 	struct sge_rxq *rxq;
989 	struct sge_fl *fl;
990 	int i;
991 
992 	for_each_rxq(pi, i, rxq) {
993 		fl = &rxq->fl;
994 
995 		FL_LOCK(fl);
996 		set_fl_tag_idx(fl, ifp->if_mtu);
997 		FL_UNLOCK(fl);
998 	}
999 }
1000 
1001 /*
1002  * A non-NULL handler indicates this iq will not receive direct interrupts, the
1003  * handler will be invoked by an interrupt queue.
1004  */
1005 static inline void
1006 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
1007     int qsize, int esize, iq_intr_handler_t *handler, char *name)
1008 {
1009 	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
1010 	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
1011 	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
1012 	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
1013 
1014 	iq->flags = 0;
1015 	iq->adapter = sc;
1016 	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) |
1017 	    V_QINTR_CNT_EN(pktc_idx >= 0);
1018 	iq->intr_pktc_idx = pktc_idx;
1019 	iq->qsize = roundup(qsize, 16);		/* See FW_IQ_CMD/iqsize */
1020 	iq->esize = max(esize, 16);		/* See FW_IQ_CMD/iqesize */
1021 	iq->handler = handler;
1022 	strlcpy(iq->lockname, name, sizeof(iq->lockname));
1023 }
1024 
1025 static inline void
1026 init_fl(struct sge_fl *fl, int qsize, char *name)
1027 {
1028 	fl->qsize = qsize;
1029 	strlcpy(fl->lockname, name, sizeof(fl->lockname));
1030 }
1031 
1032 static inline void
1033 init_eq(struct sge_eq *eq, int qsize, char *name)
1034 {
1035 	eq->qsize = qsize;
1036 	strlcpy(eq->lockname, name, sizeof(eq->lockname));
1037 }
1038 
1039 static int
1040 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
1041     bus_dmamap_t *map, bus_addr_t *pa, void **va)
1042 {
1043 	int rc;
1044 
1045 	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
1046 	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
1047 	if (rc != 0) {
1048 		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
1049 		goto done;
1050 	}
1051 
1052 	rc = bus_dmamem_alloc(*tag, va,
1053 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
1054 	if (rc != 0) {
1055 		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
1056 		goto done;
1057 	}
1058 
1059 	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
1060 	if (rc != 0) {
1061 		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
1062 		goto done;
1063 	}
1064 done:
1065 	if (rc)
1066 		free_ring(sc, *tag, *map, *pa, *va);
1067 
1068 	return (rc);
1069 }
1070 
1071 static int
1072 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
1073     bus_addr_t pa, void *va)
1074 {
1075 	if (pa)
1076 		bus_dmamap_unload(tag, map);
1077 	if (va)
1078 		bus_dmamem_free(tag, va, map);
1079 	if (tag)
1080 		bus_dma_tag_destroy(tag);
1081 
1082 	return (0);
1083 }
1084 
1085 /*
1086  * Allocates the ring for an ingress queue and an optional freelist.  If the
1087  * freelist is specified it will be allocated and then associated with the
1088  * ingress queue.
1089  *
1090  * Returns errno on failure.  Resources allocated up to that point may still be
1091  * allocated.  Caller is responsible for cleanup in case this function fails.
1092  *
1093  * If the ingress queue will take interrupts directly (iq->handler == NULL) then
1094  * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
1095  * the index of the interrupt queue to which its interrupts will be forwarded.
1096  */
1097 static int
1098 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1099     int intr_idx, int cong)
1100 {
1101 	int rc, i, cntxt_id;
1102 	size_t len;
1103 	struct fw_iq_cmd c;
1104 	struct adapter *sc = iq->adapter;
1105 	__be32 v = 0;
1106 
1107 	len = iq->qsize * iq->esize;
1108 	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
1109 	    (void **)&iq->desc);
1110 	if (rc != 0)
1111 		return (rc);
1112 
1113 	bzero(&c, sizeof(c));
1114 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1115 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1116 	    V_FW_IQ_CMD_VFN(0));
1117 
1118 	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1119 	    FW_LEN16(c));
1120 
1121 	/* Special handling for firmware event queue */
1122 	if (iq == &sc->sge.fwq)
1123 		v |= F_FW_IQ_CMD_IQASYNCH;
1124 
1125 	if (iq->handler) {
1126 		KASSERT(intr_idx < NINTRQ(sc),
1127 		    ("%s: invalid indirect intr_idx %d", __func__, intr_idx));
1128 		v |= F_FW_IQ_CMD_IQANDST;
1129 		v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id);
1130 	} else {
1131 		KASSERT(intr_idx < sc->intr_count,
1132 		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
1133 		v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1134 	}
1135 
1136 	c.type_to_iqandstindex = htobe32(v |
1137 	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1138 	    V_FW_IQ_CMD_VIID(pi->viid) |
1139 	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1140 	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1141 	    F_FW_IQ_CMD_IQGTSMODE |
1142 	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1143 	    V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1144 	c.iqsize = htobe16(iq->qsize);
1145 	c.iqaddr = htobe64(iq->ba);
1146 	if (cong >= 0)
1147 		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
1148 
1149 	if (fl) {
1150 		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
1151 
1152 		for (i = 0; i < FL_BUF_SIZES; i++) {
1153 
1154 			/*
1155 			 * A freelist buffer must be 16 byte aligned as the SGE
1156 			 * uses the low 4 bits of the bus addr to figure out the
1157 			 * buffer size.
1158 			 */
1159 			rc = bus_dma_tag_create(sc->dmat, 16, 0,
1160 			    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1161 			    FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
1162 			    NULL, NULL, &fl->tag[i]);
1163 			if (rc != 0) {
1164 				device_printf(sc->dev,
1165 				    "failed to create fl DMA tag[%d]: %d\n",
1166 				    i, rc);
1167 				return (rc);
1168 			}
1169 		}
1170 		len = fl->qsize * RX_FL_ESIZE;
1171 		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
1172 		    &fl->ba, (void **)&fl->desc);
1173 		if (rc)
1174 			return (rc);
1175 
1176 		/* Allocate space for one software descriptor per buffer. */
1177 		fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8;
1178 		FL_LOCK(fl);
1179 		set_fl_tag_idx(fl, pi->ifp->if_mtu);
1180 		rc = alloc_fl_sdesc(fl);
1181 		FL_UNLOCK(fl);
1182 		if (rc != 0) {
1183 			device_printf(sc->dev,
1184 			    "failed to setup fl software descriptors: %d\n",
1185 			    rc);
1186 			return (rc);
1187 		}
1188 		fl->needed = fl->cap;
1189 
1190 		c.iqns_to_fl0congen |=
1191 		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1192 			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
1193 			F_FW_IQ_CMD_FL0PADEN);
1194 		if (cong >= 0) {
1195 			c.iqns_to_fl0congen |=
1196 				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1197 				    F_FW_IQ_CMD_FL0CONGCIF |
1198 				    F_FW_IQ_CMD_FL0CONGEN);
1199 		}
1200 		c.fl0dcaen_to_fl0cidxfthresh =
1201 		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
1202 			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
1203 		c.fl0size = htobe16(fl->qsize);
1204 		c.fl0addr = htobe64(fl->ba);
1205 	}
1206 
1207 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1208 	if (rc != 0) {
1209 		device_printf(sc->dev,
1210 		    "failed to create ingress queue: %d\n", rc);
1211 		return (rc);
1212 	}
1213 
1214 	iq->cdesc = iq->desc;
1215 	iq->cidx = 0;
1216 	iq->gen = 1;
1217 	iq->intr_next = iq->intr_params;
1218 	iq->cntxt_id = be16toh(c.iqid);
1219 	iq->abs_id = be16toh(c.physiqid);
1220 	iq->flags |= (IQ_ALLOCATED | IQ_STARTED);
1221 
1222 	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1223 	KASSERT(cntxt_id < sc->sge.niq,
1224 	    ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1225 	    cntxt_id, sc->sge.niq - 1));
1226 	sc->sge.iqmap[cntxt_id] = iq;
1227 
1228 	if (fl) {
1229 		fl->cntxt_id = be16toh(c.fl0id);
1230 		fl->pidx = fl->cidx = 0;
1231 
1232 		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1233 		KASSERT(cntxt_id < sc->sge.neq,
1234 		    ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__,
1235 		    cntxt_id, sc->sge.neq - 1));
1236 		sc->sge.eqmap[cntxt_id] = (void *)fl;
1237 
1238 		FL_LOCK(fl);
1239 		/* Just enough to make sure it doesn't starve right away. */
1240 		refill_fl(sc, fl, roundup(sc->sge.fl_starve_threshold, 8), 8);
1241 		FL_UNLOCK(fl);
1242 	}
1243 
1244 	/* Enable IQ interrupts */
1245 	atomic_store_rel_32(&iq->state, IQS_IDLE);
1246 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1247 	    V_INGRESSQID(iq->cntxt_id));
1248 
1249 	return (0);
1250 }
1251 
1252 /*
1253  * This can be called with the iq/fl in any state - fully allocated and
1254  * functional, partially allocated, even all-zeroed out.
1255  */
1256 static int
1257 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1258 {
1259 	int i, rc;
1260 	struct adapter *sc = iq->adapter;
1261 	device_t dev;
1262 
1263 	if (sc == NULL)
1264 		return (0);	/* nothing to do */
1265 
1266 	dev = pi ? pi->dev : sc->dev;
1267 
1268 	if (iq->flags & IQ_STARTED) {
1269 		rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0,
1270 		    iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff);
1271 		if (rc != 0) {
1272 			device_printf(dev,
1273 			    "failed to stop queue %p: %d\n", iq, rc);
1274 			return (rc);
1275 		}
1276 		iq->flags &= ~IQ_STARTED;
1277 
1278 		/* Synchronize with the interrupt handler */
1279 		while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED))
1280 			pause("iqfree", hz / 1000);
1281 	}
1282 
1283 	if (iq->flags & IQ_ALLOCATED) {
1284 
1285 		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1286 		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1287 		    fl ? fl->cntxt_id : 0xffff, 0xffff);
1288 		if (rc != 0) {
1289 			device_printf(dev,
1290 			    "failed to free queue %p: %d\n", iq, rc);
1291 			return (rc);
1292 		}
1293 		iq->flags &= ~IQ_ALLOCATED;
1294 	}
1295 
1296 	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
1297 
1298 	bzero(iq, sizeof(*iq));
1299 
1300 	if (fl) {
1301 		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
1302 		    fl->desc);
1303 
1304 		if (fl->sdesc) {
1305 			FL_LOCK(fl);
1306 			free_fl_sdesc(fl);
1307 			FL_UNLOCK(fl);
1308 		}
1309 
1310 		if (mtx_initialized(&fl->fl_lock))
1311 			mtx_destroy(&fl->fl_lock);
1312 
1313 		for (i = 0; i < FL_BUF_SIZES; i++) {
1314 			if (fl->tag[i])
1315 				bus_dma_tag_destroy(fl->tag[i]);
1316 		}
1317 
1318 		bzero(fl, sizeof(*fl));
1319 	}
1320 
1321 	return (0);
1322 }
1323 
1324 static int
1325 alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx)
1326 {
1327 	int rc;
1328 	struct sysctl_oid *oid;
1329 	struct sysctl_oid_list *children;
1330 	char name[16];
1331 	struct sge_iq *intrq = &sc->sge.intrq[intrq_idx];
1332 
1333 	rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1);
1334 	if (rc != 0)
1335 		return (rc);
1336 
1337 	children = SYSCTL_CHILDREN(sc->oid_intrq);
1338 
1339 	snprintf(name, sizeof(name), "%d", intrq_idx);
1340 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1341 	    NULL, "interrupt queue");
1342 	children = SYSCTL_CHILDREN(oid);
1343 
1344 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
1345 	    CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I",
1346 	    "consumer index");
1347 
1348 	return (rc);
1349 }
1350 
1351 static int
1352 free_intrq(struct sge_iq *iq)
1353 {
1354 	return free_iq_fl(NULL, iq, NULL);
1355 
1356 }
1357 
1358 static int
1359 alloc_fwq(struct adapter *sc, int intr_idx)
1360 {
1361 	int rc;
1362 	struct sysctl_oid_list *children;
1363 	struct sge_iq *fwq = &sc->sge.fwq;
1364 
1365 	rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1366 	if (rc != 0)
1367 		return (rc);
1368 
1369 	children = SYSCTL_CHILDREN(sc->oid_fwq);
1370 
1371 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "abs_id",
1372 	    CTLTYPE_INT | CTLFLAG_RD, &fwq->abs_id, 0, sysctl_uint16, "I",
1373 	    "absolute id of the queue");
1374 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cntxt_id",
1375 	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cntxt_id, 0, sysctl_uint16, "I",
1376 	    "SGE context id of the queue");
1377 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
1378 	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
1379 	    "consumer index");
1380 
1381 	return (rc);
1382 }
1383 
1384 static int
1385 free_fwq(struct sge_iq *iq)
1386 {
1387 	return free_iq_fl(NULL, iq, NULL);
1388 }
1389 
1390 static int
1391 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
1392 {
1393 	int rc;
1394 	struct sysctl_oid *oid;
1395 	struct sysctl_oid_list *children;
1396 	char name[16];
1397 
1398 	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan);
1399 	if (rc != 0)
1400 		return (rc);
1401 
1402 	FL_LOCK(&rxq->fl);
1403 	refill_fl(pi->adapter, &rxq->fl, rxq->fl.needed / 8, 8);
1404 	FL_UNLOCK(&rxq->fl);
1405 
1406 #ifdef INET
1407 	rc = tcp_lro_init(&rxq->lro);
1408 	if (rc != 0)
1409 		return (rc);
1410 	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
1411 
1412 	if (pi->ifp->if_capenable & IFCAP_LRO)
1413 		rxq->flags |= RXQ_LRO_ENABLED;
1414 #endif
1415 	rxq->ifp = pi->ifp;
1416 
1417 	children = SYSCTL_CHILDREN(pi->oid_rxq);
1418 
1419 	snprintf(name, sizeof(name), "%d", idx);
1420 	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1421 	    NULL, "rx queue");
1422 	children = SYSCTL_CHILDREN(oid);
1423 
1424 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
1425 	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
1426 	    "absolute id of the queue");
1427 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
1428 	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cntxt_id, 0, sysctl_uint16, "I",
1429 	    "SGE context id of the queue");
1430 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
1431 	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.cidx, 0, sysctl_uint16, "I",
1432 	    "consumer index");
1433 #ifdef INET
1434 	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
1435 	    &rxq->lro.lro_queued, 0, NULL);
1436 	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
1437 	    &rxq->lro.lro_flushed, 0, NULL);
1438 #endif
1439 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
1440 	    &rxq->rxcsum, "# of times hardware assisted with checksum");
1441 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
1442 	    CTLFLAG_RD, &rxq->vlan_extraction,
1443 	    "# of times hardware extracted 802.1Q tag");
1444 
1445 	children = SYSCTL_CHILDREN(oid);
1446 	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, "fl", CTLFLAG_RD,
1447 	    NULL, "freelist");
1448 	children = SYSCTL_CHILDREN(oid);
1449 
1450 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cntxt_id",
1451 	    CTLTYPE_INT | CTLFLAG_RD, &rxq->fl.cntxt_id, 0, sysctl_uint16, "I",
1452 	    "SGE context id of the queue");
1453 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cidx", CTLFLAG_RD,
1454 	    &rxq->fl.cidx, 0, "consumer index");
1455 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "pidx", CTLFLAG_RD,
1456 	    &rxq->fl.pidx, 0, "producer index");
1457 
1458 	return (rc);
1459 }
1460 
1461 static int
1462 free_rxq(struct port_info *pi, struct sge_rxq *rxq)
1463 {
1464 	int rc;
1465 
1466 #ifdef INET
1467 	if (rxq->lro.ifp) {
1468 		tcp_lro_free(&rxq->lro);
1469 		rxq->lro.ifp = NULL;
1470 	}
1471 #endif
1472 
1473 	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
1474 	if (rc == 0)
1475 		bzero(rxq, sizeof(*rxq));
1476 
1477 	return (rc);
1478 }
1479 
1480 static int
1481 alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx)
1482 {
1483 	int rc, cntxt_id;
1484 	size_t len;
1485 	struct fw_eq_ctrl_cmd c;
1486 	struct sge_eq *eq = &ctrlq->eq;
1487 	char name[16];
1488 	struct sysctl_oid *oid;
1489 	struct sysctl_oid_list *children;
1490 
1491 	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
1492 
1493 	len = eq->qsize * CTRL_EQ_ESIZE;
1494 	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
1495 	    &eq->ba, (void **)&eq->desc);
1496 	if (rc)
1497 		return (rc);
1498 
1499 	eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE;
1500 	eq->spg = (void *)&eq->desc[eq->cap];
1501 	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
1502 	if (sc->flags & INTR_SHARED)
1503 		eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id;
1504 	else
1505 		eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id;
1506 
1507 	bzero(&c, sizeof(c));
1508 
1509 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
1510 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
1511 	    V_FW_EQ_CTRL_CMD_VFN(0));
1512 	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
1513 	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
1514 	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
1515 	c.physeqid_pkd = htobe32(0);
1516 	c.fetchszm_to_iqid =
1517 	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1518 		V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) |
1519 		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
1520 	c.dcaen_to_eqsize =
1521 	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1522 		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1523 		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1524 		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
1525 	c.eqaddr = htobe64(eq->ba);
1526 
1527 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1528 	if (rc != 0) {
1529 		device_printf(sc->dev,
1530 		    "failed to create control queue %d: %d\n", idx, rc);
1531 		return (rc);
1532 	}
1533 
1534 	eq->pidx = eq->cidx = 0;
1535 	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
1536 	eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
1537 
1538 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1539 	KASSERT(cntxt_id < sc->sge.neq,
1540 	    ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1541 	    cntxt_id, sc->sge.neq - 1));
1542 	sc->sge.eqmap[cntxt_id] = eq;
1543 
1544 	children = SYSCTL_CHILDREN(sc->oid_ctrlq);
1545 
1546 	snprintf(name, sizeof(name), "%d", idx);
1547 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1548 	    NULL, "ctrl queue");
1549 	children = SYSCTL_CHILDREN(oid);
1550 
1551 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx",
1552 	    CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I",
1553 	    "producer index");
1554 	SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
1555 	    &ctrlq->no_desc, 0,
1556 	    "# of times ctrlq ran out of hardware descriptors");
1557 
1558 	return (rc);
1559 }
1560 
1561 static int
1562 free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq)
1563 {
1564 	int rc;
1565 	struct sge_eq *eq = &ctrlq->eq;
1566 
1567 	if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
1568 		rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1569 		if (rc != 0) {
1570 			device_printf(sc->dev,
1571 			    "failed to free ctrl queue %p: %d\n", eq, rc);
1572 			return (rc);
1573 		}
1574 		eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
1575 	}
1576 
1577 	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
1578 
1579 	if (mtx_initialized(&eq->eq_lock))
1580 		mtx_destroy(&eq->eq_lock);
1581 
1582 	bzero(ctrlq, sizeof(*ctrlq));
1583 	return (0);
1584 }
1585 
1586 static int
1587 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
1588 {
1589 	int rc, cntxt_id;
1590 	size_t len;
1591 	struct adapter *sc = pi->adapter;
1592 	struct fw_eq_eth_cmd c;
1593 	struct sge_eq *eq = &txq->eq;
1594 	char name[16];
1595 	struct sysctl_oid *oid;
1596 	struct sysctl_oid_list *children;
1597 	struct sge_iq *intrq;
1598 
1599 	txq->ifp = pi->ifp;
1600 	TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq);
1601 
1602 	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
1603 
1604 	len = eq->qsize * TX_EQ_ESIZE;
1605 	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
1606 	    &eq->ba, (void **)&eq->desc);
1607 	if (rc)
1608 		return (rc);
1609 
1610 	eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE;
1611 	eq->spg = (void *)&eq->desc[eq->cap];
1612 	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
1613 	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
1614 	    M_ZERO | M_WAITOK);
1615 	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
1616 
1617 	intrq = &sc->sge.intrq[0];
1618 	if (sc->flags & INTR_SHARED)
1619 		eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id;
1620 	else
1621 		eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id;
1622 
1623 	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
1624 	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
1625 	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
1626 	if (rc != 0) {
1627 		device_printf(sc->dev,
1628 		    "failed to create tx DMA tag: %d\n", rc);
1629 		return (rc);
1630 	}
1631 
1632 	rc = alloc_tx_maps(txq);
1633 	if (rc != 0) {
1634 		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
1635 		return (rc);
1636 	}
1637 
1638 	bzero(&c, sizeof(c));
1639 
1640 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
1641 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
1642 	    V_FW_EQ_ETH_CMD_VFN(0));
1643 	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
1644 	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
1645 	c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
1646 	c.fetchszm_to_iqid =
1647 	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1648 		V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
1649 		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
1650 	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1651 		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1652 		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1653 		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
1654 	c.eqaddr = htobe64(eq->ba);
1655 
1656 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1657 	if (rc != 0) {
1658 		device_printf(pi->dev,
1659 		    "failed to create egress queue: %d\n", rc);
1660 		return (rc);
1661 	}
1662 
1663 	eq->pidx = eq->cidx = 0;
1664 	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
1665 	eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
1666 
1667 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1668 	KASSERT(cntxt_id < sc->sge.neq,
1669 	    ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1670 	    cntxt_id, sc->sge.neq - 1));
1671 	sc->sge.eqmap[cntxt_id] = eq;
1672 
1673 	children = SYSCTL_CHILDREN(pi->oid_txq);
1674 
1675 	snprintf(name, sizeof(name), "%d", idx);
1676 	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1677 	    NULL, "tx queue");
1678 	children = SYSCTL_CHILDREN(oid);
1679 
1680 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "cntxt_id", CTLFLAG_RD,
1681 	    &eq->cntxt_id, 0, "SGE context id of the queue");
1682 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "cidx",
1683 	    CTLTYPE_INT | CTLFLAG_RD, &eq->cidx, 0, sysctl_uint16, "I",
1684 	    "consumer index");
1685 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "pidx",
1686 	    CTLTYPE_INT | CTLFLAG_RD, &eq->pidx, 0, sysctl_uint16, "I",
1687 	    "producer index");
1688 
1689 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
1690 	    &txq->txcsum, "# of times hardware assisted with checksum");
1691 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
1692 	    CTLFLAG_RD, &txq->vlan_insertion,
1693 	    "# of times hardware inserted 802.1Q tag");
1694 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
1695 	    &txq->tso_wrs, "# of IPv4 TSO work requests");
1696 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
1697 	    &txq->imm_wrs, "# of work requests with immediate data");
1698 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
1699 	    &txq->sgl_wrs, "# of work requests with direct SGL");
1700 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
1701 	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
1702 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
1703 	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
1704 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
1705 	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
1706 
1707 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
1708 	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
1709 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
1710 	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
1711 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
1712 	    &txq->egr_update, 0, "egress update notifications from the SGE");
1713 
1714 	return (rc);
1715 }
1716 
1717 static int
1718 free_txq(struct port_info *pi, struct sge_txq *txq)
1719 {
1720 	int rc;
1721 	struct adapter *sc = pi->adapter;
1722 	struct sge_eq *eq = &txq->eq;
1723 
1724 	if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
1725 
1726 		/*
1727 		 * Wait for the response to a credit flush if there's one
1728 		 * pending.  Clearing the flag tells handle_sge_egr_update or
1729 		 * cxgbe_txq_start (depending on how far the response has made
1730 		 * it) that they should ignore the response and wake up free_txq
1731 		 * instead.
1732 		 *
1733 		 * The interface has been marked down by the time we get here
1734 		 * (both IFF_UP and IFF_DRV_RUNNING cleared).  qflush has
1735 		 * emptied the tx buf_rings and we know nothing new is being
1736 		 * queued for tx so we don't have to worry about a new credit
1737 		 * flush request.
1738 		 */
1739 		TXQ_LOCK(txq);
1740 		if (eq->flags & EQ_CRFLUSHED) {
1741 			eq->flags &= ~EQ_CRFLUSHED;
1742 			msleep(txq, &eq->eq_lock, 0, "crflush", 0);
1743 		}
1744 		TXQ_UNLOCK(txq);
1745 
1746 		rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1747 		if (rc != 0) {
1748 			device_printf(pi->dev,
1749 			    "failed to free egress queue %p: %d\n", eq, rc);
1750 			return (rc);
1751 		}
1752 		eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
1753 	}
1754 
1755 	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
1756 
1757 	free(txq->sdesc, M_CXGBE);
1758 
1759 	if (txq->maps)
1760 		free_tx_maps(txq);
1761 
1762 	buf_ring_free(txq->br, M_CXGBE);
1763 
1764 	if (txq->tx_tag)
1765 		bus_dma_tag_destroy(txq->tx_tag);
1766 
1767 	if (mtx_initialized(&eq->eq_lock))
1768 		mtx_destroy(&eq->eq_lock);
1769 
1770 	bzero(txq, sizeof(*txq));
1771 	return (0);
1772 }
1773 
1774 static void
1775 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1776 {
1777 	bus_addr_t *ba = arg;
1778 
1779 	KASSERT(nseg == 1,
1780 	    ("%s meant for single segment mappings only.", __func__));
1781 
1782 	*ba = error ? 0 : segs->ds_addr;
1783 }
1784 
1785 static inline bool
1786 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
1787 {
1788 	*ctrl = (void *)((uintptr_t)iq->cdesc +
1789 	    (iq->esize - sizeof(struct rsp_ctrl)));
1790 
1791 	return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
1792 }
1793 
1794 static inline void
1795 iq_next(struct sge_iq *iq)
1796 {
1797 	iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
1798 	if (__predict_false(++iq->cidx == iq->qsize - 1)) {
1799 		iq->cidx = 0;
1800 		iq->gen ^= 1;
1801 		iq->cdesc = iq->desc;
1802 	}
1803 }
1804 
1805 #define FL_HW_IDX(x) ((x) >> 3)
1806 static inline void
1807 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
1808 {
1809 	int ndesc = fl->pending / 8;
1810 
1811 	if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
1812 		ndesc--;	/* hold back one credit */
1813 
1814 	if (ndesc <= 0)
1815 		return;		/* nothing to do */
1816 
1817 	wmb();
1818 
1819 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO |
1820 	    V_QID(fl->cntxt_id) | V_PIDX(ndesc));
1821 	fl->pending -= ndesc * 8;
1822 }
1823 
1824 /*
1825  * Fill up the freelist by upto nbufs and ring its doorbell if the number of
1826  * buffers ready to be handed to the hardware >= dbthresh.
1827  */
1828 static void
1829 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh)
1830 {
1831 	__be64 *d = &fl->desc[fl->pidx];
1832 	struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
1833 	bus_dma_tag_t tag;
1834 	bus_addr_t pa;
1835 	caddr_t cl;
1836 	int rc;
1837 
1838 	FL_LOCK_ASSERT_OWNED(fl);
1839 
1840 	if (nbufs < 0 || nbufs > fl->needed)
1841 		nbufs = fl->needed;
1842 
1843 	while (nbufs--) {
1844 
1845 		if (sd->cl != NULL) {
1846 
1847 			/*
1848 			 * This happens when a frame small enough to fit
1849 			 * entirely in an mbuf was received in cl last time.
1850 			 * We'd held on to cl and can reuse it now.  Note that
1851 			 * we reuse a cluster of the old size if fl->tag_idx is
1852 			 * no longer the same as sd->tag_idx.
1853 			 */
1854 
1855 			KASSERT(*d == sd->ba_tag,
1856 			    ("%s: recyling problem at pidx %d",
1857 			    __func__, fl->pidx));
1858 
1859 			d++;
1860 			goto recycled;
1861 		}
1862 
1863 
1864 		if (fl->tag_idx != sd->tag_idx) {
1865 			bus_dmamap_t map;
1866 			bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
1867 			bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
1868 
1869 			/*
1870 			 * An MTU change can get us here.  Discard the old map
1871 			 * which was created with the old tag, but only if
1872 			 * we're able to get a new one.
1873 			 */
1874 			rc = bus_dmamap_create(newtag, 0, &map);
1875 			if (rc == 0) {
1876 				bus_dmamap_destroy(oldtag, sd->map);
1877 				sd->map = map;
1878 				sd->tag_idx = fl->tag_idx;
1879 			}
1880 		}
1881 
1882 		tag = fl->tag[sd->tag_idx];
1883 
1884 		cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
1885 		if (cl == NULL)
1886 			break;
1887 
1888 		rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
1889 		    oneseg_dma_callback, &pa, 0);
1890 		if (rc != 0 || pa == 0) {
1891 			fl->dmamap_failed++;
1892 			uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
1893 			break;
1894 		}
1895 
1896 		sd->cl = cl;
1897 		*d++ = htobe64(pa | sd->tag_idx);
1898 
1899 #ifdef INVARIANTS
1900 		sd->ba_tag = htobe64(pa | sd->tag_idx);
1901 #endif
1902 
1903 recycled:
1904 		/* sd->m is never recycled, should always be NULL */
1905 		KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
1906 
1907 		sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
1908 		if (sd->m == NULL)
1909 			break;
1910 
1911 		fl->pending++;
1912 		fl->needed--;
1913 		sd++;
1914 		if (++fl->pidx == fl->cap) {
1915 			fl->pidx = 0;
1916 			sd = fl->sdesc;
1917 			d = fl->desc;
1918 		}
1919 	}
1920 
1921 	if (fl->pending >= dbthresh)
1922 		ring_fl_db(sc, fl);
1923 }
1924 
1925 static int
1926 alloc_fl_sdesc(struct sge_fl *fl)
1927 {
1928 	struct fl_sdesc *sd;
1929 	bus_dma_tag_t tag;
1930 	int i, rc;
1931 
1932 	FL_LOCK_ASSERT_OWNED(fl);
1933 
1934 	fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
1935 	    M_ZERO | M_WAITOK);
1936 
1937 	tag = fl->tag[fl->tag_idx];
1938 	sd = fl->sdesc;
1939 	for (i = 0; i < fl->cap; i++, sd++) {
1940 
1941 		sd->tag_idx = fl->tag_idx;
1942 		rc = bus_dmamap_create(tag, 0, &sd->map);
1943 		if (rc != 0)
1944 			goto failed;
1945 	}
1946 
1947 	return (0);
1948 failed:
1949 	while (--i >= 0) {
1950 		sd--;
1951 		bus_dmamap_destroy(tag, sd->map);
1952 		if (sd->m) {
1953 			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1954 			m_free(sd->m);
1955 			sd->m = NULL;
1956 		}
1957 	}
1958 	KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
1959 
1960 	free(fl->sdesc, M_CXGBE);
1961 	fl->sdesc = NULL;
1962 
1963 	return (rc);
1964 }
1965 
1966 static void
1967 free_fl_sdesc(struct sge_fl *fl)
1968 {
1969 	struct fl_sdesc *sd;
1970 	int i;
1971 
1972 	FL_LOCK_ASSERT_OWNED(fl);
1973 
1974 	sd = fl->sdesc;
1975 	for (i = 0; i < fl->cap; i++, sd++) {
1976 
1977 		if (sd->m) {
1978 			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1979 			m_free(sd->m);
1980 			sd->m = NULL;
1981 		}
1982 
1983 		if (sd->cl) {
1984 			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
1985 			uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
1986 			sd->cl = NULL;
1987 		}
1988 
1989 		bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
1990 	}
1991 
1992 	free(fl->sdesc, M_CXGBE);
1993 	fl->sdesc = NULL;
1994 }
1995 
1996 static int
1997 alloc_tx_maps(struct sge_txq *txq)
1998 {
1999 	struct tx_map *txm;
2000 	int i, rc, count;
2001 
2002 	/*
2003 	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
2004 	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
2005 	 * sized for the worst case.
2006 	 */
2007 	count = txq->eq.qsize * 10 / 8;
2008 	txq->map_total = txq->map_avail = count;
2009 	txq->map_cidx = txq->map_pidx = 0;
2010 
2011 	txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
2012 	    M_ZERO | M_WAITOK);
2013 
2014 	txm = txq->maps;
2015 	for (i = 0; i < count; i++, txm++) {
2016 		rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map);
2017 		if (rc != 0)
2018 			goto failed;
2019 	}
2020 
2021 	return (0);
2022 failed:
2023 	while (--i >= 0) {
2024 		txm--;
2025 		bus_dmamap_destroy(txq->tx_tag, txm->map);
2026 	}
2027 	KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__));
2028 
2029 	free(txq->maps, M_CXGBE);
2030 	txq->maps = NULL;
2031 
2032 	return (rc);
2033 }
2034 
2035 static void
2036 free_tx_maps(struct sge_txq *txq)
2037 {
2038 	struct tx_map *txm;
2039 	int i;
2040 
2041 	txm = txq->maps;
2042 	for (i = 0; i < txq->map_total; i++, txm++) {
2043 
2044 		if (txm->m) {
2045 			bus_dmamap_unload(txq->tx_tag, txm->map);
2046 			m_freem(txm->m);
2047 			txm->m = NULL;
2048 		}
2049 
2050 		bus_dmamap_destroy(txq->tx_tag, txm->map);
2051 	}
2052 
2053 	free(txq->maps, M_CXGBE);
2054 	txq->maps = NULL;
2055 }
2056 
2057 /*
2058  * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
2059  * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2060  * of immediate data.
2061  */
2062 #define IMM_LEN ( \
2063       2 * TX_EQ_ESIZE \
2064     - sizeof(struct fw_eth_tx_pkt_wr) \
2065     - sizeof(struct cpl_tx_pkt_core))
2066 
2067 /*
2068  * Returns non-zero on failure, no need to cleanup anything in that case.
2069  *
2070  * Note 1: We always try to defrag the mbuf if required and return EFBIG only
2071  * if the resulting chain still won't fit in a tx descriptor.
2072  *
2073  * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
2074  * does not have the TCP header in it.
2075  */
2076 static int
2077 get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
2078     int sgl_only)
2079 {
2080 	struct mbuf *m = *fp;
2081 	struct tx_map *txm;
2082 	int rc, defragged = 0, n;
2083 
2084 	TXQ_LOCK_ASSERT_OWNED(txq);
2085 
2086 	if (m->m_pkthdr.tso_segsz)
2087 		sgl_only = 1;	/* Do not allow immediate data with LSO */
2088 
2089 start:	sgl->nsegs = 0;
2090 
2091 	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
2092 		return (0);	/* nsegs = 0 tells caller to use imm. tx */
2093 
2094 	if (txq->map_avail == 0) {
2095 		txq->no_dmamap++;
2096 		return (ENOMEM);
2097 	}
2098 	txm = &txq->maps[txq->map_pidx];
2099 
2100 	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
2101 		*fp = m_pullup(m, 50);
2102 		m = *fp;
2103 		if (m == NULL)
2104 			return (ENOBUFS);
2105 	}
2106 
2107 	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
2108 	    &sgl->nsegs, BUS_DMA_NOWAIT);
2109 	if (rc == EFBIG && defragged == 0) {
2110 		m = m_defrag(m, M_DONTWAIT);
2111 		if (m == NULL)
2112 			return (EFBIG);
2113 
2114 		defragged = 1;
2115 		*fp = m;
2116 		goto start;
2117 	}
2118 	if (rc != 0)
2119 		return (rc);
2120 
2121 	txm->m = m;
2122 	txq->map_avail--;
2123 	if (++txq->map_pidx == txq->map_total)
2124 		txq->map_pidx = 0;
2125 
2126 	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
2127 	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
2128 
2129 	/*
2130 	 * Store the # of flits required to hold this frame's SGL in nflits.  An
2131 	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2132 	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
2133 	 * then len1 must be set to 0.
2134 	 */
2135 	n = sgl->nsegs - 1;
2136 	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
2137 
2138 	return (0);
2139 }
2140 
2141 
2142 /*
2143  * Releases all the txq resources used up in the specified sgl.
2144  */
2145 static int
2146 free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
2147 {
2148 	struct tx_map *txm;
2149 
2150 	TXQ_LOCK_ASSERT_OWNED(txq);
2151 
2152 	if (sgl->nsegs == 0)
2153 		return (0);	/* didn't use any map */
2154 
2155 	/* 1 pkt uses exactly 1 map, back it out */
2156 
2157 	txq->map_avail++;
2158 	if (txq->map_pidx > 0)
2159 		txq->map_pidx--;
2160 	else
2161 		txq->map_pidx = txq->map_total - 1;
2162 
2163 	txm = &txq->maps[txq->map_pidx];
2164 	bus_dmamap_unload(txq->tx_tag, txm->map);
2165 	txm->m = NULL;
2166 
2167 	return (0);
2168 }
2169 
2170 static int
2171 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
2172     struct sgl *sgl)
2173 {
2174 	struct sge_eq *eq = &txq->eq;
2175 	struct fw_eth_tx_pkt_wr *wr;
2176 	struct cpl_tx_pkt_core *cpl;
2177 	uint32_t ctrl;	/* used in many unrelated places */
2178 	uint64_t ctrl1;
2179 	int nflits, ndesc, pktlen;
2180 	struct tx_sdesc *txsd;
2181 	caddr_t dst;
2182 
2183 	TXQ_LOCK_ASSERT_OWNED(txq);
2184 
2185 	pktlen = m->m_pkthdr.len;
2186 
2187 	/*
2188 	 * Do we have enough flits to send this frame out?
2189 	 */
2190 	ctrl = sizeof(struct cpl_tx_pkt_core);
2191 	if (m->m_pkthdr.tso_segsz) {
2192 		nflits = TXPKT_LSO_WR_HDR;
2193 		ctrl += sizeof(struct cpl_tx_pkt_lso);
2194 	} else
2195 		nflits = TXPKT_WR_HDR;
2196 	if (sgl->nsegs > 0)
2197 		nflits += sgl->nflits;
2198 	else {
2199 		nflits += howmany(pktlen, 8);
2200 		ctrl += pktlen;
2201 	}
2202 	ndesc = howmany(nflits, 8);
2203 	if (ndesc > eq->avail)
2204 		return (ENOMEM);
2205 
2206 	/* Firmware work request header */
2207 	wr = (void *)&eq->desc[eq->pidx];
2208 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
2209 	    V_FW_WR_IMMDLEN(ctrl));
2210 	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
2211 	if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
2212 		ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2213 		eq->flags |= EQ_CRFLUSHED;
2214 	}
2215 
2216 	wr->equiq_to_len16 = htobe32(ctrl);
2217 	wr->r3 = 0;
2218 
2219 	if (m->m_pkthdr.tso_segsz) {
2220 		struct cpl_tx_pkt_lso *lso = (void *)(wr + 1);
2221 		struct ether_header *eh;
2222 		struct ip *ip;
2223 		struct tcphdr *tcp;
2224 
2225 		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
2226 		    F_LSO_LAST_SLICE;
2227 
2228 		eh = mtod(m, struct ether_header *);
2229 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2230 			ctrl |= V_LSO_ETHHDR_LEN(1);
2231 			ip = (void *)((struct ether_vlan_header *)eh + 1);
2232 		} else
2233 			ip = (void *)(eh + 1);
2234 
2235 		tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
2236 		ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
2237 		    V_LSO_TCPHDR_LEN(tcp->th_off);
2238 
2239 		lso->lso_ctrl = htobe32(ctrl);
2240 		lso->ipid_ofst = htobe16(0);
2241 		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
2242 		lso->seqno_offset = htobe32(0);
2243 		lso->len = htobe32(pktlen);
2244 
2245 		cpl = (void *)(lso + 1);
2246 
2247 		txq->tso_wrs++;
2248 	} else
2249 		cpl = (void *)(wr + 1);
2250 
2251 	/* Checksum offload */
2252 	ctrl1 = 0;
2253 	if (!(m->m_pkthdr.csum_flags & CSUM_IP))
2254 		ctrl1 |= F_TXPKT_IPCSUM_DIS;
2255 	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))
2256 		ctrl1 |= F_TXPKT_L4CSUM_DIS;
2257 	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP))
2258 		txq->txcsum++;	/* some hardware assistance provided */
2259 
2260 	/* VLAN tag insertion */
2261 	if (m->m_flags & M_VLANTAG) {
2262 		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2263 		txq->vlan_insertion++;
2264 	}
2265 
2266 	/* CPL header */
2267 	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2268 	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2269 	cpl->pack = 0;
2270 	cpl->len = htobe16(pktlen);
2271 	cpl->ctrl1 = htobe64(ctrl1);
2272 
2273 	/* Software descriptor */
2274 	txsd = &txq->sdesc[eq->pidx];
2275 	txsd->desc_used = ndesc;
2276 
2277 	eq->pending += ndesc;
2278 	eq->avail -= ndesc;
2279 	eq->pidx += ndesc;
2280 	if (eq->pidx >= eq->cap)
2281 		eq->pidx -= eq->cap;
2282 
2283 	/* SGL */
2284 	dst = (void *)(cpl + 1);
2285 	if (sgl->nsegs > 0) {
2286 		txsd->credits = 1;
2287 		txq->sgl_wrs++;
2288 		write_sgl_to_txd(eq, sgl, &dst);
2289 	} else {
2290 		txsd->credits = 0;
2291 		txq->imm_wrs++;
2292 		for (; m; m = m->m_next) {
2293 			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2294 #ifdef INVARIANTS
2295 			pktlen -= m->m_len;
2296 #endif
2297 		}
2298 #ifdef INVARIANTS
2299 		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
2300 #endif
2301 
2302 	}
2303 
2304 	txq->txpkt_wrs++;
2305 	return (0);
2306 }
2307 
2308 /*
2309  * Returns 0 to indicate that m has been accepted into a coalesced tx work
2310  * request.  It has either been folded into txpkts or txpkts was flushed and m
2311  * has started a new coalesced work request (as the first frame in a fresh
2312  * txpkts).
2313  *
2314  * Returns non-zero to indicate a failure - caller is responsible for
2315  * transmitting m, if there was anything in txpkts it has been flushed.
2316  */
2317 static int
2318 add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
2319     struct mbuf *m, struct sgl *sgl)
2320 {
2321 	struct sge_eq *eq = &txq->eq;
2322 	int can_coalesce;
2323 	struct tx_sdesc *txsd;
2324 	int flits;
2325 
2326 	TXQ_LOCK_ASSERT_OWNED(txq);
2327 
2328 	if (txpkts->npkt > 0) {
2329 		flits = TXPKTS_PKT_HDR + sgl->nflits;
2330 		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
2331 		    txpkts->nflits + flits <= TX_WR_FLITS &&
2332 		    txpkts->nflits + flits <= eq->avail * 8 &&
2333 		    txpkts->plen + m->m_pkthdr.len < 65536;
2334 
2335 		if (can_coalesce) {
2336 			txpkts->npkt++;
2337 			txpkts->nflits += flits;
2338 			txpkts->plen += m->m_pkthdr.len;
2339 
2340 			txsd = &txq->sdesc[eq->pidx];
2341 			txsd->credits++;
2342 
2343 			return (0);
2344 		}
2345 
2346 		/*
2347 		 * Couldn't coalesce m into txpkts.  The first order of business
2348 		 * is to send txpkts on its way.  Then we'll revisit m.
2349 		 */
2350 		write_txpkts_wr(txq, txpkts);
2351 	}
2352 
2353 	/*
2354 	 * Check if we can start a new coalesced tx work request with m as
2355 	 * the first packet in it.
2356 	 */
2357 
2358 	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
2359 
2360 	flits = TXPKTS_WR_HDR + sgl->nflits;
2361 	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
2362 	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
2363 
2364 	if (can_coalesce == 0)
2365 		return (EINVAL);
2366 
2367 	/*
2368 	 * Start a fresh coalesced tx WR with m as the first frame in it.
2369 	 */
2370 	txpkts->npkt = 1;
2371 	txpkts->nflits = flits;
2372 	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
2373 	txpkts->plen = m->m_pkthdr.len;
2374 
2375 	txsd = &txq->sdesc[eq->pidx];
2376 	txsd->credits = 1;
2377 
2378 	return (0);
2379 }
2380 
2381 /*
2382  * Note that write_txpkts_wr can never run out of hardware descriptors (but
2383  * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
2384  * coalescing only if sufficient hardware descriptors are available.
2385  */
2386 static void
2387 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
2388 {
2389 	struct sge_eq *eq = &txq->eq;
2390 	struct fw_eth_tx_pkts_wr *wr;
2391 	struct tx_sdesc *txsd;
2392 	uint32_t ctrl;
2393 	int ndesc;
2394 
2395 	TXQ_LOCK_ASSERT_OWNED(txq);
2396 
2397 	ndesc = howmany(txpkts->nflits, 8);
2398 
2399 	wr = (void *)&eq->desc[eq->pidx];
2400 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) |
2401 	    V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */
2402 	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
2403 	if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
2404 		ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2405 		eq->flags |= EQ_CRFLUSHED;
2406 	}
2407 	wr->equiq_to_len16 = htobe32(ctrl);
2408 	wr->plen = htobe16(txpkts->plen);
2409 	wr->npkt = txpkts->npkt;
2410 	wr->r3 = wr->type = 0;
2411 
2412 	/* Everything else already written */
2413 
2414 	txsd = &txq->sdesc[eq->pidx];
2415 	txsd->desc_used = ndesc;
2416 
2417 	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
2418 
2419 	eq->pending += ndesc;
2420 	eq->avail -= ndesc;
2421 	eq->pidx += ndesc;
2422 	if (eq->pidx >= eq->cap)
2423 		eq->pidx -= eq->cap;
2424 
2425 	txq->txpkts_pkts += txpkts->npkt;
2426 	txq->txpkts_wrs++;
2427 	txpkts->npkt = 0;	/* emptied */
2428 }
2429 
2430 static inline void
2431 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
2432     struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
2433 {
2434 	struct ulp_txpkt *ulpmc;
2435 	struct ulptx_idata *ulpsc;
2436 	struct cpl_tx_pkt_core *cpl;
2437 	struct sge_eq *eq = &txq->eq;
2438 	uintptr_t flitp, start, end;
2439 	uint64_t ctrl;
2440 	caddr_t dst;
2441 
2442 	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
2443 
2444 	start = (uintptr_t)eq->desc;
2445 	end = (uintptr_t)eq->spg;
2446 
2447 	/* Checksum offload */
2448 	ctrl = 0;
2449 	if (!(m->m_pkthdr.csum_flags & CSUM_IP))
2450 		ctrl |= F_TXPKT_IPCSUM_DIS;
2451 	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))
2452 		ctrl |= F_TXPKT_L4CSUM_DIS;
2453 	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP))
2454 		txq->txcsum++;	/* some hardware assistance provided */
2455 
2456 	/* VLAN tag insertion */
2457 	if (m->m_flags & M_VLANTAG) {
2458 		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2459 		txq->vlan_insertion++;
2460 	}
2461 
2462 	/*
2463 	 * The previous packet's SGL must have ended at a 16 byte boundary (this
2464 	 * is required by the firmware/hardware).  It follows that flitp cannot
2465 	 * wrap around between the ULPTX master command and ULPTX subcommand (8
2466 	 * bytes each), and that it can not wrap around in the middle of the
2467 	 * cpl_tx_pkt_core either.
2468 	 */
2469 	flitp = (uintptr_t)txpkts->flitp;
2470 	KASSERT((flitp & 0xf) == 0,
2471 	    ("%s: last SGL did not end at 16 byte boundary: %p",
2472 	    __func__, txpkts->flitp));
2473 
2474 	/* ULP master command */
2475 	ulpmc = (void *)flitp;
2476 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
2477 	    V_ULP_TXPKT_FID(eq->iqid));
2478 	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
2479 	    sizeof(*cpl) + 8 * sgl->nflits, 16));
2480 
2481 	/* ULP subcommand */
2482 	ulpsc = (void *)(ulpmc + 1);
2483 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
2484 	    F_ULP_TX_SC_MORE);
2485 	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
2486 
2487 	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
2488 	if (flitp == end)
2489 		flitp = start;
2490 
2491 	/* CPL_TX_PKT */
2492 	cpl = (void *)flitp;
2493 	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2494 	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2495 	cpl->pack = 0;
2496 	cpl->len = htobe16(m->m_pkthdr.len);
2497 	cpl->ctrl1 = htobe64(ctrl);
2498 
2499 	flitp += sizeof(*cpl);
2500 	if (flitp == end)
2501 		flitp = start;
2502 
2503 	/* SGL for this frame */
2504 	dst = (caddr_t)flitp;
2505 	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
2506 	txpkts->flitp = (void *)dst;
2507 
2508 	KASSERT(((uintptr_t)dst & 0xf) == 0,
2509 	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
2510 }
2511 
2512 /*
2513  * If the SGL ends on an address that is not 16 byte aligned, this function will
2514  * add a 0 filled flit at the end.  It returns 1 in that case.
2515  */
2516 static int
2517 write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
2518 {
2519 	__be64 *flitp, *end;
2520 	struct ulptx_sgl *usgl;
2521 	bus_dma_segment_t *seg;
2522 	int i, padded;
2523 
2524 	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
2525 	    ("%s: bad SGL - nsegs=%d, nflits=%d",
2526 	    __func__, sgl->nsegs, sgl->nflits));
2527 
2528 	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
2529 	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
2530 
2531 	flitp = (__be64 *)(*to);
2532 	end = flitp + sgl->nflits;
2533 	seg = &sgl->seg[0];
2534 	usgl = (void *)flitp;
2535 
2536 	/*
2537 	 * We start at a 16 byte boundary somewhere inside the tx descriptor
2538 	 * ring, so we're at least 16 bytes away from the status page.  There is
2539 	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
2540 	 */
2541 
2542 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
2543 	    V_ULPTX_NSGE(sgl->nsegs));
2544 	usgl->len0 = htobe32(seg->ds_len);
2545 	usgl->addr0 = htobe64(seg->ds_addr);
2546 	seg++;
2547 
2548 	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
2549 
2550 		/* Won't wrap around at all */
2551 
2552 		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
2553 			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
2554 			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
2555 		}
2556 		if (i & 1)
2557 			usgl->sge[i / 2].len[1] = htobe32(0);
2558 	} else {
2559 
2560 		/* Will wrap somewhere in the rest of the SGL */
2561 
2562 		/* 2 flits already written, write the rest flit by flit */
2563 		flitp = (void *)(usgl + 1);
2564 		for (i = 0; i < sgl->nflits - 2; i++) {
2565 			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
2566 				flitp = (void *)eq->desc;
2567 			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
2568 		}
2569 		end = flitp;
2570 	}
2571 
2572 	if ((uintptr_t)end & 0xf) {
2573 		*(uint64_t *)end = 0;
2574 		end++;
2575 		padded = 1;
2576 	} else
2577 		padded = 0;
2578 
2579 	if ((uintptr_t)end == (uintptr_t)eq->spg)
2580 		*to = (void *)eq->desc;
2581 	else
2582 		*to = (void *)end;
2583 
2584 	return (padded);
2585 }
2586 
2587 static inline void
2588 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
2589 {
2590 	if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) {
2591 		bcopy(from, *to, len);
2592 		(*to) += len;
2593 	} else {
2594 		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
2595 
2596 		bcopy(from, *to, portion);
2597 		from += portion;
2598 		portion = len - portion;	/* remaining */
2599 		bcopy(from, (void *)eq->desc, portion);
2600 		(*to) = (caddr_t)eq->desc + portion;
2601 	}
2602 }
2603 
2604 static inline void
2605 ring_eq_db(struct adapter *sc, struct sge_eq *eq)
2606 {
2607 	wmb();
2608 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
2609 	    V_QID(eq->cntxt_id) | V_PIDX(eq->pending));
2610 	eq->pending = 0;
2611 }
2612 
2613 static inline int
2614 reclaimable(struct sge_eq *eq)
2615 {
2616 	unsigned int cidx;
2617 
2618 	cidx = eq->spg->cidx;	/* stable snapshot */
2619 	cidx = be16_to_cpu(cidx);
2620 
2621 	if (cidx >= eq->cidx)
2622 		return (cidx - eq->cidx);
2623 	else
2624 		return (cidx + eq->cap - eq->cidx);
2625 }
2626 
2627 /*
2628  * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
2629  * many as possible but stop when there are around "n" mbufs to free.
2630  *
2631  * The actual number reclaimed is provided as the return value.
2632  */
2633 static int
2634 reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
2635 {
2636 	struct tx_sdesc *txsd;
2637 	struct tx_map *txm;
2638 	unsigned int reclaimed, maps;
2639 	struct sge_eq *eq = &txq->eq;
2640 
2641 	EQ_LOCK_ASSERT_OWNED(eq);
2642 
2643 	if (can_reclaim == 0)
2644 		can_reclaim = reclaimable(eq);
2645 
2646 	maps = reclaimed = 0;
2647 	while (can_reclaim && maps < n) {
2648 		int ndesc;
2649 
2650 		txsd = &txq->sdesc[eq->cidx];
2651 		ndesc = txsd->desc_used;
2652 
2653 		/* Firmware doesn't return "partial" credits. */
2654 		KASSERT(can_reclaim >= ndesc,
2655 		    ("%s: unexpected number of credits: %d, %d",
2656 		    __func__, can_reclaim, ndesc));
2657 
2658 		maps += txsd->credits;
2659 
2660 		reclaimed += ndesc;
2661 		can_reclaim -= ndesc;
2662 
2663 		eq->cidx += ndesc;
2664 		if (__predict_false(eq->cidx >= eq->cap))
2665 			eq->cidx -= eq->cap;
2666 	}
2667 
2668 	txm = &txq->maps[txq->map_cidx];
2669 	if (maps)
2670 		prefetch(txm->m);
2671 
2672 	eq->avail += reclaimed;
2673 	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
2674 	    ("%s: too many descriptors available", __func__));
2675 
2676 	txq->map_avail += maps;
2677 	KASSERT(txq->map_avail <= txq->map_total,
2678 	    ("%s: too many maps available", __func__));
2679 
2680 	while (maps--) {
2681 		struct tx_map *next;
2682 
2683 		next = txm + 1;
2684 		if (__predict_false(txq->map_cidx + 1 == txq->map_total))
2685 			next = txq->maps;
2686 		prefetch(next->m);
2687 
2688 		bus_dmamap_unload(txq->tx_tag, txm->map);
2689 		m_freem(txm->m);
2690 		txm->m = NULL;
2691 
2692 		txm = next;
2693 		if (__predict_false(++txq->map_cidx == txq->map_total))
2694 			txq->map_cidx = 0;
2695 	}
2696 
2697 	return (reclaimed);
2698 }
2699 
2700 static void
2701 write_eqflush_wr(struct sge_eq *eq)
2702 {
2703 	struct fw_eq_flush_wr *wr;
2704 
2705 	EQ_LOCK_ASSERT_OWNED(eq);
2706 	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
2707 
2708 	wr = (void *)&eq->desc[eq->pidx];
2709 	bzero(wr, sizeof(*wr));
2710 	wr->opcode = FW_EQ_FLUSH_WR;
2711 	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
2712 	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
2713 
2714 	eq->flags |= EQ_CRFLUSHED;
2715 	eq->pending++;
2716 	eq->avail--;
2717 	if (++eq->pidx == eq->cap)
2718 		eq->pidx = 0;
2719 }
2720 
2721 static __be64
2722 get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
2723 {
2724 	int i = (idx / 3) * 2;
2725 
2726 	switch (idx % 3) {
2727 	case 0: {
2728 		__be64 rc;
2729 
2730 		rc = htobe32(sgl[i].ds_len);
2731 		if (i + 1 < nsegs)
2732 			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
2733 
2734 		return (rc);
2735 	}
2736 	case 1:
2737 		return htobe64(sgl[i].ds_addr);
2738 	case 2:
2739 		return htobe64(sgl[i + 1].ds_addr);
2740 	}
2741 
2742 	return (0);
2743 }
2744 
2745 static void
2746 set_fl_tag_idx(struct sge_fl *fl, int mtu)
2747 {
2748 	int i;
2749 
2750 	FL_LOCK_ASSERT_OWNED(fl);
2751 
2752 	for (i = 0; i < FL_BUF_SIZES - 1; i++) {
2753 		if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT))
2754 			break;
2755 	}
2756 
2757 	fl->tag_idx = i;
2758 }
2759 
2760 static int
2761 handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl)
2762 {
2763 	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
2764 	struct sge *s = &sc->sge;
2765 	struct sge_txq *txq;
2766 	struct port_info *pi;
2767 
2768 	txq = (void *)s->eqmap[qid - s->eq_start];
2769 	TXQ_LOCK(txq);
2770 	if (txq->eq.flags & EQ_CRFLUSHED) {
2771 		pi = txq->ifp->if_softc;
2772 		taskqueue_enqueue(pi->tq, &txq->resume_tx);
2773 		txq->egr_update++;
2774 	} else
2775 		wakeup_one(txq);	/* txq is going away, wakeup free_txq */
2776 	TXQ_UNLOCK(txq);
2777 
2778 	return (0);
2779 }
2780 
2781 static void
2782 handle_cpl(struct adapter *sc, struct sge_iq *iq)
2783 {
2784 	const struct rss_header *rss = (const void *)iq->cdesc;
2785 	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
2786 
2787 	switch (rss->opcode) {
2788 	case CPL_FW4_MSG:
2789 	case CPL_FW6_MSG:
2790 		if (cpl->type == FW6_TYPE_CMD_RPL)
2791 			t4_handle_fw_rpl(sc, cpl->data);
2792 		break;
2793 
2794 	case CPL_SGE_EGR_UPDATE:
2795 		handle_sge_egr_update(sc, (const void *)cpl);
2796 		break;
2797 
2798 	case CPL_SET_TCB_RPL:
2799 		filter_rpl(sc, (const void *)cpl);
2800 		break;
2801 
2802 	default:
2803 		panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode);
2804 	}
2805 }
2806 
2807 /*
2808  * m0 is freed on successful transmission.
2809  */
2810 static int
2811 ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0)
2812 {
2813 	struct sge_eq *eq = &ctrlq->eq;
2814 	int rc = 0, ndesc;
2815 	int can_reclaim;
2816 	caddr_t dst;
2817 	struct mbuf *m;
2818 
2819 	M_ASSERTPKTHDR(m0);
2820 
2821 	if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) {
2822 		log(LOG_ERR, "%s: %s work request too long (%d)",
2823 		    device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len);
2824 		return (EMSGSIZE);
2825 	}
2826 	ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE);
2827 
2828 	EQ_LOCK(eq);
2829 
2830 	can_reclaim = reclaimable(eq);
2831 	eq->cidx += can_reclaim;
2832 	eq->avail += can_reclaim;
2833 	if (__predict_false(eq->cidx >= eq->cap))
2834 		eq->cidx -= eq->cap;
2835 
2836 	if (eq->avail < ndesc) {
2837 		rc = EAGAIN;
2838 		ctrlq->no_desc++;
2839 		goto failed;
2840 	}
2841 
2842 	dst = (void *)&eq->desc[eq->pidx];
2843 	for (m = m0; m; m = m->m_next)
2844 		copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2845 
2846 	eq->pidx += ndesc;
2847 	if (__predict_false(eq->pidx >= eq->cap))
2848 		eq->pidx -= eq->cap;
2849 
2850 	eq->pending += ndesc;
2851 	ring_eq_db(sc, eq);
2852 failed:
2853 	EQ_UNLOCK(eq);
2854 	if (rc == 0)
2855 		m_freem(m0);
2856 
2857 	return (rc);
2858 }
2859 
2860 static int
2861 sysctl_uint16(SYSCTL_HANDLER_ARGS)
2862 {
2863 	uint16_t *id = arg1;
2864 	int i = *id;
2865 
2866 	return sysctl_handle_int(oidp, &i, 0, req);
2867 }
2868