xref: /freebsd/sys/dev/cxgbe/t4_sge.c (revision 884a2a699669ec61e2366e3e358342dbc94be24a)
1 /*-
2  * Copyright (c) 2011 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 
33 #include <sys/types.h>
34 #include <sys/mbuf.h>
35 #include <sys/socket.h>
36 #include <sys/kernel.h>
37 #include <sys/malloc.h>
38 #include <sys/queue.h>
39 #include <sys/taskqueue.h>
40 #include <sys/sysctl.h>
41 #include <net/bpf.h>
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_vlan_var.h>
45 #include <netinet/in.h>
46 #include <netinet/ip.h>
47 #include <netinet/tcp.h>
48 
49 #include "common/common.h"
50 #include "common/t4_regs.h"
51 #include "common/t4_regs_values.h"
52 #include "common/t4_msg.h"
53 #include "common/t4fw_interface.h"
54 
55 struct fl_buf_info {
56 	int size;
57 	int type;
58 	uma_zone_t zone;
59 };
60 
61 /* Filled up by t4_sge_modload */
62 static struct fl_buf_info fl_buf_info[FL_BUF_SIZES];
63 
64 #define FL_BUF_SIZE(x)	(fl_buf_info[x].size)
65 #define FL_BUF_TYPE(x)	(fl_buf_info[x].type)
66 #define FL_BUF_ZONE(x)	(fl_buf_info[x].zone)
67 
68 enum {
69 	FL_PKTSHIFT = 2
70 };
71 
72 #define FL_ALIGN	min(CACHE_LINE_SIZE, 32)
73 #if CACHE_LINE_SIZE > 64
74 #define SPG_LEN		128
75 #else
76 #define SPG_LEN		64
77 #endif
78 
79 /* Used to track coalesced tx work request */
80 struct txpkts {
81 	uint64_t *flitp;	/* ptr to flit where next pkt should start */
82 	uint8_t npkt;		/* # of packets in this work request */
83 	uint8_t nflits;		/* # of flits used by this work request */
84 	uint16_t plen;		/* total payload (sum of all packets) */
85 };
86 
87 /* A packet's SGL.  This + m_pkthdr has all info needed for tx */
88 struct sgl {
89 	int nsegs;		/* # of segments in the SGL, 0 means imm. tx */
90 	int nflits;		/* # of flits needed for the SGL */
91 	bus_dma_segment_t seg[TX_SGL_SEGS];
92 };
93 
94 static void t4_evt_rx(void *);
95 static void t4_eth_rx(void *);
96 static inline void init_iq(struct sge_iq *, struct adapter *, int, int, int,
97     int, iq_intr_handler_t *, char *);
98 static inline void init_fl(struct sge_fl *, int, char *);
99 static inline void init_eq(struct sge_eq *, int, char *);
100 static int alloc_ring(struct adapter *, size_t, bus_dma_tag_t *, bus_dmamap_t *,
101     bus_addr_t *, void **);
102 static int free_ring(struct adapter *, bus_dma_tag_t, bus_dmamap_t, bus_addr_t,
103     void *);
104 static int alloc_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *,
105     int, int);
106 static int free_iq_fl(struct port_info *, struct sge_iq *, struct sge_fl *);
107 static int alloc_intrq(struct adapter *, int, int, int);
108 static int free_intrq(struct sge_iq *);
109 static int alloc_fwq(struct adapter *, int);
110 static int free_fwq(struct sge_iq *);
111 static int alloc_rxq(struct port_info *, struct sge_rxq *, int, int);
112 static int free_rxq(struct port_info *, struct sge_rxq *);
113 static int alloc_ctrlq(struct adapter *, struct sge_ctrlq *, int);
114 static int free_ctrlq(struct adapter *, struct sge_ctrlq *);
115 static int alloc_txq(struct port_info *, struct sge_txq *, int);
116 static int free_txq(struct port_info *, struct sge_txq *);
117 static void oneseg_dma_callback(void *, bus_dma_segment_t *, int, int);
118 static inline bool is_new_response(const struct sge_iq *, struct rsp_ctrl **);
119 static inline void iq_next(struct sge_iq *);
120 static inline void ring_fl_db(struct adapter *, struct sge_fl *);
121 static void refill_fl(struct adapter *, struct sge_fl *, int, int);
122 static int alloc_fl_sdesc(struct sge_fl *);
123 static void free_fl_sdesc(struct sge_fl *);
124 static int alloc_tx_maps(struct sge_txq *);
125 static void free_tx_maps(struct sge_txq *);
126 static void set_fl_tag_idx(struct sge_fl *, int);
127 
128 static int get_pkt_sgl(struct sge_txq *, struct mbuf **, struct sgl *, int);
129 static int free_pkt_sgl(struct sge_txq *, struct sgl *);
130 static int write_txpkt_wr(struct port_info *, struct sge_txq *, struct mbuf *,
131     struct sgl *);
132 static int add_to_txpkts(struct port_info *, struct sge_txq *, struct txpkts *,
133     struct mbuf *, struct sgl *);
134 static void write_txpkts_wr(struct sge_txq *, struct txpkts *);
135 static inline void write_ulp_cpl_sgl(struct port_info *, struct sge_txq *,
136     struct txpkts *, struct mbuf *, struct sgl *);
137 static int write_sgl_to_txd(struct sge_eq *, struct sgl *, caddr_t *);
138 static inline void copy_to_txd(struct sge_eq *, caddr_t, caddr_t *, int);
139 static inline void ring_eq_db(struct adapter *, struct sge_eq *);
140 static inline int reclaimable(struct sge_eq *);
141 static int reclaim_tx_descs(struct sge_txq *, int, int);
142 static void write_eqflush_wr(struct sge_eq *);
143 static __be64 get_flit(bus_dma_segment_t *, int, int);
144 static int handle_sge_egr_update(struct adapter *,
145     const struct cpl_sge_egr_update *);
146 static void handle_cpl(struct adapter *, struct sge_iq *);
147 
148 static int ctrl_tx(struct adapter *, struct sge_ctrlq *, struct mbuf *);
149 static int sysctl_uint16(SYSCTL_HANDLER_ARGS);
150 
151 extern void filter_rpl(struct adapter *, const struct cpl_set_tcb_rpl *);
152 
153 /*
154  * Called on MOD_LOAD and fills up fl_buf_info[].
155  */
156 void
157 t4_sge_modload(void)
158 {
159 	int i;
160 	int bufsize[FL_BUF_SIZES] = {
161 		MCLBYTES,
162 #if MJUMPAGESIZE != MCLBYTES
163 		MJUMPAGESIZE,
164 #endif
165 		MJUM9BYTES,
166 		MJUM16BYTES
167 	};
168 
169 	for (i = 0; i < FL_BUF_SIZES; i++) {
170 		FL_BUF_SIZE(i) = bufsize[i];
171 		FL_BUF_TYPE(i) = m_gettype(bufsize[i]);
172 		FL_BUF_ZONE(i) = m_getzone(bufsize[i]);
173 	}
174 }
175 
176 /**
177  *	t4_sge_init - initialize SGE
178  *	@sc: the adapter
179  *
180  *	Performs SGE initialization needed every time after a chip reset.
181  *	We do not initialize any of the queues here, instead the driver
182  *	top-level must request them individually.
183  */
184 void
185 t4_sge_init(struct adapter *sc)
186 {
187 	struct sge *s = &sc->sge;
188 	int i;
189 
190 	t4_set_reg_field(sc, A_SGE_CONTROL, V_PKTSHIFT(M_PKTSHIFT) |
191 			 V_INGPADBOUNDARY(M_INGPADBOUNDARY) |
192 			 F_EGRSTATUSPAGESIZE,
193 			 V_INGPADBOUNDARY(ilog2(FL_ALIGN) - 5) |
194 			 V_PKTSHIFT(FL_PKTSHIFT) |
195 			 F_RXPKTCPLMODE |
196 			 V_EGRSTATUSPAGESIZE(SPG_LEN == 128));
197 	t4_set_reg_field(sc, A_SGE_HOST_PAGE_SIZE,
198 			 V_HOSTPAGESIZEPF0(M_HOSTPAGESIZEPF0),
199 			 V_HOSTPAGESIZEPF0(PAGE_SHIFT - 10));
200 
201 	for (i = 0; i < FL_BUF_SIZES; i++) {
202 		t4_write_reg(sc, A_SGE_FL_BUFFER_SIZE0 + (4 * i),
203 		    FL_BUF_SIZE(i));
204 	}
205 
206 	t4_write_reg(sc, A_SGE_INGRESS_RX_THRESHOLD,
207 		     V_THRESHOLD_0(s->counter_val[0]) |
208 		     V_THRESHOLD_1(s->counter_val[1]) |
209 		     V_THRESHOLD_2(s->counter_val[2]) |
210 		     V_THRESHOLD_3(s->counter_val[3]));
211 
212 	t4_write_reg(sc, A_SGE_TIMER_VALUE_0_AND_1,
213 		     V_TIMERVALUE0(us_to_core_ticks(sc, s->timer_val[0])) |
214 		     V_TIMERVALUE1(us_to_core_ticks(sc, s->timer_val[1])));
215 	t4_write_reg(sc, A_SGE_TIMER_VALUE_2_AND_3,
216 		     V_TIMERVALUE2(us_to_core_ticks(sc, s->timer_val[2])) |
217 		     V_TIMERVALUE3(us_to_core_ticks(sc, s->timer_val[3])));
218 	t4_write_reg(sc, A_SGE_TIMER_VALUE_4_AND_5,
219 		     V_TIMERVALUE4(us_to_core_ticks(sc, s->timer_val[4])) |
220 		     V_TIMERVALUE5(us_to_core_ticks(sc, s->timer_val[5])));
221 }
222 
223 int
224 t4_create_dma_tag(struct adapter *sc)
225 {
226 	int rc;
227 
228 	rc = bus_dma_tag_create(bus_get_dma_tag(sc->dev), 1, 0,
229 	    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL, BUS_SPACE_MAXSIZE,
230 	    BUS_SPACE_UNRESTRICTED, BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL,
231 	    NULL, &sc->dmat);
232 	if (rc != 0) {
233 		device_printf(sc->dev,
234 		    "failed to create main DMA tag: %d\n", rc);
235 	}
236 
237 	return (rc);
238 }
239 
240 int
241 t4_destroy_dma_tag(struct adapter *sc)
242 {
243 	if (sc->dmat)
244 		bus_dma_tag_destroy(sc->dmat);
245 
246 	return (0);
247 }
248 
249 /*
250  * Allocate and initialize the firmware event queue, control queues, and the
251  * interrupt queues.  The adapter owns all of these queues.
252  *
253  * Returns errno on failure.  Resources allocated up to that point may still be
254  * allocated.  Caller is responsible for cleanup in case this function fails.
255  */
256 int
257 t4_setup_adapter_queues(struct adapter *sc)
258 {
259 	int i, j, rc, intr_idx, qsize;
260 	struct sge_iq *iq;
261 	struct sge_ctrlq *ctrlq;
262 	iq_intr_handler_t *handler;
263 	char name[16];
264 
265 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
266 
267 	if (sysctl_ctx_init(&sc->ctx) == 0) {
268 		struct sysctl_oid *oid = device_get_sysctl_tree(sc->dev);
269 		struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
270 
271 		sc->oid_fwq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
272 		    "fwq", CTLFLAG_RD, NULL, "firmware event queue");
273 		sc->oid_ctrlq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
274 		    "ctrlq", CTLFLAG_RD, NULL, "ctrl queues");
275 		sc->oid_intrq = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO,
276 		    "intrq", CTLFLAG_RD, NULL, "interrupt queues");
277 	}
278 
279 	/*
280 	 * Interrupt queues
281 	 */
282 	intr_idx = sc->intr_count - NINTRQ(sc);
283 	if (sc->flags & INTR_SHARED) {
284 		qsize = max((sc->sge.nrxq + 1) * 2, INTR_IQ_QSIZE);
285 		for (i = 0; i < NINTRQ(sc); i++, intr_idx++) {
286 			snprintf(name, sizeof(name), "%s intrq%d",
287 			    device_get_nameunit(sc->dev), i);
288 
289 			iq = &sc->sge.intrq[i];
290 			init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE, NULL, name);
291 			rc = alloc_intrq(sc, i % sc->params.nports, i,
292 			    intr_idx);
293 
294 			if (rc != 0) {
295 				device_printf(sc->dev,
296 				    "failed to create %s: %d\n", name, rc);
297 				return (rc);
298 			}
299 		}
300 	} else {
301 		int qidx = 0;
302 		struct port_info *pi;
303 
304 		for (i = 0; i < sc->params.nports; i++) {
305 			pi = sc->port[i];
306 			qsize = max((pi->nrxq + 1) * 2, INTR_IQ_QSIZE);
307 			for (j = 0; j < pi->nrxq; j++, qidx++, intr_idx++) {
308 				snprintf(name, sizeof(name), "%s intrq%d",
309 				    device_get_nameunit(pi->dev), j);
310 
311 				iq = &sc->sge.intrq[qidx];
312 				init_iq(iq, sc, 0, 0, qsize, INTR_IQ_ESIZE,
313 				    NULL, name);
314 				rc = alloc_intrq(sc, i, qidx, intr_idx);
315 
316 				if (rc != 0) {
317 					device_printf(sc->dev,
318 					    "failed to create %s: %d\n",
319 					    name, rc);
320 					return (rc);
321 				}
322 			}
323 		}
324 	}
325 
326 	/*
327 	 * Firmware event queue
328 	 */
329 	snprintf(name, sizeof(name), "%s fwq", device_get_nameunit(sc->dev));
330 	if (sc->intr_count > T4_EXTRA_INTR) {
331 		handler = NULL;
332 		intr_idx = 1;
333 	} else {
334 		handler = t4_evt_rx;
335 		intr_idx = 0;
336 	}
337 
338 	iq = &sc->sge.fwq;
339 	init_iq(iq, sc, 0, 0, FW_IQ_QSIZE, FW_IQ_ESIZE, handler, name);
340 	rc = alloc_fwq(sc, intr_idx);
341 	if (rc != 0) {
342 		device_printf(sc->dev,
343 		    "failed to create firmware event queue: %d\n", rc);
344 
345 		return (rc);
346 	}
347 
348 	/*
349 	 * Control queues - one per port.
350 	 */
351 	ctrlq = &sc->sge.ctrlq[0];
352 	for (i = 0; i < sc->params.nports; i++, ctrlq++) {
353 		snprintf(name, sizeof(name), "%s ctrlq%d",
354 		    device_get_nameunit(sc->dev), i);
355 		init_eq(&ctrlq->eq, CTRL_EQ_QSIZE, name);
356 
357 		rc = alloc_ctrlq(sc, ctrlq, i);
358 		if (rc != 0) {
359 			device_printf(sc->dev,
360 			    "failed to create control queue %d: %d\n", i, rc);
361 			return (rc);
362 		}
363 	}
364 
365 	return (rc);
366 }
367 
368 /*
369  * Idempotent
370  */
371 int
372 t4_teardown_adapter_queues(struct adapter *sc)
373 {
374 	int i;
375 	struct sge_iq *iq;
376 
377 	ADAPTER_LOCK_ASSERT_NOTOWNED(sc);
378 
379 	/* Do this before freeing the queues */
380 	if (sc->oid_fwq || sc->oid_ctrlq || sc->oid_intrq) {
381 		sysctl_ctx_free(&sc->ctx);
382 		sc->oid_fwq = NULL;
383 		sc->oid_ctrlq = NULL;
384 		sc->oid_intrq = NULL;
385 	}
386 
387 	for (i = 0; i < sc->params.nports; i++)
388 		free_ctrlq(sc, &sc->sge.ctrlq[i]);
389 
390 	iq = &sc->sge.fwq;
391 	free_fwq(iq);
392 
393 	for (i = 0; i < NINTRQ(sc); i++) {
394 		iq = &sc->sge.intrq[i];
395 		free_intrq(iq);
396 	}
397 
398 	return (0);
399 }
400 
401 int
402 t4_setup_eth_queues(struct port_info *pi)
403 {
404 	int rc = 0, i, intr_idx;
405 	struct sge_rxq *rxq;
406 	struct sge_txq *txq;
407 	char name[16];
408 	struct adapter *sc = pi->adapter;
409 
410 	if (sysctl_ctx_init(&pi->ctx) == 0) {
411 		struct sysctl_oid *oid = device_get_sysctl_tree(pi->dev);
412 		struct sysctl_oid_list *children = SYSCTL_CHILDREN(oid);
413 
414 		pi->oid_rxq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
415 		    "rxq", CTLFLAG_RD, NULL, "rx queues");
416 		pi->oid_txq = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO,
417 		    "txq", CTLFLAG_RD, NULL, "tx queues");
418 	}
419 
420 	for_each_rxq(pi, i, rxq) {
421 
422 		snprintf(name, sizeof(name), "%s rxq%d-iq",
423 		    device_get_nameunit(pi->dev), i);
424 		init_iq(&rxq->iq, sc, pi->tmr_idx, pi->pktc_idx,
425 		    pi->qsize_rxq, RX_IQ_ESIZE, t4_eth_rx, name);
426 
427 		snprintf(name, sizeof(name), "%s rxq%d-fl",
428 		    device_get_nameunit(pi->dev), i);
429 		init_fl(&rxq->fl, pi->qsize_rxq / 8, name);
430 
431 		intr_idx = pi->first_rxq + i;
432 		if (sc->flags & INTR_SHARED)
433 			intr_idx %= NINTRQ(sc);
434 
435 		rc = alloc_rxq(pi, rxq, intr_idx, i);
436 		if (rc != 0)
437 			goto done;
438 	}
439 
440 	for_each_txq(pi, i, txq) {
441 
442 		snprintf(name, sizeof(name), "%s txq%d",
443 		    device_get_nameunit(pi->dev), i);
444 		init_eq(&txq->eq, pi->qsize_txq, name);
445 
446 		rc = alloc_txq(pi, txq, i);
447 		if (rc != 0)
448 			goto done;
449 	}
450 
451 done:
452 	if (rc)
453 		t4_teardown_eth_queues(pi);
454 
455 	return (rc);
456 }
457 
458 /*
459  * Idempotent
460  */
461 int
462 t4_teardown_eth_queues(struct port_info *pi)
463 {
464 	int i;
465 	struct sge_rxq *rxq;
466 	struct sge_txq *txq;
467 
468 	/* Do this before freeing the queues */
469 	if (pi->oid_txq || pi->oid_rxq) {
470 		sysctl_ctx_free(&pi->ctx);
471 		pi->oid_txq = pi->oid_rxq = NULL;
472 	}
473 
474 	for_each_txq(pi, i, txq) {
475 		free_txq(pi, txq);
476 	}
477 
478 	for_each_rxq(pi, i, rxq) {
479 		free_rxq(pi, rxq);
480 	}
481 
482 	return (0);
483 }
484 
485 /* Deals with errors and the first (and only) interrupt queue */
486 void
487 t4_intr_all(void *arg)
488 {
489 	struct adapter *sc = arg;
490 
491 	t4_intr_err(arg);
492 	t4_intr(&sc->sge.intrq[0]);
493 }
494 
495 /* Deals with interrupts, and a few CPLs, on the given interrupt queue */
496 void
497 t4_intr(void *arg)
498 {
499 	struct sge_iq *iq = arg, *q;
500 	struct adapter *sc = iq->adapter;
501 	struct rsp_ctrl *ctrl;
502 	const struct rss_header *rss;
503 	int ndesc_pending = 0, ndesc_total = 0;
504 	int qid, rsp_type;
505 
506 	if (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY))
507 		return;
508 
509 	while (is_new_response(iq, &ctrl)) {
510 
511 		rmb();
512 
513 		rss = (const void *)iq->cdesc;
514 		rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
515 
516 		if (__predict_false(rsp_type == X_RSPD_TYPE_CPL)) {
517 			handle_cpl(sc, iq);
518 			goto nextdesc;
519 		}
520 
521 		qid = ntohl(ctrl->pldbuflen_qid) - sc->sge.iq_start;
522 		q = sc->sge.iqmap[qid];
523 
524 		if (atomic_cmpset_32(&q->state, IQS_IDLE, IQS_BUSY)) {
525 			q->handler(q);
526 			atomic_cmpset_32(&q->state, IQS_BUSY, IQS_IDLE);
527 		}
528 
529 nextdesc:	ndesc_total++;
530 		if (++ndesc_pending >= iq->qsize / 4) {
531 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
532 			    V_CIDXINC(ndesc_pending) |
533 			    V_INGRESSQID(iq->cntxt_id) |
534 			    V_SEINTARM(
535 				V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
536 			ndesc_pending = 0;
537 		}
538 
539 		iq_next(iq);
540 	}
541 
542 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
543 	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_params));
544 
545 	atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
546 }
547 
548 /* Deals with error interrupts */
549 void
550 t4_intr_err(void *arg)
551 {
552 	struct adapter *sc = arg;
553 
554 	t4_write_reg(sc, MYPF_REG(A_PCIE_PF_CLI), 0);
555 	t4_slow_intr_handler(sc);
556 }
557 
558 /* Deals with the firmware event queue */
559 void
560 t4_intr_evt(void *arg)
561 {
562 	struct sge_iq *iq = arg;
563 
564 	if (atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_BUSY)) {
565 		t4_evt_rx(arg);
566 		atomic_cmpset_32(&iq->state, IQS_BUSY, IQS_IDLE);
567 	}
568 }
569 
570 static void
571 t4_evt_rx(void *arg)
572 {
573 	struct sge_iq *iq = arg;
574 	struct adapter *sc = iq->adapter;
575 	struct rsp_ctrl *ctrl;
576 	int ndesc_pending = 0, ndesc_total = 0;
577 
578 	KASSERT(iq == &sc->sge.fwq, ("%s: unexpected ingress queue", __func__));
579 
580 	while (is_new_response(iq, &ctrl)) {
581 		int rsp_type;
582 
583 		rmb();
584 
585 		rsp_type = G_RSPD_TYPE(ctrl->u.type_gen);
586 		if (__predict_false(rsp_type != X_RSPD_TYPE_CPL))
587 			panic("%s: unexpected rsp_type %d", __func__, rsp_type);
588 
589 		handle_cpl(sc, iq);
590 
591 		ndesc_total++;
592 		if (++ndesc_pending >= iq->qsize / 4) {
593 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
594 			    V_CIDXINC(ndesc_pending) |
595 			    V_INGRESSQID(iq->cntxt_id) |
596 			    V_SEINTARM(
597 				V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
598 			ndesc_pending = 0;
599 		}
600 
601 		iq_next(iq);
602 	}
603 
604 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndesc_pending) |
605 	    V_INGRESSQID(iq->cntxt_id) | V_SEINTARM(iq->intr_params));
606 }
607 
608 #ifdef T4_PKT_TIMESTAMP
609 #define RX_COPY_THRESHOLD (MINCLSIZE - 8)
610 #else
611 #define RX_COPY_THRESHOLD MINCLSIZE
612 #endif
613 
614 static void
615 t4_eth_rx(void *arg)
616 {
617 	struct sge_rxq *rxq = arg;
618 	struct sge_iq *iq = arg;
619 	struct adapter *sc = iq->adapter;
620 	struct rsp_ctrl *ctrl;
621 	struct ifnet *ifp = rxq->ifp;
622 	struct sge_fl *fl = &rxq->fl;
623 	struct fl_sdesc *sd = &fl->sdesc[fl->cidx], *sd_next;
624 	const struct rss_header *rss;
625 	const struct cpl_rx_pkt *cpl;
626 	uint32_t len;
627 	int ndescs = 0, i;
628 	struct mbuf *m0, *m;
629 #ifdef INET
630 	struct lro_ctrl *lro = &rxq->lro;
631 	struct lro_entry *l;
632 #endif
633 
634 	prefetch(sd->m);
635 	prefetch(sd->cl);
636 
637 	iq->intr_next = iq->intr_params;
638 	while (is_new_response(iq, &ctrl)) {
639 
640 		rmb();
641 
642 		rss = (const void *)iq->cdesc;
643 		i = G_RSPD_TYPE(ctrl->u.type_gen);
644 
645 		KASSERT(i == X_RSPD_TYPE_FLBUF && rss->opcode == CPL_RX_PKT,
646 		    ("%s: unexpected type %d CPL opcode 0x%x",
647 		    __func__, i, rss->opcode));
648 
649 		sd_next = sd + 1;
650 		if (__predict_false(fl->cidx + 1 == fl->cap))
651 			sd_next = fl->sdesc;
652 		prefetch(sd_next->m);
653 		prefetch(sd_next->cl);
654 
655 		cpl = (const void *)(rss + 1);
656 
657 		m0 = sd->m;
658 		sd->m = NULL;	/* consumed */
659 
660 		len = be32toh(ctrl->pldbuflen_qid);
661 		if (__predict_false((len & F_RSPD_NEWBUF) == 0))
662 			panic("%s: cannot handle packed frames", __func__);
663 		len = G_RSPD_LEN(len);
664 
665 		bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
666 		    BUS_DMASYNC_POSTREAD);
667 
668 		m_init(m0, NULL, 0, M_NOWAIT, MT_DATA, M_PKTHDR);
669 
670 #ifdef T4_PKT_TIMESTAMP
671 		*mtod(m0, uint64_t *) =
672 		    be64toh(ctrl->u.last_flit & 0xfffffffffffffff);
673 		m0->m_data += 8;
674 
675 		/*
676 		 * 60 bit timestamp value is *(uint64_t *)m0->m_pktdat.  Note
677 		 * that it is in the leading free-space (see M_LEADINGSPACE) in
678 		 * the mbuf.  The kernel can clobber it during a pullup,
679 		 * m_copymdata, etc.  You need to make sure that the mbuf
680 		 * reaches you unmolested if you care about the timestamp.
681 		 */
682 #endif
683 
684 		if (len < RX_COPY_THRESHOLD) {
685 			/* copy data to mbuf, buffer will be recycled */
686 			bcopy(sd->cl, mtod(m0, caddr_t), len);
687 			m0->m_len = len;
688 		} else {
689 			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
690 			m_cljset(m0, sd->cl, FL_BUF_TYPE(sd->tag_idx));
691 			sd->cl = NULL;	/* consumed */
692 			m0->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
693 		}
694 
695 		len -= FL_PKTSHIFT;
696 		m0->m_len -= FL_PKTSHIFT;
697 		m0->m_data += FL_PKTSHIFT;
698 
699 		m0->m_pkthdr.len = len;
700 		m0->m_pkthdr.rcvif = ifp;
701 		m0->m_flags |= M_FLOWID;
702 		m0->m_pkthdr.flowid = rss->hash_val;
703 
704 		if (cpl->csum_calc && !cpl->err_vec &&
705 		    ifp->if_capenable & IFCAP_RXCSUM) {
706 			m0->m_pkthdr.csum_flags |= (CSUM_IP_CHECKED |
707 			    CSUM_IP_VALID | CSUM_DATA_VALID | CSUM_PSEUDO_HDR);
708 			if (cpl->ip_frag)
709 				m0->m_pkthdr.csum_data = be16toh(cpl->csum);
710 			else
711 				m0->m_pkthdr.csum_data = 0xffff;
712 			rxq->rxcsum++;
713 		}
714 
715 		if (cpl->vlan_ex) {
716 			m0->m_pkthdr.ether_vtag = be16toh(cpl->vlan);
717 			m0->m_flags |= M_VLANTAG;
718 			rxq->vlan_extraction++;
719 		}
720 
721 		i = 1;	/* # of fl sdesc used */
722 		sd = sd_next;
723 		if (__predict_false(++fl->cidx == fl->cap))
724 			fl->cidx = 0;
725 
726 		len -= m0->m_len;
727 		m = m0;
728 		while (len) {
729 			i++;
730 
731 			sd_next = sd + 1;
732 			if (__predict_false(fl->cidx + 1 == fl->cap))
733 				sd_next = fl->sdesc;
734 			prefetch(sd_next->m);
735 			prefetch(sd_next->cl);
736 
737 			m->m_next = sd->m;
738 			sd->m = NULL;	/* consumed */
739 			m = m->m_next;
740 
741 			bus_dmamap_sync(fl->tag[sd->tag_idx], sd->map,
742 			    BUS_DMASYNC_POSTREAD);
743 
744 			m_init(m, NULL, 0, M_NOWAIT, MT_DATA, 0);
745 			if (len <= MLEN) {
746 				bcopy(sd->cl, mtod(m, caddr_t), len);
747 				m->m_len = len;
748 			} else {
749 				bus_dmamap_unload(fl->tag[sd->tag_idx],
750 				    sd->map);
751 				m_cljset(m, sd->cl, FL_BUF_TYPE(sd->tag_idx));
752 				sd->cl = NULL;	/* consumed */
753 				m->m_len = min(len, FL_BUF_SIZE(sd->tag_idx));
754 			}
755 
756 			i++;
757 			sd = sd_next;
758 			if (__predict_false(++fl->cidx == fl->cap))
759 				fl->cidx = 0;
760 
761 			len -= m->m_len;
762 		}
763 
764 #ifdef INET
765 		if (cpl->l2info & htobe32(F_RXF_LRO) &&
766 		    rxq->flags & RXQ_LRO_ENABLED &&
767 		    tcp_lro_rx(lro, m0, 0) == 0) {
768 			/* queued for LRO */
769 		} else
770 #endif
771 		ifp->if_input(ifp, m0);
772 
773 		FL_LOCK(fl);
774 		fl->needed += i;
775 		if (fl->needed >= 32)
776 			refill_fl(sc, fl, 64, 32);
777 		FL_UNLOCK(fl);
778 
779 		if (++ndescs > 32) {
780 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
781 			    V_CIDXINC(ndescs) |
782 			    V_INGRESSQID((u32)iq->cntxt_id) |
783 			    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
784 			ndescs = 0;
785 		}
786 
787 		iq_next(iq);
788 	}
789 
790 #ifdef INET
791 	while (!SLIST_EMPTY(&lro->lro_active)) {
792 		l = SLIST_FIRST(&lro->lro_active);
793 		SLIST_REMOVE_HEAD(&lro->lro_active, next);
794 		tcp_lro_flush(lro, l);
795 	}
796 #endif
797 
798 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(ndescs) |
799 	    V_INGRESSQID((u32)iq->cntxt_id) | V_SEINTARM(iq->intr_next));
800 
801 	FL_LOCK(fl);
802 	if (fl->needed >= 32)
803 		refill_fl(sc, fl, 128, 8);
804 	FL_UNLOCK(fl);
805 }
806 
807 int
808 t4_mgmt_tx(struct adapter *sc, struct mbuf *m)
809 {
810 	return ctrl_tx(sc, &sc->sge.ctrlq[0], m);
811 }
812 
813 /* Per-packet header in a coalesced tx WR, before the SGL starts (in flits) */
814 #define TXPKTS_PKT_HDR ((\
815     sizeof(struct ulp_txpkt) + \
816     sizeof(struct ulptx_idata) + \
817     sizeof(struct cpl_tx_pkt_core) \
818     ) / 8)
819 
820 /* Header of a coalesced tx WR, before SGL of first packet (in flits) */
821 #define TXPKTS_WR_HDR (\
822     sizeof(struct fw_eth_tx_pkts_wr) / 8 + \
823     TXPKTS_PKT_HDR)
824 
825 /* Header of a tx WR, before SGL of first packet (in flits) */
826 #define TXPKT_WR_HDR ((\
827     sizeof(struct fw_eth_tx_pkt_wr) + \
828     sizeof(struct cpl_tx_pkt_core) \
829     ) / 8 )
830 
831 /* Header of a tx LSO WR, before SGL of first packet (in flits) */
832 #define TXPKT_LSO_WR_HDR ((\
833     sizeof(struct fw_eth_tx_pkt_wr) + \
834     sizeof(struct cpl_tx_pkt_lso) + \
835     sizeof(struct cpl_tx_pkt_core) \
836     ) / 8 )
837 
838 int
839 t4_eth_tx(struct ifnet *ifp, struct sge_txq *txq, struct mbuf *m)
840 {
841 	struct port_info *pi = (void *)ifp->if_softc;
842 	struct adapter *sc = pi->adapter;
843 	struct sge_eq *eq = &txq->eq;
844 	struct buf_ring *br = txq->br;
845 	struct mbuf *next;
846 	int rc, coalescing, can_reclaim;
847 	struct txpkts txpkts;
848 	struct sgl sgl;
849 
850 	TXQ_LOCK_ASSERT_OWNED(txq);
851 	KASSERT(m, ("%s: called with nothing to do.", __func__));
852 
853 	prefetch(&eq->desc[eq->pidx]);
854 	prefetch(&txq->sdesc[eq->pidx]);
855 
856 	txpkts.npkt = 0;/* indicates there's nothing in txpkts */
857 	coalescing = 0;
858 
859 	if (eq->avail < 8)
860 		reclaim_tx_descs(txq, 0, 8);
861 
862 	for (; m; m = next ? next : drbr_dequeue(ifp, br)) {
863 
864 		if (eq->avail < 8)
865 			break;
866 
867 		next = m->m_nextpkt;
868 		m->m_nextpkt = NULL;
869 
870 		if (next || buf_ring_peek(br))
871 			coalescing = 1;
872 
873 		rc = get_pkt_sgl(txq, &m, &sgl, coalescing);
874 		if (rc != 0) {
875 			if (rc == ENOMEM) {
876 
877 				/* Short of resources, suspend tx */
878 
879 				m->m_nextpkt = next;
880 				break;
881 			}
882 
883 			/*
884 			 * Unrecoverable error for this packet, throw it away
885 			 * and move on to the next.  get_pkt_sgl may already
886 			 * have freed m (it will be NULL in that case and the
887 			 * m_freem here is still safe).
888 			 */
889 
890 			m_freem(m);
891 			continue;
892 		}
893 
894 		if (coalescing &&
895 		    add_to_txpkts(pi, txq, &txpkts, m, &sgl) == 0) {
896 
897 			/* Successfully absorbed into txpkts */
898 
899 			write_ulp_cpl_sgl(pi, txq, &txpkts, m, &sgl);
900 			goto doorbell;
901 		}
902 
903 		/*
904 		 * We weren't coalescing to begin with, or current frame could
905 		 * not be coalesced (add_to_txpkts flushes txpkts if a frame
906 		 * given to it can't be coalesced).  Either way there should be
907 		 * nothing in txpkts.
908 		 */
909 		KASSERT(txpkts.npkt == 0,
910 		    ("%s: txpkts not empty: %d", __func__, txpkts.npkt));
911 
912 		/* We're sending out individual packets now */
913 		coalescing = 0;
914 
915 		if (eq->avail < 8)
916 			reclaim_tx_descs(txq, 0, 8);
917 		rc = write_txpkt_wr(pi, txq, m, &sgl);
918 		if (rc != 0) {
919 
920 			/* Short of hardware descriptors, suspend tx */
921 
922 			/*
923 			 * This is an unlikely but expensive failure.  We've
924 			 * done all the hard work (DMA mappings etc.) and now we
925 			 * can't send out the packet.  What's worse, we have to
926 			 * spend even more time freeing up everything in sgl.
927 			 */
928 			txq->no_desc++;
929 			free_pkt_sgl(txq, &sgl);
930 
931 			m->m_nextpkt = next;
932 			break;
933 		}
934 
935 		ETHER_BPF_MTAP(ifp, m);
936 		if (sgl.nsegs == 0)
937 			m_freem(m);
938 
939 doorbell:
940 		/* Fewer and fewer doorbells as the queue fills up */
941 		if (eq->pending >= (1 << (fls(eq->qsize - eq->avail) / 2)))
942 		    ring_eq_db(sc, eq);
943 
944 		can_reclaim = reclaimable(eq);
945 		if (can_reclaim >= 32)
946 			reclaim_tx_descs(txq, can_reclaim, 32);
947 	}
948 
949 	if (txpkts.npkt > 0)
950 		write_txpkts_wr(txq, &txpkts);
951 
952 	/*
953 	 * m not NULL means there was an error but we haven't thrown it away.
954 	 * This can happen when we're short of tx descriptors (no_desc) or maybe
955 	 * even DMA maps (no_dmamap).  Either way, a credit flush and reclaim
956 	 * will get things going again.
957 	 *
958 	 * If eq->avail is already 0 we know a credit flush was requested in the
959 	 * WR that reduced it to 0 so we don't need another flush (we don't have
960 	 * any descriptor for a flush WR anyway, duh).
961 	 */
962 	if (m && eq->avail > 0 && !(eq->flags & EQ_CRFLUSHED)) {
963 		struct tx_sdesc *txsd = &txq->sdesc[eq->pidx];
964 
965 		txsd->desc_used = 1;
966 		txsd->credits = 0;
967 		write_eqflush_wr(eq);
968 	}
969 	txq->m = m;
970 
971 	if (eq->pending)
972 		ring_eq_db(sc, eq);
973 
974 	can_reclaim = reclaimable(eq);
975 	if (can_reclaim >= 32)
976 		reclaim_tx_descs(txq, can_reclaim, 128);
977 
978 	return (0);
979 }
980 
981 void
982 t4_update_fl_bufsize(struct ifnet *ifp)
983 {
984 	struct port_info *pi = ifp->if_softc;
985 	struct sge_rxq *rxq;
986 	struct sge_fl *fl;
987 	int i;
988 
989 	for_each_rxq(pi, i, rxq) {
990 		fl = &rxq->fl;
991 
992 		FL_LOCK(fl);
993 		set_fl_tag_idx(fl, ifp->if_mtu);
994 		FL_UNLOCK(fl);
995 	}
996 }
997 
998 /*
999  * A non-NULL handler indicates this iq will not receive direct interrupts, the
1000  * handler will be invoked by an interrupt queue.
1001  */
1002 static inline void
1003 init_iq(struct sge_iq *iq, struct adapter *sc, int tmr_idx, int pktc_idx,
1004     int qsize, int esize, iq_intr_handler_t *handler, char *name)
1005 {
1006 	KASSERT(tmr_idx >= 0 && tmr_idx < SGE_NTIMERS,
1007 	    ("%s: bad tmr_idx %d", __func__, tmr_idx));
1008 	KASSERT(pktc_idx < SGE_NCOUNTERS,	/* -ve is ok, means don't use */
1009 	    ("%s: bad pktc_idx %d", __func__, pktc_idx));
1010 
1011 	iq->flags = 0;
1012 	iq->adapter = sc;
1013 	iq->intr_params = V_QINTR_TIMER_IDX(tmr_idx) |
1014 	    V_QINTR_CNT_EN(pktc_idx >= 0);
1015 	iq->intr_pktc_idx = pktc_idx;
1016 	iq->qsize = roundup(qsize, 16);		/* See FW_IQ_CMD/iqsize */
1017 	iq->esize = max(esize, 16);		/* See FW_IQ_CMD/iqesize */
1018 	iq->handler = handler;
1019 	strlcpy(iq->lockname, name, sizeof(iq->lockname));
1020 }
1021 
1022 static inline void
1023 init_fl(struct sge_fl *fl, int qsize, char *name)
1024 {
1025 	fl->qsize = qsize;
1026 	strlcpy(fl->lockname, name, sizeof(fl->lockname));
1027 }
1028 
1029 static inline void
1030 init_eq(struct sge_eq *eq, int qsize, char *name)
1031 {
1032 	eq->qsize = qsize;
1033 	strlcpy(eq->lockname, name, sizeof(eq->lockname));
1034 }
1035 
1036 static int
1037 alloc_ring(struct adapter *sc, size_t len, bus_dma_tag_t *tag,
1038     bus_dmamap_t *map, bus_addr_t *pa, void **va)
1039 {
1040 	int rc;
1041 
1042 	rc = bus_dma_tag_create(sc->dmat, 512, 0, BUS_SPACE_MAXADDR,
1043 	    BUS_SPACE_MAXADDR, NULL, NULL, len, 1, len, 0, NULL, NULL, tag);
1044 	if (rc != 0) {
1045 		device_printf(sc->dev, "cannot allocate DMA tag: %d\n", rc);
1046 		goto done;
1047 	}
1048 
1049 	rc = bus_dmamem_alloc(*tag, va,
1050 	    BUS_DMA_WAITOK | BUS_DMA_COHERENT | BUS_DMA_ZERO, map);
1051 	if (rc != 0) {
1052 		device_printf(sc->dev, "cannot allocate DMA memory: %d\n", rc);
1053 		goto done;
1054 	}
1055 
1056 	rc = bus_dmamap_load(*tag, *map, *va, len, oneseg_dma_callback, pa, 0);
1057 	if (rc != 0) {
1058 		device_printf(sc->dev, "cannot load DMA map: %d\n", rc);
1059 		goto done;
1060 	}
1061 done:
1062 	if (rc)
1063 		free_ring(sc, *tag, *map, *pa, *va);
1064 
1065 	return (rc);
1066 }
1067 
1068 static int
1069 free_ring(struct adapter *sc, bus_dma_tag_t tag, bus_dmamap_t map,
1070     bus_addr_t pa, void *va)
1071 {
1072 	if (pa)
1073 		bus_dmamap_unload(tag, map);
1074 	if (va)
1075 		bus_dmamem_free(tag, va, map);
1076 	if (tag)
1077 		bus_dma_tag_destroy(tag);
1078 
1079 	return (0);
1080 }
1081 
1082 /*
1083  * Allocates the ring for an ingress queue and an optional freelist.  If the
1084  * freelist is specified it will be allocated and then associated with the
1085  * ingress queue.
1086  *
1087  * Returns errno on failure.  Resources allocated up to that point may still be
1088  * allocated.  Caller is responsible for cleanup in case this function fails.
1089  *
1090  * If the ingress queue will take interrupts directly (iq->handler == NULL) then
1091  * the intr_idx specifies the vector, starting from 0.  Otherwise it specifies
1092  * the index of the interrupt queue to which its interrupts will be forwarded.
1093  */
1094 static int
1095 alloc_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl,
1096     int intr_idx, int cong)
1097 {
1098 	int rc, i, cntxt_id;
1099 	size_t len;
1100 	struct fw_iq_cmd c;
1101 	struct adapter *sc = iq->adapter;
1102 	__be32 v = 0;
1103 
1104 	len = iq->qsize * iq->esize;
1105 	rc = alloc_ring(sc, len, &iq->desc_tag, &iq->desc_map, &iq->ba,
1106 	    (void **)&iq->desc);
1107 	if (rc != 0)
1108 		return (rc);
1109 
1110 	bzero(&c, sizeof(c));
1111 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
1112 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
1113 	    V_FW_IQ_CMD_VFN(0));
1114 
1115 	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
1116 	    FW_LEN16(c));
1117 
1118 	/* Special handling for firmware event queue */
1119 	if (iq == &sc->sge.fwq)
1120 		v |= F_FW_IQ_CMD_IQASYNCH;
1121 
1122 	if (iq->handler) {
1123 		KASSERT(intr_idx < NINTRQ(sc),
1124 		    ("%s: invalid indirect intr_idx %d", __func__, intr_idx));
1125 		v |= F_FW_IQ_CMD_IQANDST;
1126 		v |= V_FW_IQ_CMD_IQANDSTINDEX(sc->sge.intrq[intr_idx].abs_id);
1127 	} else {
1128 		KASSERT(intr_idx < sc->intr_count,
1129 		    ("%s: invalid direct intr_idx %d", __func__, intr_idx));
1130 		v |= V_FW_IQ_CMD_IQANDSTINDEX(intr_idx);
1131 	}
1132 
1133 	c.type_to_iqandstindex = htobe32(v |
1134 	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
1135 	    V_FW_IQ_CMD_VIID(pi->viid) |
1136 	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
1137 	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(pi->tx_chan) |
1138 	    F_FW_IQ_CMD_IQGTSMODE |
1139 	    V_FW_IQ_CMD_IQINTCNTTHRESH(iq->intr_pktc_idx) |
1140 	    V_FW_IQ_CMD_IQESIZE(ilog2(iq->esize) - 4));
1141 	c.iqsize = htobe16(iq->qsize);
1142 	c.iqaddr = htobe64(iq->ba);
1143 	if (cong >= 0)
1144 		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN);
1145 
1146 	if (fl) {
1147 		mtx_init(&fl->fl_lock, fl->lockname, NULL, MTX_DEF);
1148 
1149 		for (i = 0; i < FL_BUF_SIZES; i++) {
1150 
1151 			/*
1152 			 * A freelist buffer must be 16 byte aligned as the SGE
1153 			 * uses the low 4 bits of the bus addr to figure out the
1154 			 * buffer size.
1155 			 */
1156 			rc = bus_dma_tag_create(sc->dmat, 16, 0,
1157 			    BUS_SPACE_MAXADDR, BUS_SPACE_MAXADDR, NULL, NULL,
1158 			    FL_BUF_SIZE(i), 1, FL_BUF_SIZE(i), BUS_DMA_ALLOCNOW,
1159 			    NULL, NULL, &fl->tag[i]);
1160 			if (rc != 0) {
1161 				device_printf(sc->dev,
1162 				    "failed to create fl DMA tag[%d]: %d\n",
1163 				    i, rc);
1164 				return (rc);
1165 			}
1166 		}
1167 		len = fl->qsize * RX_FL_ESIZE;
1168 		rc = alloc_ring(sc, len, &fl->desc_tag, &fl->desc_map,
1169 		    &fl->ba, (void **)&fl->desc);
1170 		if (rc)
1171 			return (rc);
1172 
1173 		/* Allocate space for one software descriptor per buffer. */
1174 		fl->cap = (fl->qsize - SPG_LEN / RX_FL_ESIZE) * 8;
1175 		FL_LOCK(fl);
1176 		set_fl_tag_idx(fl, pi->ifp->if_mtu);
1177 		rc = alloc_fl_sdesc(fl);
1178 		FL_UNLOCK(fl);
1179 		if (rc != 0) {
1180 			device_printf(sc->dev,
1181 			    "failed to setup fl software descriptors: %d\n",
1182 			    rc);
1183 			return (rc);
1184 		}
1185 		fl->needed = fl->cap;
1186 
1187 		c.iqns_to_fl0congen =
1188 		    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
1189 			F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
1190 			F_FW_IQ_CMD_FL0PADEN);
1191 		if (cong >= 0) {
1192 			c.iqns_to_fl0congen |=
1193 				htobe32(V_FW_IQ_CMD_FL0CNGCHMAP(cong) |
1194 				    F_FW_IQ_CMD_FL0CONGCIF |
1195 				    F_FW_IQ_CMD_FL0CONGEN);
1196 		}
1197 		c.fl0dcaen_to_fl0cidxfthresh =
1198 		    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_64B) |
1199 			V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
1200 		c.fl0size = htobe16(fl->qsize);
1201 		c.fl0addr = htobe64(fl->ba);
1202 	}
1203 
1204 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1205 	if (rc != 0) {
1206 		device_printf(sc->dev,
1207 		    "failed to create ingress queue: %d\n", rc);
1208 		return (rc);
1209 	}
1210 
1211 	iq->cdesc = iq->desc;
1212 	iq->cidx = 0;
1213 	iq->gen = 1;
1214 	iq->intr_next = iq->intr_params;
1215 	iq->cntxt_id = be16toh(c.iqid);
1216 	iq->abs_id = be16toh(c.physiqid);
1217 	iq->flags |= (IQ_ALLOCATED | IQ_STARTED);
1218 
1219 	cntxt_id = iq->cntxt_id - sc->sge.iq_start;
1220 	KASSERT(cntxt_id < sc->sge.niq,
1221 	    ("%s: iq->cntxt_id (%d) more than the max (%d)", __func__,
1222 	    cntxt_id, sc->sge.niq - 1));
1223 	sc->sge.iqmap[cntxt_id] = iq;
1224 
1225 	if (fl) {
1226 		fl->cntxt_id = be16toh(c.fl0id);
1227 		fl->pidx = fl->cidx = 0;
1228 
1229 		cntxt_id = fl->cntxt_id - sc->sge.eq_start;
1230 		KASSERT(cntxt_id < sc->sge.neq,
1231 		    ("%s: fl->cntxt_id (%d) more than the max (%d)", __func__,
1232 		    cntxt_id, sc->sge.neq - 1));
1233 		sc->sge.eqmap[cntxt_id] = (void *)fl;
1234 
1235 		FL_LOCK(fl);
1236 		refill_fl(sc, fl, -1, 8);
1237 		FL_UNLOCK(fl);
1238 	}
1239 
1240 	/* Enable IQ interrupts */
1241 	atomic_store_rel_32(&iq->state, IQS_IDLE);
1242 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_SEINTARM(iq->intr_params) |
1243 	    V_INGRESSQID(iq->cntxt_id));
1244 
1245 	return (0);
1246 }
1247 
1248 /*
1249  * This can be called with the iq/fl in any state - fully allocated and
1250  * functional, partially allocated, even all-zeroed out.
1251  */
1252 static int
1253 free_iq_fl(struct port_info *pi, struct sge_iq *iq, struct sge_fl *fl)
1254 {
1255 	int i, rc;
1256 	struct adapter *sc = iq->adapter;
1257 	device_t dev;
1258 
1259 	if (sc == NULL)
1260 		return (0);	/* nothing to do */
1261 
1262 	dev = pi ? pi->dev : sc->dev;
1263 
1264 	if (iq->flags & IQ_STARTED) {
1265 		rc = -t4_iq_start_stop(sc, sc->mbox, 0, sc->pf, 0,
1266 		    iq->cntxt_id, fl ? fl->cntxt_id : 0xffff, 0xffff);
1267 		if (rc != 0) {
1268 			device_printf(dev,
1269 			    "failed to stop queue %p: %d\n", iq, rc);
1270 			return (rc);
1271 		}
1272 		iq->flags &= ~IQ_STARTED;
1273 
1274 		/* Synchronize with the interrupt handler */
1275 		while (!atomic_cmpset_32(&iq->state, IQS_IDLE, IQS_DISABLED))
1276 			pause("iqfree", hz / 1000);
1277 	}
1278 
1279 	if (iq->flags & IQ_ALLOCATED) {
1280 
1281 		rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0,
1282 		    FW_IQ_TYPE_FL_INT_CAP, iq->cntxt_id,
1283 		    fl ? fl->cntxt_id : 0xffff, 0xffff);
1284 		if (rc != 0) {
1285 			device_printf(dev,
1286 			    "failed to free queue %p: %d\n", iq, rc);
1287 			return (rc);
1288 		}
1289 		iq->flags &= ~IQ_ALLOCATED;
1290 	}
1291 
1292 	free_ring(sc, iq->desc_tag, iq->desc_map, iq->ba, iq->desc);
1293 
1294 	bzero(iq, sizeof(*iq));
1295 
1296 	if (fl) {
1297 		free_ring(sc, fl->desc_tag, fl->desc_map, fl->ba,
1298 		    fl->desc);
1299 
1300 		if (fl->sdesc) {
1301 			FL_LOCK(fl);
1302 			free_fl_sdesc(fl);
1303 			FL_UNLOCK(fl);
1304 		}
1305 
1306 		if (mtx_initialized(&fl->fl_lock))
1307 			mtx_destroy(&fl->fl_lock);
1308 
1309 		for (i = 0; i < FL_BUF_SIZES; i++) {
1310 			if (fl->tag[i])
1311 				bus_dma_tag_destroy(fl->tag[i]);
1312 		}
1313 
1314 		bzero(fl, sizeof(*fl));
1315 	}
1316 
1317 	return (0);
1318 }
1319 
1320 static int
1321 alloc_intrq(struct adapter *sc, int port_idx, int intrq_idx, int intr_idx)
1322 {
1323 	int rc;
1324 	struct sysctl_oid *oid;
1325 	struct sysctl_oid_list *children;
1326 	char name[16];
1327 	struct sge_iq *intrq = &sc->sge.intrq[intrq_idx];
1328 
1329 	rc = alloc_iq_fl(sc->port[port_idx], intrq, NULL, intr_idx, -1);
1330 	if (rc != 0)
1331 		return (rc);
1332 
1333 	children = SYSCTL_CHILDREN(sc->oid_intrq);
1334 
1335 	snprintf(name, sizeof(name), "%d", intrq_idx);
1336 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1337 	    NULL, "interrupt queue");
1338 	children = SYSCTL_CHILDREN(oid);
1339 
1340 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
1341 	    CTLTYPE_INT | CTLFLAG_RD, &intrq->cidx, 0, sysctl_uint16, "I",
1342 	    "consumer index");
1343 
1344 	return (rc);
1345 }
1346 
1347 static int
1348 free_intrq(struct sge_iq *iq)
1349 {
1350 	return free_iq_fl(NULL, iq, NULL);
1351 
1352 }
1353 
1354 static int
1355 alloc_fwq(struct adapter *sc, int intr_idx)
1356 {
1357 	int rc;
1358 	struct sysctl_oid_list *children;
1359 	struct sge_iq *fwq = &sc->sge.fwq;
1360 
1361 	rc = alloc_iq_fl(sc->port[0], fwq, NULL, intr_idx, -1);
1362 	if (rc != 0)
1363 		return (rc);
1364 
1365 	children = SYSCTL_CHILDREN(sc->oid_fwq);
1366 
1367 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "cidx",
1368 	    CTLTYPE_INT | CTLFLAG_RD, &fwq->cidx, 0, sysctl_uint16, "I",
1369 	    "consumer index");
1370 
1371 	return (rc);
1372 }
1373 
1374 static int
1375 free_fwq(struct sge_iq *iq)
1376 {
1377 	return free_iq_fl(NULL, iq, NULL);
1378 }
1379 
1380 static int
1381 alloc_rxq(struct port_info *pi, struct sge_rxq *rxq, int intr_idx, int idx)
1382 {
1383 	int rc;
1384 	struct sysctl_oid *oid;
1385 	struct sysctl_oid_list *children;
1386 	char name[16];
1387 
1388 	rc = alloc_iq_fl(pi, &rxq->iq, &rxq->fl, intr_idx, 1 << pi->tx_chan);
1389 	if (rc != 0)
1390 		return (rc);
1391 
1392 #ifdef INET
1393 	rc = tcp_lro_init(&rxq->lro);
1394 	if (rc != 0)
1395 		return (rc);
1396 	rxq->lro.ifp = pi->ifp; /* also indicates LRO init'ed */
1397 
1398 	if (pi->ifp->if_capenable & IFCAP_LRO)
1399 		rxq->flags |= RXQ_LRO_ENABLED;
1400 #endif
1401 	rxq->ifp = pi->ifp;
1402 
1403 	children = SYSCTL_CHILDREN(pi->oid_rxq);
1404 
1405 	snprintf(name, sizeof(name), "%d", idx);
1406 	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1407 	    NULL, "rx queue");
1408 	children = SYSCTL_CHILDREN(oid);
1409 
1410 	SYSCTL_ADD_PROC(&pi->ctx, children, OID_AUTO, "abs_id",
1411 	    CTLTYPE_INT | CTLFLAG_RD, &rxq->iq.abs_id, 0, sysctl_uint16, "I",
1412 	    "absolute id of the queue");
1413 #ifdef INET
1414 	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_queued", CTLFLAG_RD,
1415 	    &rxq->lro.lro_queued, 0, NULL);
1416 	SYSCTL_ADD_INT(&pi->ctx, children, OID_AUTO, "lro_flushed", CTLFLAG_RD,
1417 	    &rxq->lro.lro_flushed, 0, NULL);
1418 #endif
1419 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "rxcsum", CTLFLAG_RD,
1420 	    &rxq->rxcsum, "# of times hardware assisted with checksum");
1421 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_extraction",
1422 	    CTLFLAG_RD, &rxq->vlan_extraction,
1423 	    "# of times hardware extracted 802.1Q tag");
1424 
1425 	return (rc);
1426 }
1427 
1428 static int
1429 free_rxq(struct port_info *pi, struct sge_rxq *rxq)
1430 {
1431 	int rc;
1432 
1433 #ifdef INET
1434 	if (rxq->lro.ifp) {
1435 		tcp_lro_free(&rxq->lro);
1436 		rxq->lro.ifp = NULL;
1437 	}
1438 #endif
1439 
1440 	rc = free_iq_fl(pi, &rxq->iq, &rxq->fl);
1441 	if (rc == 0)
1442 		bzero(rxq, sizeof(*rxq));
1443 
1444 	return (rc);
1445 }
1446 
1447 static int
1448 alloc_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq, int idx)
1449 {
1450 	int rc, cntxt_id;
1451 	size_t len;
1452 	struct fw_eq_ctrl_cmd c;
1453 	struct sge_eq *eq = &ctrlq->eq;
1454 	char name[16];
1455 	struct sysctl_oid *oid;
1456 	struct sysctl_oid_list *children;
1457 
1458 	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
1459 
1460 	len = eq->qsize * CTRL_EQ_ESIZE;
1461 	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
1462 	    &eq->ba, (void **)&eq->desc);
1463 	if (rc)
1464 		return (rc);
1465 
1466 	eq->cap = eq->qsize - SPG_LEN / CTRL_EQ_ESIZE;
1467 	eq->spg = (void *)&eq->desc[eq->cap];
1468 	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
1469 	if (sc->flags & INTR_SHARED)
1470 		eq->iqid = sc->sge.intrq[idx % NINTRQ(sc)].cntxt_id;
1471 	else
1472 		eq->iqid = sc->sge.intrq[sc->port[idx]->first_rxq].cntxt_id;
1473 
1474 	bzero(&c, sizeof(c));
1475 
1476 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_CTRL_CMD) | F_FW_CMD_REQUEST |
1477 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_CTRL_CMD_PFN(sc->pf) |
1478 	    V_FW_EQ_CTRL_CMD_VFN(0));
1479 	c.alloc_to_len16 = htobe32(F_FW_EQ_CTRL_CMD_ALLOC |
1480 	    F_FW_EQ_CTRL_CMD_EQSTART | FW_LEN16(c));
1481 	c.cmpliqid_eqid = htonl(V_FW_EQ_CTRL_CMD_CMPLIQID(eq->iqid)); /* XXX */
1482 	c.physeqid_pkd = htobe32(0);
1483 	c.fetchszm_to_iqid =
1484 	    htobe32(V_FW_EQ_CTRL_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1485 		V_FW_EQ_CTRL_CMD_PCIECHN(sc->port[idx]->tx_chan) |
1486 		F_FW_EQ_CTRL_CMD_FETCHRO | V_FW_EQ_CTRL_CMD_IQID(eq->iqid));
1487 	c.dcaen_to_eqsize =
1488 	    htobe32(V_FW_EQ_CTRL_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1489 		V_FW_EQ_CTRL_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1490 		V_FW_EQ_CTRL_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1491 		V_FW_EQ_CTRL_CMD_EQSIZE(eq->qsize));
1492 	c.eqaddr = htobe64(eq->ba);
1493 
1494 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1495 	if (rc != 0) {
1496 		device_printf(sc->dev,
1497 		    "failed to create control queue %d: %d\n", idx, rc);
1498 		return (rc);
1499 	}
1500 
1501 	eq->pidx = eq->cidx = 0;
1502 	eq->cntxt_id = G_FW_EQ_CTRL_CMD_EQID(be32toh(c.cmpliqid_eqid));
1503 	eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
1504 
1505 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1506 	KASSERT(cntxt_id < sc->sge.neq,
1507 	    ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1508 	    cntxt_id, sc->sge.neq - 1));
1509 	sc->sge.eqmap[cntxt_id] = eq;
1510 
1511 	children = SYSCTL_CHILDREN(sc->oid_ctrlq);
1512 
1513 	snprintf(name, sizeof(name), "%d", idx);
1514 	oid = SYSCTL_ADD_NODE(&sc->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1515 	    NULL, "ctrl queue");
1516 	children = SYSCTL_CHILDREN(oid);
1517 
1518 	SYSCTL_ADD_PROC(&sc->ctx, children, OID_AUTO, "pidx",
1519 	    CTLTYPE_INT | CTLFLAG_RD, &ctrlq->eq.pidx, 0, sysctl_uint16, "I",
1520 	    "producer index");
1521 	SYSCTL_ADD_UINT(&sc->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
1522 	    &ctrlq->no_desc, 0,
1523 	    "# of times ctrlq ran out of hardware descriptors");
1524 
1525 	return (rc);
1526 }
1527 
1528 static int
1529 free_ctrlq(struct adapter *sc, struct sge_ctrlq *ctrlq)
1530 {
1531 	int rc;
1532 	struct sge_eq *eq = &ctrlq->eq;
1533 
1534 	if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
1535 		rc = -t4_ctrl_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1536 		if (rc != 0) {
1537 			device_printf(sc->dev,
1538 			    "failed to free ctrl queue %p: %d\n", eq, rc);
1539 			return (rc);
1540 		}
1541 		eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
1542 	}
1543 
1544 	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
1545 
1546 	if (mtx_initialized(&eq->eq_lock))
1547 		mtx_destroy(&eq->eq_lock);
1548 
1549 	bzero(ctrlq, sizeof(*ctrlq));
1550 	return (0);
1551 }
1552 
1553 static int
1554 alloc_txq(struct port_info *pi, struct sge_txq *txq, int idx)
1555 {
1556 	int rc, cntxt_id;
1557 	size_t len;
1558 	struct adapter *sc = pi->adapter;
1559 	struct fw_eq_eth_cmd c;
1560 	struct sge_eq *eq = &txq->eq;
1561 	char name[16];
1562 	struct sysctl_oid *oid;
1563 	struct sysctl_oid_list *children;
1564 	struct sge_iq *intrq;
1565 
1566 	txq->ifp = pi->ifp;
1567 	TASK_INIT(&txq->resume_tx, 0, cxgbe_txq_start, txq);
1568 
1569 	mtx_init(&eq->eq_lock, eq->lockname, NULL, MTX_DEF);
1570 
1571 	len = eq->qsize * TX_EQ_ESIZE;
1572 	rc = alloc_ring(sc, len, &eq->desc_tag, &eq->desc_map,
1573 	    &eq->ba, (void **)&eq->desc);
1574 	if (rc)
1575 		return (rc);
1576 
1577 	eq->cap = eq->qsize - SPG_LEN / TX_EQ_ESIZE;
1578 	eq->spg = (void *)&eq->desc[eq->cap];
1579 	eq->avail = eq->cap - 1;	/* one less to avoid cidx = pidx */
1580 	txq->sdesc = malloc(eq->cap * sizeof(struct tx_sdesc), M_CXGBE,
1581 	    M_ZERO | M_WAITOK);
1582 	txq->br = buf_ring_alloc(eq->qsize, M_CXGBE, M_WAITOK, &eq->eq_lock);
1583 
1584 	intrq = &sc->sge.intrq[0];
1585 	if (sc->flags & INTR_SHARED)
1586 		eq->iqid = intrq[(pi->first_txq + idx) % NINTRQ(sc)].cntxt_id;
1587 	else
1588 		eq->iqid = intrq[pi->first_rxq + (idx % pi->nrxq)].cntxt_id;
1589 
1590 	rc = bus_dma_tag_create(sc->dmat, 1, 0, BUS_SPACE_MAXADDR,
1591 	    BUS_SPACE_MAXADDR, NULL, NULL, 64 * 1024, TX_SGL_SEGS,
1592 	    BUS_SPACE_MAXSIZE, BUS_DMA_ALLOCNOW, NULL, NULL, &txq->tx_tag);
1593 	if (rc != 0) {
1594 		device_printf(sc->dev,
1595 		    "failed to create tx DMA tag: %d\n", rc);
1596 		return (rc);
1597 	}
1598 
1599 	rc = alloc_tx_maps(txq);
1600 	if (rc != 0) {
1601 		device_printf(sc->dev, "failed to setup tx DMA maps: %d\n", rc);
1602 		return (rc);
1603 	}
1604 
1605 	bzero(&c, sizeof(c));
1606 
1607 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
1608 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
1609 	    V_FW_EQ_ETH_CMD_VFN(0));
1610 	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
1611 	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
1612 	c.viid_pkd = htobe32(V_FW_EQ_ETH_CMD_VIID(pi->viid));
1613 	c.fetchszm_to_iqid =
1614 	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_STATUS_PAGE) |
1615 		V_FW_EQ_ETH_CMD_PCIECHN(pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
1616 		V_FW_EQ_ETH_CMD_IQID(eq->iqid));
1617 	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
1618 		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
1619 		      V_FW_EQ_ETH_CMD_CIDXFTHRESH(X_CIDXFLUSHTHRESH_32) |
1620 		      V_FW_EQ_ETH_CMD_EQSIZE(eq->qsize));
1621 	c.eqaddr = htobe64(eq->ba);
1622 
1623 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
1624 	if (rc != 0) {
1625 		device_printf(pi->dev,
1626 		    "failed to create egress queue: %d\n", rc);
1627 		return (rc);
1628 	}
1629 
1630 	eq->pidx = eq->cidx = 0;
1631 	eq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
1632 	eq->flags |= (EQ_ALLOCATED | EQ_STARTED);
1633 
1634 	cntxt_id = eq->cntxt_id - sc->sge.eq_start;
1635 	KASSERT(cntxt_id < sc->sge.neq,
1636 	    ("%s: eq->cntxt_id (%d) more than the max (%d)", __func__,
1637 	    cntxt_id, sc->sge.neq - 1));
1638 	sc->sge.eqmap[cntxt_id] = eq;
1639 
1640 	children = SYSCTL_CHILDREN(pi->oid_txq);
1641 
1642 	snprintf(name, sizeof(name), "%d", idx);
1643 	oid = SYSCTL_ADD_NODE(&pi->ctx, children, OID_AUTO, name, CTLFLAG_RD,
1644 	    NULL, "tx queue");
1645 	children = SYSCTL_CHILDREN(oid);
1646 
1647 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txcsum", CTLFLAG_RD,
1648 	    &txq->txcsum, "# of times hardware assisted with checksum");
1649 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "vlan_insertion",
1650 	    CTLFLAG_RD, &txq->vlan_insertion,
1651 	    "# of times hardware inserted 802.1Q tag");
1652 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "tso_wrs", CTLFLAG_RD,
1653 	    &txq->tso_wrs, "# of IPv4 TSO work requests");
1654 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "imm_wrs", CTLFLAG_RD,
1655 	    &txq->imm_wrs, "# of work requests with immediate data");
1656 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "sgl_wrs", CTLFLAG_RD,
1657 	    &txq->sgl_wrs, "# of work requests with direct SGL");
1658 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkt_wrs", CTLFLAG_RD,
1659 	    &txq->txpkt_wrs, "# of txpkt work requests (one pkt/WR)");
1660 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_wrs", CTLFLAG_RD,
1661 	    &txq->txpkts_wrs, "# of txpkts work requests (multiple pkts/WR)");
1662 	SYSCTL_ADD_UQUAD(&pi->ctx, children, OID_AUTO, "txpkts_pkts", CTLFLAG_RD,
1663 	    &txq->txpkts_pkts, "# of frames tx'd using txpkts work requests");
1664 
1665 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_dmamap", CTLFLAG_RD,
1666 	    &txq->no_dmamap, 0, "# of times txq ran out of DMA maps");
1667 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "no_desc", CTLFLAG_RD,
1668 	    &txq->no_desc, 0, "# of times txq ran out of hardware descriptors");
1669 	SYSCTL_ADD_UINT(&pi->ctx, children, OID_AUTO, "egr_update", CTLFLAG_RD,
1670 	    &txq->egr_update, 0, "egress update notifications from the SGE");
1671 
1672 	return (rc);
1673 }
1674 
1675 static int
1676 free_txq(struct port_info *pi, struct sge_txq *txq)
1677 {
1678 	int rc;
1679 	struct adapter *sc = pi->adapter;
1680 	struct sge_eq *eq = &txq->eq;
1681 
1682 	if (eq->flags & (EQ_ALLOCATED | EQ_STARTED)) {
1683 
1684 		/*
1685 		 * Wait for the response to a credit flush if there's one
1686 		 * pending.  Clearing the flag tells handle_sge_egr_update or
1687 		 * cxgbe_txq_start (depending on how far the response has made
1688 		 * it) that they should ignore the response and wake up free_txq
1689 		 * instead.
1690 		 *
1691 		 * The interface has been marked down by the time we get here
1692 		 * (both IFF_UP and IFF_DRV_RUNNING cleared).  qflush has
1693 		 * emptied the tx buf_rings and we know nothing new is being
1694 		 * queued for tx so we don't have to worry about a new credit
1695 		 * flush request.
1696 		 */
1697 		TXQ_LOCK(txq);
1698 		if (eq->flags & EQ_CRFLUSHED) {
1699 			eq->flags &= ~EQ_CRFLUSHED;
1700 			msleep(txq, &eq->eq_lock, 0, "crflush", 0);
1701 		}
1702 		TXQ_UNLOCK(txq);
1703 
1704 		rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, eq->cntxt_id);
1705 		if (rc != 0) {
1706 			device_printf(pi->dev,
1707 			    "failed to free egress queue %p: %d\n", eq, rc);
1708 			return (rc);
1709 		}
1710 		eq->flags &= ~(EQ_ALLOCATED | EQ_STARTED);
1711 	}
1712 
1713 	free_ring(sc, eq->desc_tag, eq->desc_map, eq->ba, eq->desc);
1714 
1715 	free(txq->sdesc, M_CXGBE);
1716 
1717 	if (txq->maps)
1718 		free_tx_maps(txq);
1719 
1720 	buf_ring_free(txq->br, M_CXGBE);
1721 
1722 	if (txq->tx_tag)
1723 		bus_dma_tag_destroy(txq->tx_tag);
1724 
1725 	if (mtx_initialized(&eq->eq_lock))
1726 		mtx_destroy(&eq->eq_lock);
1727 
1728 	bzero(txq, sizeof(*txq));
1729 	return (0);
1730 }
1731 
1732 static void
1733 oneseg_dma_callback(void *arg, bus_dma_segment_t *segs, int nseg, int error)
1734 {
1735 	bus_addr_t *ba = arg;
1736 
1737 	KASSERT(nseg == 1,
1738 	    ("%s meant for single segment mappings only.", __func__));
1739 
1740 	*ba = error ? 0 : segs->ds_addr;
1741 }
1742 
1743 static inline bool
1744 is_new_response(const struct sge_iq *iq, struct rsp_ctrl **ctrl)
1745 {
1746 	*ctrl = (void *)((uintptr_t)iq->cdesc +
1747 	    (iq->esize - sizeof(struct rsp_ctrl)));
1748 
1749 	return (((*ctrl)->u.type_gen >> S_RSPD_GEN) == iq->gen);
1750 }
1751 
1752 static inline void
1753 iq_next(struct sge_iq *iq)
1754 {
1755 	iq->cdesc = (void *) ((uintptr_t)iq->cdesc + iq->esize);
1756 	if (__predict_false(++iq->cidx == iq->qsize - 1)) {
1757 		iq->cidx = 0;
1758 		iq->gen ^= 1;
1759 		iq->cdesc = iq->desc;
1760 	}
1761 }
1762 
1763 #define FL_HW_IDX(x) ((x) >> 3)
1764 static inline void
1765 ring_fl_db(struct adapter *sc, struct sge_fl *fl)
1766 {
1767 	int ndesc = fl->pending / 8;
1768 
1769 	if (FL_HW_IDX(fl->pidx) == FL_HW_IDX(fl->cidx))
1770 		ndesc--;	/* hold back one credit */
1771 
1772 	if (ndesc <= 0)
1773 		return;		/* nothing to do */
1774 
1775 	wmb();
1776 
1777 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL), F_DBPRIO |
1778 	    V_QID(fl->cntxt_id) | V_PIDX(ndesc));
1779 	fl->pending -= ndesc * 8;
1780 }
1781 
1782 /*
1783  * Fill up the freelist by upto nbufs and ring its doorbell if the number of
1784  * buffers ready to be handed to the hardware >= dbthresh.
1785  */
1786 static void
1787 refill_fl(struct adapter *sc, struct sge_fl *fl, int nbufs, int dbthresh)
1788 {
1789 	__be64 *d = &fl->desc[fl->pidx];
1790 	struct fl_sdesc *sd = &fl->sdesc[fl->pidx];
1791 	bus_dma_tag_t tag;
1792 	bus_addr_t pa;
1793 	caddr_t cl;
1794 	int rc;
1795 
1796 	FL_LOCK_ASSERT_OWNED(fl);
1797 
1798 	if (nbufs < 0 || nbufs > fl->needed)
1799 		nbufs = fl->needed;
1800 
1801 	while (nbufs--) {
1802 
1803 		if (sd->cl != NULL) {
1804 
1805 			/*
1806 			 * This happens when a frame small enough to fit
1807 			 * entirely in an mbuf was received in cl last time.
1808 			 * We'd held on to cl and can reuse it now.  Note that
1809 			 * we reuse a cluster of the old size if fl->tag_idx is
1810 			 * no longer the same as sd->tag_idx.
1811 			 */
1812 
1813 			KASSERT(*d == sd->ba_tag,
1814 			    ("%s: recyling problem at pidx %d",
1815 			    __func__, fl->pidx));
1816 
1817 			d++;
1818 			goto recycled;
1819 		}
1820 
1821 
1822 		if (fl->tag_idx != sd->tag_idx) {
1823 			bus_dmamap_t map;
1824 			bus_dma_tag_t newtag = fl->tag[fl->tag_idx];
1825 			bus_dma_tag_t oldtag = fl->tag[sd->tag_idx];
1826 
1827 			/*
1828 			 * An MTU change can get us here.  Discard the old map
1829 			 * which was created with the old tag, but only if
1830 			 * we're able to get a new one.
1831 			 */
1832 			rc = bus_dmamap_create(newtag, 0, &map);
1833 			if (rc == 0) {
1834 				bus_dmamap_destroy(oldtag, sd->map);
1835 				sd->map = map;
1836 				sd->tag_idx = fl->tag_idx;
1837 			}
1838 		}
1839 
1840 		tag = fl->tag[sd->tag_idx];
1841 
1842 		cl = m_cljget(NULL, M_NOWAIT, FL_BUF_SIZE(sd->tag_idx));
1843 		if (cl == NULL)
1844 			break;
1845 
1846 		rc = bus_dmamap_load(tag, sd->map, cl, FL_BUF_SIZE(sd->tag_idx),
1847 		    oneseg_dma_callback, &pa, 0);
1848 		if (rc != 0 || pa == 0) {
1849 			fl->dmamap_failed++;
1850 			uma_zfree(FL_BUF_ZONE(sd->tag_idx), cl);
1851 			break;
1852 		}
1853 
1854 		sd->cl = cl;
1855 		*d++ = htobe64(pa | sd->tag_idx);
1856 
1857 #ifdef INVARIANTS
1858 		sd->ba_tag = htobe64(pa | sd->tag_idx);
1859 #endif
1860 
1861 recycled:
1862 		/* sd->m is never recycled, should always be NULL */
1863 		KASSERT(sd->m == NULL, ("%s: stray mbuf", __func__));
1864 
1865 		sd->m = m_gethdr(M_NOWAIT, MT_NOINIT);
1866 		if (sd->m == NULL)
1867 			break;
1868 
1869 		fl->pending++;
1870 		fl->needed--;
1871 		sd++;
1872 		if (++fl->pidx == fl->cap) {
1873 			fl->pidx = 0;
1874 			sd = fl->sdesc;
1875 			d = fl->desc;
1876 		}
1877 	}
1878 
1879 	if (fl->pending >= dbthresh)
1880 		ring_fl_db(sc, fl);
1881 }
1882 
1883 static int
1884 alloc_fl_sdesc(struct sge_fl *fl)
1885 {
1886 	struct fl_sdesc *sd;
1887 	bus_dma_tag_t tag;
1888 	int i, rc;
1889 
1890 	FL_LOCK_ASSERT_OWNED(fl);
1891 
1892 	fl->sdesc = malloc(fl->cap * sizeof(struct fl_sdesc), M_CXGBE,
1893 	    M_ZERO | M_WAITOK);
1894 
1895 	tag = fl->tag[fl->tag_idx];
1896 	sd = fl->sdesc;
1897 	for (i = 0; i < fl->cap; i++, sd++) {
1898 
1899 		sd->tag_idx = fl->tag_idx;
1900 		rc = bus_dmamap_create(tag, 0, &sd->map);
1901 		if (rc != 0)
1902 			goto failed;
1903 	}
1904 
1905 	return (0);
1906 failed:
1907 	while (--i >= 0) {
1908 		sd--;
1909 		bus_dmamap_destroy(tag, sd->map);
1910 		if (sd->m) {
1911 			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1912 			m_free(sd->m);
1913 			sd->m = NULL;
1914 		}
1915 	}
1916 	KASSERT(sd == fl->sdesc, ("%s: EDOOFUS", __func__));
1917 
1918 	free(fl->sdesc, M_CXGBE);
1919 	fl->sdesc = NULL;
1920 
1921 	return (rc);
1922 }
1923 
1924 static void
1925 free_fl_sdesc(struct sge_fl *fl)
1926 {
1927 	struct fl_sdesc *sd;
1928 	int i;
1929 
1930 	FL_LOCK_ASSERT_OWNED(fl);
1931 
1932 	sd = fl->sdesc;
1933 	for (i = 0; i < fl->cap; i++, sd++) {
1934 
1935 		if (sd->m) {
1936 			m_init(sd->m, NULL, 0, M_NOWAIT, MT_DATA, 0);
1937 			m_free(sd->m);
1938 			sd->m = NULL;
1939 		}
1940 
1941 		if (sd->cl) {
1942 			bus_dmamap_unload(fl->tag[sd->tag_idx], sd->map);
1943 			uma_zfree(FL_BUF_ZONE(sd->tag_idx), sd->cl);
1944 			sd->cl = NULL;
1945 		}
1946 
1947 		bus_dmamap_destroy(fl->tag[sd->tag_idx], sd->map);
1948 	}
1949 
1950 	free(fl->sdesc, M_CXGBE);
1951 	fl->sdesc = NULL;
1952 }
1953 
1954 static int
1955 alloc_tx_maps(struct sge_txq *txq)
1956 {
1957 	struct tx_map *txm;
1958 	int i, rc, count;
1959 
1960 	/*
1961 	 * We can stuff ~10 frames in an 8-descriptor txpkts WR (8 is the SGE
1962 	 * limit for any WR).  txq->no_dmamap events shouldn't occur if maps is
1963 	 * sized for the worst case.
1964 	 */
1965 	count = txq->eq.qsize * 10 / 8;
1966 	txq->map_total = txq->map_avail = count;
1967 	txq->map_cidx = txq->map_pidx = 0;
1968 
1969 	txq->maps = malloc(count * sizeof(struct tx_map), M_CXGBE,
1970 	    M_ZERO | M_WAITOK);
1971 
1972 	txm = txq->maps;
1973 	for (i = 0; i < count; i++, txm++) {
1974 		rc = bus_dmamap_create(txq->tx_tag, 0, &txm->map);
1975 		if (rc != 0)
1976 			goto failed;
1977 	}
1978 
1979 	return (0);
1980 failed:
1981 	while (--i >= 0) {
1982 		txm--;
1983 		bus_dmamap_destroy(txq->tx_tag, txm->map);
1984 	}
1985 	KASSERT(txm == txq->maps, ("%s: EDOOFUS", __func__));
1986 
1987 	free(txq->maps, M_CXGBE);
1988 	txq->maps = NULL;
1989 
1990 	return (rc);
1991 }
1992 
1993 static void
1994 free_tx_maps(struct sge_txq *txq)
1995 {
1996 	struct tx_map *txm;
1997 	int i;
1998 
1999 	txm = txq->maps;
2000 	for (i = 0; i < txq->map_total; i++, txm++) {
2001 
2002 		if (txm->m) {
2003 			bus_dmamap_unload(txq->tx_tag, txm->map);
2004 			m_freem(txm->m);
2005 			txm->m = NULL;
2006 		}
2007 
2008 		bus_dmamap_destroy(txq->tx_tag, txm->map);
2009 	}
2010 
2011 	free(txq->maps, M_CXGBE);
2012 	txq->maps = NULL;
2013 }
2014 
2015 /*
2016  * We'll do immediate data tx for non-TSO, but only when not coalescing.  We're
2017  * willing to use upto 2 hardware descriptors which means a maximum of 96 bytes
2018  * of immediate data.
2019  */
2020 #define IMM_LEN ( \
2021       2 * TX_EQ_ESIZE \
2022     - sizeof(struct fw_eth_tx_pkt_wr) \
2023     - sizeof(struct cpl_tx_pkt_core))
2024 
2025 /*
2026  * Returns non-zero on failure, no need to cleanup anything in that case.
2027  *
2028  * Note 1: We always try to defrag the mbuf if required and return EFBIG only
2029  * if the resulting chain still won't fit in a tx descriptor.
2030  *
2031  * Note 2: We'll pullup the mbuf chain if TSO is requested and the first mbuf
2032  * does not have the TCP header in it.
2033  */
2034 static int
2035 get_pkt_sgl(struct sge_txq *txq, struct mbuf **fp, struct sgl *sgl,
2036     int sgl_only)
2037 {
2038 	struct mbuf *m = *fp;
2039 	struct tx_map *txm;
2040 	int rc, defragged = 0, n;
2041 
2042 	TXQ_LOCK_ASSERT_OWNED(txq);
2043 
2044 	if (m->m_pkthdr.tso_segsz)
2045 		sgl_only = 1;	/* Do not allow immediate data with LSO */
2046 
2047 start:	sgl->nsegs = 0;
2048 
2049 	if (m->m_pkthdr.len <= IMM_LEN && !sgl_only)
2050 		return (0);	/* nsegs = 0 tells caller to use imm. tx */
2051 
2052 	if (txq->map_avail == 0) {
2053 		txq->no_dmamap++;
2054 		return (ENOMEM);
2055 	}
2056 	txm = &txq->maps[txq->map_pidx];
2057 
2058 	if (m->m_pkthdr.tso_segsz && m->m_len < 50) {
2059 		*fp = m_pullup(m, 50);
2060 		m = *fp;
2061 		if (m == NULL)
2062 			return (ENOBUFS);
2063 	}
2064 
2065 	rc = bus_dmamap_load_mbuf_sg(txq->tx_tag, txm->map, m, sgl->seg,
2066 	    &sgl->nsegs, BUS_DMA_NOWAIT);
2067 	if (rc == EFBIG && defragged == 0) {
2068 		m = m_defrag(m, M_DONTWAIT);
2069 		if (m == NULL)
2070 			return (EFBIG);
2071 
2072 		defragged = 1;
2073 		*fp = m;
2074 		goto start;
2075 	}
2076 	if (rc != 0)
2077 		return (rc);
2078 
2079 	txm->m = m;
2080 	txq->map_avail--;
2081 	if (++txq->map_pidx == txq->map_total)
2082 		txq->map_pidx = 0;
2083 
2084 	KASSERT(sgl->nsegs > 0 && sgl->nsegs <= TX_SGL_SEGS,
2085 	    ("%s: bad DMA mapping (%d segments)", __func__, sgl->nsegs));
2086 
2087 	/*
2088 	 * Store the # of flits required to hold this frame's SGL in nflits.  An
2089 	 * SGL has a (ULPTX header + len0, addr0) tuple optionally followed by
2090 	 * multiple (len0 + len1, addr0, addr1) tuples.  If addr1 is not used
2091 	 * then len1 must be set to 0.
2092 	 */
2093 	n = sgl->nsegs - 1;
2094 	sgl->nflits = (3 * n) / 2 + (n & 1) + 2;
2095 
2096 	return (0);
2097 }
2098 
2099 
2100 /*
2101  * Releases all the txq resources used up in the specified sgl.
2102  */
2103 static int
2104 free_pkt_sgl(struct sge_txq *txq, struct sgl *sgl)
2105 {
2106 	struct tx_map *txm;
2107 
2108 	TXQ_LOCK_ASSERT_OWNED(txq);
2109 
2110 	if (sgl->nsegs == 0)
2111 		return (0);	/* didn't use any map */
2112 
2113 	/* 1 pkt uses exactly 1 map, back it out */
2114 
2115 	txq->map_avail++;
2116 	if (txq->map_pidx > 0)
2117 		txq->map_pidx--;
2118 	else
2119 		txq->map_pidx = txq->map_total - 1;
2120 
2121 	txm = &txq->maps[txq->map_pidx];
2122 	bus_dmamap_unload(txq->tx_tag, txm->map);
2123 	txm->m = NULL;
2124 
2125 	return (0);
2126 }
2127 
2128 static int
2129 write_txpkt_wr(struct port_info *pi, struct sge_txq *txq, struct mbuf *m,
2130     struct sgl *sgl)
2131 {
2132 	struct sge_eq *eq = &txq->eq;
2133 	struct fw_eth_tx_pkt_wr *wr;
2134 	struct cpl_tx_pkt_core *cpl;
2135 	uint32_t ctrl;	/* used in many unrelated places */
2136 	uint64_t ctrl1;
2137 	int nflits, ndesc, pktlen;
2138 	struct tx_sdesc *txsd;
2139 	caddr_t dst;
2140 
2141 	TXQ_LOCK_ASSERT_OWNED(txq);
2142 
2143 	pktlen = m->m_pkthdr.len;
2144 
2145 	/*
2146 	 * Do we have enough flits to send this frame out?
2147 	 */
2148 	ctrl = sizeof(struct cpl_tx_pkt_core);
2149 	if (m->m_pkthdr.tso_segsz) {
2150 		nflits = TXPKT_LSO_WR_HDR;
2151 		ctrl += sizeof(struct cpl_tx_pkt_lso);
2152 	} else
2153 		nflits = TXPKT_WR_HDR;
2154 	if (sgl->nsegs > 0)
2155 		nflits += sgl->nflits;
2156 	else {
2157 		nflits += howmany(pktlen, 8);
2158 		ctrl += pktlen;
2159 	}
2160 	ndesc = howmany(nflits, 8);
2161 	if (ndesc > eq->avail)
2162 		return (ENOMEM);
2163 
2164 	/* Firmware work request header */
2165 	wr = (void *)&eq->desc[eq->pidx];
2166 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKT_WR) |
2167 	    V_FW_WR_IMMDLEN(ctrl));
2168 	ctrl = V_FW_WR_LEN16(howmany(nflits, 2));
2169 	if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
2170 		ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2171 		eq->flags |= EQ_CRFLUSHED;
2172 	}
2173 
2174 	wr->equiq_to_len16 = htobe32(ctrl);
2175 	wr->r3 = 0;
2176 
2177 	if (m->m_pkthdr.tso_segsz) {
2178 		struct cpl_tx_pkt_lso *lso = (void *)(wr + 1);
2179 		struct ether_header *eh;
2180 		struct ip *ip;
2181 		struct tcphdr *tcp;
2182 
2183 		ctrl = V_LSO_OPCODE(CPL_TX_PKT_LSO) | F_LSO_FIRST_SLICE |
2184 		    F_LSO_LAST_SLICE;
2185 
2186 		eh = mtod(m, struct ether_header *);
2187 		if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2188 			ctrl |= V_LSO_ETHHDR_LEN(1);
2189 			ip = (void *)((struct ether_vlan_header *)eh + 1);
2190 		} else
2191 			ip = (void *)(eh + 1);
2192 
2193 		tcp = (void *)((uintptr_t)ip + ip->ip_hl * 4);
2194 		ctrl |= V_LSO_IPHDR_LEN(ip->ip_hl) |
2195 		    V_LSO_TCPHDR_LEN(tcp->th_off);
2196 
2197 		lso->lso_ctrl = htobe32(ctrl);
2198 		lso->ipid_ofst = htobe16(0);
2199 		lso->mss = htobe16(m->m_pkthdr.tso_segsz);
2200 		lso->seqno_offset = htobe32(0);
2201 		lso->len = htobe32(pktlen);
2202 
2203 		cpl = (void *)(lso + 1);
2204 
2205 		txq->tso_wrs++;
2206 	} else
2207 		cpl = (void *)(wr + 1);
2208 
2209 	/* Checksum offload */
2210 	ctrl1 = 0;
2211 	if (!(m->m_pkthdr.csum_flags & CSUM_IP))
2212 		ctrl1 |= F_TXPKT_IPCSUM_DIS;
2213 	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))
2214 		ctrl1 |= F_TXPKT_L4CSUM_DIS;
2215 	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP))
2216 		txq->txcsum++;	/* some hardware assistance provided */
2217 
2218 	/* VLAN tag insertion */
2219 	if (m->m_flags & M_VLANTAG) {
2220 		ctrl1 |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2221 		txq->vlan_insertion++;
2222 	}
2223 
2224 	/* CPL header */
2225 	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2226 	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2227 	cpl->pack = 0;
2228 	cpl->len = htobe16(pktlen);
2229 	cpl->ctrl1 = htobe64(ctrl1);
2230 
2231 	/* Software descriptor */
2232 	txsd = &txq->sdesc[eq->pidx];
2233 	txsd->desc_used = ndesc;
2234 
2235 	eq->pending += ndesc;
2236 	eq->avail -= ndesc;
2237 	eq->pidx += ndesc;
2238 	if (eq->pidx >= eq->cap)
2239 		eq->pidx -= eq->cap;
2240 
2241 	/* SGL */
2242 	dst = (void *)(cpl + 1);
2243 	if (sgl->nsegs > 0) {
2244 		txsd->credits = 1;
2245 		txq->sgl_wrs++;
2246 		write_sgl_to_txd(eq, sgl, &dst);
2247 	} else {
2248 		txsd->credits = 0;
2249 		txq->imm_wrs++;
2250 		for (; m; m = m->m_next) {
2251 			copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2252 #ifdef INVARIANTS
2253 			pktlen -= m->m_len;
2254 #endif
2255 		}
2256 #ifdef INVARIANTS
2257 		KASSERT(pktlen == 0, ("%s: %d bytes left.", __func__, pktlen));
2258 #endif
2259 
2260 	}
2261 
2262 	txq->txpkt_wrs++;
2263 	return (0);
2264 }
2265 
2266 /*
2267  * Returns 0 to indicate that m has been accepted into a coalesced tx work
2268  * request.  It has either been folded into txpkts or txpkts was flushed and m
2269  * has started a new coalesced work request (as the first frame in a fresh
2270  * txpkts).
2271  *
2272  * Returns non-zero to indicate a failure - caller is responsible for
2273  * transmitting m, if there was anything in txpkts it has been flushed.
2274  */
2275 static int
2276 add_to_txpkts(struct port_info *pi, struct sge_txq *txq, struct txpkts *txpkts,
2277     struct mbuf *m, struct sgl *sgl)
2278 {
2279 	struct sge_eq *eq = &txq->eq;
2280 	int can_coalesce;
2281 	struct tx_sdesc *txsd;
2282 	int flits;
2283 
2284 	TXQ_LOCK_ASSERT_OWNED(txq);
2285 
2286 	if (txpkts->npkt > 0) {
2287 		flits = TXPKTS_PKT_HDR + sgl->nflits;
2288 		can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
2289 		    txpkts->nflits + flits <= TX_WR_FLITS &&
2290 		    txpkts->nflits + flits <= eq->avail * 8 &&
2291 		    txpkts->plen + m->m_pkthdr.len < 65536;
2292 
2293 		if (can_coalesce) {
2294 			txpkts->npkt++;
2295 			txpkts->nflits += flits;
2296 			txpkts->plen += m->m_pkthdr.len;
2297 
2298 			txsd = &txq->sdesc[eq->pidx];
2299 			txsd->credits++;
2300 
2301 			return (0);
2302 		}
2303 
2304 		/*
2305 		 * Couldn't coalesce m into txpkts.  The first order of business
2306 		 * is to send txpkts on its way.  Then we'll revisit m.
2307 		 */
2308 		write_txpkts_wr(txq, txpkts);
2309 	}
2310 
2311 	/*
2312 	 * Check if we can start a new coalesced tx work request with m as
2313 	 * the first packet in it.
2314 	 */
2315 
2316 	KASSERT(txpkts->npkt == 0, ("%s: txpkts not empty", __func__));
2317 
2318 	flits = TXPKTS_WR_HDR + sgl->nflits;
2319 	can_coalesce = m->m_pkthdr.tso_segsz == 0 &&
2320 	    flits <= eq->avail * 8 && flits <= TX_WR_FLITS;
2321 
2322 	if (can_coalesce == 0)
2323 		return (EINVAL);
2324 
2325 	/*
2326 	 * Start a fresh coalesced tx WR with m as the first frame in it.
2327 	 */
2328 	txpkts->npkt = 1;
2329 	txpkts->nflits = flits;
2330 	txpkts->flitp = &eq->desc[eq->pidx].flit[2];
2331 	txpkts->plen = m->m_pkthdr.len;
2332 
2333 	txsd = &txq->sdesc[eq->pidx];
2334 	txsd->credits = 1;
2335 
2336 	return (0);
2337 }
2338 
2339 /*
2340  * Note that write_txpkts_wr can never run out of hardware descriptors (but
2341  * write_txpkt_wr can).  add_to_txpkts ensures that a frame is accepted for
2342  * coalescing only if sufficient hardware descriptors are available.
2343  */
2344 static void
2345 write_txpkts_wr(struct sge_txq *txq, struct txpkts *txpkts)
2346 {
2347 	struct sge_eq *eq = &txq->eq;
2348 	struct fw_eth_tx_pkts_wr *wr;
2349 	struct tx_sdesc *txsd;
2350 	uint32_t ctrl;
2351 	int ndesc;
2352 
2353 	TXQ_LOCK_ASSERT_OWNED(txq);
2354 
2355 	ndesc = howmany(txpkts->nflits, 8);
2356 
2357 	wr = (void *)&eq->desc[eq->pidx];
2358 	wr->op_immdlen = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR) |
2359 	    V_FW_WR_IMMDLEN(0)); /* immdlen does not matter in this WR */
2360 	ctrl = V_FW_WR_LEN16(howmany(txpkts->nflits, 2));
2361 	if (eq->avail == ndesc && !(eq->flags & EQ_CRFLUSHED)) {
2362 		ctrl |= F_FW_WR_EQUEQ | F_FW_WR_EQUIQ;
2363 		eq->flags |= EQ_CRFLUSHED;
2364 	}
2365 	wr->equiq_to_len16 = htobe32(ctrl);
2366 	wr->plen = htobe16(txpkts->plen);
2367 	wr->npkt = txpkts->npkt;
2368 	wr->r3 = wr->type = 0;
2369 
2370 	/* Everything else already written */
2371 
2372 	txsd = &txq->sdesc[eq->pidx];
2373 	txsd->desc_used = ndesc;
2374 
2375 	KASSERT(eq->avail >= ndesc, ("%s: out of descriptors", __func__));
2376 
2377 	eq->pending += ndesc;
2378 	eq->avail -= ndesc;
2379 	eq->pidx += ndesc;
2380 	if (eq->pidx >= eq->cap)
2381 		eq->pidx -= eq->cap;
2382 
2383 	txq->txpkts_pkts += txpkts->npkt;
2384 	txq->txpkts_wrs++;
2385 	txpkts->npkt = 0;	/* emptied */
2386 }
2387 
2388 static inline void
2389 write_ulp_cpl_sgl(struct port_info *pi, struct sge_txq *txq,
2390     struct txpkts *txpkts, struct mbuf *m, struct sgl *sgl)
2391 {
2392 	struct ulp_txpkt *ulpmc;
2393 	struct ulptx_idata *ulpsc;
2394 	struct cpl_tx_pkt_core *cpl;
2395 	struct sge_eq *eq = &txq->eq;
2396 	uintptr_t flitp, start, end;
2397 	uint64_t ctrl;
2398 	caddr_t dst;
2399 
2400 	KASSERT(txpkts->npkt > 0, ("%s: txpkts is empty", __func__));
2401 
2402 	start = (uintptr_t)eq->desc;
2403 	end = (uintptr_t)eq->spg;
2404 
2405 	/* Checksum offload */
2406 	ctrl = 0;
2407 	if (!(m->m_pkthdr.csum_flags & CSUM_IP))
2408 		ctrl |= F_TXPKT_IPCSUM_DIS;
2409 	if (!(m->m_pkthdr.csum_flags & (CSUM_TCP | CSUM_UDP)))
2410 		ctrl |= F_TXPKT_L4CSUM_DIS;
2411 	if (m->m_pkthdr.csum_flags & (CSUM_IP | CSUM_TCP | CSUM_UDP))
2412 		txq->txcsum++;	/* some hardware assistance provided */
2413 
2414 	/* VLAN tag insertion */
2415 	if (m->m_flags & M_VLANTAG) {
2416 		ctrl |= F_TXPKT_VLAN_VLD | V_TXPKT_VLAN(m->m_pkthdr.ether_vtag);
2417 		txq->vlan_insertion++;
2418 	}
2419 
2420 	/*
2421 	 * The previous packet's SGL must have ended at a 16 byte boundary (this
2422 	 * is required by the firmware/hardware).  It follows that flitp cannot
2423 	 * wrap around between the ULPTX master command and ULPTX subcommand (8
2424 	 * bytes each), and that it can not wrap around in the middle of the
2425 	 * cpl_tx_pkt_core either.
2426 	 */
2427 	flitp = (uintptr_t)txpkts->flitp;
2428 	KASSERT((flitp & 0xf) == 0,
2429 	    ("%s: last SGL did not end at 16 byte boundary: %p",
2430 	    __func__, txpkts->flitp));
2431 
2432 	/* ULP master command */
2433 	ulpmc = (void *)flitp;
2434 	ulpmc->cmd_dest = htonl(V_ULPTX_CMD(ULP_TX_PKT) | V_ULP_TXPKT_DEST(0) |
2435 	    V_ULP_TXPKT_FID(eq->iqid));
2436 	ulpmc->len = htonl(howmany(sizeof(*ulpmc) + sizeof(*ulpsc) +
2437 	    sizeof(*cpl) + 8 * sgl->nflits, 16));
2438 
2439 	/* ULP subcommand */
2440 	ulpsc = (void *)(ulpmc + 1);
2441 	ulpsc->cmd_more = htobe32(V_ULPTX_CMD((u32)ULP_TX_SC_IMM) |
2442 	    F_ULP_TX_SC_MORE);
2443 	ulpsc->len = htobe32(sizeof(struct cpl_tx_pkt_core));
2444 
2445 	flitp += sizeof(*ulpmc) + sizeof(*ulpsc);
2446 	if (flitp == end)
2447 		flitp = start;
2448 
2449 	/* CPL_TX_PKT */
2450 	cpl = (void *)flitp;
2451 	cpl->ctrl0 = htobe32(V_TXPKT_OPCODE(CPL_TX_PKT) |
2452 	    V_TXPKT_INTF(pi->tx_chan) | V_TXPKT_PF(pi->adapter->pf));
2453 	cpl->pack = 0;
2454 	cpl->len = htobe16(m->m_pkthdr.len);
2455 	cpl->ctrl1 = htobe64(ctrl);
2456 
2457 	flitp += sizeof(*cpl);
2458 	if (flitp == end)
2459 		flitp = start;
2460 
2461 	/* SGL for this frame */
2462 	dst = (caddr_t)flitp;
2463 	txpkts->nflits += write_sgl_to_txd(eq, sgl, &dst);
2464 	txpkts->flitp = (void *)dst;
2465 
2466 	KASSERT(((uintptr_t)dst & 0xf) == 0,
2467 	    ("%s: SGL ends at %p (not a 16 byte boundary)", __func__, dst));
2468 }
2469 
2470 /*
2471  * If the SGL ends on an address that is not 16 byte aligned, this function will
2472  * add a 0 filled flit at the end.  It returns 1 in that case.
2473  */
2474 static int
2475 write_sgl_to_txd(struct sge_eq *eq, struct sgl *sgl, caddr_t *to)
2476 {
2477 	__be64 *flitp, *end;
2478 	struct ulptx_sgl *usgl;
2479 	bus_dma_segment_t *seg;
2480 	int i, padded;
2481 
2482 	KASSERT(sgl->nsegs > 0 && sgl->nflits > 0,
2483 	    ("%s: bad SGL - nsegs=%d, nflits=%d",
2484 	    __func__, sgl->nsegs, sgl->nflits));
2485 
2486 	KASSERT(((uintptr_t)(*to) & 0xf) == 0,
2487 	    ("%s: SGL must start at a 16 byte boundary: %p", __func__, *to));
2488 
2489 	flitp = (__be64 *)(*to);
2490 	end = flitp + sgl->nflits;
2491 	seg = &sgl->seg[0];
2492 	usgl = (void *)flitp;
2493 
2494 	/*
2495 	 * We start at a 16 byte boundary somewhere inside the tx descriptor
2496 	 * ring, so we're at least 16 bytes away from the status page.  There is
2497 	 * no chance of a wrap around in the middle of usgl (which is 16 bytes).
2498 	 */
2499 
2500 	usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
2501 	    V_ULPTX_NSGE(sgl->nsegs));
2502 	usgl->len0 = htobe32(seg->ds_len);
2503 	usgl->addr0 = htobe64(seg->ds_addr);
2504 	seg++;
2505 
2506 	if ((uintptr_t)end <= (uintptr_t)eq->spg) {
2507 
2508 		/* Won't wrap around at all */
2509 
2510 		for (i = 0; i < sgl->nsegs - 1; i++, seg++) {
2511 			usgl->sge[i / 2].len[i & 1] = htobe32(seg->ds_len);
2512 			usgl->sge[i / 2].addr[i & 1] = htobe64(seg->ds_addr);
2513 		}
2514 		if (i & 1)
2515 			usgl->sge[i / 2].len[1] = htobe32(0);
2516 	} else {
2517 
2518 		/* Will wrap somewhere in the rest of the SGL */
2519 
2520 		/* 2 flits already written, write the rest flit by flit */
2521 		flitp = (void *)(usgl + 1);
2522 		for (i = 0; i < sgl->nflits - 2; i++) {
2523 			if ((uintptr_t)flitp == (uintptr_t)eq->spg)
2524 				flitp = (void *)eq->desc;
2525 			*flitp++ = get_flit(seg, sgl->nsegs - 1, i);
2526 		}
2527 		end = flitp;
2528 	}
2529 
2530 	if ((uintptr_t)end & 0xf) {
2531 		*(uint64_t *)end = 0;
2532 		end++;
2533 		padded = 1;
2534 	} else
2535 		padded = 0;
2536 
2537 	if ((uintptr_t)end == (uintptr_t)eq->spg)
2538 		*to = (void *)eq->desc;
2539 	else
2540 		*to = (void *)end;
2541 
2542 	return (padded);
2543 }
2544 
2545 static inline void
2546 copy_to_txd(struct sge_eq *eq, caddr_t from, caddr_t *to, int len)
2547 {
2548 	if ((uintptr_t)(*to) + len <= (uintptr_t)eq->spg) {
2549 		bcopy(from, *to, len);
2550 		(*to) += len;
2551 	} else {
2552 		int portion = (uintptr_t)eq->spg - (uintptr_t)(*to);
2553 
2554 		bcopy(from, *to, portion);
2555 		from += portion;
2556 		portion = len - portion;	/* remaining */
2557 		bcopy(from, (void *)eq->desc, portion);
2558 		(*to) = (caddr_t)eq->desc + portion;
2559 	}
2560 }
2561 
2562 static inline void
2563 ring_eq_db(struct adapter *sc, struct sge_eq *eq)
2564 {
2565 	wmb();
2566 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
2567 	    V_QID(eq->cntxt_id) | V_PIDX(eq->pending));
2568 	eq->pending = 0;
2569 }
2570 
2571 static inline int
2572 reclaimable(struct sge_eq *eq)
2573 {
2574 	unsigned int cidx;
2575 
2576 	cidx = eq->spg->cidx;	/* stable snapshot */
2577 	cidx = be16_to_cpu(cidx);
2578 
2579 	if (cidx >= eq->cidx)
2580 		return (cidx - eq->cidx);
2581 	else
2582 		return (cidx + eq->cap - eq->cidx);
2583 }
2584 
2585 /*
2586  * There are "can_reclaim" tx descriptors ready to be reclaimed.  Reclaim as
2587  * many as possible but stop when there are around "n" mbufs to free.
2588  *
2589  * The actual number reclaimed is provided as the return value.
2590  */
2591 static int
2592 reclaim_tx_descs(struct sge_txq *txq, int can_reclaim, int n)
2593 {
2594 	struct tx_sdesc *txsd;
2595 	struct tx_map *txm;
2596 	unsigned int reclaimed, maps;
2597 	struct sge_eq *eq = &txq->eq;
2598 
2599 	EQ_LOCK_ASSERT_OWNED(eq);
2600 
2601 	if (can_reclaim == 0)
2602 		can_reclaim = reclaimable(eq);
2603 
2604 	maps = reclaimed = 0;
2605 	while (can_reclaim && maps < n) {
2606 		int ndesc;
2607 
2608 		txsd = &txq->sdesc[eq->cidx];
2609 		ndesc = txsd->desc_used;
2610 
2611 		/* Firmware doesn't return "partial" credits. */
2612 		KASSERT(can_reclaim >= ndesc,
2613 		    ("%s: unexpected number of credits: %d, %d",
2614 		    __func__, can_reclaim, ndesc));
2615 
2616 		maps += txsd->credits;
2617 
2618 		reclaimed += ndesc;
2619 		can_reclaim -= ndesc;
2620 
2621 		eq->cidx += ndesc;
2622 		if (__predict_false(eq->cidx >= eq->cap))
2623 			eq->cidx -= eq->cap;
2624 	}
2625 
2626 	txm = &txq->maps[txq->map_cidx];
2627 	if (maps)
2628 		prefetch(txm->m);
2629 
2630 	eq->avail += reclaimed;
2631 	KASSERT(eq->avail < eq->cap,	/* avail tops out at (cap - 1) */
2632 	    ("%s: too many descriptors available", __func__));
2633 
2634 	txq->map_avail += maps;
2635 	KASSERT(txq->map_avail <= txq->map_total,
2636 	    ("%s: too many maps available", __func__));
2637 
2638 	while (maps--) {
2639 		struct tx_map *next;
2640 
2641 		next = txm + 1;
2642 		if (__predict_false(txq->map_cidx + 1 == txq->map_total))
2643 			next = txq->maps;
2644 		prefetch(next->m);
2645 
2646 		bus_dmamap_unload(txq->tx_tag, txm->map);
2647 		m_freem(txm->m);
2648 		txm->m = NULL;
2649 
2650 		txm = next;
2651 		if (__predict_false(++txq->map_cidx == txq->map_total))
2652 			txq->map_cidx = 0;
2653 	}
2654 
2655 	return (reclaimed);
2656 }
2657 
2658 static void
2659 write_eqflush_wr(struct sge_eq *eq)
2660 {
2661 	struct fw_eq_flush_wr *wr;
2662 
2663 	EQ_LOCK_ASSERT_OWNED(eq);
2664 	KASSERT(eq->avail > 0, ("%s: no descriptors left.", __func__));
2665 
2666 	wr = (void *)&eq->desc[eq->pidx];
2667 	bzero(wr, sizeof(*wr));
2668 	wr->opcode = FW_EQ_FLUSH_WR;
2669 	wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(sizeof(*wr) / 16) |
2670 	    F_FW_WR_EQUEQ | F_FW_WR_EQUIQ);
2671 
2672 	eq->flags |= EQ_CRFLUSHED;
2673 	eq->pending++;
2674 	eq->avail--;
2675 	if (++eq->pidx == eq->cap)
2676 		eq->pidx = 0;
2677 }
2678 
2679 static __be64
2680 get_flit(bus_dma_segment_t *sgl, int nsegs, int idx)
2681 {
2682 	int i = (idx / 3) * 2;
2683 
2684 	switch (idx % 3) {
2685 	case 0: {
2686 		__be64 rc;
2687 
2688 		rc = htobe32(sgl[i].ds_len);
2689 		if (i + 1 < nsegs)
2690 			rc |= (uint64_t)htobe32(sgl[i + 1].ds_len) << 32;
2691 
2692 		return (rc);
2693 	}
2694 	case 1:
2695 		return htobe64(sgl[i].ds_addr);
2696 	case 2:
2697 		return htobe64(sgl[i + 1].ds_addr);
2698 	}
2699 
2700 	return (0);
2701 }
2702 
2703 static void
2704 set_fl_tag_idx(struct sge_fl *fl, int mtu)
2705 {
2706 	int i;
2707 
2708 	FL_LOCK_ASSERT_OWNED(fl);
2709 
2710 	for (i = 0; i < FL_BUF_SIZES - 1; i++) {
2711 		if (FL_BUF_SIZE(i) >= (mtu + FL_PKTSHIFT))
2712 			break;
2713 	}
2714 
2715 	fl->tag_idx = i;
2716 }
2717 
2718 static int
2719 handle_sge_egr_update(struct adapter *sc, const struct cpl_sge_egr_update *cpl)
2720 {
2721 	unsigned int qid = G_EGR_QID(ntohl(cpl->opcode_qid));
2722 	struct sge *s = &sc->sge;
2723 	struct sge_txq *txq;
2724 	struct port_info *pi;
2725 
2726 	txq = (void *)s->eqmap[qid - s->eq_start];
2727 	TXQ_LOCK(txq);
2728 	if (txq->eq.flags & EQ_CRFLUSHED) {
2729 		pi = txq->ifp->if_softc;
2730 		taskqueue_enqueue(pi->tq, &txq->resume_tx);
2731 		txq->egr_update++;
2732 	} else
2733 		wakeup_one(txq);	/* txq is going away, wakeup free_txq */
2734 	TXQ_UNLOCK(txq);
2735 
2736 	return (0);
2737 }
2738 
2739 static void
2740 handle_cpl(struct adapter *sc, struct sge_iq *iq)
2741 {
2742 	const struct rss_header *rss = (const void *)iq->cdesc;
2743 	const struct cpl_fw6_msg *cpl = (const void *)(rss + 1);
2744 
2745 	switch (rss->opcode) {
2746 	case CPL_FW4_MSG:
2747 	case CPL_FW6_MSG:
2748 		if (cpl->type == FW6_TYPE_CMD_RPL)
2749 			t4_handle_fw_rpl(sc, cpl->data);
2750 		break;
2751 
2752 	case CPL_SGE_EGR_UPDATE:
2753 		handle_sge_egr_update(sc, (const void *)cpl);
2754 		break;
2755 
2756 	case CPL_SET_TCB_RPL:
2757 		filter_rpl(sc, (const void *)cpl);
2758 		break;
2759 
2760 	default:
2761 		panic("%s: unexpected CPL opcode 0x%x", __func__, rss->opcode);
2762 	}
2763 }
2764 
2765 /*
2766  * m0 is freed on successful transmission.
2767  */
2768 static int
2769 ctrl_tx(struct adapter *sc, struct sge_ctrlq *ctrlq, struct mbuf *m0)
2770 {
2771 	struct sge_eq *eq = &ctrlq->eq;
2772 	int rc = 0, ndesc;
2773 	int can_reclaim;
2774 	caddr_t dst;
2775 	struct mbuf *m;
2776 
2777 	M_ASSERTPKTHDR(m0);
2778 
2779 	if (m0->m_pkthdr.len > SGE_MAX_WR_LEN) {
2780 		log(LOG_ERR, "%s: %s work request too long (%d)",
2781 		    device_get_nameunit(sc->dev), __func__, m0->m_pkthdr.len);
2782 		return (EMSGSIZE);
2783 	}
2784 	ndesc = howmany(m0->m_pkthdr.len, CTRL_EQ_ESIZE);
2785 
2786 	EQ_LOCK(eq);
2787 
2788 	can_reclaim = reclaimable(eq);
2789 	eq->cidx += can_reclaim;
2790 	eq->avail += can_reclaim;
2791 	if (__predict_false(eq->cidx >= eq->cap))
2792 		eq->cidx -= eq->cap;
2793 
2794 	if (eq->avail < ndesc) {
2795 		rc = EAGAIN;
2796 		ctrlq->no_desc++;
2797 		goto failed;
2798 	}
2799 
2800 	dst = (void *)&eq->desc[eq->pidx];
2801 	for (m = m0; m; m = m->m_next)
2802 		copy_to_txd(eq, mtod(m, caddr_t), &dst, m->m_len);
2803 
2804 	eq->pidx += ndesc;
2805 	if (__predict_false(eq->pidx >= eq->cap))
2806 		eq->pidx -= eq->cap;
2807 
2808 	eq->pending += ndesc;
2809 	ring_eq_db(sc, eq);
2810 failed:
2811 	EQ_UNLOCK(eq);
2812 	if (rc == 0)
2813 		m_freem(m0);
2814 
2815 	return (rc);
2816 }
2817 
2818 static int
2819 sysctl_uint16(SYSCTL_HANDLER_ARGS)
2820 {
2821 	uint16_t *id = arg1;
2822 	int i = *id;
2823 
2824 	return sysctl_handle_int(oidp, &i, 0, req);
2825 }
2826