xref: /freebsd/sys/dev/cxgbe/t4_netmap.c (revision 8ef24a0d4b28fe230e20637f56869cc4148cd2ca)
1 /*-
2  * Copyright (c) 2014 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: Navdeep Parhar <np@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #ifdef DEV_NETMAP
35 #include <sys/param.h>
36 #include <sys/bus.h>
37 #include <sys/eventhandler.h>
38 #include <sys/lock.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/selinfo.h>
42 #include <sys/socket.h>
43 #include <sys/sockio.h>
44 #include <machine/bus.h>
45 #include <net/ethernet.h>
46 #include <net/if.h>
47 #include <net/if_media.h>
48 #include <net/if_var.h>
49 #include <net/if_clone.h>
50 #include <net/if_types.h>
51 #include <net/netmap.h>
52 #include <dev/netmap/netmap_kern.h>
53 
54 #include "common/common.h"
55 #include "common/t4_regs.h"
56 #include "common/t4_regs_values.h"
57 
58 extern int fl_pad;	/* XXXNM */
59 
60 SYSCTL_NODE(_hw, OID_AUTO, cxgbe, CTLFLAG_RD, 0, "cxgbe netmap parameters");
61 
62 /*
63  * 0 = normal netmap rx
64  * 1 = black hole
65  * 2 = supermassive black hole (buffer packing enabled)
66  */
67 int black_hole = 0;
68 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_black_hole, CTLFLAG_RDTUN, &black_hole, 0,
69     "Sink incoming packets.");
70 
71 int rx_ndesc = 256;
72 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_rx_ndesc, CTLFLAG_RWTUN,
73     &rx_ndesc, 0, "# of rx descriptors after which the hw cidx is updated.");
74 
75 int holdoff_tmr_idx = 2;
76 SYSCTL_INT(_hw_cxgbe, OID_AUTO, nm_holdoff_tmr_idx, CTLFLAG_RWTUN,
77     &holdoff_tmr_idx, 0, "Holdoff timer index for netmap rx queues.");
78 
79 /*
80  * Congestion drops.
81  * -1: no congestion feedback (not recommended).
82  *  0: backpressure the channel instead of dropping packets right away.
83  *  1: no backpressure, drop packets for the congested queue immediately.
84  */
85 static int nm_cong_drop = 1;
86 TUNABLE_INT("hw.cxgbe.nm_cong_drop", &nm_cong_drop);
87 
88 /* netmap ifnet routines */
89 static void cxgbe_nm_init(void *);
90 static int cxgbe_nm_ioctl(struct ifnet *, unsigned long, caddr_t);
91 static int cxgbe_nm_transmit(struct ifnet *, struct mbuf *);
92 static void cxgbe_nm_qflush(struct ifnet *);
93 
94 static int cxgbe_nm_init_synchronized(struct vi_info *);
95 static int cxgbe_nm_uninit_synchronized(struct vi_info *);
96 
97 /* T4 netmap VI (ncxgbe) interface */
98 static int ncxgbe_probe(device_t);
99 static int ncxgbe_attach(device_t);
100 static int ncxgbe_detach(device_t);
101 static device_method_t ncxgbe_methods[] = {
102 	DEVMETHOD(device_probe,		ncxgbe_probe),
103 	DEVMETHOD(device_attach,	ncxgbe_attach),
104 	DEVMETHOD(device_detach,	ncxgbe_detach),
105 	{ 0, 0 }
106 };
107 static driver_t ncxgbe_driver = {
108 	"ncxgbe",
109 	ncxgbe_methods,
110 	sizeof(struct vi_info)
111 };
112 
113 /* T5 netmap VI (ncxl) interface */
114 static driver_t ncxl_driver = {
115 	"ncxl",
116 	ncxgbe_methods,
117 	sizeof(struct vi_info)
118 };
119 
120 static void
121 cxgbe_nm_init(void *arg)
122 {
123 	struct vi_info *vi = arg;
124 	struct adapter *sc = vi->pi->adapter;
125 
126 	if (begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nminit") != 0)
127 		return;
128 	cxgbe_nm_init_synchronized(vi);
129 	end_synchronized_op(sc, 0);
130 
131 	return;
132 }
133 
134 static int
135 cxgbe_nm_init_synchronized(struct vi_info *vi)
136 {
137 	struct adapter *sc = vi->pi->adapter;
138 	struct ifnet *ifp = vi->ifp;
139 	int rc = 0;
140 
141 	ASSERT_SYNCHRONIZED_OP(sc);
142 
143 	if (ifp->if_drv_flags & IFF_DRV_RUNNING)
144 		return (0);	/* already running */
145 
146 	if (!(sc->flags & FULL_INIT_DONE) &&
147 	    ((rc = adapter_full_init(sc)) != 0))
148 		return (rc);	/* error message displayed already */
149 
150 	if (!(vi->flags & VI_INIT_DONE) &&
151 	    ((rc = vi_full_init(vi)) != 0))
152 		return (rc);	/* error message displayed already */
153 
154 	rc = update_mac_settings(ifp, XGMAC_ALL);
155 	if (rc)
156 		return (rc);	/* error message displayed already */
157 
158 	ifp->if_drv_flags |= IFF_DRV_RUNNING;
159 	callout_reset(&vi->tick, hz, vi_tick, vi);
160 
161 	return (rc);
162 }
163 
164 static int
165 cxgbe_nm_uninit_synchronized(struct vi_info *vi)
166 {
167 #ifdef INVARIANTS
168 	struct adapter *sc = vi->pi->adapter;
169 #endif
170 	struct ifnet *ifp = vi->ifp;
171 
172 	ASSERT_SYNCHRONIZED_OP(sc);
173 
174 	callout_stop(&vi->tick);
175 	ifp->if_drv_flags &= ~IFF_DRV_RUNNING;
176 
177 	return (0);
178 }
179 
180 static int
181 cxgbe_nm_ioctl(struct ifnet *ifp, unsigned long cmd, caddr_t data)
182 {
183 	int rc = 0, mtu, flags;
184 	struct vi_info *vi = ifp->if_softc;
185 	struct adapter *sc = vi->pi->adapter;
186 	struct ifreq *ifr = (struct ifreq *)data;
187 	uint32_t mask;
188 
189 	MPASS(vi->ifp == ifp);
190 
191 	switch (cmd) {
192 	case SIOCSIFMTU:
193 		mtu = ifr->ifr_mtu;
194 		if ((mtu < ETHERMIN) || (mtu > ETHERMTU_JUMBO))
195 			return (EINVAL);
196 
197 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmtu");
198 		if (rc)
199 			return (rc);
200 		ifp->if_mtu = mtu;
201 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
202 			rc = update_mac_settings(ifp, XGMAC_MTU);
203 		end_synchronized_op(sc, 0);
204 		break;
205 
206 	case SIOCSIFFLAGS:
207 		rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nflg");
208 		if (rc)
209 			return (rc);
210 
211 		if (ifp->if_flags & IFF_UP) {
212 			if (ifp->if_drv_flags & IFF_DRV_RUNNING) {
213 				flags = vi->if_flags;
214 				if ((ifp->if_flags ^ flags) &
215 				    (IFF_PROMISC | IFF_ALLMULTI)) {
216 					rc = update_mac_settings(ifp,
217 					    XGMAC_PROMISC | XGMAC_ALLMULTI);
218 				}
219 			} else
220 				rc = cxgbe_nm_init_synchronized(vi);
221 			vi->if_flags = ifp->if_flags;
222 		} else if (ifp->if_drv_flags & IFF_DRV_RUNNING)
223 			rc = cxgbe_nm_uninit_synchronized(vi);
224 		end_synchronized_op(sc, 0);
225 		break;
226 
227 	case SIOCADDMULTI:
228 	case SIOCDELMULTI: /* these two are called with a mutex held :-( */
229 		rc = begin_synchronized_op(sc, vi, HOLD_LOCK, "t4nmulti");
230 		if (rc)
231 			return (rc);
232 		if (ifp->if_drv_flags & IFF_DRV_RUNNING)
233 			rc = update_mac_settings(ifp, XGMAC_MCADDRS);
234 		end_synchronized_op(sc, LOCK_HELD);
235 		break;
236 
237 	case SIOCSIFCAP:
238 		mask = ifr->ifr_reqcap ^ ifp->if_capenable;
239 		if (mask & IFCAP_TXCSUM) {
240 			ifp->if_capenable ^= IFCAP_TXCSUM;
241 			ifp->if_hwassist ^= (CSUM_TCP | CSUM_UDP | CSUM_IP);
242 		}
243 		if (mask & IFCAP_TXCSUM_IPV6) {
244 			ifp->if_capenable ^= IFCAP_TXCSUM_IPV6;
245 			ifp->if_hwassist ^= (CSUM_UDP_IPV6 | CSUM_TCP_IPV6);
246 		}
247 		if (mask & IFCAP_RXCSUM)
248 			ifp->if_capenable ^= IFCAP_RXCSUM;
249 		if (mask & IFCAP_RXCSUM_IPV6)
250 			ifp->if_capenable ^= IFCAP_RXCSUM_IPV6;
251 		break;
252 
253 	case SIOCSIFMEDIA:
254 	case SIOCGIFMEDIA:
255 		ifmedia_ioctl(ifp, ifr, &vi->media, cmd);
256 		break;
257 
258 	default:
259 		rc = ether_ioctl(ifp, cmd, data);
260 	}
261 
262 	return (rc);
263 }
264 
265 static int
266 cxgbe_nm_transmit(struct ifnet *ifp, struct mbuf *m)
267 {
268 
269 	m_freem(m);
270 	return (0);
271 }
272 
273 static void
274 cxgbe_nm_qflush(struct ifnet *ifp)
275 {
276 
277 	return;
278 }
279 
280 static int
281 alloc_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq, int cong)
282 {
283 	int rc, cntxt_id, i;
284 	__be32 v;
285 	struct adapter *sc = vi->pi->adapter;
286 	struct sge_params *sp = &sc->params.sge;
287 	struct netmap_adapter *na = NA(vi->ifp);
288 	struct fw_iq_cmd c;
289 
290 	MPASS(na != NULL);
291 	MPASS(nm_rxq->iq_desc != NULL);
292 	MPASS(nm_rxq->fl_desc != NULL);
293 
294 	bzero(nm_rxq->iq_desc, vi->qsize_rxq * IQ_ESIZE);
295 	bzero(nm_rxq->fl_desc, na->num_rx_desc * EQ_ESIZE + sp->spg_len);
296 
297 	bzero(&c, sizeof(c));
298 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_IQ_CMD) | F_FW_CMD_REQUEST |
299 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_IQ_CMD_PFN(sc->pf) |
300 	    V_FW_IQ_CMD_VFN(0));
301 	c.alloc_to_len16 = htobe32(F_FW_IQ_CMD_ALLOC | F_FW_IQ_CMD_IQSTART |
302 	    FW_LEN16(c));
303 	if (vi->flags & INTR_RXQ) {
304 		KASSERT(nm_rxq->intr_idx < sc->intr_count,
305 		    ("%s: invalid direct intr_idx %d", __func__,
306 		    nm_rxq->intr_idx));
307 		v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx);
308 	} else {
309 		CXGBE_UNIMPLEMENTED(__func__);	/* XXXNM: needs review */
310 		v = V_FW_IQ_CMD_IQANDSTINDEX(nm_rxq->intr_idx) |
311 		    F_FW_IQ_CMD_IQANDST;
312 	}
313 	c.type_to_iqandstindex = htobe32(v |
314 	    V_FW_IQ_CMD_TYPE(FW_IQ_TYPE_FL_INT_CAP) |
315 	    V_FW_IQ_CMD_VIID(vi->viid) |
316 	    V_FW_IQ_CMD_IQANUD(X_UPDATEDELIVERY_INTERRUPT));
317 	c.iqdroprss_to_iqesize = htobe16(V_FW_IQ_CMD_IQPCIECH(vi->pi->tx_chan) |
318 	    F_FW_IQ_CMD_IQGTSMODE |
319 	    V_FW_IQ_CMD_IQINTCNTTHRESH(0) |
320 	    V_FW_IQ_CMD_IQESIZE(ilog2(IQ_ESIZE) - 4));
321 	c.iqsize = htobe16(vi->qsize_rxq);
322 	c.iqaddr = htobe64(nm_rxq->iq_ba);
323 	if (cong >= 0) {
324 		c.iqns_to_fl0congen = htobe32(F_FW_IQ_CMD_IQFLINTCONGEN |
325 		    V_FW_IQ_CMD_FL0CNGCHMAP(cong) | F_FW_IQ_CMD_FL0CONGCIF |
326 		    F_FW_IQ_CMD_FL0CONGEN);
327 	}
328 	c.iqns_to_fl0congen |=
329 	    htobe32(V_FW_IQ_CMD_FL0HOSTFCMODE(X_HOSTFCMODE_NONE) |
330 		F_FW_IQ_CMD_FL0FETCHRO | F_FW_IQ_CMD_FL0DATARO |
331 		(fl_pad ? F_FW_IQ_CMD_FL0PADEN : 0) |
332 		(black_hole == 2 ? F_FW_IQ_CMD_FL0PACKEN : 0));
333 	c.fl0dcaen_to_fl0cidxfthresh =
334 	    htobe16(V_FW_IQ_CMD_FL0FBMIN(X_FETCHBURSTMIN_128B) |
335 		V_FW_IQ_CMD_FL0FBMAX(X_FETCHBURSTMAX_512B));
336 	c.fl0size = htobe16(na->num_rx_desc / 8 + sp->spg_len / EQ_ESIZE);
337 	c.fl0addr = htobe64(nm_rxq->fl_ba);
338 
339 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
340 	if (rc != 0) {
341 		device_printf(sc->dev,
342 		    "failed to create netmap ingress queue: %d\n", rc);
343 		return (rc);
344 	}
345 
346 	nm_rxq->iq_cidx = 0;
347 	MPASS(nm_rxq->iq_sidx == vi->qsize_rxq - sp->spg_len / IQ_ESIZE);
348 	nm_rxq->iq_gen = F_RSPD_GEN;
349 	nm_rxq->iq_cntxt_id = be16toh(c.iqid);
350 	nm_rxq->iq_abs_id = be16toh(c.physiqid);
351 	cntxt_id = nm_rxq->iq_cntxt_id - sc->sge.iq_start;
352 	if (cntxt_id >= sc->sge.niq) {
353 		panic ("%s: nm_rxq->iq_cntxt_id (%d) more than the max (%d)",
354 		    __func__, cntxt_id, sc->sge.niq - 1);
355 	}
356 	sc->sge.iqmap[cntxt_id] = (void *)nm_rxq;
357 
358 	nm_rxq->fl_cntxt_id = be16toh(c.fl0id);
359 	nm_rxq->fl_pidx = nm_rxq->fl_cidx = 0;
360 	MPASS(nm_rxq->fl_sidx == na->num_rx_desc);
361 	cntxt_id = nm_rxq->fl_cntxt_id - sc->sge.eq_start;
362 	if (cntxt_id >= sc->sge.neq) {
363 		panic("%s: nm_rxq->fl_cntxt_id (%d) more than the max (%d)",
364 		    __func__, cntxt_id, sc->sge.neq - 1);
365 	}
366 	sc->sge.eqmap[cntxt_id] = (void *)nm_rxq;
367 
368 	nm_rxq->fl_db_val = V_QID(nm_rxq->fl_cntxt_id) |
369 	    sc->chip_params->sge_fl_db;
370 
371 	if (is_t5(sc) && cong >= 0) {
372 		uint32_t param, val;
373 
374 		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
375 		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
376 		    V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id);
377 		param = V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DMAQ) |
378 		    V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DMAQ_CONM_CTXT) |
379 		    V_FW_PARAMS_PARAM_YZ(nm_rxq->iq_cntxt_id);
380 		if (cong == 0)
381 			val = 1 << 19;
382 		else {
383 			val = 2 << 19;
384 			for (i = 0; i < 4; i++) {
385 				if (cong & (1 << i))
386 					val |= 1 << (i << 2);
387 			}
388 		}
389 
390 		rc = -t4_set_params(sc, sc->mbox, sc->pf, 0, 1, &param, &val);
391 		if (rc != 0) {
392 			/* report error but carry on */
393 			device_printf(sc->dev,
394 			    "failed to set congestion manager context for "
395 			    "ingress queue %d: %d\n", nm_rxq->iq_cntxt_id, rc);
396 		}
397 	}
398 
399 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
400 	    V_INGRESSQID(nm_rxq->iq_cntxt_id) |
401 	    V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx)));
402 
403 	return (rc);
404 }
405 
406 static int
407 free_nm_rxq_hwq(struct vi_info *vi, struct sge_nm_rxq *nm_rxq)
408 {
409 	struct adapter *sc = vi->pi->adapter;
410 	int rc;
411 
412 	rc = -t4_iq_free(sc, sc->mbox, sc->pf, 0, FW_IQ_TYPE_FL_INT_CAP,
413 	    nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, 0xffff);
414 	if (rc != 0)
415 		device_printf(sc->dev, "%s: failed for iq %d, fl %d: %d\n",
416 		    __func__, nm_rxq->iq_cntxt_id, nm_rxq->fl_cntxt_id, rc);
417 	return (rc);
418 }
419 
420 static int
421 alloc_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
422 {
423 	int rc, cntxt_id;
424 	size_t len;
425 	struct adapter *sc = vi->pi->adapter;
426 	struct netmap_adapter *na = NA(vi->ifp);
427 	struct fw_eq_eth_cmd c;
428 
429 	MPASS(na != NULL);
430 	MPASS(nm_txq->desc != NULL);
431 
432 	len = na->num_tx_desc * EQ_ESIZE + sc->params.sge.spg_len;
433 	bzero(nm_txq->desc, len);
434 
435 	bzero(&c, sizeof(c));
436 	c.op_to_vfn = htobe32(V_FW_CMD_OP(FW_EQ_ETH_CMD) | F_FW_CMD_REQUEST |
437 	    F_FW_CMD_WRITE | F_FW_CMD_EXEC | V_FW_EQ_ETH_CMD_PFN(sc->pf) |
438 	    V_FW_EQ_ETH_CMD_VFN(0));
439 	c.alloc_to_len16 = htobe32(F_FW_EQ_ETH_CMD_ALLOC |
440 	    F_FW_EQ_ETH_CMD_EQSTART | FW_LEN16(c));
441 	c.autoequiqe_to_viid = htobe32(F_FW_EQ_ETH_CMD_AUTOEQUIQE |
442 	    F_FW_EQ_ETH_CMD_AUTOEQUEQE | V_FW_EQ_ETH_CMD_VIID(vi->viid));
443 	c.fetchszm_to_iqid =
444 	    htobe32(V_FW_EQ_ETH_CMD_HOSTFCMODE(X_HOSTFCMODE_NONE) |
445 		V_FW_EQ_ETH_CMD_PCIECHN(vi->pi->tx_chan) | F_FW_EQ_ETH_CMD_FETCHRO |
446 		V_FW_EQ_ETH_CMD_IQID(sc->sge.nm_rxq[nm_txq->iqidx].iq_cntxt_id));
447 	c.dcaen_to_eqsize = htobe32(V_FW_EQ_ETH_CMD_FBMIN(X_FETCHBURSTMIN_64B) |
448 		      V_FW_EQ_ETH_CMD_FBMAX(X_FETCHBURSTMAX_512B) |
449 		      V_FW_EQ_ETH_CMD_EQSIZE(len / EQ_ESIZE));
450 	c.eqaddr = htobe64(nm_txq->ba);
451 
452 	rc = -t4_wr_mbox(sc, sc->mbox, &c, sizeof(c), &c);
453 	if (rc != 0) {
454 		device_printf(vi->dev,
455 		    "failed to create netmap egress queue: %d\n", rc);
456 		return (rc);
457 	}
458 
459 	nm_txq->cntxt_id = G_FW_EQ_ETH_CMD_EQID(be32toh(c.eqid_pkd));
460 	cntxt_id = nm_txq->cntxt_id - sc->sge.eq_start;
461 	if (cntxt_id >= sc->sge.neq)
462 	    panic("%s: nm_txq->cntxt_id (%d) more than the max (%d)", __func__,
463 		cntxt_id, sc->sge.neq - 1);
464 	sc->sge.eqmap[cntxt_id] = (void *)nm_txq;
465 
466 	nm_txq->pidx = nm_txq->cidx = 0;
467 	MPASS(nm_txq->sidx == na->num_tx_desc);
468 	nm_txq->equiqidx = nm_txq->equeqidx = nm_txq->dbidx = 0;
469 
470 	nm_txq->doorbells = sc->doorbells;
471 	if (isset(&nm_txq->doorbells, DOORBELL_UDB) ||
472 	    isset(&nm_txq->doorbells, DOORBELL_UDBWC) ||
473 	    isset(&nm_txq->doorbells, DOORBELL_WCWR)) {
474 		uint32_t s_qpp = sc->params.sge.eq_s_qpp;
475 		uint32_t mask = (1 << s_qpp) - 1;
476 		volatile uint8_t *udb;
477 
478 		udb = sc->udbs_base + UDBS_DB_OFFSET;
479 		udb += (nm_txq->cntxt_id >> s_qpp) << PAGE_SHIFT;
480 		nm_txq->udb_qid = nm_txq->cntxt_id & mask;
481 		if (nm_txq->udb_qid >= PAGE_SIZE / UDBS_SEG_SIZE)
482 	    		clrbit(&nm_txq->doorbells, DOORBELL_WCWR);
483 		else {
484 			udb += nm_txq->udb_qid << UDBS_SEG_SHIFT;
485 			nm_txq->udb_qid = 0;
486 		}
487 		nm_txq->udb = (volatile void *)udb;
488 	}
489 
490 	return (rc);
491 }
492 
493 static int
494 free_nm_txq_hwq(struct vi_info *vi, struct sge_nm_txq *nm_txq)
495 {
496 	struct adapter *sc = vi->pi->adapter;
497 	int rc;
498 
499 	rc = -t4_eth_eq_free(sc, sc->mbox, sc->pf, 0, nm_txq->cntxt_id);
500 	if (rc != 0)
501 		device_printf(sc->dev, "%s: failed for eq %d: %d\n", __func__,
502 		    nm_txq->cntxt_id, rc);
503 	return (rc);
504 }
505 
506 static int
507 cxgbe_netmap_on(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
508     struct netmap_adapter *na)
509 {
510 	struct netmap_slot *slot;
511 	struct sge_nm_rxq *nm_rxq;
512 	struct sge_nm_txq *nm_txq;
513 	int rc, i, j, hwidx;
514 	struct hw_buf_info *hwb;
515 	uint16_t *rss;
516 
517 	ASSERT_SYNCHRONIZED_OP(sc);
518 
519 	if ((vi->flags & VI_INIT_DONE) == 0 ||
520 	    (ifp->if_drv_flags & IFF_DRV_RUNNING) == 0)
521 		return (EAGAIN);
522 
523 	hwb = &sc->sge.hw_buf_info[0];
524 	for (i = 0; i < SGE_FLBUF_SIZES; i++, hwb++) {
525 		if (hwb->size == NETMAP_BUF_SIZE(na))
526 			break;
527 	}
528 	if (i >= SGE_FLBUF_SIZES) {
529 		if_printf(ifp, "no hwidx for netmap buffer size %d.\n",
530 		    NETMAP_BUF_SIZE(na));
531 		return (ENXIO);
532 	}
533 	hwidx = i;
534 
535 	/* Must set caps before calling netmap_reset */
536 	nm_set_native_flags(na);
537 
538 	for_each_nm_rxq(vi, i, nm_rxq) {
539 		alloc_nm_rxq_hwq(vi, nm_rxq, tnl_cong(vi->pi, nm_cong_drop));
540 		nm_rxq->fl_hwidx = hwidx;
541 		slot = netmap_reset(na, NR_RX, i, 0);
542 		MPASS(slot != NULL);	/* XXXNM: error check, not assert */
543 
544 		/* We deal with 8 bufs at a time */
545 		MPASS((na->num_rx_desc & 7) == 0);
546 		MPASS(na->num_rx_desc == nm_rxq->fl_sidx);
547 		for (j = 0; j < nm_rxq->fl_sidx; j++) {
548 			uint64_t ba;
549 
550 			PNMB(na, &slot[j], &ba);
551 			MPASS(ba != 0);
552 			nm_rxq->fl_desc[j] = htobe64(ba | hwidx);
553 		}
554 		j = nm_rxq->fl_pidx = nm_rxq->fl_sidx - 8;
555 		MPASS((j & 7) == 0);
556 		j /= 8;	/* driver pidx to hardware pidx */
557 		wmb();
558 		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
559 		    nm_rxq->fl_db_val | V_PIDX(j));
560 	}
561 
562 	for_each_nm_txq(vi, i, nm_txq) {
563 		alloc_nm_txq_hwq(vi, nm_txq);
564 		slot = netmap_reset(na, NR_TX, i, 0);
565 		MPASS(slot != NULL);	/* XXXNM: error check, not assert */
566 	}
567 
568 	rss = malloc(vi->rss_size * sizeof (*rss), M_CXGBE, M_ZERO |
569 	    M_WAITOK);
570 	for (i = 0; i < vi->rss_size;) {
571 		for_each_nm_rxq(vi, j, nm_rxq) {
572 			rss[i++] = nm_rxq->iq_abs_id;
573 			if (i == vi->rss_size)
574 				break;
575 		}
576 	}
577 	rc = -t4_config_rss_range(sc, sc->mbox, vi->viid, 0, vi->rss_size,
578 	    rss, vi->rss_size);
579 	if (rc != 0)
580 		if_printf(ifp, "netmap rss_config failed: %d\n", rc);
581 	free(rss, M_CXGBE);
582 
583 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, true, true);
584 	if (rc != 0)
585 		if_printf(ifp, "netmap enable_vi failed: %d\n", rc);
586 
587 	return (rc);
588 }
589 
590 static int
591 cxgbe_netmap_off(struct adapter *sc, struct vi_info *vi, struct ifnet *ifp,
592     struct netmap_adapter *na)
593 {
594 	int rc, i;
595 	struct sge_nm_txq *nm_txq;
596 	struct sge_nm_rxq *nm_rxq;
597 
598 	ASSERT_SYNCHRONIZED_OP(sc);
599 
600 	if ((vi->flags & VI_INIT_DONE) == 0)
601 		return (0);
602 
603 	rc = -t4_enable_vi(sc, sc->mbox, vi->viid, false, false);
604 	if (rc != 0)
605 		if_printf(ifp, "netmap disable_vi failed: %d\n", rc);
606 	nm_clear_native_flags(na);
607 
608 	for_each_nm_txq(vi, i, nm_txq) {
609 		struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx];
610 
611 		/* Wait for hw pidx to catch up ... */
612 		while (be16toh(nm_txq->pidx) != spg->pidx)
613 			pause("nmpidx", 1);
614 
615 		/* ... and then for the cidx. */
616 		while (spg->pidx != spg->cidx)
617 			pause("nmcidx", 1);
618 
619 		free_nm_txq_hwq(vi, nm_txq);
620 	}
621 	for_each_nm_rxq(vi, i, nm_rxq) {
622 		free_nm_rxq_hwq(vi, nm_rxq);
623 	}
624 
625 	return (rc);
626 }
627 
628 static int
629 cxgbe_netmap_reg(struct netmap_adapter *na, int on)
630 {
631 	struct ifnet *ifp = na->ifp;
632 	struct vi_info *vi = ifp->if_softc;
633 	struct adapter *sc = vi->pi->adapter;
634 	int rc;
635 
636 	rc = begin_synchronized_op(sc, vi, SLEEP_OK | INTR_OK, "t4nmreg");
637 	if (rc != 0)
638 		return (rc);
639 	if (on)
640 		rc = cxgbe_netmap_on(sc, vi, ifp, na);
641 	else
642 		rc = cxgbe_netmap_off(sc, vi, ifp, na);
643 	end_synchronized_op(sc, 0);
644 
645 	return (rc);
646 }
647 
648 /* How many packets can a single type1 WR carry in n descriptors */
649 static inline int
650 ndesc_to_npkt(const int n)
651 {
652 
653 	MPASS(n > 0 && n <= SGE_MAX_WR_NDESC);
654 
655 	return (n * 2 - 1);
656 }
657 #define MAX_NPKT_IN_TYPE1_WR	(ndesc_to_npkt(SGE_MAX_WR_NDESC))
658 
659 /* Space (in descriptors) needed for a type1 WR that carries n packets */
660 static inline int
661 npkt_to_ndesc(const int n)
662 {
663 
664 	MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR);
665 
666 	return ((n + 2) / 2);
667 }
668 
669 /* Space (in 16B units) needed for a type1 WR that carries n packets */
670 static inline int
671 npkt_to_len16(const int n)
672 {
673 
674 	MPASS(n > 0 && n <= MAX_NPKT_IN_TYPE1_WR);
675 
676 	return (n * 2 + 1);
677 }
678 
679 #define NMIDXDIFF(q, idx) IDXDIFF((q)->pidx, (q)->idx, (q)->sidx)
680 
681 static void
682 ring_nm_txq_db(struct adapter *sc, struct sge_nm_txq *nm_txq)
683 {
684 	int n;
685 	u_int db = nm_txq->doorbells;
686 
687 	MPASS(nm_txq->pidx != nm_txq->dbidx);
688 
689 	n = NMIDXDIFF(nm_txq, dbidx);
690 	if (n > 1)
691 		clrbit(&db, DOORBELL_WCWR);
692 	wmb();
693 
694 	switch (ffs(db) - 1) {
695 	case DOORBELL_UDB:
696 		*nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n));
697 		break;
698 
699 	case DOORBELL_WCWR: {
700 		volatile uint64_t *dst, *src;
701 
702 		/*
703 		 * Queues whose 128B doorbell segment fits in the page do not
704 		 * use relative qid (udb_qid is always 0).  Only queues with
705 		 * doorbell segments can do WCWR.
706 		 */
707 		KASSERT(nm_txq->udb_qid == 0 && n == 1,
708 		    ("%s: inappropriate doorbell (0x%x, %d, %d) for nm_txq %p",
709 		    __func__, nm_txq->doorbells, n, nm_txq->pidx, nm_txq));
710 
711 		dst = (volatile void *)((uintptr_t)nm_txq->udb +
712 		    UDBS_WR_OFFSET - UDBS_DB_OFFSET);
713 		src = (void *)&nm_txq->desc[nm_txq->dbidx];
714 		while (src != (void *)&nm_txq->desc[nm_txq->dbidx + 1])
715 			*dst++ = *src++;
716 		wmb();
717 		break;
718 	}
719 
720 	case DOORBELL_UDBWC:
721 		*nm_txq->udb = htole32(V_QID(nm_txq->udb_qid) | V_PIDX(n));
722 		wmb();
723 		break;
724 
725 	case DOORBELL_KDB:
726 		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
727 		    V_QID(nm_txq->cntxt_id) | V_PIDX(n));
728 		break;
729 	}
730 	nm_txq->dbidx = nm_txq->pidx;
731 }
732 
733 int lazy_tx_credit_flush = 1;
734 
735 /*
736  * Write work requests to send 'npkt' frames and ring the doorbell to send them
737  * on their way.  No need to check for wraparound.
738  */
739 static void
740 cxgbe_nm_tx(struct adapter *sc, struct sge_nm_txq *nm_txq,
741     struct netmap_kring *kring, int npkt, int npkt_remaining, int txcsum)
742 {
743 	struct netmap_ring *ring = kring->ring;
744 	struct netmap_slot *slot;
745 	const u_int lim = kring->nkr_num_slots - 1;
746 	struct fw_eth_tx_pkts_wr *wr = (void *)&nm_txq->desc[nm_txq->pidx];
747 	uint16_t len;
748 	uint64_t ba;
749 	struct cpl_tx_pkt_core *cpl;
750 	struct ulptx_sgl *usgl;
751 	int i, n;
752 
753 	while (npkt) {
754 		n = min(npkt, MAX_NPKT_IN_TYPE1_WR);
755 		len = 0;
756 
757 		wr = (void *)&nm_txq->desc[nm_txq->pidx];
758 		wr->op_pkd = htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR));
759 		wr->equiq_to_len16 = htobe32(V_FW_WR_LEN16(npkt_to_len16(n)));
760 		wr->npkt = n;
761 		wr->r3 = 0;
762 		wr->type = 1;
763 		cpl = (void *)(wr + 1);
764 
765 		for (i = 0; i < n; i++) {
766 			slot = &ring->slot[kring->nr_hwcur];
767 			PNMB(kring->na, slot, &ba);
768 			MPASS(ba != 0);
769 
770 			cpl->ctrl0 = nm_txq->cpl_ctrl0;
771 			cpl->pack = 0;
772 			cpl->len = htobe16(slot->len);
773 			/*
774 			 * netmap(4) says "netmap does not use features such as
775 			 * checksum offloading, TCP segmentation offloading,
776 			 * encryption, VLAN encapsulation/decapsulation, etc."
777 			 *
778 			 * So the ncxl interfaces have tx hardware checksumming
779 			 * disabled by default.  But you can override netmap by
780 			 * enabling IFCAP_TXCSUM on the interface manully.
781 			 */
782 			cpl->ctrl1 = txcsum ? 0 :
783 			    htobe64(F_TXPKT_IPCSUM_DIS | F_TXPKT_L4CSUM_DIS);
784 
785 			usgl = (void *)(cpl + 1);
786 			usgl->cmd_nsge = htobe32(V_ULPTX_CMD(ULP_TX_SC_DSGL) |
787 			    V_ULPTX_NSGE(1));
788 			usgl->len0 = htobe32(slot->len);
789 			usgl->addr0 = htobe64(ba);
790 
791 			slot->flags &= ~(NS_REPORT | NS_BUF_CHANGED);
792 			cpl = (void *)(usgl + 1);
793 			MPASS(slot->len + len <= UINT16_MAX);
794 			len += slot->len;
795 			kring->nr_hwcur = nm_next(kring->nr_hwcur, lim);
796 		}
797 		wr->plen = htobe16(len);
798 
799 		npkt -= n;
800 		nm_txq->pidx += npkt_to_ndesc(n);
801 		MPASS(nm_txq->pidx <= nm_txq->sidx);
802 		if (__predict_false(nm_txq->pidx == nm_txq->sidx)) {
803 			/*
804 			 * This routine doesn't know how to write WRs that wrap
805 			 * around.  Make sure it wasn't asked to.
806 			 */
807 			MPASS(npkt == 0);
808 			nm_txq->pidx = 0;
809 		}
810 
811 		if (npkt == 0 && npkt_remaining == 0) {
812 			/* All done. */
813 			if (lazy_tx_credit_flush == 0) {
814 				wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ |
815 				    F_FW_WR_EQUIQ);
816 				nm_txq->equeqidx = nm_txq->pidx;
817 				nm_txq->equiqidx = nm_txq->pidx;
818 			}
819 			ring_nm_txq_db(sc, nm_txq);
820 			return;
821 		}
822 
823 		if (NMIDXDIFF(nm_txq, equiqidx) >= nm_txq->sidx / 2) {
824 			wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ |
825 			    F_FW_WR_EQUIQ);
826 			nm_txq->equeqidx = nm_txq->pidx;
827 			nm_txq->equiqidx = nm_txq->pidx;
828 		} else if (NMIDXDIFF(nm_txq, equeqidx) >= 64) {
829 			wr->equiq_to_len16 |= htobe32(F_FW_WR_EQUEQ);
830 			nm_txq->equeqidx = nm_txq->pidx;
831 		}
832 		if (NMIDXDIFF(nm_txq, dbidx) >= 2 * SGE_MAX_WR_NDESC)
833 			ring_nm_txq_db(sc, nm_txq);
834 	}
835 
836 	/* Will get called again. */
837 	MPASS(npkt_remaining);
838 }
839 
840 /* How many contiguous free descriptors starting at pidx */
841 static inline int
842 contiguous_ndesc_available(struct sge_nm_txq *nm_txq)
843 {
844 
845 	if (nm_txq->cidx > nm_txq->pidx)
846 		return (nm_txq->cidx - nm_txq->pidx - 1);
847 	else if (nm_txq->cidx > 0)
848 		return (nm_txq->sidx - nm_txq->pidx);
849 	else
850 		return (nm_txq->sidx - nm_txq->pidx - 1);
851 }
852 
853 static int
854 reclaim_nm_tx_desc(struct sge_nm_txq *nm_txq)
855 {
856 	struct sge_qstat *spg = (void *)&nm_txq->desc[nm_txq->sidx];
857 	uint16_t hw_cidx = spg->cidx;	/* snapshot */
858 	struct fw_eth_tx_pkts_wr *wr;
859 	int n = 0;
860 
861 	hw_cidx = be16toh(hw_cidx);
862 
863 	while (nm_txq->cidx != hw_cidx) {
864 		wr = (void *)&nm_txq->desc[nm_txq->cidx];
865 
866 		MPASS(wr->op_pkd == htobe32(V_FW_WR_OP(FW_ETH_TX_PKTS_WR)));
867 		MPASS(wr->type == 1);
868 		MPASS(wr->npkt > 0 && wr->npkt <= MAX_NPKT_IN_TYPE1_WR);
869 
870 		n += wr->npkt;
871 		nm_txq->cidx += npkt_to_ndesc(wr->npkt);
872 
873 		/*
874 		 * We never sent a WR that wrapped around so the credits coming
875 		 * back, WR by WR, should never cause the cidx to wrap around
876 		 * either.
877 		 */
878 		MPASS(nm_txq->cidx <= nm_txq->sidx);
879 		if (__predict_false(nm_txq->cidx == nm_txq->sidx))
880 			nm_txq->cidx = 0;
881 	}
882 
883 	return (n);
884 }
885 
886 static int
887 cxgbe_netmap_txsync(struct netmap_kring *kring, int flags)
888 {
889 	struct netmap_adapter *na = kring->na;
890 	struct ifnet *ifp = na->ifp;
891 	struct vi_info *vi = ifp->if_softc;
892 	struct adapter *sc = vi->pi->adapter;
893 	struct sge_nm_txq *nm_txq = &sc->sge.nm_txq[vi->first_txq + kring->ring_id];
894 	const u_int head = kring->rhead;
895 	u_int reclaimed = 0;
896 	int n, d, npkt_remaining, ndesc_remaining, txcsum;
897 
898 	/*
899 	 * Tx was at kring->nr_hwcur last time around and now we need to advance
900 	 * to kring->rhead.  Note that the driver's pidx moves independent of
901 	 * netmap's kring->nr_hwcur (pidx counts descriptors and the relation
902 	 * between descriptors and frames isn't 1:1).
903 	 */
904 
905 	npkt_remaining = head >= kring->nr_hwcur ? head - kring->nr_hwcur :
906 	    kring->nkr_num_slots - kring->nr_hwcur + head;
907 	txcsum = ifp->if_capenable & (IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6);
908 	while (npkt_remaining) {
909 		reclaimed += reclaim_nm_tx_desc(nm_txq);
910 		ndesc_remaining = contiguous_ndesc_available(nm_txq);
911 		/* Can't run out of descriptors with packets still remaining */
912 		MPASS(ndesc_remaining > 0);
913 
914 		/* # of desc needed to tx all remaining packets */
915 		d = (npkt_remaining / MAX_NPKT_IN_TYPE1_WR) * SGE_MAX_WR_NDESC;
916 		if (npkt_remaining % MAX_NPKT_IN_TYPE1_WR)
917 			d += npkt_to_ndesc(npkt_remaining % MAX_NPKT_IN_TYPE1_WR);
918 
919 		if (d <= ndesc_remaining)
920 			n = npkt_remaining;
921 		else {
922 			/* Can't send all, calculate how many can be sent */
923 			n = (ndesc_remaining / SGE_MAX_WR_NDESC) *
924 			    MAX_NPKT_IN_TYPE1_WR;
925 			if (ndesc_remaining % SGE_MAX_WR_NDESC)
926 				n += ndesc_to_npkt(ndesc_remaining % SGE_MAX_WR_NDESC);
927 		}
928 
929 		/* Send n packets and update nm_txq->pidx and kring->nr_hwcur */
930 		npkt_remaining -= n;
931 		cxgbe_nm_tx(sc, nm_txq, kring, n, npkt_remaining, txcsum);
932 	}
933 	MPASS(npkt_remaining == 0);
934 	MPASS(kring->nr_hwcur == head);
935 	MPASS(nm_txq->dbidx == nm_txq->pidx);
936 
937 	/*
938 	 * Second part: reclaim buffers for completed transmissions.
939 	 */
940 	if (reclaimed || flags & NAF_FORCE_RECLAIM || nm_kr_txempty(kring)) {
941 		reclaimed += reclaim_nm_tx_desc(nm_txq);
942 		kring->nr_hwtail += reclaimed;
943 		if (kring->nr_hwtail >= kring->nkr_num_slots)
944 			kring->nr_hwtail -= kring->nkr_num_slots;
945 	}
946 
947 	return (0);
948 }
949 
950 static int
951 cxgbe_netmap_rxsync(struct netmap_kring *kring, int flags)
952 {
953 	struct netmap_adapter *na = kring->na;
954 	struct netmap_ring *ring = kring->ring;
955 	struct ifnet *ifp = na->ifp;
956 	struct vi_info *vi = ifp->if_softc;
957 	struct adapter *sc = vi->pi->adapter;
958 	struct sge_nm_rxq *nm_rxq = &sc->sge.nm_rxq[vi->first_rxq + kring->ring_id];
959 	u_int const head = kring->rhead;
960 	u_int n;
961 	int force_update = (flags & NAF_FORCE_READ) || kring->nr_kflags & NKR_PENDINTR;
962 
963 	if (black_hole)
964 		return (0);	/* No updates ever. */
965 
966 	if (netmap_no_pendintr || force_update) {
967 		kring->nr_hwtail = atomic_load_acq_32(&nm_rxq->fl_cidx);
968 		kring->nr_kflags &= ~NKR_PENDINTR;
969 	}
970 
971 	/* Userspace done with buffers from kring->nr_hwcur to head */
972 	n = head >= kring->nr_hwcur ? head - kring->nr_hwcur :
973 	    kring->nkr_num_slots - kring->nr_hwcur + head;
974 	n &= ~7U;
975 	if (n > 0) {
976 		u_int fl_pidx = nm_rxq->fl_pidx;
977 		struct netmap_slot *slot = &ring->slot[fl_pidx];
978 		uint64_t ba;
979 		int i, dbinc = 0, hwidx = nm_rxq->fl_hwidx;
980 
981 		/*
982 		 * We always deal with 8 buffers at a time.  We must have
983 		 * stopped at an 8B boundary (fl_pidx) last time around and we
984 		 * must have a multiple of 8B buffers to give to the freelist.
985 		 */
986 		MPASS((fl_pidx & 7) == 0);
987 		MPASS((n & 7) == 0);
988 
989 		IDXINCR(kring->nr_hwcur, n, kring->nkr_num_slots);
990 		IDXINCR(nm_rxq->fl_pidx, n, nm_rxq->fl_sidx);
991 
992 		while (n > 0) {
993 			for (i = 0; i < 8; i++, fl_pidx++, slot++) {
994 				PNMB(na, slot, &ba);
995 				MPASS(ba != 0);
996 				nm_rxq->fl_desc[fl_pidx] = htobe64(ba | hwidx);
997 				slot->flags &= ~NS_BUF_CHANGED;
998 				MPASS(fl_pidx <= nm_rxq->fl_sidx);
999 			}
1000 			n -= 8;
1001 			if (fl_pidx == nm_rxq->fl_sidx) {
1002 				fl_pidx = 0;
1003 				slot = &ring->slot[0];
1004 			}
1005 			if (++dbinc == 8 && n >= 32) {
1006 				wmb();
1007 				t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
1008 				    nm_rxq->fl_db_val | V_PIDX(dbinc));
1009 				dbinc = 0;
1010 			}
1011 		}
1012 		MPASS(nm_rxq->fl_pidx == fl_pidx);
1013 
1014 		if (dbinc > 0) {
1015 			wmb();
1016 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
1017 			    nm_rxq->fl_db_val | V_PIDX(dbinc));
1018 		}
1019 	}
1020 
1021 	return (0);
1022 }
1023 
1024 static int
1025 ncxgbe_probe(device_t dev)
1026 {
1027 	char buf[128];
1028 	struct vi_info *vi = device_get_softc(dev);
1029 
1030 	snprintf(buf, sizeof(buf), "port %d netmap vi", vi->pi->port_id);
1031 	device_set_desc_copy(dev, buf);
1032 
1033 	return (BUS_PROBE_DEFAULT);
1034 }
1035 
1036 static int
1037 ncxgbe_attach(device_t dev)
1038 {
1039 	struct vi_info *vi;
1040 	struct port_info *pi;
1041 	struct adapter *sc;
1042 	struct netmap_adapter na;
1043 	struct ifnet *ifp;
1044 	int rc;
1045 
1046 	vi = device_get_softc(dev);
1047 	pi = vi->pi;
1048 	sc = pi->adapter;
1049 
1050 	/*
1051 	 * Allocate a virtual interface exclusively for netmap use.  Give it the
1052 	 * MAC address normally reserved for use by a TOE interface.  (The TOE
1053 	 * driver on FreeBSD doesn't use it).
1054 	 */
1055 	rc = t4_alloc_vi_func(sc, sc->mbox, pi->tx_chan, sc->pf, 0, 1,
1056 	    vi->hw_addr, &vi->rss_size, FW_VI_FUNC_OFLD, 0);
1057 	if (rc < 0) {
1058 		device_printf(dev, "unable to allocate netmap virtual "
1059 		    "interface for port %d: %d\n", pi->port_id, -rc);
1060 		return (-rc);
1061 	}
1062 	vi->viid = rc;
1063 	vi->xact_addr_filt = -1;
1064 	callout_init(&vi->tick, 1);
1065 
1066 	ifp = if_alloc(IFT_ETHER);
1067 	if (ifp == NULL) {
1068 		device_printf(dev, "Cannot allocate netmap ifnet\n");
1069 		return (ENOMEM);
1070 	}
1071 	vi->ifp = ifp;
1072 	ifp->if_softc = vi;
1073 
1074 	if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1075 	ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST;
1076 
1077 	ifp->if_init = cxgbe_nm_init;
1078 	ifp->if_ioctl = cxgbe_nm_ioctl;
1079 	ifp->if_transmit = cxgbe_nm_transmit;
1080 	ifp->if_qflush = cxgbe_nm_qflush;
1081 	ifp->if_get_counter = cxgbe_get_counter;
1082 
1083 	/*
1084 	 * netmap(4) says "netmap does not use features such as checksum
1085 	 * offloading, TCP segmentation offloading, encryption, VLAN
1086 	 * encapsulation/decapsulation, etc."
1087 	 *
1088 	 * By default we comply with the statement above.  But we do declare the
1089 	 * ifnet capable of L3/L4 checksumming so that a user can override
1090 	 * netmap and have the hardware do the L3/L4 checksums.
1091 	 */
1092 	ifp->if_capabilities = IFCAP_HWCSUM | IFCAP_JUMBO_MTU |
1093 	    IFCAP_HWCSUM_IPV6;
1094 	ifp->if_capenable = 0;
1095 	ifp->if_hwassist = 0;
1096 
1097 	/* vi->media has already been setup by the caller */
1098 
1099 	ether_ifattach(ifp, vi->hw_addr);
1100 
1101 	device_printf(dev, "%d txq, %d rxq (netmap)\n", vi->ntxq, vi->nrxq);
1102 
1103 	vi_sysctls(vi);
1104 
1105 	/*
1106 	 * Register with netmap in the kernel.
1107 	 */
1108 	bzero(&na, sizeof(na));
1109 
1110 	na.ifp = ifp;
1111 	na.na_flags = NAF_BDG_MAYSLEEP;
1112 
1113 	/* Netmap doesn't know about the space reserved for the status page. */
1114 	na.num_tx_desc = vi->qsize_txq - sc->params.sge.spg_len / EQ_ESIZE;
1115 
1116 	/*
1117 	 * The freelist's cidx/pidx drives netmap's rx cidx/pidx.  So
1118 	 * num_rx_desc is based on the number of buffers that can be held in the
1119 	 * freelist, and not the number of entries in the iq.  (These two are
1120 	 * not exactly the same due to the space taken up by the status page).
1121 	 */
1122 	na.num_rx_desc = rounddown(vi->qsize_rxq, 8);
1123 	na.nm_txsync = cxgbe_netmap_txsync;
1124 	na.nm_rxsync = cxgbe_netmap_rxsync;
1125 	na.nm_register = cxgbe_netmap_reg;
1126 	na.num_tx_rings = vi->ntxq;
1127 	na.num_rx_rings = vi->nrxq;
1128 	netmap_attach(&na);	/* This adds IFCAP_NETMAP to if_capabilities */
1129 
1130 	return (0);
1131 }
1132 
1133 static int
1134 ncxgbe_detach(device_t dev)
1135 {
1136 	struct vi_info *vi;
1137 	struct adapter *sc;
1138 
1139 	vi = device_get_softc(dev);
1140 	sc = vi->pi->adapter;
1141 
1142 	doom_vi(sc, vi);
1143 
1144 	netmap_detach(vi->ifp);
1145 	ether_ifdetach(vi->ifp);
1146 	cxgbe_nm_uninit_synchronized(vi);
1147 	callout_drain(&vi->tick);
1148 	vi_full_uninit(vi);
1149 	ifmedia_removeall(&vi->media);
1150 	if_free(vi->ifp);
1151 	vi->ifp = NULL;
1152 	t4_free_vi(sc, sc->mbox, sc->pf, 0, vi->viid);
1153 
1154 	end_synchronized_op(sc, 0);
1155 
1156 	return (0);
1157 }
1158 
1159 static void
1160 handle_nm_fw6_msg(struct adapter *sc, struct ifnet *ifp,
1161     const struct cpl_fw6_msg *cpl)
1162 {
1163 	const struct cpl_sge_egr_update *egr;
1164 	uint32_t oq;
1165 	struct sge_nm_txq *nm_txq;
1166 
1167 	if (cpl->type != FW_TYPE_RSSCPL && cpl->type != FW6_TYPE_RSSCPL)
1168 		panic("%s: FW_TYPE 0x%x on nm_rxq.", __func__, cpl->type);
1169 
1170 	/* data[0] is RSS header */
1171 	egr = (const void *)&cpl->data[1];
1172 	oq = be32toh(egr->opcode_qid);
1173 	MPASS(G_CPL_OPCODE(oq) == CPL_SGE_EGR_UPDATE);
1174 	nm_txq = (void *)sc->sge.eqmap[G_EGR_QID(oq) - sc->sge.eq_start];
1175 
1176 	netmap_tx_irq(ifp, nm_txq->nid);
1177 }
1178 
1179 void
1180 t4_nm_intr(void *arg)
1181 {
1182 	struct sge_nm_rxq *nm_rxq = arg;
1183 	struct vi_info *vi = nm_rxq->vi;
1184 	struct adapter *sc = vi->pi->adapter;
1185 	struct ifnet *ifp = vi->ifp;
1186 	struct netmap_adapter *na = NA(ifp);
1187 	struct netmap_kring *kring = &na->rx_rings[nm_rxq->nid];
1188 	struct netmap_ring *ring = kring->ring;
1189 	struct iq_desc *d = &nm_rxq->iq_desc[nm_rxq->iq_cidx];
1190 	uint32_t lq;
1191 	u_int n = 0, work = 0;
1192 	uint8_t opcode;
1193 	uint32_t fl_cidx = atomic_load_acq_32(&nm_rxq->fl_cidx);
1194 	u_int fl_credits = fl_cidx & 7;
1195 
1196 	while ((d->rsp.u.type_gen & F_RSPD_GEN) == nm_rxq->iq_gen) {
1197 
1198 		rmb();
1199 
1200 		lq = be32toh(d->rsp.pldbuflen_qid);
1201 		opcode = d->rss.opcode;
1202 
1203 		switch (G_RSPD_TYPE(d->rsp.u.type_gen)) {
1204 		case X_RSPD_TYPE_FLBUF:
1205 			if (black_hole != 2) {
1206 				/* No buffer packing so new buf every time */
1207 				MPASS(lq & F_RSPD_NEWBUF);
1208 			}
1209 
1210 			/* fall through */
1211 
1212 		case X_RSPD_TYPE_CPL:
1213 			MPASS(opcode < NUM_CPL_CMDS);
1214 
1215 			switch (opcode) {
1216 			case CPL_FW4_MSG:
1217 			case CPL_FW6_MSG:
1218 				handle_nm_fw6_msg(sc, ifp,
1219 				    (const void *)&d->cpl[0]);
1220 				break;
1221 			case CPL_RX_PKT:
1222 				ring->slot[fl_cidx].len = G_RSPD_LEN(lq) -
1223 				    sc->params.sge.fl_pktshift;
1224 				ring->slot[fl_cidx].flags = kring->nkr_slot_flags;
1225 				fl_cidx += (lq & F_RSPD_NEWBUF) ? 1 : 0;
1226 				fl_credits += (lq & F_RSPD_NEWBUF) ? 1 : 0;
1227 				if (__predict_false(fl_cidx == nm_rxq->fl_sidx))
1228 					fl_cidx = 0;
1229 				break;
1230 			default:
1231 				panic("%s: unexpected opcode 0x%x on nm_rxq %p",
1232 				    __func__, opcode, nm_rxq);
1233 			}
1234 			break;
1235 
1236 		case X_RSPD_TYPE_INTR:
1237 			/* Not equipped to handle forwarded interrupts. */
1238 			panic("%s: netmap queue received interrupt for iq %u\n",
1239 			    __func__, lq);
1240 
1241 		default:
1242 			panic("%s: illegal response type %d on nm_rxq %p",
1243 			    __func__, G_RSPD_TYPE(d->rsp.u.type_gen), nm_rxq);
1244 		}
1245 
1246 		d++;
1247 		if (__predict_false(++nm_rxq->iq_cidx == nm_rxq->iq_sidx)) {
1248 			nm_rxq->iq_cidx = 0;
1249 			d = &nm_rxq->iq_desc[0];
1250 			nm_rxq->iq_gen ^= F_RSPD_GEN;
1251 		}
1252 
1253 		if (__predict_false(++n == rx_ndesc)) {
1254 			atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
1255 			if (black_hole && fl_credits >= 8) {
1256 				fl_credits /= 8;
1257 				IDXINCR(nm_rxq->fl_pidx, fl_credits * 8,
1258 				    nm_rxq->fl_sidx);
1259 				t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
1260 				    nm_rxq->fl_db_val | V_PIDX(fl_credits));
1261 				fl_credits = fl_cidx & 7;
1262 			} else if (!black_hole) {
1263 				netmap_rx_irq(ifp, nm_rxq->nid, &work);
1264 				MPASS(work != 0);
1265 			}
1266 			t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS),
1267 			    V_CIDXINC(n) | V_INGRESSQID(nm_rxq->iq_cntxt_id) |
1268 			    V_SEINTARM(V_QINTR_TIMER_IDX(X_TIMERREG_UPDATE_CIDX)));
1269 			n = 0;
1270 		}
1271 	}
1272 
1273 	atomic_store_rel_32(&nm_rxq->fl_cidx, fl_cidx);
1274 	if (black_hole) {
1275 		fl_credits /= 8;
1276 		IDXINCR(nm_rxq->fl_pidx, fl_credits * 8, nm_rxq->fl_sidx);
1277 		t4_write_reg(sc, MYPF_REG(A_SGE_PF_KDOORBELL),
1278 		    nm_rxq->fl_db_val | V_PIDX(fl_credits));
1279 	} else
1280 		netmap_rx_irq(ifp, nm_rxq->nid, &work);
1281 
1282 	t4_write_reg(sc, MYPF_REG(A_SGE_PF_GTS), V_CIDXINC(n) |
1283 	    V_INGRESSQID((u32)nm_rxq->iq_cntxt_id) |
1284 	    V_SEINTARM(V_QINTR_TIMER_IDX(holdoff_tmr_idx)));
1285 }
1286 
1287 static devclass_t ncxgbe_devclass, ncxl_devclass;
1288 
1289 DRIVER_MODULE(ncxgbe, cxgbe, ncxgbe_driver, ncxgbe_devclass, 0, 0);
1290 MODULE_VERSION(ncxgbe, 1);
1291 
1292 DRIVER_MODULE(ncxl, cxl, ncxl_driver, ncxl_devclass, 0, 0);
1293 MODULE_VERSION(ncxl, 1);
1294 #endif
1295