xref: /freebsd/sys/dev/cxgbe/t4_vf.c (revision 7c1b51d6dc2e165ae7333373513b080f17cf79bd)
1 /*-
2  * Copyright (c) 2016 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: John Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 __FBSDID("$FreeBSD$");
30 
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33 
34 #include <sys/param.h>
35 #include <sys/bus.h>
36 #include <sys/conf.h>
37 #include <sys/kernel.h>
38 #include <sys/module.h>
39 #include <sys/priv.h>
40 #include <dev/pci/pcivar.h>
41 #if defined(__i386__) || defined(__amd64__)
42 #include <vm/vm.h>
43 #include <vm/pmap.h>
44 #endif
45 
46 #include "common/common.h"
47 #include "common/t4_regs.h"
48 #include "t4_ioctl.h"
49 #include "t4_mp_ring.h"
50 
51 /*
52  * Some notes:
53  *
54  * The Virtual Interfaces are connected to an internal switch on the chip
55  * which allows VIs attached to the same port to talk to each other even when
56  * the port link is down.  As a result, we might want to always report a
57  * VF's link as being "up".
58  *
59  * XXX: Add a TUNABLE and possible per-device sysctl for this?
60  */
61 
62 struct intrs_and_queues {
63 	uint16_t intr_type;	/* MSI, or MSI-X */
64 	uint16_t nirq;		/* Total # of vectors */
65 	uint16_t intr_flags_10g;/* Interrupt flags for each 10G port */
66 	uint16_t intr_flags_1g;	/* Interrupt flags for each 1G port */
67 	uint16_t ntxq10g;	/* # of NIC txq's for each 10G port */
68 	uint16_t nrxq10g;	/* # of NIC rxq's for each 10G port */
69 	uint16_t ntxq1g;	/* # of NIC txq's for each 1G port */
70 	uint16_t nrxq1g;	/* # of NIC rxq's for each 1G port */
71 };
72 
73 struct {
74 	uint16_t device;
75 	char *desc;
76 } t4vf_pciids[] = {
77 	{0x4800, "Chelsio T440-dbg VF"},
78 	{0x4801, "Chelsio T420-CR VF"},
79 	{0x4802, "Chelsio T422-CR VF"},
80 	{0x4803, "Chelsio T440-CR VF"},
81 	{0x4804, "Chelsio T420-BCH VF"},
82 	{0x4805, "Chelsio T440-BCH VF"},
83 	{0x4806, "Chelsio T440-CH VF"},
84 	{0x4807, "Chelsio T420-SO VF"},
85 	{0x4808, "Chelsio T420-CX VF"},
86 	{0x4809, "Chelsio T420-BT VF"},
87 	{0x480a, "Chelsio T404-BT VF"},
88 	{0x480e, "Chelsio T440-LP-CR VF"},
89 }, t5vf_pciids[] = {
90 	{0x5800, "Chelsio T580-dbg VF"},
91 	{0x5801,  "Chelsio T520-CR VF"},	/* 2 x 10G */
92 	{0x5802,  "Chelsio T522-CR VF"},	/* 2 x 10G, 2 X 1G */
93 	{0x5803,  "Chelsio T540-CR VF"},	/* 4 x 10G */
94 	{0x5807,  "Chelsio T520-SO VF"},	/* 2 x 10G, nomem */
95 	{0x5809,  "Chelsio T520-BT VF"},	/* 2 x 10GBaseT */
96 	{0x580a,  "Chelsio T504-BT VF"},	/* 4 x 1G */
97 	{0x580d,  "Chelsio T580-CR VF"},	/* 2 x 40G */
98 	{0x580e,  "Chelsio T540-LP-CR VF"},	/* 4 x 10G */
99 	{0x5810,  "Chelsio T580-LP-CR VF"},	/* 2 x 40G */
100 	{0x5811,  "Chelsio T520-LL-CR VF"},	/* 2 x 10G */
101 	{0x5812,  "Chelsio T560-CR VF"},	/* 1 x 40G, 2 x 10G */
102 	{0x5814,  "Chelsio T580-LP-SO-CR VF"},	/* 2 x 40G, nomem */
103 	{0x5815,  "Chelsio T502-BT VF"},	/* 2 x 1G */
104 #ifdef notyet
105 	{0x5804,  "Chelsio T520-BCH VF"},
106 	{0x5805,  "Chelsio T540-BCH VF"},
107 	{0x5806,  "Chelsio T540-CH VF"},
108 	{0x5808,  "Chelsio T520-CX VF"},
109 	{0x580b,  "Chelsio B520-SR VF"},
110 	{0x580c,  "Chelsio B504-BT VF"},
111 	{0x580f,  "Chelsio Amsterdam VF"},
112 	{0x5813,  "Chelsio T580-CHR VF"},
113 #endif
114 }, t6vf_pciids[] = {
115 	{0x6800, "Chelsio T6-DBG-25 VF"},	/* 2 x 10/25G, debug */
116 	{0x6801, "Chelsio T6225-CR VF"},	/* 2 x 10/25G */
117 	{0x6802, "Chelsio T6225-SO-CR VF"},	/* 2 x 10/25G, nomem */
118 	{0x6803, "Chelsio T6425-CR VF"},	/* 4 x 10/25G */
119 	{0x6804, "Chelsio T6425-SO-CR VF"},	/* 4 x 10/25G, nomem */
120 	{0x6805, "Chelsio T6225-OCP-SO VF"},	/* 2 x 10/25G, nomem */
121 	{0x6806, "Chelsio T62100-OCP-SO VF"},	/* 2 x 40/50/100G, nomem */
122 	{0x6807, "Chelsio T62100-LP-CR VF"},	/* 2 x 40/50/100G */
123 	{0x6808, "Chelsio T62100-SO-CR VF"},	/* 2 x 40/50/100G, nomem */
124 	{0x6809, "Chelsio T6210-BT VF"},	/* 2 x 10GBASE-T */
125 	{0x680d, "Chelsio T62100-CR VF"},	/* 2 x 40/50/100G */
126 	{0x6810, "Chelsio T6-DBG-100 VF"},	/* 2 x 40/50/100G, debug */
127 	{0x6811, "Chelsio T6225-LL-CR VF"},	/* 2 x 10/25G */
128 	{0x6814, "Chelsio T61100-OCP-SO VF"},	/* 1 x 40/50/100G, nomem */
129 	{0x6815, "Chelsio T6201-BT VF"},	/* 2 x 1000BASE-T */
130 
131 	/* Custom */
132 	{0x6880, "Chelsio T6225 80 VF"},
133 	{0x6881, "Chelsio T62100 81 VF"},
134 };
135 
136 static d_ioctl_t t4vf_ioctl;
137 
138 static struct cdevsw t4vf_cdevsw = {
139        .d_version = D_VERSION,
140        .d_ioctl = t4vf_ioctl,
141        .d_name = "t4vf",
142 };
143 
144 static int
145 t4vf_probe(device_t dev)
146 {
147 	uint16_t d;
148 	size_t i;
149 
150 	d = pci_get_device(dev);
151 	for (i = 0; i < nitems(t4vf_pciids); i++) {
152 		if (d == t4vf_pciids[i].device) {
153 			device_set_desc(dev, t4vf_pciids[i].desc);
154 			return (BUS_PROBE_DEFAULT);
155 		}
156 	}
157 	return (ENXIO);
158 }
159 
160 static int
161 t5vf_probe(device_t dev)
162 {
163 	uint16_t d;
164 	size_t i;
165 
166 	d = pci_get_device(dev);
167 	for (i = 0; i < nitems(t5vf_pciids); i++) {
168 		if (d == t5vf_pciids[i].device) {
169 			device_set_desc(dev, t5vf_pciids[i].desc);
170 			return (BUS_PROBE_DEFAULT);
171 		}
172 	}
173 	return (ENXIO);
174 }
175 
176 static int
177 t6vf_probe(device_t dev)
178 {
179 	uint16_t d;
180 	size_t i;
181 
182 	d = pci_get_device(dev);
183 	for (i = 0; i < nitems(t6vf_pciids); i++) {
184 		if (d == t6vf_pciids[i].device) {
185 			device_set_desc(dev, t6vf_pciids[i].desc);
186 			return (BUS_PROBE_DEFAULT);
187 		}
188 	}
189 	return (ENXIO);
190 }
191 
192 #define FW_PARAM_DEV(param) \
193 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
194 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
195 #define FW_PARAM_PFVF(param) \
196 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
197 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
198 
199 static int
200 get_params__pre_init(struct adapter *sc)
201 {
202 	int rc;
203 	uint32_t param[3], val[3];
204 
205 	param[0] = FW_PARAM_DEV(FWREV);
206 	param[1] = FW_PARAM_DEV(TPREV);
207 	param[2] = FW_PARAM_DEV(CCLK);
208 	rc = -t4vf_query_params(sc, nitems(param), param, val);
209 	if (rc != 0) {
210 		device_printf(sc->dev,
211 		    "failed to query parameters (pre_init): %d.\n", rc);
212 		return (rc);
213 	}
214 
215 	sc->params.fw_vers = val[0];
216 	sc->params.tp_vers = val[1];
217 	sc->params.vpd.cclk = val[2];
218 
219 	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
220 	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
221 	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
222 	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
223 	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
224 
225 	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
226 	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
227 	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
228 	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
229 	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
230 
231 	return (0);
232 }
233 
234 static int
235 get_params__post_init(struct adapter *sc)
236 {
237 	int rc;
238 
239 	rc = -t4vf_get_sge_params(sc);
240 	if (rc != 0) {
241 		device_printf(sc->dev,
242 		    "unable to retrieve adapter SGE parameters: %d\n", rc);
243 		return (rc);
244 	}
245 
246 	rc = -t4vf_get_rss_glb_config(sc);
247 	if (rc != 0) {
248 		device_printf(sc->dev,
249 		    "unable to retrieve adapter RSS parameters: %d\n", rc);
250 		return (rc);
251 	}
252 	if (sc->params.rss.mode != FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
253 		device_printf(sc->dev,
254 		    "unable to operate with global RSS mode %d\n",
255 		    sc->params.rss.mode);
256 		return (EINVAL);
257 	}
258 
259 	rc = t4_read_chip_settings(sc);
260 	if (rc != 0)
261 		return (rc);
262 
263 	/*
264 	 * Grab our Virtual Interface resource allocation, extract the
265 	 * features that we're interested in and do a bit of sanity testing on
266 	 * what we discover.
267 	 */
268 	rc = -t4vf_get_vfres(sc);
269 	if (rc != 0) {
270 		device_printf(sc->dev,
271 		    "unable to get virtual interface resources: %d\n", rc);
272 		return (rc);
273 	}
274 
275 	/*
276 	 * Check for various parameter sanity issues.
277 	 */
278 	if (sc->params.vfres.pmask == 0) {
279 		device_printf(sc->dev, "no port access configured/usable!\n");
280 		return (EINVAL);
281 	}
282 	if (sc->params.vfres.nvi == 0) {
283 		device_printf(sc->dev,
284 		    "no virtual interfaces configured/usable!\n");
285 		return (EINVAL);
286 	}
287 	sc->params.portvec = sc->params.vfres.pmask;
288 
289 	return (0);
290 }
291 
292 static int
293 set_params__post_init(struct adapter *sc)
294 {
295 	uint32_t param, val;
296 
297 	/* ask for encapsulated CPLs */
298 	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
299 	val = 1;
300 	(void)t4vf_set_params(sc, 1, &param, &val);
301 
302 	return (0);
303 }
304 
305 #undef FW_PARAM_PFVF
306 #undef FW_PARAM_DEV
307 
308 static int
309 cfg_itype_and_nqueues(struct adapter *sc, int n10g, int n1g,
310     struct intrs_and_queues *iaq)
311 {
312 	struct vf_resources *vfres;
313 	int nrxq10g, nrxq1g, nrxq;
314 	int ntxq10g, ntxq1g, ntxq;
315 	int itype, iq_avail, navail, rc;
316 
317 	/*
318 	 * Figure out the layout of queues across our VIs and ensure
319 	 * we can allocate enough interrupts for our layout.
320 	 */
321 	vfres = &sc->params.vfres;
322 	bzero(iaq, sizeof(*iaq));
323 
324 	for (itype = INTR_MSIX; itype != 0; itype >>= 1) {
325 		if (itype == INTR_INTX)
326 			continue;
327 
328 		if (itype == INTR_MSIX)
329 			navail = pci_msix_count(sc->dev);
330 		else
331 			navail = pci_msi_count(sc->dev);
332 
333 		if (navail == 0)
334 			continue;
335 
336 		iaq->intr_type = itype;
337 		iaq->intr_flags_10g = 0;
338 		iaq->intr_flags_1g = 0;
339 
340 		/*
341 		 * XXX: The Linux driver reserves an Ingress Queue for
342 		 * forwarded interrupts when using MSI (but not MSI-X).
343 		 * It seems it just always asks for 2 interrupts and
344 		 * forwards all rxqs to the forwarded interrupt.
345 		 *
346 		 * We must reserve one IRQ for the for the firmware
347 		 * event queue.
348 		 *
349 		 * Every rxq requires an ingress queue with a free
350 		 * list and interrupts and an egress queue.  Every txq
351 		 * requires an ETH egress queue.
352 		 */
353 		iaq->nirq = T4VF_EXTRA_INTR;
354 
355 		/*
356 		 * First, determine how many queues we can allocate.
357 		 * Start by finding the upper bound on rxqs from the
358 		 * limit on ingress queues.
359 		 */
360 		iq_avail = vfres->niqflint - iaq->nirq;
361 		if (iq_avail < n10g + n1g) {
362 			device_printf(sc->dev,
363 			    "Not enough ingress queues (%d) for %d ports\n",
364 			    vfres->niqflint, n10g + n1g);
365 			return (ENXIO);
366 		}
367 
368 		/*
369 		 * Try to honor the cap on interrupts.  If there aren't
370 		 * enough interrupts for at least one interrupt per
371 		 * port, then don't bother, we will just forward all
372 		 * interrupts to one interrupt in that case.
373 		 */
374 		if (iaq->nirq + n10g + n1g <= navail) {
375 			if (iq_avail > navail - iaq->nirq)
376 				iq_avail = navail - iaq->nirq;
377 		}
378 
379 		nrxq10g = t4_nrxq10g;
380 		nrxq1g = t4_nrxq1g;
381 		nrxq = n10g * nrxq10g + n1g * nrxq1g;
382 		if (nrxq > iq_avail && nrxq1g > 1) {
383 			/* Too many ingress queues.  Try just 1 for 1G. */
384 			nrxq1g = 1;
385 			nrxq = n10g * nrxq10g + n1g * nrxq1g;
386 		}
387 		if (nrxq > iq_avail) {
388 			/*
389 			 * Still too many ingress queues.  Use what we
390 			 * can for each 10G port.
391 			 */
392 			nrxq10g = (iq_avail - n1g) / n10g;
393 			nrxq = n10g * nrxq10g + n1g * nrxq1g;
394 		}
395 		KASSERT(nrxq <= iq_avail, ("too many ingress queues"));
396 
397 		/*
398 		 * Next, determine the upper bound on txqs from the limit
399 		 * on ETH queues.
400 		 */
401 		if (vfres->nethctrl < n10g + n1g) {
402 			device_printf(sc->dev,
403 			    "Not enough ETH queues (%d) for %d ports\n",
404 			    vfres->nethctrl, n10g + n1g);
405 			return (ENXIO);
406 		}
407 
408 		ntxq10g = t4_ntxq10g;
409 		ntxq1g = t4_ntxq1g;
410 		ntxq = n10g * ntxq10g + n1g * ntxq1g;
411 		if (ntxq > vfres->nethctrl) {
412 			/* Too many ETH queues.  Try just 1 for 1G. */
413 			ntxq1g = 1;
414 			ntxq = n10g * ntxq10g + n1g * ntxq1g;
415 		}
416 		if (ntxq > vfres->nethctrl) {
417 			/*
418 			 * Still too many ETH queues.  Use what we
419 			 * can for each 10G port.
420 			 */
421 			ntxq10g = (vfres->nethctrl - n1g) / n10g;
422 			ntxq = n10g * ntxq10g + n1g * ntxq1g;
423 		}
424 		KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues"));
425 
426 		/*
427 		 * Finally, ensure we have enough egress queues.
428 		 */
429 		if (vfres->neq < (n10g + n1g) * 2) {
430 			device_printf(sc->dev,
431 			    "Not enough egress queues (%d) for %d ports\n",
432 			    vfres->neq, n10g + n1g);
433 			return (ENXIO);
434 		}
435 		if (nrxq + ntxq > vfres->neq) {
436 			/* Just punt and use 1 for everything. */
437 			nrxq1g = ntxq1g = nrxq10g = ntxq10g = 1;
438 			nrxq = n10g * nrxq10g + n1g * nrxq1g;
439 			ntxq = n10g * ntxq10g + n1g * ntxq1g;
440 		}
441 		KASSERT(nrxq <= iq_avail, ("too many ingress queues"));
442 		KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues"));
443 		KASSERT(nrxq + ntxq <= vfres->neq, ("too many egress queues"));
444 
445 		/*
446 		 * Do we have enough interrupts?  For MSI the interrupts
447 		 * have to be a power of 2 as well.
448 		 */
449 		iaq->nirq += nrxq;
450 		iaq->ntxq10g = ntxq10g;
451 		iaq->ntxq1g = ntxq1g;
452 		iaq->nrxq10g = nrxq10g;
453 		iaq->nrxq1g = nrxq1g;
454 		if (iaq->nirq <= navail &&
455 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
456 			navail = iaq->nirq;
457 			if (itype == INTR_MSIX)
458 				rc = pci_alloc_msix(sc->dev, &navail);
459 			else
460 				rc = pci_alloc_msi(sc->dev, &navail);
461 			if (rc != 0) {
462 				device_printf(sc->dev,
463 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
464 				    itype, rc, iaq->nirq, navail);
465 				return (rc);
466 			}
467 			if (navail == iaq->nirq) {
468 				iaq->intr_flags_10g = INTR_RXQ;
469 				iaq->intr_flags_1g = INTR_RXQ;
470 				return (0);
471 			}
472 			pci_release_msi(sc->dev);
473 		}
474 
475 		/* Fall back to a single interrupt. */
476 		iaq->nirq = 1;
477 		navail = iaq->nirq;
478 		if (itype == INTR_MSIX)
479 			rc = pci_alloc_msix(sc->dev, &navail);
480 		else
481 			rc = pci_alloc_msi(sc->dev, &navail);
482 		if (rc != 0)
483 			device_printf(sc->dev,
484 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
485 			    itype, rc, iaq->nirq, navail);
486 		iaq->intr_flags_10g = 0;
487 		iaq->intr_flags_1g = 0;
488 		return (rc);
489 	}
490 
491 	device_printf(sc->dev,
492 	    "failed to find a usable interrupt type.  "
493 	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
494 	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
495 
496 	return (ENXIO);
497 }
498 
499 static int
500 t4vf_attach(device_t dev)
501 {
502 	struct adapter *sc;
503 	int rc = 0, i, j, n10g, n1g, rqidx, tqidx;
504 	struct make_dev_args mda;
505 	struct intrs_and_queues iaq;
506 	struct sge *s;
507 
508 	sc = device_get_softc(dev);
509 	sc->dev = dev;
510 	pci_enable_busmaster(dev);
511 	pci_set_max_read_req(dev, 4096);
512 	sc->params.pci.mps = pci_get_max_payload(dev);
513 
514 	sc->flags |= IS_VF;
515 
516 	sc->sge_gts_reg = VF_SGE_REG(A_SGE_VF_GTS);
517 	sc->sge_kdoorbell_reg = VF_SGE_REG(A_SGE_VF_KDOORBELL);
518 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
519 	    device_get_nameunit(dev));
520 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
521 	t4_add_adapter(sc);
522 
523 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
524 	TAILQ_INIT(&sc->sfl);
525 	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
526 
527 	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
528 
529 	rc = t4_map_bars_0_and_4(sc);
530 	if (rc != 0)
531 		goto done; /* error message displayed already */
532 
533 	rc = -t4vf_prep_adapter(sc);
534 	if (rc != 0)
535 		goto done;
536 
537 	t4_init_devnames(sc);
538 	if (sc->names == NULL) {
539 		rc = ENOTSUP;
540 		goto done; /* error message displayed already */
541 	}
542 
543 	/*
544 	 * Leave the 'pf' and 'mbox' values as zero.  This ensures
545 	 * that various firmware messages do not set the fields which
546 	 * is the correct thing to do for a VF.
547 	 */
548 
549 	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
550 
551 	make_dev_args_init(&mda);
552 	mda.mda_devsw = &t4vf_cdevsw;
553 	mda.mda_uid = UID_ROOT;
554 	mda.mda_gid = GID_WHEEL;
555 	mda.mda_mode = 0600;
556 	mda.mda_si_drv1 = sc;
557 	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
558 	if (rc != 0)
559 		device_printf(dev, "failed to create nexus char device: %d.\n",
560 		    rc);
561 
562 #if defined(__i386__)
563 	if ((cpu_feature & CPUID_CX8) == 0) {
564 		device_printf(dev, "64 bit atomics not available.\n");
565 		rc = ENOTSUP;
566 		goto done;
567 	}
568 #endif
569 
570 	/*
571 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
572 	 * 2.6.31 and later we can't call pci_reset_function() in order to
573 	 * issue an FLR because of a self- deadlock on the device semaphore.
574 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
575 	 * cases where they're needed -- for instance, some versions of KVM
576 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
577 	 * use the firmware based reset in order to reset any per function
578 	 * state.
579 	 */
580 	rc = -t4vf_fw_reset(sc);
581 	if (rc != 0) {
582 		device_printf(dev, "FW reset failed: %d\n", rc);
583 		goto done;
584 	}
585 	sc->flags |= FW_OK;
586 
587 	/*
588 	 * Grab basic operational parameters.  These will predominantly have
589 	 * been set up by the Physical Function Driver or will be hard coded
590 	 * into the adapter.  We just have to live with them ...  Note that
591 	 * we _must_ get our VPD parameters before our SGE parameters because
592 	 * we need to know the adapter's core clock from the VPD in order to
593 	 * properly decode the SGE Timer Values.
594 	 */
595 	rc = get_params__pre_init(sc);
596 	if (rc != 0)
597 		goto done; /* error message displayed already */
598 	rc = get_params__post_init(sc);
599 	if (rc != 0)
600 		goto done; /* error message displayed already */
601 
602 	rc = set_params__post_init(sc);
603 	if (rc != 0)
604 		goto done; /* error message displayed already */
605 
606 	rc = t4_map_bar_2(sc);
607 	if (rc != 0)
608 		goto done; /* error message displayed already */
609 
610 	rc = t4_create_dma_tag(sc);
611 	if (rc != 0)
612 		goto done; /* error message displayed already */
613 
614 	/*
615 	 * The number of "ports" which we support is equal to the number of
616 	 * Virtual Interfaces with which we've been provisioned.
617 	 */
618 	sc->params.nports = imin(sc->params.vfres.nvi, MAX_NPORTS);
619 
620 	/*
621 	 * We may have been provisioned with more VIs than the number of
622 	 * ports we're allowed to access (our Port Access Rights Mask).
623 	 * Just use a single VI for each port.
624 	 */
625 	sc->params.nports = imin(sc->params.nports,
626 	    bitcount32(sc->params.vfres.pmask));
627 
628 #ifdef notyet
629 	/*
630 	 * XXX: The Linux VF driver will lower nports if it thinks there
631 	 * are too few resources in vfres (niqflint, nethctrl, neq).
632 	 */
633 #endif
634 
635 	/*
636 	 * First pass over all the ports - allocate VIs and initialize some
637 	 * basic parameters like mac address, port type, etc.  We also figure
638 	 * out whether a port is 10G or 1G and use that information when
639 	 * calculating how many interrupts to attempt to allocate.
640 	 */
641 	n10g = n1g = 0;
642 	for_each_port(sc, i) {
643 		struct port_info *pi;
644 
645 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
646 		sc->port[i] = pi;
647 
648 		/* These must be set before t4_port_init */
649 		pi->adapter = sc;
650 		pi->port_id = i;
651 		pi->nvi = 1;
652 		pi->vi = malloc(sizeof(struct vi_info) * pi->nvi, M_CXGBE,
653 		    M_ZERO | M_WAITOK);
654 
655 		/*
656 		 * Allocate the "main" VI and initialize parameters
657 		 * like mac addr.
658 		 */
659 		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
660 		if (rc != 0) {
661 			device_printf(dev, "unable to initialize port %d: %d\n",
662 			    i, rc);
663 			free(pi->vi, M_CXGBE);
664 			free(pi, M_CXGBE);
665 			sc->port[i] = NULL;
666 			goto done;
667 		}
668 
669 		/* No t4_link_start. */
670 
671 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
672 		    device_get_nameunit(dev), i);
673 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
674 		sc->chan_map[pi->tx_chan] = i;
675 
676 		if (port_top_speed(pi) >= 10) {
677 			n10g++;
678 		} else {
679 			n1g++;
680 		}
681 
682 		pi->dev = device_add_child(dev, sc->names->vf_ifnet_name, -1);
683 		if (pi->dev == NULL) {
684 			device_printf(dev,
685 			    "failed to add device for port %d.\n", i);
686 			rc = ENXIO;
687 			goto done;
688 		}
689 		pi->vi[0].dev = pi->dev;
690 		device_set_softc(pi->dev, pi);
691 	}
692 
693 	/*
694 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
695 	 */
696 	rc = cfg_itype_and_nqueues(sc, n10g, n1g, &iaq);
697 	if (rc != 0)
698 		goto done; /* error message displayed already */
699 
700 	sc->intr_type = iaq.intr_type;
701 	sc->intr_count = iaq.nirq;
702 
703 	s = &sc->sge;
704 	s->nrxq = n10g * iaq.nrxq10g + n1g * iaq.nrxq1g;
705 	s->ntxq = n10g * iaq.ntxq10g + n1g * iaq.ntxq1g;
706 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
707 	s->neq += sc->params.nports + 1;/* ctrl queues: 1 per port + 1 mgmt */
708 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
709 
710 	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
711 	    M_ZERO | M_WAITOK);
712 	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
713 	    M_ZERO | M_WAITOK);
714 	s->iqmap = malloc(s->niq * sizeof(struct sge_iq *), M_CXGBE,
715 	    M_ZERO | M_WAITOK);
716 	s->eqmap = malloc(s->neq * sizeof(struct sge_eq *), M_CXGBE,
717 	    M_ZERO | M_WAITOK);
718 
719 	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
720 	    M_ZERO | M_WAITOK);
721 
722 	/*
723 	 * Second pass over the ports.  This time we know the number of rx and
724 	 * tx queues that each port should get.
725 	 */
726 	rqidx = tqidx = 0;
727 	for_each_port(sc, i) {
728 		struct port_info *pi = sc->port[i];
729 		struct vi_info *vi;
730 
731 		if (pi == NULL)
732 			continue;
733 
734 		for_each_vi(pi, j, vi) {
735 			vi->pi = pi;
736 			vi->qsize_rxq = t4_qsize_rxq;
737 			vi->qsize_txq = t4_qsize_txq;
738 
739 			vi->first_rxq = rqidx;
740 			vi->first_txq = tqidx;
741 			if (port_top_speed(pi) >= 10) {
742 				vi->tmr_idx = t4_tmr_idx_10g;
743 				vi->pktc_idx = t4_pktc_idx_10g;
744 				vi->flags |= iaq.intr_flags_10g & INTR_RXQ;
745 				vi->nrxq = j == 0 ? iaq.nrxq10g : 1;
746 				vi->ntxq = j == 0 ? iaq.ntxq10g : 1;
747 			} else {
748 				vi->tmr_idx = t4_tmr_idx_1g;
749 				vi->pktc_idx = t4_pktc_idx_1g;
750 				vi->flags |= iaq.intr_flags_1g & INTR_RXQ;
751 				vi->nrxq = j == 0 ? iaq.nrxq1g : 1;
752 				vi->ntxq = j == 0 ? iaq.ntxq1g : 1;
753 			}
754 			rqidx += vi->nrxq;
755 			tqidx += vi->ntxq;
756 
757 			vi->rsrv_noflowq = 0;
758 		}
759 	}
760 
761 	rc = t4_setup_intr_handlers(sc);
762 	if (rc != 0) {
763 		device_printf(dev,
764 		    "failed to setup interrupt handlers: %d\n", rc);
765 		goto done;
766 	}
767 
768 	rc = bus_generic_attach(dev);
769 	if (rc != 0) {
770 		device_printf(dev,
771 		    "failed to attach all child ports: %d\n", rc);
772 		goto done;
773 	}
774 
775 	device_printf(dev,
776 	    "%d ports, %d %s interrupt%s, %d eq, %d iq\n",
777 	    sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ?
778 	    "MSI-X" : "MSI", sc->intr_count > 1 ? "s" : "", sc->sge.neq,
779 	    sc->sge.niq);
780 
781 done:
782 	if (rc != 0)
783 		t4_detach_common(dev);
784 	else
785 		t4_sysctls(sc);
786 
787 	return (rc);
788 }
789 
790 static void
791 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
792 {
793 
794 	/* 0x3f is used as the revision for VFs. */
795 	regs->version = chip_id(sc) | (0x3f << 10);
796 	t4_get_regs(sc, buf, regs->len);
797 }
798 
799 static void
800 t4_clr_vi_stats(struct adapter *sc)
801 {
802 	int reg;
803 
804 	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
805 	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
806 		t4_write_reg(sc, VF_MPS_REG(reg), 0);
807 }
808 
809 static int
810 t4vf_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
811     struct thread *td)
812 {
813 	int rc;
814 	struct adapter *sc = dev->si_drv1;
815 
816 	rc = priv_check(td, PRIV_DRIVER);
817 	if (rc != 0)
818 		return (rc);
819 
820 	switch (cmd) {
821 	case CHELSIO_T4_GETREG: {
822 		struct t4_reg *edata = (struct t4_reg *)data;
823 
824 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
825 			return (EFAULT);
826 
827 		if (edata->size == 4)
828 			edata->val = t4_read_reg(sc, edata->addr);
829 		else if (edata->size == 8)
830 			edata->val = t4_read_reg64(sc, edata->addr);
831 		else
832 			return (EINVAL);
833 
834 		break;
835 	}
836 	case CHELSIO_T4_SETREG: {
837 		struct t4_reg *edata = (struct t4_reg *)data;
838 
839 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
840 			return (EFAULT);
841 
842 		if (edata->size == 4) {
843 			if (edata->val & 0xffffffff00000000)
844 				return (EINVAL);
845 			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
846 		} else if (edata->size == 8)
847 			t4_write_reg64(sc, edata->addr, edata->val);
848 		else
849 			return (EINVAL);
850 		break;
851 	}
852 	case CHELSIO_T4_REGDUMP: {
853 		struct t4_regdump *regs = (struct t4_regdump *)data;
854 		int reglen = t4_get_regs_len(sc);
855 		uint8_t *buf;
856 
857 		if (regs->len < reglen) {
858 			regs->len = reglen; /* hint to the caller */
859 			return (ENOBUFS);
860 		}
861 
862 		regs->len = reglen;
863 		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
864 		get_regs(sc, regs, buf);
865 		rc = copyout(buf, regs->data, reglen);
866 		free(buf, M_CXGBE);
867 		break;
868 	}
869 	case CHELSIO_T4_CLEAR_STATS: {
870 		int i, v;
871 		u_int port_id = *(uint32_t *)data;
872 		struct port_info *pi;
873 		struct vi_info *vi;
874 
875 		if (port_id >= sc->params.nports)
876 			return (EINVAL);
877 		pi = sc->port[port_id];
878 
879 		/* MAC stats */
880 		pi->tx_parse_error = 0;
881 		t4_clr_vi_stats(sc);
882 
883 		/*
884 		 * Since this command accepts a port, clear stats for
885 		 * all VIs on this port.
886 		 */
887 		for_each_vi(pi, v, vi) {
888 			if (vi->flags & VI_INIT_DONE) {
889 				struct sge_rxq *rxq;
890 				struct sge_txq *txq;
891 
892 				for_each_rxq(vi, i, rxq) {
893 #if defined(INET) || defined(INET6)
894 					rxq->lro.lro_queued = 0;
895 					rxq->lro.lro_flushed = 0;
896 #endif
897 					rxq->rxcsum = 0;
898 					rxq->vlan_extraction = 0;
899 				}
900 
901 				for_each_txq(vi, i, txq) {
902 					txq->txcsum = 0;
903 					txq->tso_wrs = 0;
904 					txq->vlan_insertion = 0;
905 					txq->imm_wrs = 0;
906 					txq->sgl_wrs = 0;
907 					txq->txpkt_wrs = 0;
908 					txq->txpkts0_wrs = 0;
909 					txq->txpkts1_wrs = 0;
910 					txq->txpkts0_pkts = 0;
911 					txq->txpkts1_pkts = 0;
912 					mp_ring_reset_stats(txq->r);
913 				}
914 			}
915 		}
916 		break;
917 	}
918 	case CHELSIO_T4_SCHED_CLASS:
919 		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
920 		break;
921 	case CHELSIO_T4_SCHED_QUEUE:
922 		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
923 		break;
924 	default:
925 		rc = ENOTTY;
926 	}
927 
928 	return (rc);
929 }
930 
931 static device_method_t t4vf_methods[] = {
932 	DEVMETHOD(device_probe,		t4vf_probe),
933 	DEVMETHOD(device_attach,	t4vf_attach),
934 	DEVMETHOD(device_detach,	t4_detach_common),
935 
936 	DEVMETHOD_END
937 };
938 
939 static driver_t t4vf_driver = {
940 	"t4vf",
941 	t4vf_methods,
942 	sizeof(struct adapter)
943 };
944 
945 static device_method_t t5vf_methods[] = {
946 	DEVMETHOD(device_probe,		t5vf_probe),
947 	DEVMETHOD(device_attach,	t4vf_attach),
948 	DEVMETHOD(device_detach,	t4_detach_common),
949 
950 	DEVMETHOD_END
951 };
952 
953 static driver_t t5vf_driver = {
954 	"t5vf",
955 	t5vf_methods,
956 	sizeof(struct adapter)
957 };
958 
959 static device_method_t t6vf_methods[] = {
960 	DEVMETHOD(device_probe,		t6vf_probe),
961 	DEVMETHOD(device_attach,	t4vf_attach),
962 	DEVMETHOD(device_detach,	t4_detach_common),
963 
964 	DEVMETHOD_END
965 };
966 
967 static driver_t t6vf_driver = {
968 	"t6vf",
969 	t6vf_methods,
970 	sizeof(struct adapter)
971 };
972 
973 static driver_t cxgbev_driver = {
974 	"cxgbev",
975 	cxgbe_methods,
976 	sizeof(struct port_info)
977 };
978 
979 static driver_t cxlv_driver = {
980 	"cxlv",
981 	cxgbe_methods,
982 	sizeof(struct port_info)
983 };
984 
985 static driver_t ccv_driver = {
986 	"ccv",
987 	cxgbe_methods,
988 	sizeof(struct port_info)
989 };
990 
991 static devclass_t t4vf_devclass, t5vf_devclass, t6vf_devclass;
992 static devclass_t cxgbev_devclass, cxlv_devclass, ccv_devclass;
993 
994 DRIVER_MODULE(t4vf, pci, t4vf_driver, t4vf_devclass, 0, 0);
995 MODULE_VERSION(t4vf, 1);
996 MODULE_DEPEND(t4vf, t4nex, 1, 1, 1);
997 
998 DRIVER_MODULE(t5vf, pci, t5vf_driver, t5vf_devclass, 0, 0);
999 MODULE_VERSION(t5vf, 1);
1000 MODULE_DEPEND(t5vf, t5nex, 1, 1, 1);
1001 
1002 DRIVER_MODULE(t6vf, pci, t6vf_driver, t6vf_devclass, 0, 0);
1003 MODULE_VERSION(t6vf, 1);
1004 MODULE_DEPEND(t6vf, t6nex, 1, 1, 1);
1005 
1006 DRIVER_MODULE(cxgbev, t4vf, cxgbev_driver, cxgbev_devclass, 0, 0);
1007 MODULE_VERSION(cxgbev, 1);
1008 
1009 DRIVER_MODULE(cxlv, t5vf, cxlv_driver, cxlv_devclass, 0, 0);
1010 MODULE_VERSION(cxlv, 1);
1011 
1012 DRIVER_MODULE(ccv, t6vf, ccv_driver, ccv_devclass, 0, 0);
1013 MODULE_VERSION(ccv, 1);
1014