xref: /freebsd/sys/dev/cxgbe/t4_vf.c (revision 1ba1c783296bc651e1af3174a1643dff39edf05a)
1 /*-
2  * Copyright (c) 2016 Chelsio Communications, Inc.
3  * All rights reserved.
4  * Written by: John Baldwin <jhb@FreeBSD.org>
5  *
6  * Redistribution and use in source and binary forms, with or without
7  * modification, are permitted provided that the following conditions
8  * are met:
9  * 1. Redistributions of source code must retain the above copyright
10  *    notice, this list of conditions and the following disclaimer.
11  * 2. Redistributions in binary form must reproduce the above copyright
12  *    notice, this list of conditions and the following disclaimer in the
13  *    documentation and/or other materials provided with the distribution.
14  *
15  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
19  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25  * SUCH DAMAGE.
26  */
27 
28 #include <sys/cdefs.h>
29 #include "opt_inet.h"
30 #include "opt_inet6.h"
31 
32 #include <sys/param.h>
33 #include <sys/bus.h>
34 #include <sys/conf.h>
35 #include <sys/kernel.h>
36 #include <sys/module.h>
37 #include <sys/priv.h>
38 #include <dev/pci/pcivar.h>
39 #if defined(__i386__) || defined(__amd64__)
40 #include <vm/vm.h>
41 #include <vm/pmap.h>
42 #endif
43 
44 #include "common/common.h"
45 #include "common/t4_regs.h"
46 #include "t4_ioctl.h"
47 #include "t4_mp_ring.h"
48 
49 /*
50  * Some notes:
51  *
52  * The Virtual Interfaces are connected to an internal switch on the chip
53  * which allows VIs attached to the same port to talk to each other even when
54  * the port link is down.  As a result, we might want to always report a
55  * VF's link as being "up".
56  *
57  * XXX: Add a TUNABLE and possible per-device sysctl for this?
58  */
59 
60 struct intrs_and_queues {
61 	uint16_t intr_type;	/* MSI, or MSI-X */
62 	uint16_t nirq;		/* Total # of vectors */
63 	uint16_t ntxq;		/* # of NIC txq's for each port */
64 	uint16_t nrxq;		/* # of NIC rxq's for each port */
65 };
66 
67 struct {
68 	uint16_t device;
69 	char *desc;
70 } t4vf_pciids[] = {
71 	{0x4800, "Chelsio T440-dbg VF"},
72 	{0x4801, "Chelsio T420-CR VF"},
73 	{0x4802, "Chelsio T422-CR VF"},
74 	{0x4803, "Chelsio T440-CR VF"},
75 	{0x4804, "Chelsio T420-BCH VF"},
76 	{0x4805, "Chelsio T440-BCH VF"},
77 	{0x4806, "Chelsio T440-CH VF"},
78 	{0x4807, "Chelsio T420-SO VF"},
79 	{0x4808, "Chelsio T420-CX VF"},
80 	{0x4809, "Chelsio T420-BT VF"},
81 	{0x480a, "Chelsio T404-BT VF"},
82 	{0x480e, "Chelsio T440-LP-CR VF"},
83 }, t5vf_pciids[] = {
84 	{0x5800, "Chelsio T580-dbg VF"},
85 	{0x5801,  "Chelsio T520-CR VF"},	/* 2 x 10G */
86 	{0x5802,  "Chelsio T522-CR VF"},	/* 2 x 10G, 2 X 1G */
87 	{0x5803,  "Chelsio T540-CR VF"},	/* 4 x 10G */
88 	{0x5807,  "Chelsio T520-SO VF"},	/* 2 x 10G, nomem */
89 	{0x5809,  "Chelsio T520-BT VF"},	/* 2 x 10GBaseT */
90 	{0x580a,  "Chelsio T504-BT VF"},	/* 4 x 1G */
91 	{0x580d,  "Chelsio T580-CR VF"},	/* 2 x 40G */
92 	{0x580e,  "Chelsio T540-LP-CR VF"},	/* 4 x 10G */
93 	{0x5810,  "Chelsio T580-LP-CR VF"},	/* 2 x 40G */
94 	{0x5811,  "Chelsio T520-LL-CR VF"},	/* 2 x 10G */
95 	{0x5812,  "Chelsio T560-CR VF"},	/* 1 x 40G, 2 x 10G */
96 	{0x5814,  "Chelsio T580-LP-SO-CR VF"},	/* 2 x 40G, nomem */
97 	{0x5815,  "Chelsio T502-BT VF"},	/* 2 x 1G */
98 	{0x5818,  "Chelsio T540-BT VF"},	/* 4 x 10GBaseT */
99 	{0x5819,  "Chelsio T540-LP-BT VF"},	/* 4 x 10GBaseT */
100 	{0x581a,  "Chelsio T540-SO-BT VF"},	/* 4 x 10GBaseT, nomem */
101 	{0x581b,  "Chelsio T540-SO-CR VF"},	/* 4 x 10G, nomem */
102 }, t6vf_pciids[] = {
103 	{0x6800, "Chelsio T6-DBG-25 VF"},	/* 2 x 10/25G, debug */
104 	{0x6801, "Chelsio T6225-CR VF"},	/* 2 x 10/25G */
105 	{0x6802, "Chelsio T6225-SO-CR VF"},	/* 2 x 10/25G, nomem */
106 	{0x6803, "Chelsio T6425-CR VF"},	/* 4 x 10/25G */
107 	{0x6804, "Chelsio T6425-SO-CR VF"},	/* 4 x 10/25G, nomem */
108 	{0x6805, "Chelsio T6225-SO-OCP3 VF"},	/* 2 x 10/25G, nomem */
109 	{0x6806, "Chelsio T6225-OCP3 VF"},	/* 2 x 10/25G */
110 	{0x6807, "Chelsio T62100-LP-CR VF"},	/* 2 x 40/50/100G */
111 	{0x6808, "Chelsio T62100-SO-CR VF"},	/* 2 x 40/50/100G, nomem */
112 	{0x6809, "Chelsio T6210-BT VF"},	/* 2 x 10GBASE-T */
113 	{0x680d, "Chelsio T62100-CR VF"},	/* 2 x 40/50/100G */
114 	{0x6810, "Chelsio T6-DBG-100 VF"},	/* 2 x 40/50/100G, debug */
115 	{0x6811, "Chelsio T6225-LL-CR VF"},	/* 2 x 10/25G */
116 	{0x6814, "Chelsio T62100-SO-OCP3 VF"},	/* 2 x 40/50/100G, nomem */
117 	{0x6815, "Chelsio T6201-BT VF"},	/* 2 x 1000BASE-T */
118 
119 	/* Custom */
120 	{0x6880, "Chelsio T6225 80 VF"},
121 	{0x6881, "Chelsio T62100 81 VF"},
122 	{0x6882, "Chelsio T6225-CR 82 VF"},
123 	{0x6883, "Chelsio T62100-CR 83 VF"},
124 	{0x6884, "Chelsio T64100-CR 84 VF"},
125 	{0x6885, "Chelsio T6240-SO 85 VF"},
126 	{0x6886, "Chelsio T6225-SO-CR 86 VF"},
127 	{0x6887, "Chelsio T6225-CR 87 VF"},
128 }, t7vf_pciids[] = {
129 	{0xd800, "Chelsio T7 FPGA VF"},		/* T7 PE12K FPGA */
130 	{0x7800, "Chelsio T72200-DBG VF"},	/* 2 x 200G, debug */
131 	{0x7801, "Chelsio T7250 VF"},		/* 2 x 10/25/50G, 1 mem */
132 	{0x7802, "Chelsio S7250 VF"},		/* 2 x 10/25/50G, nomem */
133 	{0x7803, "Chelsio T7450 VF"},		/* 4 x 10/25/50G, 1 mem */
134 	{0x7804, "Chelsio S7450 VF"},		/* 4 x 10/25/50G, nomem */
135 	{0x7805, "Chelsio T72200 VF"},		/* 2 x 40/100/200G, 1 mem */
136 	{0x7806, "Chelsio S72200 VF"},		/* 2 x 40/100/200G, nomem */
137 	{0x7807, "Chelsio T72200-FH VF"},	/* 2 x 40/100/200G, 2 mem */
138 	{0x7808, "Chelsio T71400 VF"},		/* 1 x 400G, nomem */
139 	{0x7809, "Chelsio S7210-BT VF"},	/* 2 x 10GBASE-T, nomem */
140 	{0x780a, "Chelsio T7450-RC VF"},	/* 4 x 10/25/50G, 1 mem, RC */
141 	{0x780b, "Chelsio T72200-RC VF"},	/* 2 x 40/100/200G, 1 mem, RC */
142 	{0x780c, "Chelsio T72200-FH-RC VF"},	/* 2 x 40/100/200G, 2 mem, RC */
143 	{0x780d, "Chelsio S72200-OCP3 VF"},	/* 2 x 40/100/200G OCP3 */
144 	{0x780e, "Chelsio S7450-OCP3 VF"},	/* 4 x 1/20/25/50G OCP3 */
145 	{0x780f, "Chelsio S7410-BT-OCP3 VF"},	/* 4 x 10GBASE-T OCP3 */
146 	{0x7810, "Chelsio S7210-BT-A VF"},	/* 2 x 10GBASE-T */
147 	{0x7811, "Chelsio T7_MAYRA_7 VF"},	/* Motherboard */
148 
149 	{0x7880, "Custom T7 VF"},
150 };
151 
152 static d_ioctl_t t4vf_ioctl;
153 
154 static struct cdevsw t4vf_cdevsw = {
155        .d_version = D_VERSION,
156        .d_ioctl = t4vf_ioctl,
157        .d_name = "t4vf",
158 };
159 
160 static int
161 t4vf_probe(device_t dev)
162 {
163 	uint16_t d;
164 	size_t i;
165 
166 	d = pci_get_device(dev);
167 	for (i = 0; i < nitems(t4vf_pciids); i++) {
168 		if (d == t4vf_pciids[i].device) {
169 			device_set_desc(dev, t4vf_pciids[i].desc);
170 			return (BUS_PROBE_DEFAULT);
171 		}
172 	}
173 	return (ENXIO);
174 }
175 
176 static int
177 t5vf_probe(device_t dev)
178 {
179 	uint16_t d;
180 	size_t i;
181 
182 	d = pci_get_device(dev);
183 	for (i = 0; i < nitems(t5vf_pciids); i++) {
184 		if (d == t5vf_pciids[i].device) {
185 			device_set_desc(dev, t5vf_pciids[i].desc);
186 			return (BUS_PROBE_DEFAULT);
187 		}
188 	}
189 	return (ENXIO);
190 }
191 
192 static int
193 t6vf_probe(device_t dev)
194 {
195 	uint16_t d;
196 	size_t i;
197 
198 	d = pci_get_device(dev);
199 	for (i = 0; i < nitems(t6vf_pciids); i++) {
200 		if (d == t6vf_pciids[i].device) {
201 			device_set_desc(dev, t6vf_pciids[i].desc);
202 			return (BUS_PROBE_DEFAULT);
203 		}
204 	}
205 	return (ENXIO);
206 }
207 
208 static int
209 chvf_probe(device_t dev)
210 {
211 	uint16_t d;
212 	size_t i;
213 
214 	d = pci_get_device(dev);
215 	for (i = 0; i < nitems(t7vf_pciids); i++) {
216 		if (d == t7vf_pciids[i].device) {
217 			device_set_desc(dev, t7vf_pciids[i].desc);
218 			return (BUS_PROBE_DEFAULT);
219 		}
220 	}
221 	return (ENXIO);
222 }
223 
224 #define FW_PARAM_DEV(param) \
225 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_DEV) | \
226 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_DEV_##param))
227 #define FW_PARAM_PFVF(param) \
228 	(V_FW_PARAMS_MNEM(FW_PARAMS_MNEM_PFVF) | \
229 	 V_FW_PARAMS_PARAM_X(FW_PARAMS_PARAM_PFVF_##param))
230 
231 static int
232 get_params__pre_init(struct adapter *sc)
233 {
234 	int rc;
235 	uint32_t param[3], val[3];
236 
237 	param[0] = FW_PARAM_DEV(FWREV);
238 	param[1] = FW_PARAM_DEV(TPREV);
239 	param[2] = FW_PARAM_DEV(CCLK);
240 	rc = -t4vf_query_params(sc, nitems(param), param, val);
241 	if (rc != 0) {
242 		device_printf(sc->dev,
243 		    "failed to query parameters (pre_init): %d.\n", rc);
244 		return (rc);
245 	}
246 
247 	sc->params.fw_vers = val[0];
248 	sc->params.tp_vers = val[1];
249 	sc->params.vpd.cclk = val[2];
250 
251 	snprintf(sc->fw_version, sizeof(sc->fw_version), "%u.%u.%u.%u",
252 	    G_FW_HDR_FW_VER_MAJOR(sc->params.fw_vers),
253 	    G_FW_HDR_FW_VER_MINOR(sc->params.fw_vers),
254 	    G_FW_HDR_FW_VER_MICRO(sc->params.fw_vers),
255 	    G_FW_HDR_FW_VER_BUILD(sc->params.fw_vers));
256 
257 	snprintf(sc->tp_version, sizeof(sc->tp_version), "%u.%u.%u.%u",
258 	    G_FW_HDR_FW_VER_MAJOR(sc->params.tp_vers),
259 	    G_FW_HDR_FW_VER_MINOR(sc->params.tp_vers),
260 	    G_FW_HDR_FW_VER_MICRO(sc->params.tp_vers),
261 	    G_FW_HDR_FW_VER_BUILD(sc->params.tp_vers));
262 
263 	return (0);
264 }
265 
266 static int
267 get_params__post_init(struct adapter *sc)
268 {
269 	int rc;
270 	uint32_t param, val;
271 
272 	rc = -t4vf_get_sge_params(sc);
273 	if (rc != 0) {
274 		device_printf(sc->dev,
275 		    "unable to retrieve adapter SGE parameters: %d\n", rc);
276 		return (rc);
277 	}
278 
279 	rc = -t4vf_get_rss_glb_config(sc);
280 	if (rc != 0) {
281 		device_printf(sc->dev,
282 		    "unable to retrieve adapter RSS parameters: %d\n", rc);
283 		return (rc);
284 	}
285 	if (sc->params.rss.mode != FW_RSS_GLB_CONFIG_CMD_MODE_BASICVIRTUAL) {
286 		device_printf(sc->dev,
287 		    "unable to operate with global RSS mode %d\n",
288 		    sc->params.rss.mode);
289 		return (EINVAL);
290 	}
291 
292 	/*
293 	 * Grab our Virtual Interface resource allocation, extract the
294 	 * features that we're interested in and do a bit of sanity testing on
295 	 * what we discover.
296 	 */
297 	rc = -t4vf_get_vfres(sc);
298 	if (rc != 0) {
299 		device_printf(sc->dev,
300 		    "unable to get virtual interface resources: %d\n", rc);
301 		return (rc);
302 	}
303 
304 	/*
305 	 * Check for various parameter sanity issues.
306 	 */
307 	if (sc->params.vfres.pmask == 0) {
308 		device_printf(sc->dev, "no port access configured/usable!\n");
309 		return (EINVAL);
310 	}
311 	if (sc->params.vfres.nvi == 0) {
312 		device_printf(sc->dev,
313 		    "no virtual interfaces configured/usable!\n");
314 		return (EINVAL);
315 	}
316 	sc->params.portvec = sc->params.vfres.pmask;
317 
318 	param = FW_PARAM_PFVF(MAX_PKTS_PER_ETH_TX_PKTS_WR);
319 	rc = -t4vf_query_params(sc, 1, &param, &val);
320 	if (rc == 0)
321 		sc->params.max_pkts_per_eth_tx_pkts_wr = val;
322 	else
323 		sc->params.max_pkts_per_eth_tx_pkts_wr = 14;
324 
325 	rc = t4_verify_chip_settings(sc);
326 	if (rc != 0)
327 		return (rc);
328 	t4_init_rx_buf_info(sc);
329 
330 	return (0);
331 }
332 
333 static int
334 set_params__post_init(struct adapter *sc)
335 {
336 	uint32_t param, val;
337 
338 	/* ask for encapsulated CPLs */
339 	param = FW_PARAM_PFVF(CPLFW4MSG_ENCAP);
340 	val = 1;
341 	(void)t4vf_set_params(sc, 1, &param, &val);
342 
343 	/* Enable 32b port caps if the firmware supports it. */
344 	param = FW_PARAM_PFVF(PORT_CAPS32);
345 	val = 1;
346 	if (t4vf_set_params(sc, 1, &param, &val) == 0)
347 		sc->params.port_caps32 = 1;
348 
349 	return (0);
350 }
351 
352 #undef FW_PARAM_PFVF
353 #undef FW_PARAM_DEV
354 
355 static int
356 cfg_itype_and_nqueues(struct adapter *sc, struct intrs_and_queues *iaq)
357 {
358 	struct vf_resources *vfres;
359 	int nrxq, ntxq, nports;
360 	int itype, iq_avail, navail, rc;
361 
362 	/*
363 	 * Figure out the layout of queues across our VIs and ensure
364 	 * we can allocate enough interrupts for our layout.
365 	 */
366 	vfres = &sc->params.vfres;
367 	nports = sc->params.nports;
368 	bzero(iaq, sizeof(*iaq));
369 
370 	for (itype = INTR_MSIX; itype != 0; itype >>= 1) {
371 		if (itype == INTR_INTX)
372 			continue;
373 
374 		if (itype == INTR_MSIX)
375 			navail = pci_msix_count(sc->dev);
376 		else
377 			navail = pci_msi_count(sc->dev);
378 
379 		if (navail == 0)
380 			continue;
381 
382 		iaq->intr_type = itype;
383 
384 		/*
385 		 * XXX: The Linux driver reserves an Ingress Queue for
386 		 * forwarded interrupts when using MSI (but not MSI-X).
387 		 * It seems it just always asks for 2 interrupts and
388 		 * forwards all rxqs to the forwarded interrupt.
389 		 *
390 		 * We must reserve one IRQ for the for the firmware
391 		 * event queue.
392 		 *
393 		 * Every rxq requires an ingress queue with a free
394 		 * list and interrupts and an egress queue.  Every txq
395 		 * requires an ETH egress queue.
396 		 */
397 		iaq->nirq = T4VF_EXTRA_INTR;
398 
399 		/*
400 		 * First, determine how many queues we can allocate.
401 		 * Start by finding the upper bound on rxqs from the
402 		 * limit on ingress queues.
403 		 */
404 		iq_avail = vfres->niqflint - iaq->nirq;
405 		if (iq_avail < nports) {
406 			device_printf(sc->dev,
407 			    "Not enough ingress queues (%d) for %d ports\n",
408 			    vfres->niqflint, nports);
409 			return (ENXIO);
410 		}
411 
412 		/*
413 		 * Try to honor the cap on interrupts.  If there aren't
414 		 * enough interrupts for at least one interrupt per
415 		 * port, then don't bother, we will just forward all
416 		 * interrupts to one interrupt in that case.
417 		 */
418 		if (iaq->nirq + nports <= navail) {
419 			if (iq_avail > navail - iaq->nirq)
420 				iq_avail = navail - iaq->nirq;
421 		}
422 
423 		nrxq = nports * t4_nrxq;
424 		if (nrxq > iq_avail) {
425 			/*
426 			 * Too many ingress queues.  Use what we can.
427 			 */
428 			nrxq = (iq_avail / nports) * nports;
429 		}
430 		KASSERT(nrxq <= iq_avail, ("too many ingress queues"));
431 
432 		/*
433 		 * Next, determine the upper bound on txqs from the limit
434 		 * on ETH queues.
435 		 */
436 		if (vfres->nethctrl < nports) {
437 			device_printf(sc->dev,
438 			    "Not enough ETH queues (%d) for %d ports\n",
439 			    vfres->nethctrl, nports);
440 			return (ENXIO);
441 		}
442 
443 		ntxq = nports * t4_ntxq;
444 		if (ntxq > vfres->nethctrl) {
445 			/*
446 			 * Too many ETH queues.  Use what we can.
447 			 */
448 			ntxq = (vfres->nethctrl / nports) * nports;
449 		}
450 		KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues"));
451 
452 		/*
453 		 * Finally, ensure we have enough egress queues.
454 		 */
455 		if (vfres->neq < nports * 2) {
456 			device_printf(sc->dev,
457 			    "Not enough egress queues (%d) for %d ports\n",
458 			    vfres->neq, nports);
459 			return (ENXIO);
460 		}
461 		if (nrxq + ntxq > vfres->neq) {
462 			/* Just punt and use 1 for everything. */
463 			nrxq = ntxq = nports;
464 		}
465 		KASSERT(nrxq <= iq_avail, ("too many ingress queues"));
466 		KASSERT(ntxq <= vfres->nethctrl, ("too many ETH queues"));
467 		KASSERT(nrxq + ntxq <= vfres->neq, ("too many egress queues"));
468 
469 		/*
470 		 * Do we have enough interrupts?  For MSI the interrupts
471 		 * have to be a power of 2 as well.
472 		 */
473 		iaq->nirq += nrxq;
474 		iaq->ntxq = ntxq;
475 		iaq->nrxq = nrxq;
476 		if (iaq->nirq <= navail &&
477 		    (itype != INTR_MSI || powerof2(iaq->nirq))) {
478 			navail = iaq->nirq;
479 			if (itype == INTR_MSIX)
480 				rc = pci_alloc_msix(sc->dev, &navail);
481 			else
482 				rc = pci_alloc_msi(sc->dev, &navail);
483 			if (rc != 0) {
484 				device_printf(sc->dev,
485 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
486 				    itype, rc, iaq->nirq, navail);
487 				return (rc);
488 			}
489 			if (navail == iaq->nirq) {
490 				return (0);
491 			}
492 			pci_release_msi(sc->dev);
493 		}
494 
495 		/* Fall back to a single interrupt. */
496 		iaq->nirq = 1;
497 		navail = iaq->nirq;
498 		if (itype == INTR_MSIX)
499 			rc = pci_alloc_msix(sc->dev, &navail);
500 		else
501 			rc = pci_alloc_msi(sc->dev, &navail);
502 		if (rc != 0)
503 			device_printf(sc->dev,
504 		    "failed to allocate vectors:%d, type=%d, req=%d, rcvd=%d\n",
505 			    itype, rc, iaq->nirq, navail);
506 		return (rc);
507 	}
508 
509 	device_printf(sc->dev,
510 	    "failed to find a usable interrupt type.  "
511 	    "allowed=%d, msi-x=%d, msi=%d, intx=1", t4_intr_types,
512 	    pci_msix_count(sc->dev), pci_msi_count(sc->dev));
513 
514 	return (ENXIO);
515 }
516 
517 static int
518 t4vf_attach(device_t dev)
519 {
520 	struct adapter *sc;
521 	int rc = 0, i, j, rqidx, tqidx, n, p, pmask;
522 	struct make_dev_args mda;
523 	struct intrs_and_queues iaq;
524 	struct sge *s;
525 
526 	sc = device_get_softc(dev);
527 	sc->dev = dev;
528 	sysctl_ctx_init(&sc->ctx);
529 	pci_enable_busmaster(dev);
530 	pci_set_max_read_req(dev, 4096);
531 	sc->params.pci.mps = pci_get_max_payload(dev);
532 
533 	sc->flags |= IS_VF;
534 	TUNABLE_INT_FETCH("hw.cxgbe.dflags", &sc->debug_flags);
535 
536 	sc->sge_gts_reg = VF_SGE_REG(A_SGE_VF_GTS);
537 	sc->sge_kdoorbell_reg = VF_SGE_REG(A_SGE_VF_KDOORBELL);
538 	snprintf(sc->lockname, sizeof(sc->lockname), "%s",
539 	    device_get_nameunit(dev));
540 	mtx_init(&sc->sc_lock, sc->lockname, 0, MTX_DEF);
541 	t4_add_adapter(sc);
542 
543 	mtx_init(&sc->sfl_lock, "starving freelists", 0, MTX_DEF);
544 	TAILQ_INIT(&sc->sfl);
545 	callout_init_mtx(&sc->sfl_callout, &sc->sfl_lock, 0);
546 
547 	mtx_init(&sc->reg_lock, "indirect register access", 0, MTX_DEF);
548 
549 	rc = t4_map_bars_0_and_4(sc);
550 	if (rc != 0)
551 		goto done; /* error message displayed already */
552 
553 	rc = -t4vf_prep_adapter(sc);
554 	if (rc != 0)
555 		goto done;
556 
557 	t4_init_devnames(sc);
558 	if (sc->names == NULL) {
559 		rc = ENOTSUP;
560 		goto done; /* error message displayed already */
561 	}
562 
563 	/*
564 	 * Leave the 'pf' and 'mbox' values as zero.  This ensures
565 	 * that various firmware messages do not set the fields which
566 	 * is the correct thing to do for a VF.
567 	 */
568 
569 	memset(sc->chan_map, 0xff, sizeof(sc->chan_map));
570 
571 	make_dev_args_init(&mda);
572 	mda.mda_devsw = &t4vf_cdevsw;
573 	mda.mda_uid = UID_ROOT;
574 	mda.mda_gid = GID_WHEEL;
575 	mda.mda_mode = 0600;
576 	mda.mda_si_drv1 = sc;
577 	rc = make_dev_s(&mda, &sc->cdev, "%s", device_get_nameunit(dev));
578 	if (rc != 0)
579 		device_printf(dev, "failed to create nexus char device: %d.\n",
580 		    rc);
581 
582 #if defined(__i386__)
583 	if ((cpu_feature & CPUID_CX8) == 0) {
584 		device_printf(dev, "64 bit atomics not available.\n");
585 		rc = ENOTSUP;
586 		goto done;
587 	}
588 #endif
589 
590 	/*
591 	 * Some environments do not properly handle PCIE FLRs -- e.g. in Linux
592 	 * 2.6.31 and later we can't call pci_reset_function() in order to
593 	 * issue an FLR because of a self- deadlock on the device semaphore.
594 	 * Meanwhile, the OS infrastructure doesn't issue FLRs in all the
595 	 * cases where they're needed -- for instance, some versions of KVM
596 	 * fail to reset "Assigned Devices" when the VM reboots.  Therefore we
597 	 * use the firmware based reset in order to reset any per function
598 	 * state.
599 	 */
600 	rc = -t4vf_fw_reset(sc);
601 	if (rc != 0) {
602 		device_printf(dev, "FW reset failed: %d\n", rc);
603 		goto done;
604 	}
605 	sc->flags |= FW_OK;
606 
607 	/*
608 	 * Grab basic operational parameters.  These will predominantly have
609 	 * been set up by the Physical Function Driver or will be hard coded
610 	 * into the adapter.  We just have to live with them ...  Note that
611 	 * we _must_ get our VPD parameters before our SGE parameters because
612 	 * we need to know the adapter's core clock from the VPD in order to
613 	 * properly decode the SGE Timer Values.
614 	 */
615 	rc = get_params__pre_init(sc);
616 	if (rc != 0)
617 		goto done; /* error message displayed already */
618 	rc = get_params__post_init(sc);
619 	if (rc != 0)
620 		goto done; /* error message displayed already */
621 
622 	rc = set_params__post_init(sc);
623 	if (rc != 0)
624 		goto done; /* error message displayed already */
625 
626 	rc = t4_map_bar_2(sc);
627 	if (rc != 0)
628 		goto done; /* error message displayed already */
629 
630 	rc = t4_adj_doorbells(sc);
631 	if (rc != 0)
632 		goto done; /* error message displayed already */
633 
634 	rc = t4_create_dma_tag(sc);
635 	if (rc != 0)
636 		goto done; /* error message displayed already */
637 
638 	/*
639 	 * The number of "ports" which we support is equal to the number of
640 	 * Virtual Interfaces with which we've been provisioned.
641 	 */
642 	sc->params.nports = imin(sc->params.vfres.nvi, MAX_NPORTS);
643 
644 	/*
645 	 * We may have been provisioned with more VIs than the number of
646 	 * ports we're allowed to access (our Port Access Rights Mask).
647 	 * Just use a single VI for each port.
648 	 */
649 	sc->params.nports = imin(sc->params.nports,
650 	    bitcount32(sc->params.vfres.pmask));
651 
652 #ifdef notyet
653 	/*
654 	 * XXX: The Linux VF driver will lower nports if it thinks there
655 	 * are too few resources in vfres (niqflint, nethctrl, neq).
656 	 */
657 #endif
658 
659 	/*
660 	 * First pass over all the ports - allocate VIs and initialize some
661 	 * basic parameters like mac address, port type, etc.
662 	 */
663 	pmask = sc->params.vfres.pmask;
664 	for_each_port(sc, i) {
665 		struct port_info *pi;
666 		uint8_t mac[ETHER_ADDR_LEN];
667 
668 		pi = malloc(sizeof(*pi), M_CXGBE, M_ZERO | M_WAITOK);
669 		sc->port[i] = pi;
670 
671 		/* These must be set before t4_port_init */
672 		pi->adapter = sc;
673 		pi->port_id = i;
674 		pi->nvi = 1;
675 		pi->vi = malloc(sizeof(struct vi_info) * pi->nvi, M_CXGBE,
676 		    M_ZERO | M_WAITOK);
677 
678 		/*
679 		 * Allocate the "main" VI and initialize parameters
680 		 * like mac addr.
681 		 */
682 		rc = -t4_port_init(sc, sc->mbox, sc->pf, 0, i);
683 		if (rc != 0) {
684 			device_printf(dev, "unable to initialize port %d: %d\n",
685 			    i, rc);
686 			free(pi->vi, M_CXGBE);
687 			free(pi, M_CXGBE);
688 			sc->port[i] = NULL;
689 			goto done;
690 		}
691 
692 		/* Prefer the MAC address set by the PF, if there is one. */
693 		n = 1;
694 		p = ffs(pmask) - 1;
695 		MPASS(p >= 0);
696 		rc = t4vf_get_vf_mac(sc, p, &n, mac);
697 		if (rc == 0 && n == 1)
698 			t4_os_set_hw_addr(pi, mac);
699 		pmask &= ~(1 << p);
700 
701 		sc->vlan_id = t4vf_get_vf_vlan(sc);
702 
703 		/* No t4_link_start. */
704 
705 		snprintf(pi->lockname, sizeof(pi->lockname), "%sp%d",
706 		    device_get_nameunit(dev), i);
707 		mtx_init(&pi->pi_lock, pi->lockname, 0, MTX_DEF);
708 		sc->chan_map[pi->tx_chan] = i;
709 
710 		/* All VIs on this port share this media. */
711 		ifmedia_init(&pi->media, IFM_IMASK, cxgbe_media_change,
712 		    cxgbe_media_status);
713 
714 		pi->dev = device_add_child(dev, sc->names->vf_ifnet_name, DEVICE_UNIT_ANY);
715 		if (pi->dev == NULL) {
716 			device_printf(dev,
717 			    "failed to add device for port %d.\n", i);
718 			rc = ENXIO;
719 			goto done;
720 		}
721 		pi->vi[0].dev = pi->dev;
722 		device_set_softc(pi->dev, pi);
723 	}
724 
725 	/*
726 	 * Interrupt type, # of interrupts, # of rx/tx queues, etc.
727 	 */
728 	rc = cfg_itype_and_nqueues(sc, &iaq);
729 	if (rc != 0)
730 		goto done; /* error message displayed already */
731 
732 	sc->intr_type = iaq.intr_type;
733 	sc->intr_count = iaq.nirq;
734 
735 	s = &sc->sge;
736 	s->nrxq = sc->params.nports * iaq.nrxq;
737 	s->ntxq = sc->params.nports * iaq.ntxq;
738 	s->neq = s->ntxq + s->nrxq;	/* the free list in an rxq is an eq */
739 	s->neq += sc->params.nports;	/* ctrl queues: 1 per port */
740 	s->niq = s->nrxq + 1;		/* 1 extra for firmware event queue */
741 
742 	s->iqmap_sz = s->niq;
743 	s->eqmap_sz = s->neq;
744 
745 	s->rxq = malloc(s->nrxq * sizeof(struct sge_rxq), M_CXGBE,
746 	    M_ZERO | M_WAITOK);
747 	s->txq = malloc(s->ntxq * sizeof(struct sge_txq), M_CXGBE,
748 	    M_ZERO | M_WAITOK);
749 	s->iqmap = malloc(s->iqmap_sz * sizeof(struct sge_iq *), M_CXGBE,
750 	    M_ZERO | M_WAITOK);
751 	s->eqmap = malloc(s->eqmap_sz * sizeof(struct sge_eq *), M_CXGBE,
752 	    M_ZERO | M_WAITOK);
753 
754 	sc->irq = malloc(sc->intr_count * sizeof(struct irq), M_CXGBE,
755 	    M_ZERO | M_WAITOK);
756 
757 	/*
758 	 * Second pass over the ports.  This time we know the number of rx and
759 	 * tx queues that each port should get.
760 	 */
761 	rqidx = tqidx = 0;
762 	for_each_port(sc, i) {
763 		struct port_info *pi = sc->port[i];
764 		struct vi_info *vi;
765 
766 		if (pi == NULL)
767 			continue;
768 
769 		for_each_vi(pi, j, vi) {
770 			vi->pi = pi;
771 			vi->adapter = sc;
772 			vi->qsize_rxq = t4_qsize_rxq;
773 			vi->qsize_txq = t4_qsize_txq;
774 
775 			vi->first_rxq = rqidx;
776 			vi->first_txq = tqidx;
777 			vi->tmr_idx = t4_tmr_idx;
778 			vi->pktc_idx = t4_pktc_idx;
779 			vi->nrxq = j == 0 ? iaq.nrxq: 1;
780 			vi->ntxq = j == 0 ? iaq.ntxq: 1;
781 
782 			rqidx += vi->nrxq;
783 			tqidx += vi->ntxq;
784 
785 			vi->rsrv_noflowq = 0;
786 		}
787 	}
788 
789 	rc = t4_setup_intr_handlers(sc);
790 	if (rc != 0) {
791 		device_printf(dev,
792 		    "failed to setup interrupt handlers: %d\n", rc);
793 		goto done;
794 	}
795 
796 	bus_attach_children(dev);
797 
798 	device_printf(dev,
799 	    "%d ports, %d %s interrupt%s, %d eq, %d iq\n",
800 	    sc->params.nports, sc->intr_count, sc->intr_type == INTR_MSIX ?
801 	    "MSI-X" : "MSI", sc->intr_count > 1 ? "s" : "", sc->sge.neq,
802 	    sc->sge.niq);
803 
804 done:
805 	if (rc != 0)
806 		t4_detach_common(dev);
807 	else
808 		t4_sysctls(sc);
809 
810 	return (rc);
811 }
812 
813 static void
814 get_regs(struct adapter *sc, struct t4_regdump *regs, uint8_t *buf)
815 {
816 
817 	/* 0x3f is used as the revision for VFs. */
818 	regs->version = chip_id(sc) | (0x3f << 10);
819 	t4_get_regs(sc, buf, regs->len);
820 }
821 
822 static void
823 t4_clr_vi_stats(struct adapter *sc)
824 {
825 	int reg;
826 
827 	for (reg = A_MPS_VF_STAT_TX_VF_BCAST_BYTES_L;
828 	     reg <= A_MPS_VF_STAT_RX_VF_ERR_FRAMES_H; reg += 4)
829 		t4_write_reg(sc, VF_MPS_REG(reg), 0);
830 }
831 
832 static int
833 t4vf_ioctl(struct cdev *dev, unsigned long cmd, caddr_t data, int fflag,
834     struct thread *td)
835 {
836 	int rc;
837 	struct adapter *sc = dev->si_drv1;
838 
839 	rc = priv_check(td, PRIV_DRIVER);
840 	if (rc != 0)
841 		return (rc);
842 
843 	switch (cmd) {
844 	case CHELSIO_T4_GETREG: {
845 		struct t4_reg *edata = (struct t4_reg *)data;
846 
847 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
848 			return (EFAULT);
849 
850 		if (edata->size == 4)
851 			edata->val = t4_read_reg(sc, edata->addr);
852 		else if (edata->size == 8)
853 			edata->val = t4_read_reg64(sc, edata->addr);
854 		else
855 			return (EINVAL);
856 
857 		break;
858 	}
859 	case CHELSIO_T4_SETREG: {
860 		struct t4_reg *edata = (struct t4_reg *)data;
861 
862 		if ((edata->addr & 0x3) != 0 || edata->addr >= sc->mmio_len)
863 			return (EFAULT);
864 
865 		if (edata->size == 4) {
866 			if (edata->val & 0xffffffff00000000)
867 				return (EINVAL);
868 			t4_write_reg(sc, edata->addr, (uint32_t) edata->val);
869 		} else if (edata->size == 8)
870 			t4_write_reg64(sc, edata->addr, edata->val);
871 		else
872 			return (EINVAL);
873 		break;
874 	}
875 	case CHELSIO_T4_REGDUMP: {
876 		struct t4_regdump *regs = (struct t4_regdump *)data;
877 		int reglen = t4_get_regs_len(sc);
878 		uint8_t *buf;
879 
880 		if (regs->len < reglen) {
881 			regs->len = reglen; /* hint to the caller */
882 			return (ENOBUFS);
883 		}
884 
885 		regs->len = reglen;
886 		buf = malloc(reglen, M_CXGBE, M_WAITOK | M_ZERO);
887 		get_regs(sc, regs, buf);
888 		rc = copyout(buf, regs->data, reglen);
889 		free(buf, M_CXGBE);
890 		break;
891 	}
892 	case CHELSIO_T4_CLEAR_STATS: {
893 		int i, v;
894 		u_int port_id = *(uint32_t *)data;
895 		struct port_info *pi;
896 		struct vi_info *vi;
897 
898 		if (port_id >= sc->params.nports)
899 			return (EINVAL);
900 		pi = sc->port[port_id];
901 
902 		/* MAC stats */
903 		pi->tx_parse_error = 0;
904 		t4_clr_vi_stats(sc);
905 
906 		/*
907 		 * Since this command accepts a port, clear stats for
908 		 * all VIs on this port.
909 		 */
910 		for_each_vi(pi, v, vi) {
911 			if (vi->flags & VI_INIT_DONE) {
912 				struct sge_rxq *rxq;
913 				struct sge_txq *txq;
914 
915 				for_each_rxq(vi, i, rxq) {
916 #if defined(INET) || defined(INET6)
917 					rxq->lro.lro_queued = 0;
918 					rxq->lro.lro_flushed = 0;
919 #endif
920 					rxq->rxcsum = 0;
921 					rxq->vlan_extraction = 0;
922 				}
923 
924 				for_each_txq(vi, i, txq) {
925 					txq->txcsum = 0;
926 					txq->tso_wrs = 0;
927 					txq->vlan_insertion = 0;
928 					txq->imm_wrs = 0;
929 					txq->sgl_wrs = 0;
930 					txq->txpkt_wrs = 0;
931 					txq->txpkts0_wrs = 0;
932 					txq->txpkts1_wrs = 0;
933 					txq->txpkts0_pkts = 0;
934 					txq->txpkts1_pkts = 0;
935 					txq->txpkts_flush = 0;
936 					mp_ring_reset_stats(txq->r);
937 				}
938 			}
939 		}
940 		break;
941 	}
942 	case CHELSIO_T4_SCHED_CLASS:
943 		rc = t4_set_sched_class(sc, (struct t4_sched_params *)data);
944 		break;
945 	case CHELSIO_T4_SCHED_QUEUE:
946 		rc = t4_set_sched_queue(sc, (struct t4_sched_queue *)data);
947 		break;
948 	default:
949 		rc = ENOTTY;
950 	}
951 
952 	return (rc);
953 }
954 
955 static device_method_t t4vf_methods[] = {
956 	DEVMETHOD(device_probe,		t4vf_probe),
957 	DEVMETHOD(device_attach,	t4vf_attach),
958 	DEVMETHOD(device_detach,	t4_detach_common),
959 
960 	DEVMETHOD_END
961 };
962 
963 static driver_t t4vf_driver = {
964 	"t4vf",
965 	t4vf_methods,
966 	sizeof(struct adapter)
967 };
968 
969 static device_method_t t5vf_methods[] = {
970 	DEVMETHOD(device_probe,		t5vf_probe),
971 	DEVMETHOD(device_attach,	t4vf_attach),
972 	DEVMETHOD(device_detach,	t4_detach_common),
973 
974 	DEVMETHOD_END
975 };
976 
977 static driver_t t5vf_driver = {
978 	"t5vf",
979 	t5vf_methods,
980 	sizeof(struct adapter)
981 };
982 
983 static device_method_t t6vf_methods[] = {
984 	DEVMETHOD(device_probe,		t6vf_probe),
985 	DEVMETHOD(device_attach,	t4vf_attach),
986 	DEVMETHOD(device_detach,	t4_detach_common),
987 
988 	DEVMETHOD_END
989 };
990 
991 static driver_t t6vf_driver = {
992 	"t6vf",
993 	t6vf_methods,
994 	sizeof(struct adapter)
995 };
996 
997 static device_method_t chvf_methods[] = {
998 	DEVMETHOD(device_probe,		chvf_probe),
999 	DEVMETHOD(device_attach,	t4vf_attach),
1000 	DEVMETHOD(device_detach,	t4_detach_common),
1001 
1002 	DEVMETHOD_END
1003 };
1004 
1005 static driver_t chvf_driver = {
1006 	"chvf",
1007 	chvf_methods,
1008 	sizeof(struct adapter)
1009 };
1010 
1011 static driver_t cxgbev_driver = {
1012 	"cxgbev",
1013 	cxgbe_methods,
1014 	sizeof(struct port_info)
1015 };
1016 
1017 static driver_t cxlv_driver = {
1018 	"cxlv",
1019 	cxgbe_methods,
1020 	sizeof(struct port_info)
1021 };
1022 
1023 static driver_t ccv_driver = {
1024 	"ccv",
1025 	cxgbe_methods,
1026 	sizeof(struct port_info)
1027 };
1028 
1029 static driver_t chev_driver = {
1030 	"chev",
1031 	cxgbe_methods,
1032 	sizeof(struct port_info)
1033 };
1034 
1035 DRIVER_MODULE(t4vf, pci, t4vf_driver, 0, 0);
1036 MODULE_VERSION(t4vf, 1);
1037 MODULE_DEPEND(t4vf, t4nex, 1, 1, 1);
1038 
1039 DRIVER_MODULE(t5vf, pci, t5vf_driver, 0, 0);
1040 MODULE_VERSION(t5vf, 1);
1041 MODULE_DEPEND(t5vf, t5nex, 1, 1, 1);
1042 
1043 DRIVER_MODULE(t6vf, pci, t6vf_driver, 0, 0);
1044 MODULE_VERSION(t6vf, 1);
1045 MODULE_DEPEND(t6vf, t6nex, 1, 1, 1);
1046 
1047 DRIVER_MODULE(chvf, pci, chvf_driver, 0, 0);
1048 MODULE_VERSION(chvf, 1);
1049 MODULE_DEPEND(chvf, chnex, 1, 1, 1);
1050 
1051 DRIVER_MODULE(cxgbev, t4vf, cxgbev_driver, 0, 0);
1052 MODULE_VERSION(cxgbev, 1);
1053 
1054 DRIVER_MODULE(cxlv, t5vf, cxlv_driver, 0, 0);
1055 MODULE_VERSION(cxlv, 1);
1056 
1057 DRIVER_MODULE(ccv, t6vf, ccv_driver, 0, 0);
1058 MODULE_VERSION(ccv, 1);
1059 
1060 DRIVER_MODULE(chev, chvf, chev_driver, 0, 0);
1061 MODULE_VERSION(chev, 1);
1062