xref: /freebsd/sys/dev/vnic/nicvf_main.c (revision 031beb4e239bfce798af17f5fe8dba8bcaf13d99)
1 /*
2  * Copyright (C) 2015 Cavium Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  */
27 #include <sys/cdefs.h>
28 #include "opt_inet.h"
29 #include "opt_inet6.h"
30 
31 #include <sys/param.h>
32 #include <sys/systm.h>
33 #include <sys/bitset.h>
34 #include <sys/bitstring.h>
35 #include <sys/bus.h>
36 #include <sys/endian.h>
37 #include <sys/kernel.h>
38 #include <sys/malloc.h>
39 #include <sys/mbuf.h>
40 #include <sys/module.h>
41 #include <sys/rman.h>
42 #include <sys/pciio.h>
43 #include <sys/pcpu.h>
44 #include <sys/proc.h>
45 #include <sys/socket.h>
46 #include <sys/sockio.h>
47 #include <sys/stdatomic.h>
48 #include <sys/cpuset.h>
49 #include <sys/lock.h>
50 #include <sys/mutex.h>
51 #include <sys/smp.h>
52 #include <sys/taskqueue.h>
53 
54 #include <net/bpf.h>
55 #include <net/ethernet.h>
56 #include <net/if.h>
57 #include <net/if_var.h>
58 #include <net/if_arp.h>
59 #include <net/if_dl.h>
60 #include <net/if_media.h>
61 #include <net/if_types.h>
62 #include <net/if_vlan_var.h>
63 
64 #include <netinet/in.h>
65 #include <netinet/ip.h>
66 #include <netinet/if_ether.h>
67 #include <netinet/tcp_lro.h>
68 
69 #include <dev/pci/pcireg.h>
70 #include <dev/pci/pcivar.h>
71 
72 #include <sys/dnv.h>
73 #include <sys/nv.h>
74 #include <sys/iov_schema.h>
75 
76 #include <machine/bus.h>
77 
78 #include "thunder_bgx.h"
79 #include "nic_reg.h"
80 #include "nic.h"
81 #include "nicvf_queues.h"
82 
83 #define	VNIC_VF_DEVSTR		"Cavium Thunder NIC Virtual Function Driver"
84 
85 #define	VNIC_VF_REG_RID		PCIR_BAR(PCI_CFG_REG_BAR_NUM)
86 
87 /* Lock for core interface settings */
88 #define	NICVF_CORE_LOCK_INIT(nic)				\
89     sx_init(&(nic)->core_sx, device_get_nameunit((nic)->dev))
90 
91 #define	NICVF_CORE_LOCK_DESTROY(nic)				\
92     sx_destroy(&(nic)->core_sx)
93 
94 #define	NICVF_CORE_LOCK(nic)		sx_xlock(&(nic)->core_sx)
95 #define	NICVF_CORE_UNLOCK(nic)		sx_xunlock(&(nic)->core_sx)
96 
97 #define	NICVF_CORE_LOCK_ASSERT(nic)	sx_assert(&(nic)->core_sx, SA_XLOCKED)
98 
99 #define	SPEED_10	10
100 #define	SPEED_100	100
101 #define	SPEED_1000	1000
102 #define	SPEED_10000	10000
103 #define	SPEED_40000	40000
104 
105 MALLOC_DEFINE(M_NICVF, "nicvf", "ThunderX VNIC VF dynamic memory");
106 
107 static int nicvf_probe(device_t);
108 static int nicvf_attach(device_t);
109 static int nicvf_detach(device_t);
110 
111 static device_method_t nicvf_methods[] = {
112 	/* Device interface */
113 	DEVMETHOD(device_probe,		nicvf_probe),
114 	DEVMETHOD(device_attach,	nicvf_attach),
115 	DEVMETHOD(device_detach,	nicvf_detach),
116 
117 	DEVMETHOD_END,
118 };
119 
120 static driver_t nicvf_driver = {
121 	"vnic",
122 	nicvf_methods,
123 	sizeof(struct nicvf),
124 };
125 
126 DRIVER_MODULE(vnicvf, pci, nicvf_driver, 0, 0);
127 MODULE_VERSION(vnicvf, 1);
128 MODULE_DEPEND(vnicvf, pci, 1, 1, 1);
129 MODULE_DEPEND(vnicvf, ether, 1, 1, 1);
130 MODULE_DEPEND(vnicvf, vnicpf, 1, 1, 1);
131 
132 static int nicvf_allocate_misc_interrupt(struct nicvf *);
133 static int nicvf_enable_misc_interrupt(struct nicvf *);
134 static int nicvf_allocate_net_interrupts(struct nicvf *);
135 static void nicvf_release_all_interrupts(struct nicvf *);
136 static int nicvf_update_hw_max_frs(struct nicvf *, int);
137 static int nicvf_hw_set_mac_addr(struct nicvf *, uint8_t *);
138 static void nicvf_config_cpi(struct nicvf *);
139 static int nicvf_rss_init(struct nicvf *);
140 static int nicvf_init_resources(struct nicvf *);
141 
142 static int nicvf_setup_ifnet(struct nicvf *);
143 static int nicvf_setup_ifmedia(struct nicvf *);
144 static void nicvf_hw_addr_random(uint8_t *);
145 
146 static int nicvf_if_ioctl(if_t, u_long, caddr_t);
147 static void nicvf_if_init(void *);
148 static void nicvf_if_init_locked(struct nicvf *);
149 static int nicvf_if_transmit(if_t, struct mbuf *);
150 static void nicvf_if_qflush(if_t);
151 static uint64_t nicvf_if_getcounter(if_t, ift_counter);
152 
153 static int nicvf_stop_locked(struct nicvf *);
154 
155 static void nicvf_media_status(if_t, struct ifmediareq *);
156 static int nicvf_media_change(if_t);
157 
158 static void nicvf_tick_stats(void *);
159 
160 static int
161 nicvf_probe(device_t dev)
162 {
163 	uint16_t vendor_id;
164 	uint16_t device_id;
165 
166 	vendor_id = pci_get_vendor(dev);
167 	device_id = pci_get_device(dev);
168 
169 	if (vendor_id != PCI_VENDOR_ID_CAVIUM)
170 		return (ENXIO);
171 
172 	if (device_id == PCI_DEVICE_ID_THUNDER_NIC_VF ||
173 	    device_id == PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF) {
174 		device_set_desc(dev, VNIC_VF_DEVSTR);
175 		return (BUS_PROBE_DEFAULT);
176 	}
177 
178 	return (ENXIO);
179 }
180 
181 static int
182 nicvf_attach(device_t dev)
183 {
184 	struct nicvf *nic;
185 	int rid, qcount;
186 	int err = 0;
187 	uint8_t hwaddr[ETHER_ADDR_LEN];
188 	uint8_t zeromac[] = {[0 ... (ETHER_ADDR_LEN - 1)] = 0};
189 
190 	nic = device_get_softc(dev);
191 	nic->dev = dev;
192 	nic->pnicvf = nic;
193 
194 	NICVF_CORE_LOCK_INIT(nic);
195 	/* Enable HW TSO on Pass2 */
196 	if (!pass1_silicon(dev))
197 		nic->hw_tso = TRUE;
198 
199 	rid = VNIC_VF_REG_RID;
200 	nic->reg_base = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
201 	    RF_ACTIVE);
202 	if (nic->reg_base == NULL) {
203 		device_printf(dev, "Could not allocate registers memory\n");
204 		return (ENXIO);
205 	}
206 
207 	qcount = MAX_CMP_QUEUES_PER_QS;
208 	nic->max_queues = qcount;
209 
210 	err = nicvf_set_qset_resources(nic);
211 	if (err != 0)
212 		goto err_free_res;
213 
214 	/* Check if PF is alive and get MAC address for this VF */
215 	err = nicvf_allocate_misc_interrupt(nic);
216 	if (err != 0)
217 		goto err_free_res;
218 
219 	NICVF_CORE_LOCK(nic);
220 	err = nicvf_enable_misc_interrupt(nic);
221 	NICVF_CORE_UNLOCK(nic);
222 	if (err != 0)
223 		goto err_release_intr;
224 
225 	err = nicvf_allocate_net_interrupts(nic);
226 	if (err != 0) {
227 		device_printf(dev,
228 		    "Could not allocate network interface interrupts\n");
229 		goto err_free_ifnet;
230 	}
231 
232 	/* If no MAC address was obtained we generate random one */
233 	if (memcmp(nic->hwaddr, zeromac, ETHER_ADDR_LEN) == 0) {
234 		nicvf_hw_addr_random(hwaddr);
235 		memcpy(nic->hwaddr, hwaddr, ETHER_ADDR_LEN);
236 		NICVF_CORE_LOCK(nic);
237 		nicvf_hw_set_mac_addr(nic, hwaddr);
238 		NICVF_CORE_UNLOCK(nic);
239 	}
240 
241 	/* Configure CPI alorithm */
242 	nic->cpi_alg = CPI_ALG_NONE;
243 	NICVF_CORE_LOCK(nic);
244 	nicvf_config_cpi(nic);
245 	/* Configure receive side scaling */
246 	if (nic->qs->rq_cnt > 1)
247 		nicvf_rss_init(nic);
248 	NICVF_CORE_UNLOCK(nic);
249 
250 	err = nicvf_setup_ifnet(nic);
251 	if (err != 0) {
252 		device_printf(dev, "Could not set-up ifnet\n");
253 		goto err_release_intr;
254 	}
255 
256 	err = nicvf_setup_ifmedia(nic);
257 	if (err != 0) {
258 		device_printf(dev, "Could not set-up ifmedia\n");
259 		goto err_free_ifnet;
260 	}
261 
262 	mtx_init(&nic->stats_mtx, "VNIC stats", NULL, MTX_DEF);
263 	callout_init_mtx(&nic->stats_callout, &nic->stats_mtx, 0);
264 
265 	ether_ifattach(nic->ifp, nic->hwaddr);
266 
267 	return (0);
268 
269 err_free_ifnet:
270 	if_free(nic->ifp);
271 err_release_intr:
272 	nicvf_release_all_interrupts(nic);
273 err_free_res:
274 	bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(nic->reg_base),
275 	    nic->reg_base);
276 
277 	return (err);
278 }
279 
280 static int
281 nicvf_detach(device_t dev)
282 {
283 	struct nicvf *nic;
284 
285 	nic = device_get_softc(dev);
286 
287 	NICVF_CORE_LOCK(nic);
288 	/* Shut down the port and release ring resources */
289 	nicvf_stop_locked(nic);
290 	/* Release stats lock */
291 	mtx_destroy(&nic->stats_mtx);
292 	/* Release interrupts */
293 	nicvf_release_all_interrupts(nic);
294 	/* Release memory resource */
295 	if (nic->reg_base != NULL) {
296 		bus_release_resource(dev, SYS_RES_MEMORY,
297 		    rman_get_rid(nic->reg_base), nic->reg_base);
298 	}
299 
300 	/* Remove all ifmedia configurations */
301 	ifmedia_removeall(&nic->if_media);
302 	/* Free this ifnet */
303 	if_free(nic->ifp);
304 	NICVF_CORE_UNLOCK(nic);
305 	/* Finally destroy the lock */
306 	NICVF_CORE_LOCK_DESTROY(nic);
307 
308 	return (0);
309 }
310 
311 static void
312 nicvf_hw_addr_random(uint8_t *hwaddr)
313 {
314 	uint32_t rnd;
315 	uint8_t addr[ETHER_ADDR_LEN];
316 
317 	/*
318 	 * Create randomized MAC address.
319 	 * Set 'bsd' + random 24 low-order bits.
320 	 */
321 	rnd = arc4random() & 0x00ffffff;
322 	addr[0] = 'b';
323 	addr[1] = 's';
324 	addr[2] = 'd';
325 	addr[3] = rnd >> 16;
326 	addr[4] = rnd >> 8;
327 	addr[5] = rnd >> 0;
328 
329 	memcpy(hwaddr, addr, ETHER_ADDR_LEN);
330 }
331 
332 static int
333 nicvf_setup_ifnet(struct nicvf *nic)
334 {
335 	if_t ifp;
336 
337 	ifp = if_alloc(IFT_ETHER);
338 	if (ifp == NULL) {
339 		device_printf(nic->dev, "Could not allocate ifnet structure\n");
340 		return (ENOMEM);
341 	}
342 
343 	nic->ifp = ifp;
344 
345 	if_setsoftc(ifp, nic);
346 	if_initname(ifp, device_get_name(nic->dev), device_get_unit(nic->dev));
347 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
348 
349 	if_settransmitfn(ifp, nicvf_if_transmit);
350 	if_setqflushfn(ifp, nicvf_if_qflush);
351 	if_setioctlfn(ifp, nicvf_if_ioctl);
352 	if_setinitfn(ifp, nicvf_if_init);
353 	if_setgetcounterfn(ifp, nicvf_if_getcounter);
354 
355 	if_setmtu(ifp, ETHERMTU);
356 
357 	/* Reset caps */
358 	if_setcapabilities(ifp, 0);
359 
360 	/* Set the default values */
361 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU, 0);
362 	if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
363 	if (nic->hw_tso) {
364 		/* TSO */
365 		if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
366 		/* TSO parameters */
367 		if_sethwtsomax(ifp, NICVF_TSO_MAXSIZE);
368 		if_sethwtsomaxsegcount(ifp, NICVF_TSO_NSEGS);
369 		if_sethwtsomaxsegsize(ifp, MCLBYTES);
370 	}
371 	/* IP/TCP/UDP HW checksums */
372 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM, 0);
373 	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
374 	/*
375 	 * HW offload enable
376 	 */
377 	if_clearhwassist(ifp);
378 	if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP), 0);
379 	if (nic->hw_tso)
380 		if_sethwassistbits(ifp, (CSUM_TSO), 0);
381 	if_setcapenable(ifp, if_getcapabilities(ifp));
382 
383 	return (0);
384 }
385 
386 static int
387 nicvf_setup_ifmedia(struct nicvf *nic)
388 {
389 
390 	ifmedia_init(&nic->if_media, IFM_IMASK, nicvf_media_change,
391 	    nicvf_media_status);
392 
393 	/*
394 	 * Advertise availability of all possible connection types,
395 	 * even though not all are possible at the same time.
396 	 */
397 
398 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_10_T | IFM_FDX),
399 	    0, NULL);
400 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_100_TX | IFM_FDX),
401 	    0, NULL);
402 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_1000_T | IFM_FDX),
403 	    0, NULL);
404 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_10G_SR | IFM_FDX),
405 	    0, NULL);
406 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_40G_CR4 | IFM_FDX),
407 	    0, NULL);
408 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_AUTO | IFM_FDX),
409 	    0, NULL);
410 
411 	ifmedia_set(&nic->if_media, (IFM_ETHER | IFM_AUTO | IFM_FDX));
412 
413 	return (0);
414 }
415 
416 static int
417 nicvf_if_ioctl(if_t ifp, u_long cmd, caddr_t data)
418 {
419 	struct nicvf *nic;
420 	struct rcv_queue *rq;
421 	struct ifreq *ifr;
422 	uint32_t flags;
423 	int mask, err;
424 	int rq_idx;
425 #if defined(INET) || defined(INET6)
426 	struct ifaddr *ifa;
427 	boolean_t avoid_reset = FALSE;
428 #endif
429 
430 	nic = if_getsoftc(ifp);
431 	ifr = (struct ifreq *)data;
432 #if defined(INET) || defined(INET6)
433 	ifa = (struct ifaddr *)data;
434 #endif
435 	err = 0;
436 	switch (cmd) {
437 	case SIOCSIFADDR:
438 #ifdef INET
439 		if (ifa->ifa_addr->sa_family == AF_INET)
440 			avoid_reset = TRUE;
441 #endif
442 #ifdef INET6
443 		if (ifa->ifa_addr->sa_family == AF_INET6)
444 			avoid_reset = TRUE;
445 #endif
446 
447 #if defined(INET) || defined(INET6)
448 		/* Avoid reinitialization unless it's necessary */
449 		if (avoid_reset) {
450 			if_setflagbits(ifp, IFF_UP, 0);
451 			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
452 				nicvf_if_init(nic);
453 #ifdef INET
454 			if (!(if_getflags(ifp) & IFF_NOARP))
455 				arp_ifinit(ifp, ifa);
456 #endif
457 
458 			return (0);
459 		}
460 #endif
461 		err = ether_ioctl(ifp, cmd, data);
462 		break;
463 	case SIOCSIFMTU:
464 		if (ifr->ifr_mtu < NIC_HW_MIN_FRS ||
465 		    ifr->ifr_mtu > NIC_HW_MAX_FRS) {
466 			err = EINVAL;
467 		} else {
468 			NICVF_CORE_LOCK(nic);
469 			err = nicvf_update_hw_max_frs(nic, ifr->ifr_mtu);
470 			if (err == 0)
471 				if_setmtu(ifp, ifr->ifr_mtu);
472 			NICVF_CORE_UNLOCK(nic);
473 		}
474 		break;
475 	case SIOCSIFFLAGS:
476 		NICVF_CORE_LOCK(nic);
477 		flags = if_getflags(ifp);
478 		if (flags & IFF_UP) {
479 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
480 				if ((flags ^ nic->if_flags) & IFF_PROMISC) {
481 					/* Change promiscous mode */
482 #if 0 /* XXX */
483 					nicvf_set_promiscous(nic);
484 #endif
485 				}
486 
487 				if ((flags ^ nic->if_flags) & IFF_ALLMULTI) {
488 					/* Change multicasting settings */
489 #if 0 /* XXX */
490 					nicvf_set_multicast(nic);
491 #endif
492 				}
493 			} else {
494 				nicvf_if_init_locked(nic);
495 			}
496 		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
497 			nicvf_stop_locked(nic);
498 
499 		nic->if_flags = flags;
500 		NICVF_CORE_UNLOCK(nic);
501 		break;
502 
503 	case SIOCADDMULTI:
504 	case SIOCDELMULTI:
505 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
506 #if 0
507 			NICVF_CORE_LOCK(nic);
508 			/* ARM64TODO */
509 			nicvf_set_multicast(nic);
510 			NICVF_CORE_UNLOCK(nic);
511 #endif
512 		}
513 		break;
514 
515 	case SIOCSIFMEDIA:
516 	case SIOCGIFMEDIA:
517 		err = ifmedia_ioctl(ifp, ifr, &nic->if_media, cmd);
518 		break;
519 
520 	case SIOCSIFCAP:
521 		mask = if_getcapenable(ifp) ^ ifr->ifr_reqcap;
522 		if (mask & IFCAP_VLAN_MTU) {
523 			/* No work to do except acknowledge the change took. */
524 			if_togglecapenable(ifp, IFCAP_VLAN_MTU);
525 		}
526 		if (mask & IFCAP_TXCSUM)
527 			if_togglecapenable(ifp, IFCAP_TXCSUM);
528 		if (mask & IFCAP_RXCSUM)
529 			if_togglecapenable(ifp, IFCAP_RXCSUM);
530 		if ((mask & IFCAP_TSO4) && nic->hw_tso)
531 			if_togglecapenable(ifp, IFCAP_TSO4);
532 		if (mask & IFCAP_LRO) {
533 			/*
534 			 * Lock the driver for a moment to avoid
535 			 * mismatch in per-queue settings.
536 			 */
537 			NICVF_CORE_LOCK(nic);
538 			if_togglecapenable(ifp, IFCAP_LRO);
539 			if ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0) {
540 				/*
541 				 * Now disable LRO for subsequent packets.
542 				 * Atomicity of this change is not necessary
543 				 * as we don't need precise toggle of this
544 				 * feature for all threads processing the
545 				 * completion queue.
546 				 */
547 				for (rq_idx = 0;
548 				    rq_idx < nic->qs->rq_cnt; rq_idx++) {
549 					rq = &nic->qs->rq[rq_idx];
550 					rq->lro_enabled = !rq->lro_enabled;
551 				}
552 			}
553 			NICVF_CORE_UNLOCK(nic);
554 		}
555 
556 		break;
557 
558 	default:
559 		err = ether_ioctl(ifp, cmd, data);
560 		break;
561 	}
562 
563 	return (err);
564 }
565 
566 static void
567 nicvf_if_init_locked(struct nicvf *nic)
568 {
569 	struct queue_set *qs = nic->qs;
570 	if_t ifp;
571 	int qidx;
572 	int err;
573 	caddr_t if_addr;
574 
575 	NICVF_CORE_LOCK_ASSERT(nic);
576 	ifp = nic->ifp;
577 
578 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)
579 		nicvf_stop_locked(nic);
580 
581 	err = nicvf_enable_misc_interrupt(nic);
582 	if (err != 0) {
583 		if_printf(ifp, "Could not reenable Mbox interrupt\n");
584 		return;
585 	}
586 
587 	/* Get the latest MAC address */
588 	if_addr = if_getlladdr(ifp);
589 	/* Update MAC address if changed */
590 	if (memcmp(nic->hwaddr, if_addr, ETHER_ADDR_LEN) != 0) {
591 		memcpy(nic->hwaddr, if_addr, ETHER_ADDR_LEN);
592 		nicvf_hw_set_mac_addr(nic, if_addr);
593 	}
594 
595 	/* Initialize the queues */
596 	err = nicvf_init_resources(nic);
597 	if (err != 0)
598 		goto error;
599 
600 	/* Make sure queue initialization is written */
601 	wmb();
602 
603 	nicvf_reg_write(nic, NIC_VF_INT, ~0UL);
604 	/* Enable Qset err interrupt */
605 	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
606 
607 	/* Enable completion queue interrupt */
608 	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
609 		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
610 
611 	/* Enable RBDR threshold interrupt */
612 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
613 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
614 
615 	nic->drv_stats.txq_stop = 0;
616 	nic->drv_stats.txq_wake = 0;
617 
618 	/* Activate network interface */
619 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
620 
621 	/* Schedule callout to update stats */
622 	callout_reset(&nic->stats_callout, hz, nicvf_tick_stats, nic);
623 
624 	return;
625 
626 error:
627 	/* Something went very wrong. Disable this ifnet for good */
628 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
629 }
630 
631 static void
632 nicvf_if_init(void *if_softc)
633 {
634 	struct nicvf *nic = if_softc;
635 
636 	NICVF_CORE_LOCK(nic);
637 	nicvf_if_init_locked(nic);
638 	NICVF_CORE_UNLOCK(nic);
639 }
640 
641 static int
642 nicvf_if_transmit(if_t ifp, struct mbuf *mbuf)
643 {
644 	struct nicvf *nic = if_getsoftc(ifp);
645 	struct queue_set *qs = nic->qs;
646 	struct snd_queue *sq;
647 	struct mbuf *mtmp;
648 	int qidx;
649 	int err = 0;
650 
651 	if (__predict_false(qs == NULL)) {
652 		panic("%s: missing queue set for %s", __func__,
653 		    device_get_nameunit(nic->dev));
654 	}
655 
656 	/* Select queue */
657 	if (M_HASHTYPE_GET(mbuf) != M_HASHTYPE_NONE)
658 		qidx = mbuf->m_pkthdr.flowid % qs->sq_cnt;
659 	else
660 		qidx = curcpu % qs->sq_cnt;
661 
662 	sq = &qs->sq[qidx];
663 
664 	if (mbuf->m_next != NULL &&
665 	    (mbuf->m_pkthdr.csum_flags &
666 	    (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)) != 0) {
667 		if (M_WRITABLE(mbuf) == 0) {
668 			mtmp = m_dup(mbuf, M_NOWAIT);
669 			m_freem(mbuf);
670 			if (mtmp == NULL)
671 				return (ENOBUFS);
672 			mbuf = mtmp;
673 		}
674 	}
675 
676 	err = drbr_enqueue(ifp, sq->br, mbuf);
677 	if (((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
678 	    IFF_DRV_RUNNING) || !nic->link_up || (err != 0)) {
679 		/*
680 		 * Try to enqueue packet to the ring buffer.
681 		 * If the driver is not active, link down or enqueue operation
682 		 * failed, return with the appropriate error code.
683 		 */
684 		return (err);
685 	}
686 
687 	if (NICVF_TX_TRYLOCK(sq) != 0) {
688 		err = nicvf_xmit_locked(sq);
689 		NICVF_TX_UNLOCK(sq);
690 		return (err);
691 	} else
692 		taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);
693 
694 	return (0);
695 }
696 
697 static void
698 nicvf_if_qflush(if_t ifp)
699 {
700 	struct nicvf *nic;
701 	struct queue_set *qs;
702 	struct snd_queue *sq;
703 	struct mbuf *mbuf;
704 	size_t idx;
705 
706 	nic = if_getsoftc(ifp);
707 	qs = nic->qs;
708 
709 	for (idx = 0; idx < qs->sq_cnt; idx++) {
710 		sq = &qs->sq[idx];
711 		NICVF_TX_LOCK(sq);
712 		while ((mbuf = buf_ring_dequeue_sc(sq->br)) != NULL)
713 			m_freem(mbuf);
714 		NICVF_TX_UNLOCK(sq);
715 	}
716 	if_qflush(ifp);
717 }
718 
719 static uint64_t
720 nicvf_if_getcounter(if_t ifp, ift_counter cnt)
721 {
722 	struct nicvf *nic;
723 	struct nicvf_hw_stats *hw_stats;
724 	struct nicvf_drv_stats *drv_stats;
725 
726 	nic = if_getsoftc(ifp);
727 	hw_stats = &nic->hw_stats;
728 	drv_stats = &nic->drv_stats;
729 
730 	switch (cnt) {
731 	case IFCOUNTER_IPACKETS:
732 		return (drv_stats->rx_frames_ok);
733 	case IFCOUNTER_OPACKETS:
734 		return (drv_stats->tx_frames_ok);
735 	case IFCOUNTER_IBYTES:
736 		return (hw_stats->rx_bytes);
737 	case IFCOUNTER_OBYTES:
738 		return (hw_stats->tx_bytes_ok);
739 	case IFCOUNTER_IMCASTS:
740 		return (hw_stats->rx_mcast_frames);
741 	case IFCOUNTER_COLLISIONS:
742 		return (0);
743 	case IFCOUNTER_IQDROPS:
744 		return (drv_stats->rx_drops);
745 	case IFCOUNTER_OQDROPS:
746 		return (drv_stats->tx_drops);
747 	default:
748 		return (if_get_counter_default(ifp, cnt));
749 	}
750 
751 }
752 
753 static void
754 nicvf_media_status(if_t ifp, struct ifmediareq *ifmr)
755 {
756 	struct nicvf *nic = if_getsoftc(ifp);
757 
758 	NICVF_CORE_LOCK(nic);
759 
760 	ifmr->ifm_status = IFM_AVALID;
761 	ifmr->ifm_active = IFM_ETHER;
762 
763 	if (nic->link_up) {
764 		/* Device attached to working network */
765 		ifmr->ifm_status |= IFM_ACTIVE;
766 	}
767 
768 	switch (nic->speed) {
769 	case SPEED_10:
770 		ifmr->ifm_active |= IFM_10_T;
771 		break;
772 	case SPEED_100:
773 		ifmr->ifm_active |= IFM_100_TX;
774 		break;
775 	case SPEED_1000:
776 		ifmr->ifm_active |= IFM_1000_T;
777 		break;
778 	case SPEED_10000:
779 		ifmr->ifm_active |= IFM_10G_SR;
780 		break;
781 	case SPEED_40000:
782 		ifmr->ifm_active |= IFM_40G_CR4;
783 		break;
784 	default:
785 		ifmr->ifm_active |= IFM_AUTO;
786 		break;
787 	}
788 
789 	if (nic->duplex)
790 		ifmr->ifm_active |= IFM_FDX;
791 	else
792 		ifmr->ifm_active |= IFM_HDX;
793 
794 	NICVF_CORE_UNLOCK(nic);
795 }
796 
797 static int
798 nicvf_media_change(if_t ifp __unused)
799 {
800 
801 	return (0);
802 }
803 
804 /* Register read/write APIs */
805 void
806 nicvf_reg_write(struct nicvf *nic, bus_space_handle_t offset, uint64_t val)
807 {
808 
809 	bus_write_8(nic->reg_base, offset, val);
810 }
811 
812 uint64_t
813 nicvf_reg_read(struct nicvf *nic, uint64_t offset)
814 {
815 
816 	return (bus_read_8(nic->reg_base, offset));
817 }
818 
819 void
820 nicvf_queue_reg_write(struct nicvf *nic, bus_space_handle_t offset,
821     uint64_t qidx, uint64_t val)
822 {
823 
824 	bus_write_8(nic->reg_base, offset + (qidx << NIC_Q_NUM_SHIFT), val);
825 }
826 
827 uint64_t
828 nicvf_queue_reg_read(struct nicvf *nic, bus_space_handle_t offset,
829     uint64_t qidx)
830 {
831 
832 	return (bus_read_8(nic->reg_base, offset + (qidx << NIC_Q_NUM_SHIFT)));
833 }
834 
835 /* VF -> PF mailbox communication */
836 static void
837 nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
838 {
839 	uint64_t *msg = (uint64_t *)mbx;
840 
841 	nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]);
842 	nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]);
843 }
844 
845 int
846 nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
847 {
848 	int timeout = NIC_MBOX_MSG_TIMEOUT * 10;
849 	int sleep = 2;
850 
851 	NICVF_CORE_LOCK_ASSERT(nic);
852 
853 	nic->pf_acked = FALSE;
854 	nic->pf_nacked = FALSE;
855 
856 	nicvf_write_to_mbx(nic, mbx);
857 
858 	/* Wait for previous message to be acked, timeout 2sec */
859 	while (!nic->pf_acked) {
860 		if (nic->pf_nacked)
861 			return (EINVAL);
862 
863 		DELAY(sleep * 1000);
864 
865 		if (nic->pf_acked)
866 			break;
867 		timeout -= sleep;
868 		if (!timeout) {
869 			device_printf(nic->dev,
870 				   "PF didn't ack to mbox msg %d from VF%d\n",
871 				   (mbx->msg.msg & 0xFF), nic->vf_id);
872 
873 			return (EBUSY);
874 		}
875 	}
876 	return (0);
877 }
878 
879 /*
880  * Checks if VF is able to comminicate with PF
881  * and also gets the VNIC number this VF is associated to.
882  */
883 static int
884 nicvf_check_pf_ready(struct nicvf *nic)
885 {
886 	union nic_mbx mbx = {};
887 
888 	mbx.msg.msg = NIC_MBOX_MSG_READY;
889 	if (nicvf_send_msg_to_pf(nic, &mbx)) {
890 		device_printf(nic->dev,
891 			   "PF didn't respond to READY msg\n");
892 		return 0;
893 	}
894 
895 	return 1;
896 }
897 
898 static void
899 nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
900 {
901 
902 	if (bgx->rx)
903 		nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats;
904 	else
905 		nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats;
906 }
907 
908 static void
909 nicvf_handle_mbx_intr(struct nicvf *nic)
910 {
911 	union nic_mbx mbx = {};
912 	uint64_t *mbx_data;
913 	uint64_t mbx_addr;
914 	int i;
915 
916 	mbx_addr = NIC_VF_PF_MAILBOX_0_1;
917 	mbx_data = (uint64_t *)&mbx;
918 
919 	for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
920 		*mbx_data = nicvf_reg_read(nic, mbx_addr);
921 		mbx_data++;
922 		mbx_addr += sizeof(uint64_t);
923 	}
924 
925 	switch (mbx.msg.msg) {
926 	case NIC_MBOX_MSG_READY:
927 		nic->pf_acked = TRUE;
928 		nic->vf_id = mbx.nic_cfg.vf_id & 0x7F;
929 		nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
930 		nic->node = mbx.nic_cfg.node_id;
931 		memcpy(nic->hwaddr, mbx.nic_cfg.mac_addr, ETHER_ADDR_LEN);
932 		nic->loopback_supported = mbx.nic_cfg.loopback_supported;
933 		nic->link_up = FALSE;
934 		nic->duplex = 0;
935 		nic->speed = 0;
936 		break;
937 	case NIC_MBOX_MSG_ACK:
938 		nic->pf_acked = TRUE;
939 		break;
940 	case NIC_MBOX_MSG_NACK:
941 		nic->pf_nacked = TRUE;
942 		break;
943 	case NIC_MBOX_MSG_RSS_SIZE:
944 		nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size;
945 		nic->pf_acked = TRUE;
946 		break;
947 	case NIC_MBOX_MSG_BGX_STATS:
948 		nicvf_read_bgx_stats(nic, &mbx.bgx_stats);
949 		nic->pf_acked = TRUE;
950 		break;
951 	case NIC_MBOX_MSG_BGX_LINK_CHANGE:
952 		nic->pf_acked = TRUE;
953 		nic->link_up = mbx.link_status.link_up;
954 		nic->duplex = mbx.link_status.duplex;
955 		nic->speed = mbx.link_status.speed;
956 		if (nic->link_up) {
957 			if_setbaudrate(nic->ifp, nic->speed * 1000000);
958 			if_link_state_change(nic->ifp, LINK_STATE_UP);
959 		} else {
960 			if_setbaudrate(nic->ifp, 0);
961 			if_link_state_change(nic->ifp, LINK_STATE_DOWN);
962 		}
963 		break;
964 	default:
965 		device_printf(nic->dev,
966 			   "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
967 		break;
968 	}
969 	nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0);
970 }
971 
972 static int
973 nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
974 {
975 	union nic_mbx mbx = {};
976 
977 	mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
978 	mbx.frs.max_frs = mtu;
979 	mbx.frs.vf_id = nic->vf_id;
980 
981 	return nicvf_send_msg_to_pf(nic, &mbx);
982 }
983 
984 static int
985 nicvf_hw_set_mac_addr(struct nicvf *nic, uint8_t *hwaddr)
986 {
987 	union nic_mbx mbx = {};
988 
989 	mbx.mac.msg = NIC_MBOX_MSG_SET_MAC;
990 	mbx.mac.vf_id = nic->vf_id;
991 	memcpy(mbx.mac.mac_addr, hwaddr, ETHER_ADDR_LEN);
992 
993 	return (nicvf_send_msg_to_pf(nic, &mbx));
994 }
995 
996 static void
997 nicvf_config_cpi(struct nicvf *nic)
998 {
999 	union nic_mbx mbx = {};
1000 
1001 	mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG;
1002 	mbx.cpi_cfg.vf_id = nic->vf_id;
1003 	mbx.cpi_cfg.cpi_alg = nic->cpi_alg;
1004 	mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt;
1005 
1006 	nicvf_send_msg_to_pf(nic, &mbx);
1007 }
1008 
1009 static void
1010 nicvf_get_rss_size(struct nicvf *nic)
1011 {
1012 	union nic_mbx mbx = {};
1013 
1014 	mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
1015 	mbx.rss_size.vf_id = nic->vf_id;
1016 	nicvf_send_msg_to_pf(nic, &mbx);
1017 }
1018 
1019 static void
1020 nicvf_config_rss(struct nicvf *nic)
1021 {
1022 	union nic_mbx mbx = {};
1023 	struct nicvf_rss_info *rss;
1024 	int ind_tbl_len;
1025 	int i, nextq;
1026 
1027 	rss = &nic->rss_info;
1028 	ind_tbl_len = rss->rss_size;
1029 	nextq = 0;
1030 
1031 	mbx.rss_cfg.vf_id = nic->vf_id;
1032 	mbx.rss_cfg.hash_bits = rss->hash_bits;
1033 	while (ind_tbl_len != 0) {
1034 		mbx.rss_cfg.tbl_offset = nextq;
1035 		mbx.rss_cfg.tbl_len = MIN(ind_tbl_len,
1036 		    RSS_IND_TBL_LEN_PER_MBX_MSG);
1037 		mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ?
1038 		    NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG;
1039 
1040 		for (i = 0; i < mbx.rss_cfg.tbl_len; i++)
1041 			mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++];
1042 
1043 		nicvf_send_msg_to_pf(nic, &mbx);
1044 
1045 		ind_tbl_len -= mbx.rss_cfg.tbl_len;
1046 	}
1047 }
1048 
1049 static void
1050 nicvf_set_rss_key(struct nicvf *nic)
1051 {
1052 	struct nicvf_rss_info *rss;
1053 	uint64_t key_addr;
1054 	int idx;
1055 
1056 	rss = &nic->rss_info;
1057 	key_addr = NIC_VNIC_RSS_KEY_0_4;
1058 
1059 	for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) {
1060 		nicvf_reg_write(nic, key_addr, rss->key[idx]);
1061 		key_addr += sizeof(uint64_t);
1062 	}
1063 }
1064 
1065 static int
1066 nicvf_rss_init(struct nicvf *nic)
1067 {
1068 	struct nicvf_rss_info *rss;
1069 	int idx;
1070 
1071 	nicvf_get_rss_size(nic);
1072 
1073 	rss = &nic->rss_info;
1074 	if (nic->cpi_alg != CPI_ALG_NONE) {
1075 		rss->enable = FALSE;
1076 		rss->hash_bits = 0;
1077 		return (ENXIO);
1078 	}
1079 
1080 	rss->enable = TRUE;
1081 
1082 	/* Using the HW reset value for now */
1083 	rss->key[0] = 0xFEED0BADFEED0BADUL;
1084 	rss->key[1] = 0xFEED0BADFEED0BADUL;
1085 	rss->key[2] = 0xFEED0BADFEED0BADUL;
1086 	rss->key[3] = 0xFEED0BADFEED0BADUL;
1087 	rss->key[4] = 0xFEED0BADFEED0BADUL;
1088 
1089 	nicvf_set_rss_key(nic);
1090 
1091 	rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
1092 	nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg);
1093 
1094 	rss->hash_bits = fls(rss->rss_size) - 1;
1095 	for (idx = 0; idx < rss->rss_size; idx++)
1096 		rss->ind_tbl[idx] = idx % nic->rx_queues;
1097 
1098 	nicvf_config_rss(nic);
1099 
1100 	return (0);
1101 }
1102 
1103 static int
1104 nicvf_init_resources(struct nicvf *nic)
1105 {
1106 	int err;
1107 	union nic_mbx mbx = {};
1108 
1109 	mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
1110 
1111 	/* Enable Qset */
1112 	nicvf_qset_config(nic, TRUE);
1113 
1114 	/* Initialize queues and HW for data transfer */
1115 	err = nicvf_config_data_transfer(nic, TRUE);
1116 	if (err) {
1117 		device_printf(nic->dev,
1118 		    "Failed to alloc/config VF's QSet resources\n");
1119 		return (err);
1120 	}
1121 
1122 	/* Send VF config done msg to PF */
1123 	nicvf_write_to_mbx(nic, &mbx);
1124 
1125 	return (0);
1126 }
1127 
1128 static void
1129 nicvf_misc_intr_handler(void *arg)
1130 {
1131 	struct nicvf *nic = (struct nicvf *)arg;
1132 	uint64_t intr;
1133 
1134 	intr = nicvf_reg_read(nic, NIC_VF_INT);
1135 	/* Check for spurious interrupt */
1136 	if (!(intr & NICVF_INTR_MBOX_MASK))
1137 		return;
1138 
1139 	nicvf_handle_mbx_intr(nic);
1140 }
1141 
1142 static int
1143 nicvf_intr_handler(void *arg)
1144 {
1145 	struct nicvf *nic;
1146 	struct cmp_queue *cq;
1147 	int qidx;
1148 
1149 	cq = (struct cmp_queue *)arg;
1150 	nic = cq->nic;
1151 	qidx = cq->idx;
1152 
1153 	/* Disable interrupts */
1154 	nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
1155 
1156 	taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
1157 
1158 	/* Clear interrupt */
1159 	nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
1160 
1161 	return (FILTER_HANDLED);
1162 }
1163 
1164 static void
1165 nicvf_rbdr_intr_handler(void *arg)
1166 {
1167 	struct nicvf *nic;
1168 	struct queue_set *qs;
1169 	struct rbdr *rbdr;
1170 	int qidx;
1171 
1172 	nic = (struct nicvf *)arg;
1173 
1174 	/* Disable RBDR interrupt and schedule softirq */
1175 	for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
1176 		if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
1177 			continue;
1178 		nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
1179 
1180 		qs = nic->qs;
1181 		rbdr = &qs->rbdr[qidx];
1182 		taskqueue_enqueue(rbdr->rbdr_taskq, &rbdr->rbdr_task_nowait);
1183 		/* Clear interrupt */
1184 		nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
1185 	}
1186 }
1187 
1188 static void
1189 nicvf_qs_err_intr_handler(void *arg)
1190 {
1191 	struct nicvf *nic = (struct nicvf *)arg;
1192 	struct queue_set *qs = nic->qs;
1193 
1194 	/* Disable Qset err interrupt and schedule softirq */
1195 	nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
1196 	taskqueue_enqueue(qs->qs_err_taskq, &qs->qs_err_task);
1197 	nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
1198 
1199 }
1200 
1201 static int
1202 nicvf_enable_msix(struct nicvf *nic)
1203 {
1204 	struct pci_devinfo *dinfo;
1205 	int rid, count;
1206 	int ret;
1207 
1208 	dinfo = device_get_ivars(nic->dev);
1209 	rid = dinfo->cfg.msix.msix_table_bar;
1210 	nic->msix_table_res =
1211 	    bus_alloc_resource_any(nic->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
1212 	if (nic->msix_table_res == NULL) {
1213 		device_printf(nic->dev,
1214 		    "Could not allocate memory for MSI-X table\n");
1215 		return (ENXIO);
1216 	}
1217 
1218 	count = nic->num_vec = NIC_VF_MSIX_VECTORS;
1219 
1220 	ret = pci_alloc_msix(nic->dev, &count);
1221 	if ((ret != 0) || (count != nic->num_vec)) {
1222 		device_printf(nic->dev,
1223 		    "Request for #%d msix vectors failed, error: %d\n",
1224 		    nic->num_vec, ret);
1225 		return (ret);
1226 	}
1227 
1228 	nic->msix_enabled = 1;
1229 	return (0);
1230 }
1231 
1232 static void
1233 nicvf_disable_msix(struct nicvf *nic)
1234 {
1235 
1236 	if (nic->msix_enabled) {
1237 		pci_release_msi(nic->dev);
1238 		nic->msix_enabled = 0;
1239 		nic->num_vec = 0;
1240 	}
1241 }
1242 
1243 static void
1244 nicvf_release_all_interrupts(struct nicvf *nic)
1245 {
1246 	struct resource *res;
1247 	int irq;
1248 	int err __diagused;
1249 
1250 	/* Free registered interrupts */
1251 	for (irq = 0; irq < nic->num_vec; irq++) {
1252 		res = nic->msix_entries[irq].irq_res;
1253 		if (res == NULL)
1254 			continue;
1255 		/* Teardown interrupt first */
1256 		if (nic->msix_entries[irq].handle != NULL) {
1257 			err = bus_teardown_intr(nic->dev,
1258 			    nic->msix_entries[irq].irq_res,
1259 			    nic->msix_entries[irq].handle);
1260 			KASSERT(err == 0,
1261 			    ("ERROR: Unable to teardown interrupt %d", irq));
1262 			nic->msix_entries[irq].handle = NULL;
1263 		}
1264 
1265 		bus_release_resource(nic->dev, SYS_RES_IRQ,
1266 			    rman_get_rid(res), nic->msix_entries[irq].irq_res);
1267 		nic->msix_entries[irq].irq_res = NULL;
1268 	}
1269 	/* Disable MSI-X */
1270 	nicvf_disable_msix(nic);
1271 }
1272 
1273 /*
1274  * Initialize MSIX vectors and register MISC interrupt.
1275  * Send READY message to PF to check if its alive
1276  */
1277 static int
1278 nicvf_allocate_misc_interrupt(struct nicvf *nic)
1279 {
1280 	struct resource *res;
1281 	int irq, rid;
1282 	int ret = 0;
1283 
1284 	/* Return if mailbox interrupt is already registered */
1285 	if (nic->msix_enabled)
1286 		return (0);
1287 
1288 	/* Enable MSI-X */
1289 	if (nicvf_enable_msix(nic) != 0)
1290 		return (ENXIO);
1291 
1292 	irq = NICVF_INTR_ID_MISC;
1293 	rid = irq + 1;
1294 	nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1295 	    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1296 	if (nic->msix_entries[irq].irq_res == NULL) {
1297 		device_printf(nic->dev,
1298 		    "Could not allocate Mbox interrupt for VF%d\n",
1299 		    device_get_unit(nic->dev));
1300 		return (ENXIO);
1301 	}
1302 
1303 	ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1304 	    (INTR_MPSAFE | INTR_TYPE_MISC), NULL, nicvf_misc_intr_handler, nic,
1305 	    &nic->msix_entries[irq].handle);
1306 	if (ret != 0) {
1307 		res = nic->msix_entries[irq].irq_res;
1308 		bus_release_resource(nic->dev, SYS_RES_IRQ,
1309 			    rman_get_rid(res), res);
1310 		nic->msix_entries[irq].irq_res = NULL;
1311 		return (ret);
1312 	}
1313 
1314 	return (0);
1315 }
1316 
1317 static int
1318 nicvf_enable_misc_interrupt(struct nicvf *nic)
1319 {
1320 
1321 	/* Enable mailbox interrupt */
1322 	nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0);
1323 
1324 	/* Check if VF is able to communicate with PF */
1325 	if (!nicvf_check_pf_ready(nic)) {
1326 		nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
1327 		return (ENXIO);
1328 	}
1329 
1330 	return (0);
1331 }
1332 
1333 static void
1334 nicvf_release_net_interrupts(struct nicvf *nic)
1335 {
1336 	struct resource *res;
1337 	int irq;
1338 	int err;
1339 
1340 	for_each_cq_irq(irq) {
1341 		res = nic->msix_entries[irq].irq_res;
1342 		if (res == NULL)
1343 			continue;
1344 		/* Teardown active interrupts first */
1345 		if (nic->msix_entries[irq].handle != NULL) {
1346 			err = bus_teardown_intr(nic->dev,
1347 			    nic->msix_entries[irq].irq_res,
1348 			    nic->msix_entries[irq].handle);
1349 			KASSERT(err == 0,
1350 			    ("ERROR: Unable to teardown CQ interrupt %d",
1351 			    (irq - NICVF_INTR_ID_CQ)));
1352 			if (err != 0)
1353 				continue;
1354 		}
1355 
1356 		/* Release resource */
1357 		bus_release_resource(nic->dev, SYS_RES_IRQ, rman_get_rid(res),
1358 		    res);
1359 		nic->msix_entries[irq].irq_res = NULL;
1360 	}
1361 
1362 	for_each_rbdr_irq(irq) {
1363 		res = nic->msix_entries[irq].irq_res;
1364 		if (res == NULL)
1365 			continue;
1366 		/* Teardown active interrupts first */
1367 		if (nic->msix_entries[irq].handle != NULL) {
1368 			err = bus_teardown_intr(nic->dev,
1369 			    nic->msix_entries[irq].irq_res,
1370 			    nic->msix_entries[irq].handle);
1371 			KASSERT(err == 0,
1372 			    ("ERROR: Unable to teardown RDBR interrupt %d",
1373 			    (irq - NICVF_INTR_ID_RBDR)));
1374 			if (err != 0)
1375 				continue;
1376 		}
1377 
1378 		/* Release resource */
1379 		bus_release_resource(nic->dev, SYS_RES_IRQ, rman_get_rid(res),
1380 		    res);
1381 		nic->msix_entries[irq].irq_res = NULL;
1382 	}
1383 
1384 	irq = NICVF_INTR_ID_QS_ERR;
1385 	res = nic->msix_entries[irq].irq_res;
1386 	if (res != NULL) {
1387 		/* Teardown active interrupts first */
1388 		if (nic->msix_entries[irq].handle != NULL) {
1389 			err = bus_teardown_intr(nic->dev,
1390 			    nic->msix_entries[irq].irq_res,
1391 			    nic->msix_entries[irq].handle);
1392 			KASSERT(err == 0,
1393 			    ("ERROR: Unable to teardown QS Error interrupt %d",
1394 			    irq));
1395 			if (err != 0)
1396 				return;
1397 		}
1398 
1399 		/* Release resource */
1400 		bus_release_resource(nic->dev, SYS_RES_IRQ, rman_get_rid(res),
1401 		    res);
1402 		nic->msix_entries[irq].irq_res = NULL;
1403 	}
1404 }
1405 
1406 static int
1407 nicvf_allocate_net_interrupts(struct nicvf *nic)
1408 {
1409 	u_int cpuid;
1410 	int irq, rid;
1411 	int qidx;
1412 	int ret = 0;
1413 
1414 	/* MSI-X must be configured by now */
1415 	if (!nic->msix_enabled) {
1416 		device_printf(nic->dev, "Cannot alloacte queue interrups. "
1417 		    "MSI-X interrupts disabled.\n");
1418 		return (ENXIO);
1419 	}
1420 
1421 	/* Register CQ interrupts */
1422 	for_each_cq_irq(irq) {
1423 		if (irq >= (NICVF_INTR_ID_CQ + nic->qs->cq_cnt))
1424 			break;
1425 
1426 		qidx = irq - NICVF_INTR_ID_CQ;
1427 		rid = irq + 1;
1428 		nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1429 		    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1430 		if (nic->msix_entries[irq].irq_res == NULL) {
1431 			device_printf(nic->dev,
1432 			    "Could not allocate CQ interrupt %d for VF%d\n",
1433 			    (irq - NICVF_INTR_ID_CQ), device_get_unit(nic->dev));
1434 			ret = ENXIO;
1435 			goto error;
1436 		}
1437 		ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1438 		    (INTR_MPSAFE | INTR_TYPE_NET), nicvf_intr_handler,
1439 		    NULL, &nic->qs->cq[qidx], &nic->msix_entries[irq].handle);
1440 		if (ret != 0) {
1441 			device_printf(nic->dev,
1442 			    "Could not setup CQ interrupt %d for VF%d\n",
1443 			    (irq - NICVF_INTR_ID_CQ), device_get_unit(nic->dev));
1444 			goto error;
1445 		}
1446 		cpuid = (device_get_unit(nic->dev) * CMP_QUEUE_CNT) + qidx;
1447 		cpuid %= mp_ncpus;
1448 		/*
1449 		 * Save CPU ID for later use when system-wide RSS is enabled.
1450 		 * It will be used to pit the CQ task to the same CPU that got
1451 		 * interrupted.
1452 		 */
1453 		nic->qs->cq[qidx].cmp_cpuid = cpuid;
1454 		if (bootverbose) {
1455 			device_printf(nic->dev, "bind CQ%d IRQ to CPU%d\n",
1456 			    qidx, cpuid);
1457 		}
1458 		/* Bind interrupts to the given CPU */
1459 		bus_bind_intr(nic->dev, nic->msix_entries[irq].irq_res, cpuid);
1460 	}
1461 
1462 	/* Register RBDR interrupt */
1463 	for_each_rbdr_irq(irq) {
1464 		if (irq >= (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt))
1465 			break;
1466 
1467 		rid = irq + 1;
1468 		nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1469 		    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1470 		if (nic->msix_entries[irq].irq_res == NULL) {
1471 			device_printf(nic->dev,
1472 			    "Could not allocate RBDR interrupt %d for VF%d\n",
1473 			    (irq - NICVF_INTR_ID_RBDR),
1474 			    device_get_unit(nic->dev));
1475 			ret = ENXIO;
1476 			goto error;
1477 		}
1478 		ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1479 		    (INTR_MPSAFE | INTR_TYPE_NET), NULL,
1480 		    nicvf_rbdr_intr_handler, nic,
1481 		    &nic->msix_entries[irq].handle);
1482 		if (ret != 0) {
1483 			device_printf(nic->dev,
1484 			    "Could not setup RBDR interrupt %d for VF%d\n",
1485 			    (irq - NICVF_INTR_ID_RBDR),
1486 			    device_get_unit(nic->dev));
1487 			goto error;
1488 		}
1489 	}
1490 
1491 	/* Register QS error interrupt */
1492 	irq = NICVF_INTR_ID_QS_ERR;
1493 	rid = irq + 1;
1494 	nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1495 	    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1496 	if (nic->msix_entries[irq].irq_res == NULL) {
1497 		device_printf(nic->dev,
1498 		    "Could not allocate QS Error interrupt for VF%d\n",
1499 		    device_get_unit(nic->dev));
1500 		ret = ENXIO;
1501 		goto error;
1502 	}
1503 	ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1504 	    (INTR_MPSAFE | INTR_TYPE_NET), NULL, nicvf_qs_err_intr_handler,
1505 	    nic, &nic->msix_entries[irq].handle);
1506 	if (ret != 0) {
1507 		device_printf(nic->dev,
1508 		    "Could not setup QS Error interrupt for VF%d\n",
1509 		    device_get_unit(nic->dev));
1510 		goto error;
1511 	}
1512 
1513 	return (0);
1514 error:
1515 	nicvf_release_net_interrupts(nic);
1516 	return (ret);
1517 }
1518 
1519 static int
1520 nicvf_stop_locked(struct nicvf *nic)
1521 {
1522 	if_t ifp;
1523 	int qidx;
1524 	struct queue_set *qs = nic->qs;
1525 	union nic_mbx mbx = {};
1526 
1527 	NICVF_CORE_LOCK_ASSERT(nic);
1528 	/* Stop callout. Can block here since holding SX lock */
1529 	callout_drain(&nic->stats_callout);
1530 
1531 	ifp = nic->ifp;
1532 
1533 	mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
1534 	nicvf_send_msg_to_pf(nic, &mbx);
1535 
1536 	/* Disable RBDR & QS error interrupts */
1537 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1538 		nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
1539 		nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
1540 	}
1541 	nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
1542 	nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
1543 
1544 	/* Deactivate network interface */
1545 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
1546 
1547 	/* Free resources */
1548 	nicvf_config_data_transfer(nic, FALSE);
1549 
1550 	/* Disable HW Qset */
1551 	nicvf_qset_config(nic, FALSE);
1552 
1553 	/* disable mailbox interrupt */
1554 	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
1555 
1556 	return (0);
1557 }
1558 
1559 static void
1560 nicvf_update_stats(struct nicvf *nic)
1561 {
1562 	int qidx;
1563 	struct nicvf_hw_stats *stats = &nic->hw_stats;
1564 	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
1565 	struct queue_set *qs = nic->qs;
1566 
1567 #define	GET_RX_STATS(reg) \
1568     nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | ((reg) << 3))
1569 #define GET_TX_STATS(reg) \
1570     nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | ((reg) << 3))
1571 
1572 	stats->rx_bytes = GET_RX_STATS(RX_OCTS);
1573 	stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST);
1574 	stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST);
1575 	stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST);
1576 	stats->rx_fcs_errors = GET_RX_STATS(RX_FCS);
1577 	stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR);
1578 	stats->rx_drop_red = GET_RX_STATS(RX_RED);
1579 	stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS);
1580 	stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN);
1581 	stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS);
1582 	stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST);
1583 	stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST);
1584 	stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
1585 	stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
1586 
1587 	stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS);
1588 	stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST);
1589 	stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST);
1590 	stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST);
1591 	stats->tx_drops = GET_TX_STATS(TX_DROP);
1592 
1593 	drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
1594 	    stats->tx_bcast_frames_ok + stats->tx_mcast_frames_ok;
1595 	drv_stats->rx_drops = stats->rx_drop_red + stats->rx_drop_overrun;
1596 	drv_stats->tx_drops = stats->tx_drops;
1597 
1598 	/* Update RQ and SQ stats */
1599 	for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1600 		nicvf_update_rq_stats(nic, qidx);
1601 	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1602 		nicvf_update_sq_stats(nic, qidx);
1603 }
1604 
1605 static void
1606 nicvf_tick_stats(void *arg)
1607 {
1608 	struct nicvf *nic;
1609 
1610 	nic = (struct nicvf *)arg;
1611 
1612 	/* Read the statistics */
1613 	nicvf_update_stats(nic);
1614 
1615 	callout_reset(&nic->stats_callout, hz, nicvf_tick_stats, nic);
1616 }
1617