xref: /freebsd/sys/dev/vnic/nicvf_main.c (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 /*
2  * Copyright (C) 2015 Cavium Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  *
26  * $FreeBSD$
27  *
28  */
29 #include <sys/cdefs.h>
30 __FBSDID("$FreeBSD$");
31 
32 #include "opt_inet.h"
33 #include "opt_inet6.h"
34 
35 #include <sys/param.h>
36 #include <sys/systm.h>
37 #include <sys/bitset.h>
38 #include <sys/bitstring.h>
39 #include <sys/bus.h>
40 #include <sys/endian.h>
41 #include <sys/kernel.h>
42 #include <sys/malloc.h>
43 #include <sys/mbuf.h>
44 #include <sys/module.h>
45 #include <sys/rman.h>
46 #include <sys/pciio.h>
47 #include <sys/pcpu.h>
48 #include <sys/proc.h>
49 #include <sys/socket.h>
50 #include <sys/sockio.h>
51 #include <sys/stdatomic.h>
52 #include <sys/cpuset.h>
53 #include <sys/lock.h>
54 #include <sys/mutex.h>
55 #include <sys/smp.h>
56 #include <sys/taskqueue.h>
57 
58 #include <net/bpf.h>
59 #include <net/ethernet.h>
60 #include <net/if.h>
61 #include <net/if_var.h>
62 #include <net/if_arp.h>
63 #include <net/if_dl.h>
64 #include <net/if_media.h>
65 #include <net/if_types.h>
66 #include <net/if_vlan_var.h>
67 
68 #include <netinet/in.h>
69 #include <netinet/ip.h>
70 #include <netinet/if_ether.h>
71 #include <netinet/tcp_lro.h>
72 
73 #include <dev/pci/pcireg.h>
74 #include <dev/pci/pcivar.h>
75 
76 #include <sys/dnv.h>
77 #include <sys/nv.h>
78 #include <sys/iov_schema.h>
79 
80 #include <machine/bus.h>
81 
82 #include "thunder_bgx.h"
83 #include "nic_reg.h"
84 #include "nic.h"
85 #include "nicvf_queues.h"
86 
87 #define	VNIC_VF_DEVSTR		"Cavium Thunder NIC Virtual Function Driver"
88 
89 #define	VNIC_VF_REG_RID		PCIR_BAR(PCI_CFG_REG_BAR_NUM)
90 
91 /* Lock for core interface settings */
92 #define	NICVF_CORE_LOCK_INIT(nic)				\
93     sx_init(&(nic)->core_sx, device_get_nameunit((nic)->dev))
94 
95 #define	NICVF_CORE_LOCK_DESTROY(nic)				\
96     sx_destroy(&(nic)->core_sx)
97 
98 #define	NICVF_CORE_LOCK(nic)		sx_xlock(&(nic)->core_sx)
99 #define	NICVF_CORE_UNLOCK(nic)		sx_xunlock(&(nic)->core_sx)
100 
101 #define	NICVF_CORE_LOCK_ASSERT(nic)	sx_assert(&(nic)->core_sx, SA_XLOCKED)
102 
103 #define	SPEED_10	10
104 #define	SPEED_100	100
105 #define	SPEED_1000	1000
106 #define	SPEED_10000	10000
107 #define	SPEED_40000	40000
108 
109 MALLOC_DEFINE(M_NICVF, "nicvf", "ThunderX VNIC VF dynamic memory");
110 
111 static int nicvf_probe(device_t);
112 static int nicvf_attach(device_t);
113 static int nicvf_detach(device_t);
114 
115 static device_method_t nicvf_methods[] = {
116 	/* Device interface */
117 	DEVMETHOD(device_probe,		nicvf_probe),
118 	DEVMETHOD(device_attach,	nicvf_attach),
119 	DEVMETHOD(device_detach,	nicvf_detach),
120 
121 	DEVMETHOD_END,
122 };
123 
124 static driver_t nicvf_driver = {
125 	"vnic",
126 	nicvf_methods,
127 	sizeof(struct nicvf),
128 };
129 
130 DRIVER_MODULE(vnicvf, pci, nicvf_driver, 0, 0);
131 MODULE_VERSION(vnicvf, 1);
132 MODULE_DEPEND(vnicvf, pci, 1, 1, 1);
133 MODULE_DEPEND(vnicvf, ether, 1, 1, 1);
134 MODULE_DEPEND(vnicvf, vnicpf, 1, 1, 1);
135 
136 static int nicvf_allocate_misc_interrupt(struct nicvf *);
137 static int nicvf_enable_misc_interrupt(struct nicvf *);
138 static int nicvf_allocate_net_interrupts(struct nicvf *);
139 static void nicvf_release_all_interrupts(struct nicvf *);
140 static int nicvf_update_hw_max_frs(struct nicvf *, int);
141 static int nicvf_hw_set_mac_addr(struct nicvf *, uint8_t *);
142 static void nicvf_config_cpi(struct nicvf *);
143 static int nicvf_rss_init(struct nicvf *);
144 static int nicvf_init_resources(struct nicvf *);
145 
146 static int nicvf_setup_ifnet(struct nicvf *);
147 static int nicvf_setup_ifmedia(struct nicvf *);
148 static void nicvf_hw_addr_random(uint8_t *);
149 
150 static int nicvf_if_ioctl(struct ifnet *, u_long, caddr_t);
151 static void nicvf_if_init(void *);
152 static void nicvf_if_init_locked(struct nicvf *);
153 static int nicvf_if_transmit(struct ifnet *, struct mbuf *);
154 static void nicvf_if_qflush(struct ifnet *);
155 static uint64_t nicvf_if_getcounter(struct ifnet *, ift_counter);
156 
157 static int nicvf_stop_locked(struct nicvf *);
158 
159 static void nicvf_media_status(struct ifnet *, struct ifmediareq *);
160 static int nicvf_media_change(struct ifnet *);
161 
162 static void nicvf_tick_stats(void *);
163 
164 static int
165 nicvf_probe(device_t dev)
166 {
167 	uint16_t vendor_id;
168 	uint16_t device_id;
169 
170 	vendor_id = pci_get_vendor(dev);
171 	device_id = pci_get_device(dev);
172 
173 	if (vendor_id != PCI_VENDOR_ID_CAVIUM)
174 		return (ENXIO);
175 
176 	if (device_id == PCI_DEVICE_ID_THUNDER_NIC_VF ||
177 	    device_id == PCI_DEVICE_ID_THUNDER_PASS1_NIC_VF) {
178 		device_set_desc(dev, VNIC_VF_DEVSTR);
179 		return (BUS_PROBE_DEFAULT);
180 	}
181 
182 	return (ENXIO);
183 }
184 
185 static int
186 nicvf_attach(device_t dev)
187 {
188 	struct nicvf *nic;
189 	int rid, qcount;
190 	int err = 0;
191 	uint8_t hwaddr[ETHER_ADDR_LEN];
192 	uint8_t zeromac[] = {[0 ... (ETHER_ADDR_LEN - 1)] = 0};
193 
194 	nic = device_get_softc(dev);
195 	nic->dev = dev;
196 	nic->pnicvf = nic;
197 
198 	NICVF_CORE_LOCK_INIT(nic);
199 	/* Enable HW TSO on Pass2 */
200 	if (!pass1_silicon(dev))
201 		nic->hw_tso = TRUE;
202 
203 	rid = VNIC_VF_REG_RID;
204 	nic->reg_base = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
205 	    RF_ACTIVE);
206 	if (nic->reg_base == NULL) {
207 		device_printf(dev, "Could not allocate registers memory\n");
208 		return (ENXIO);
209 	}
210 
211 	qcount = MAX_CMP_QUEUES_PER_QS;
212 	nic->max_queues = qcount;
213 
214 	err = nicvf_set_qset_resources(nic);
215 	if (err != 0)
216 		goto err_free_res;
217 
218 	/* Check if PF is alive and get MAC address for this VF */
219 	err = nicvf_allocate_misc_interrupt(nic);
220 	if (err != 0)
221 		goto err_free_res;
222 
223 	NICVF_CORE_LOCK(nic);
224 	err = nicvf_enable_misc_interrupt(nic);
225 	NICVF_CORE_UNLOCK(nic);
226 	if (err != 0)
227 		goto err_release_intr;
228 
229 	err = nicvf_allocate_net_interrupts(nic);
230 	if (err != 0) {
231 		device_printf(dev,
232 		    "Could not allocate network interface interrupts\n");
233 		goto err_free_ifnet;
234 	}
235 
236 	/* If no MAC address was obtained we generate random one */
237 	if (memcmp(nic->hwaddr, zeromac, ETHER_ADDR_LEN) == 0) {
238 		nicvf_hw_addr_random(hwaddr);
239 		memcpy(nic->hwaddr, hwaddr, ETHER_ADDR_LEN);
240 		NICVF_CORE_LOCK(nic);
241 		nicvf_hw_set_mac_addr(nic, hwaddr);
242 		NICVF_CORE_UNLOCK(nic);
243 	}
244 
245 	/* Configure CPI alorithm */
246 	nic->cpi_alg = CPI_ALG_NONE;
247 	NICVF_CORE_LOCK(nic);
248 	nicvf_config_cpi(nic);
249 	/* Configure receive side scaling */
250 	if (nic->qs->rq_cnt > 1)
251 		nicvf_rss_init(nic);
252 	NICVF_CORE_UNLOCK(nic);
253 
254 	err = nicvf_setup_ifnet(nic);
255 	if (err != 0) {
256 		device_printf(dev, "Could not set-up ifnet\n");
257 		goto err_release_intr;
258 	}
259 
260 	err = nicvf_setup_ifmedia(nic);
261 	if (err != 0) {
262 		device_printf(dev, "Could not set-up ifmedia\n");
263 		goto err_free_ifnet;
264 	}
265 
266 	mtx_init(&nic->stats_mtx, "VNIC stats", NULL, MTX_DEF);
267 	callout_init_mtx(&nic->stats_callout, &nic->stats_mtx, 0);
268 
269 	ether_ifattach(nic->ifp, nic->hwaddr);
270 
271 	return (0);
272 
273 err_free_ifnet:
274 	if_free(nic->ifp);
275 err_release_intr:
276 	nicvf_release_all_interrupts(nic);
277 err_free_res:
278 	bus_release_resource(dev, SYS_RES_MEMORY, rman_get_rid(nic->reg_base),
279 	    nic->reg_base);
280 
281 	return (err);
282 }
283 
284 static int
285 nicvf_detach(device_t dev)
286 {
287 	struct nicvf *nic;
288 
289 	nic = device_get_softc(dev);
290 
291 	NICVF_CORE_LOCK(nic);
292 	/* Shut down the port and release ring resources */
293 	nicvf_stop_locked(nic);
294 	/* Release stats lock */
295 	mtx_destroy(&nic->stats_mtx);
296 	/* Release interrupts */
297 	nicvf_release_all_interrupts(nic);
298 	/* Release memory resource */
299 	if (nic->reg_base != NULL) {
300 		bus_release_resource(dev, SYS_RES_MEMORY,
301 		    rman_get_rid(nic->reg_base), nic->reg_base);
302 	}
303 
304 	/* Remove all ifmedia configurations */
305 	ifmedia_removeall(&nic->if_media);
306 	/* Free this ifnet */
307 	if_free(nic->ifp);
308 	NICVF_CORE_UNLOCK(nic);
309 	/* Finally destroy the lock */
310 	NICVF_CORE_LOCK_DESTROY(nic);
311 
312 	return (0);
313 }
314 
315 static void
316 nicvf_hw_addr_random(uint8_t *hwaddr)
317 {
318 	uint32_t rnd;
319 	uint8_t addr[ETHER_ADDR_LEN];
320 
321 	/*
322 	 * Create randomized MAC address.
323 	 * Set 'bsd' + random 24 low-order bits.
324 	 */
325 	rnd = arc4random() & 0x00ffffff;
326 	addr[0] = 'b';
327 	addr[1] = 's';
328 	addr[2] = 'd';
329 	addr[3] = rnd >> 16;
330 	addr[4] = rnd >> 8;
331 	addr[5] = rnd >> 0;
332 
333 	memcpy(hwaddr, addr, ETHER_ADDR_LEN);
334 }
335 
336 static int
337 nicvf_setup_ifnet(struct nicvf *nic)
338 {
339 	struct ifnet *ifp;
340 
341 	ifp = if_alloc(IFT_ETHER);
342 	if (ifp == NULL) {
343 		device_printf(nic->dev, "Could not allocate ifnet structure\n");
344 		return (ENOMEM);
345 	}
346 
347 	nic->ifp = ifp;
348 
349 	if_setsoftc(ifp, nic);
350 	if_initname(ifp, device_get_name(nic->dev), device_get_unit(nic->dev));
351 	if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
352 
353 	if_settransmitfn(ifp, nicvf_if_transmit);
354 	if_setqflushfn(ifp, nicvf_if_qflush);
355 	if_setioctlfn(ifp, nicvf_if_ioctl);
356 	if_setinitfn(ifp, nicvf_if_init);
357 	if_setgetcounterfn(ifp, nicvf_if_getcounter);
358 
359 	if_setmtu(ifp, ETHERMTU);
360 
361 	/* Reset caps */
362 	if_setcapabilities(ifp, 0);
363 
364 	/* Set the default values */
365 	if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU | IFCAP_JUMBO_MTU, 0);
366 	if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
367 	if (nic->hw_tso) {
368 		/* TSO */
369 		if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
370 		/* TSO parameters */
371 		if_sethwtsomax(ifp, NICVF_TSO_MAXSIZE);
372 		if_sethwtsomaxsegcount(ifp, NICVF_TSO_NSEGS);
373 		if_sethwtsomaxsegsize(ifp, MCLBYTES);
374 	}
375 	/* IP/TCP/UDP HW checksums */
376 	if_setcapabilitiesbit(ifp, IFCAP_HWCSUM, 0);
377 	if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
378 	/*
379 	 * HW offload enable
380 	 */
381 	if_clearhwassist(ifp);
382 	if_sethwassistbits(ifp, (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP), 0);
383 	if (nic->hw_tso)
384 		if_sethwassistbits(ifp, (CSUM_TSO), 0);
385 	if_setcapenable(ifp, if_getcapabilities(ifp));
386 
387 	return (0);
388 }
389 
390 static int
391 nicvf_setup_ifmedia(struct nicvf *nic)
392 {
393 
394 	ifmedia_init(&nic->if_media, IFM_IMASK, nicvf_media_change,
395 	    nicvf_media_status);
396 
397 	/*
398 	 * Advertise availability of all possible connection types,
399 	 * even though not all are possible at the same time.
400 	 */
401 
402 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_10_T | IFM_FDX),
403 	    0, NULL);
404 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_100_TX | IFM_FDX),
405 	    0, NULL);
406 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_1000_T | IFM_FDX),
407 	    0, NULL);
408 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_10G_SR | IFM_FDX),
409 	    0, NULL);
410 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_40G_CR4 | IFM_FDX),
411 	    0, NULL);
412 	ifmedia_add(&nic->if_media, (IFM_ETHER | IFM_AUTO | IFM_FDX),
413 	    0, NULL);
414 
415 	ifmedia_set(&nic->if_media, (IFM_ETHER | IFM_AUTO | IFM_FDX));
416 
417 	return (0);
418 }
419 
420 static int
421 nicvf_if_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data)
422 {
423 	struct nicvf *nic;
424 	struct rcv_queue *rq;
425 	struct ifreq *ifr;
426 	uint32_t flags;
427 	int mask, err;
428 	int rq_idx;
429 #if defined(INET) || defined(INET6)
430 	struct ifaddr *ifa;
431 	boolean_t avoid_reset = FALSE;
432 #endif
433 
434 	nic = if_getsoftc(ifp);
435 	ifr = (struct ifreq *)data;
436 #if defined(INET) || defined(INET6)
437 	ifa = (struct ifaddr *)data;
438 #endif
439 	err = 0;
440 	switch (cmd) {
441 	case SIOCSIFADDR:
442 #ifdef INET
443 		if (ifa->ifa_addr->sa_family == AF_INET)
444 			avoid_reset = TRUE;
445 #endif
446 #ifdef INET6
447 		if (ifa->ifa_addr->sa_family == AF_INET6)
448 			avoid_reset = TRUE;
449 #endif
450 
451 #if defined(INET) || defined(INET6)
452 		/* Avoid reinitialization unless it's necessary */
453 		if (avoid_reset) {
454 			if_setflagbits(ifp, IFF_UP, 0);
455 			if (!(if_getdrvflags(ifp) & IFF_DRV_RUNNING))
456 				nicvf_if_init(nic);
457 #ifdef INET
458 			if (!(if_getflags(ifp) & IFF_NOARP))
459 				arp_ifinit(ifp, ifa);
460 #endif
461 
462 			return (0);
463 		}
464 #endif
465 		err = ether_ioctl(ifp, cmd, data);
466 		break;
467 	case SIOCSIFMTU:
468 		if (ifr->ifr_mtu < NIC_HW_MIN_FRS ||
469 		    ifr->ifr_mtu > NIC_HW_MAX_FRS) {
470 			err = EINVAL;
471 		} else {
472 			NICVF_CORE_LOCK(nic);
473 			err = nicvf_update_hw_max_frs(nic, ifr->ifr_mtu);
474 			if (err == 0)
475 				if_setmtu(ifp, ifr->ifr_mtu);
476 			NICVF_CORE_UNLOCK(nic);
477 		}
478 		break;
479 	case SIOCSIFFLAGS:
480 		NICVF_CORE_LOCK(nic);
481 		flags = if_getflags(ifp);
482 		if (flags & IFF_UP) {
483 			if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
484 				if ((flags ^ nic->if_flags) & IFF_PROMISC) {
485 					/* Change promiscous mode */
486 #if 0 /* XXX */
487 					nicvf_set_promiscous(nic);
488 #endif
489 				}
490 
491 				if ((flags ^ nic->if_flags) & IFF_ALLMULTI) {
492 					/* Change multicasting settings */
493 #if 0 /* XXX */
494 					nicvf_set_multicast(nic);
495 #endif
496 				}
497 			} else {
498 				nicvf_if_init_locked(nic);
499 			}
500 		} else if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
501 			nicvf_stop_locked(nic);
502 
503 		nic->if_flags = flags;
504 		NICVF_CORE_UNLOCK(nic);
505 		break;
506 
507 	case SIOCADDMULTI:
508 	case SIOCDELMULTI:
509 		if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
510 #if 0
511 			NICVF_CORE_LOCK(nic);
512 			/* ARM64TODO */
513 			nicvf_set_multicast(nic);
514 			NICVF_CORE_UNLOCK(nic);
515 #endif
516 		}
517 		break;
518 
519 	case SIOCSIFMEDIA:
520 	case SIOCGIFMEDIA:
521 		err = ifmedia_ioctl(ifp, ifr, &nic->if_media, cmd);
522 		break;
523 
524 	case SIOCSIFCAP:
525 		mask = if_getcapenable(ifp) ^ ifr->ifr_reqcap;
526 		if (mask & IFCAP_VLAN_MTU) {
527 			/* No work to do except acknowledge the change took. */
528 			if_togglecapenable(ifp, IFCAP_VLAN_MTU);
529 		}
530 		if (mask & IFCAP_TXCSUM)
531 			if_togglecapenable(ifp, IFCAP_TXCSUM);
532 		if (mask & IFCAP_RXCSUM)
533 			if_togglecapenable(ifp, IFCAP_RXCSUM);
534 		if ((mask & IFCAP_TSO4) && nic->hw_tso)
535 			if_togglecapenable(ifp, IFCAP_TSO4);
536 		if (mask & IFCAP_LRO) {
537 			/*
538 			 * Lock the driver for a moment to avoid
539 			 * mismatch in per-queue settings.
540 			 */
541 			NICVF_CORE_LOCK(nic);
542 			if_togglecapenable(ifp, IFCAP_LRO);
543 			if ((if_getdrvflags(nic->ifp) & IFF_DRV_RUNNING) != 0) {
544 				/*
545 				 * Now disable LRO for subsequent packets.
546 				 * Atomicity of this change is not necessary
547 				 * as we don't need precise toggle of this
548 				 * feature for all threads processing the
549 				 * completion queue.
550 				 */
551 				for (rq_idx = 0;
552 				    rq_idx < nic->qs->rq_cnt; rq_idx++) {
553 					rq = &nic->qs->rq[rq_idx];
554 					rq->lro_enabled = !rq->lro_enabled;
555 				}
556 			}
557 			NICVF_CORE_UNLOCK(nic);
558 		}
559 
560 		break;
561 
562 	default:
563 		err = ether_ioctl(ifp, cmd, data);
564 		break;
565 	}
566 
567 	return (err);
568 }
569 
570 static void
571 nicvf_if_init_locked(struct nicvf *nic)
572 {
573 	struct queue_set *qs = nic->qs;
574 	struct ifnet *ifp;
575 	int qidx;
576 	int err;
577 	caddr_t if_addr;
578 
579 	NICVF_CORE_LOCK_ASSERT(nic);
580 	ifp = nic->ifp;
581 
582 	if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0)
583 		nicvf_stop_locked(nic);
584 
585 	err = nicvf_enable_misc_interrupt(nic);
586 	if (err != 0) {
587 		if_printf(ifp, "Could not reenable Mbox interrupt\n");
588 		return;
589 	}
590 
591 	/* Get the latest MAC address */
592 	if_addr = if_getlladdr(ifp);
593 	/* Update MAC address if changed */
594 	if (memcmp(nic->hwaddr, if_addr, ETHER_ADDR_LEN) != 0) {
595 		memcpy(nic->hwaddr, if_addr, ETHER_ADDR_LEN);
596 		nicvf_hw_set_mac_addr(nic, if_addr);
597 	}
598 
599 	/* Initialize the queues */
600 	err = nicvf_init_resources(nic);
601 	if (err != 0)
602 		goto error;
603 
604 	/* Make sure queue initialization is written */
605 	wmb();
606 
607 	nicvf_reg_write(nic, NIC_VF_INT, ~0UL);
608 	/* Enable Qset err interrupt */
609 	nicvf_enable_intr(nic, NICVF_INTR_QS_ERR, 0);
610 
611 	/* Enable completion queue interrupt */
612 	for (qidx = 0; qidx < qs->cq_cnt; qidx++)
613 		nicvf_enable_intr(nic, NICVF_INTR_CQ, qidx);
614 
615 	/* Enable RBDR threshold interrupt */
616 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++)
617 		nicvf_enable_intr(nic, NICVF_INTR_RBDR, qidx);
618 
619 	nic->drv_stats.txq_stop = 0;
620 	nic->drv_stats.txq_wake = 0;
621 
622 	/* Activate network interface */
623 	if_setdrvflagbits(ifp, IFF_DRV_RUNNING, IFF_DRV_OACTIVE);
624 
625 	/* Schedule callout to update stats */
626 	callout_reset(&nic->stats_callout, hz, nicvf_tick_stats, nic);
627 
628 	return;
629 
630 error:
631 	/* Something went very wrong. Disable this ifnet for good */
632 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
633 }
634 
635 static void
636 nicvf_if_init(void *if_softc)
637 {
638 	struct nicvf *nic = if_softc;
639 
640 	NICVF_CORE_LOCK(nic);
641 	nicvf_if_init_locked(nic);
642 	NICVF_CORE_UNLOCK(nic);
643 }
644 
645 static int
646 nicvf_if_transmit(struct ifnet *ifp, struct mbuf *mbuf)
647 {
648 	struct nicvf *nic = if_getsoftc(ifp);
649 	struct queue_set *qs = nic->qs;
650 	struct snd_queue *sq;
651 	struct mbuf *mtmp;
652 	int qidx;
653 	int err = 0;
654 
655 	if (__predict_false(qs == NULL)) {
656 		panic("%s: missing queue set for %s", __func__,
657 		    device_get_nameunit(nic->dev));
658 	}
659 
660 	/* Select queue */
661 	if (M_HASHTYPE_GET(mbuf) != M_HASHTYPE_NONE)
662 		qidx = mbuf->m_pkthdr.flowid % qs->sq_cnt;
663 	else
664 		qidx = curcpu % qs->sq_cnt;
665 
666 	sq = &qs->sq[qidx];
667 
668 	if (mbuf->m_next != NULL &&
669 	    (mbuf->m_pkthdr.csum_flags &
670 	    (CSUM_IP | CSUM_TCP | CSUM_UDP | CSUM_SCTP)) != 0) {
671 		if (M_WRITABLE(mbuf) == 0) {
672 			mtmp = m_dup(mbuf, M_NOWAIT);
673 			m_freem(mbuf);
674 			if (mtmp == NULL)
675 				return (ENOBUFS);
676 			mbuf = mtmp;
677 		}
678 	}
679 
680 	err = drbr_enqueue(ifp, sq->br, mbuf);
681 	if (((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
682 	    IFF_DRV_RUNNING) || !nic->link_up || (err != 0)) {
683 		/*
684 		 * Try to enqueue packet to the ring buffer.
685 		 * If the driver is not active, link down or enqueue operation
686 		 * failed, return with the appropriate error code.
687 		 */
688 		return (err);
689 	}
690 
691 	if (NICVF_TX_TRYLOCK(sq) != 0) {
692 		err = nicvf_xmit_locked(sq);
693 		NICVF_TX_UNLOCK(sq);
694 		return (err);
695 	} else
696 		taskqueue_enqueue(sq->snd_taskq, &sq->snd_task);
697 
698 	return (0);
699 }
700 
701 static void
702 nicvf_if_qflush(struct ifnet *ifp)
703 {
704 	struct nicvf *nic;
705 	struct queue_set *qs;
706 	struct snd_queue *sq;
707 	struct mbuf *mbuf;
708 	size_t idx;
709 
710 	nic = if_getsoftc(ifp);
711 	qs = nic->qs;
712 
713 	for (idx = 0; idx < qs->sq_cnt; idx++) {
714 		sq = &qs->sq[idx];
715 		NICVF_TX_LOCK(sq);
716 		while ((mbuf = buf_ring_dequeue_sc(sq->br)) != NULL)
717 			m_freem(mbuf);
718 		NICVF_TX_UNLOCK(sq);
719 	}
720 	if_qflush(ifp);
721 }
722 
723 static uint64_t
724 nicvf_if_getcounter(struct ifnet *ifp, ift_counter cnt)
725 {
726 	struct nicvf *nic;
727 	struct nicvf_hw_stats *hw_stats;
728 	struct nicvf_drv_stats *drv_stats;
729 
730 	nic = if_getsoftc(ifp);
731 	hw_stats = &nic->hw_stats;
732 	drv_stats = &nic->drv_stats;
733 
734 	switch (cnt) {
735 	case IFCOUNTER_IPACKETS:
736 		return (drv_stats->rx_frames_ok);
737 	case IFCOUNTER_OPACKETS:
738 		return (drv_stats->tx_frames_ok);
739 	case IFCOUNTER_IBYTES:
740 		return (hw_stats->rx_bytes);
741 	case IFCOUNTER_OBYTES:
742 		return (hw_stats->tx_bytes_ok);
743 	case IFCOUNTER_IMCASTS:
744 		return (hw_stats->rx_mcast_frames);
745 	case IFCOUNTER_COLLISIONS:
746 		return (0);
747 	case IFCOUNTER_IQDROPS:
748 		return (drv_stats->rx_drops);
749 	case IFCOUNTER_OQDROPS:
750 		return (drv_stats->tx_drops);
751 	default:
752 		return (if_get_counter_default(ifp, cnt));
753 	}
754 
755 }
756 
757 static void
758 nicvf_media_status(struct ifnet *ifp, struct ifmediareq *ifmr)
759 {
760 	struct nicvf *nic = if_getsoftc(ifp);
761 
762 	NICVF_CORE_LOCK(nic);
763 
764 	ifmr->ifm_status = IFM_AVALID;
765 	ifmr->ifm_active = IFM_ETHER;
766 
767 	if (nic->link_up) {
768 		/* Device attached to working network */
769 		ifmr->ifm_status |= IFM_ACTIVE;
770 	}
771 
772 	switch (nic->speed) {
773 	case SPEED_10:
774 		ifmr->ifm_active |= IFM_10_T;
775 		break;
776 	case SPEED_100:
777 		ifmr->ifm_active |= IFM_100_TX;
778 		break;
779 	case SPEED_1000:
780 		ifmr->ifm_active |= IFM_1000_T;
781 		break;
782 	case SPEED_10000:
783 		ifmr->ifm_active |= IFM_10G_SR;
784 		break;
785 	case SPEED_40000:
786 		ifmr->ifm_active |= IFM_40G_CR4;
787 		break;
788 	default:
789 		ifmr->ifm_active |= IFM_AUTO;
790 		break;
791 	}
792 
793 	if (nic->duplex)
794 		ifmr->ifm_active |= IFM_FDX;
795 	else
796 		ifmr->ifm_active |= IFM_HDX;
797 
798 	NICVF_CORE_UNLOCK(nic);
799 }
800 
801 static int
802 nicvf_media_change(struct ifnet *ifp __unused)
803 {
804 
805 	return (0);
806 }
807 
808 /* Register read/write APIs */
809 void
810 nicvf_reg_write(struct nicvf *nic, bus_space_handle_t offset, uint64_t val)
811 {
812 
813 	bus_write_8(nic->reg_base, offset, val);
814 }
815 
816 uint64_t
817 nicvf_reg_read(struct nicvf *nic, uint64_t offset)
818 {
819 
820 	return (bus_read_8(nic->reg_base, offset));
821 }
822 
823 void
824 nicvf_queue_reg_write(struct nicvf *nic, bus_space_handle_t offset,
825     uint64_t qidx, uint64_t val)
826 {
827 
828 	bus_write_8(nic->reg_base, offset + (qidx << NIC_Q_NUM_SHIFT), val);
829 }
830 
831 uint64_t
832 nicvf_queue_reg_read(struct nicvf *nic, bus_space_handle_t offset,
833     uint64_t qidx)
834 {
835 
836 	return (bus_read_8(nic->reg_base, offset + (qidx << NIC_Q_NUM_SHIFT)));
837 }
838 
839 /* VF -> PF mailbox communication */
840 static void
841 nicvf_write_to_mbx(struct nicvf *nic, union nic_mbx *mbx)
842 {
843 	uint64_t *msg = (uint64_t *)mbx;
844 
845 	nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 0, msg[0]);
846 	nicvf_reg_write(nic, NIC_VF_PF_MAILBOX_0_1 + 8, msg[1]);
847 }
848 
849 int
850 nicvf_send_msg_to_pf(struct nicvf *nic, union nic_mbx *mbx)
851 {
852 	int timeout = NIC_MBOX_MSG_TIMEOUT * 10;
853 	int sleep = 2;
854 
855 	NICVF_CORE_LOCK_ASSERT(nic);
856 
857 	nic->pf_acked = FALSE;
858 	nic->pf_nacked = FALSE;
859 
860 	nicvf_write_to_mbx(nic, mbx);
861 
862 	/* Wait for previous message to be acked, timeout 2sec */
863 	while (!nic->pf_acked) {
864 		if (nic->pf_nacked)
865 			return (EINVAL);
866 
867 		DELAY(sleep * 1000);
868 
869 		if (nic->pf_acked)
870 			break;
871 		timeout -= sleep;
872 		if (!timeout) {
873 			device_printf(nic->dev,
874 				   "PF didn't ack to mbox msg %d from VF%d\n",
875 				   (mbx->msg.msg & 0xFF), nic->vf_id);
876 
877 			return (EBUSY);
878 		}
879 	}
880 	return (0);
881 }
882 
883 /*
884  * Checks if VF is able to comminicate with PF
885  * and also gets the VNIC number this VF is associated to.
886  */
887 static int
888 nicvf_check_pf_ready(struct nicvf *nic)
889 {
890 	union nic_mbx mbx = {};
891 
892 	mbx.msg.msg = NIC_MBOX_MSG_READY;
893 	if (nicvf_send_msg_to_pf(nic, &mbx)) {
894 		device_printf(nic->dev,
895 			   "PF didn't respond to READY msg\n");
896 		return 0;
897 	}
898 
899 	return 1;
900 }
901 
902 static void
903 nicvf_read_bgx_stats(struct nicvf *nic, struct bgx_stats_msg *bgx)
904 {
905 
906 	if (bgx->rx)
907 		nic->bgx_stats.rx_stats[bgx->idx] = bgx->stats;
908 	else
909 		nic->bgx_stats.tx_stats[bgx->idx] = bgx->stats;
910 }
911 
912 static void
913 nicvf_handle_mbx_intr(struct nicvf *nic)
914 {
915 	union nic_mbx mbx = {};
916 	uint64_t *mbx_data;
917 	uint64_t mbx_addr;
918 	int i;
919 
920 	mbx_addr = NIC_VF_PF_MAILBOX_0_1;
921 	mbx_data = (uint64_t *)&mbx;
922 
923 	for (i = 0; i < NIC_PF_VF_MAILBOX_SIZE; i++) {
924 		*mbx_data = nicvf_reg_read(nic, mbx_addr);
925 		mbx_data++;
926 		mbx_addr += sizeof(uint64_t);
927 	}
928 
929 	switch (mbx.msg.msg) {
930 	case NIC_MBOX_MSG_READY:
931 		nic->pf_acked = TRUE;
932 		nic->vf_id = mbx.nic_cfg.vf_id & 0x7F;
933 		nic->tns_mode = mbx.nic_cfg.tns_mode & 0x7F;
934 		nic->node = mbx.nic_cfg.node_id;
935 		memcpy(nic->hwaddr, mbx.nic_cfg.mac_addr, ETHER_ADDR_LEN);
936 		nic->loopback_supported = mbx.nic_cfg.loopback_supported;
937 		nic->link_up = FALSE;
938 		nic->duplex = 0;
939 		nic->speed = 0;
940 		break;
941 	case NIC_MBOX_MSG_ACK:
942 		nic->pf_acked = TRUE;
943 		break;
944 	case NIC_MBOX_MSG_NACK:
945 		nic->pf_nacked = TRUE;
946 		break;
947 	case NIC_MBOX_MSG_RSS_SIZE:
948 		nic->rss_info.rss_size = mbx.rss_size.ind_tbl_size;
949 		nic->pf_acked = TRUE;
950 		break;
951 	case NIC_MBOX_MSG_BGX_STATS:
952 		nicvf_read_bgx_stats(nic, &mbx.bgx_stats);
953 		nic->pf_acked = TRUE;
954 		break;
955 	case NIC_MBOX_MSG_BGX_LINK_CHANGE:
956 		nic->pf_acked = TRUE;
957 		nic->link_up = mbx.link_status.link_up;
958 		nic->duplex = mbx.link_status.duplex;
959 		nic->speed = mbx.link_status.speed;
960 		if (nic->link_up) {
961 			if_setbaudrate(nic->ifp, nic->speed * 1000000);
962 			if_link_state_change(nic->ifp, LINK_STATE_UP);
963 		} else {
964 			if_setbaudrate(nic->ifp, 0);
965 			if_link_state_change(nic->ifp, LINK_STATE_DOWN);
966 		}
967 		break;
968 	default:
969 		device_printf(nic->dev,
970 			   "Invalid message from PF, msg 0x%x\n", mbx.msg.msg);
971 		break;
972 	}
973 	nicvf_clear_intr(nic, NICVF_INTR_MBOX, 0);
974 }
975 
976 static int
977 nicvf_update_hw_max_frs(struct nicvf *nic, int mtu)
978 {
979 	union nic_mbx mbx = {};
980 
981 	mbx.frs.msg = NIC_MBOX_MSG_SET_MAX_FRS;
982 	mbx.frs.max_frs = mtu;
983 	mbx.frs.vf_id = nic->vf_id;
984 
985 	return nicvf_send_msg_to_pf(nic, &mbx);
986 }
987 
988 static int
989 nicvf_hw_set_mac_addr(struct nicvf *nic, uint8_t *hwaddr)
990 {
991 	union nic_mbx mbx = {};
992 
993 	mbx.mac.msg = NIC_MBOX_MSG_SET_MAC;
994 	mbx.mac.vf_id = nic->vf_id;
995 	memcpy(mbx.mac.mac_addr, hwaddr, ETHER_ADDR_LEN);
996 
997 	return (nicvf_send_msg_to_pf(nic, &mbx));
998 }
999 
1000 static void
1001 nicvf_config_cpi(struct nicvf *nic)
1002 {
1003 	union nic_mbx mbx = {};
1004 
1005 	mbx.cpi_cfg.msg = NIC_MBOX_MSG_CPI_CFG;
1006 	mbx.cpi_cfg.vf_id = nic->vf_id;
1007 	mbx.cpi_cfg.cpi_alg = nic->cpi_alg;
1008 	mbx.cpi_cfg.rq_cnt = nic->qs->rq_cnt;
1009 
1010 	nicvf_send_msg_to_pf(nic, &mbx);
1011 }
1012 
1013 static void
1014 nicvf_get_rss_size(struct nicvf *nic)
1015 {
1016 	union nic_mbx mbx = {};
1017 
1018 	mbx.rss_size.msg = NIC_MBOX_MSG_RSS_SIZE;
1019 	mbx.rss_size.vf_id = nic->vf_id;
1020 	nicvf_send_msg_to_pf(nic, &mbx);
1021 }
1022 
1023 static void
1024 nicvf_config_rss(struct nicvf *nic)
1025 {
1026 	union nic_mbx mbx = {};
1027 	struct nicvf_rss_info *rss;
1028 	int ind_tbl_len;
1029 	int i, nextq;
1030 
1031 	rss = &nic->rss_info;
1032 	ind_tbl_len = rss->rss_size;
1033 	nextq = 0;
1034 
1035 	mbx.rss_cfg.vf_id = nic->vf_id;
1036 	mbx.rss_cfg.hash_bits = rss->hash_bits;
1037 	while (ind_tbl_len != 0) {
1038 		mbx.rss_cfg.tbl_offset = nextq;
1039 		mbx.rss_cfg.tbl_len = MIN(ind_tbl_len,
1040 		    RSS_IND_TBL_LEN_PER_MBX_MSG);
1041 		mbx.rss_cfg.msg = mbx.rss_cfg.tbl_offset ?
1042 		    NIC_MBOX_MSG_RSS_CFG_CONT : NIC_MBOX_MSG_RSS_CFG;
1043 
1044 		for (i = 0; i < mbx.rss_cfg.tbl_len; i++)
1045 			mbx.rss_cfg.ind_tbl[i] = rss->ind_tbl[nextq++];
1046 
1047 		nicvf_send_msg_to_pf(nic, &mbx);
1048 
1049 		ind_tbl_len -= mbx.rss_cfg.tbl_len;
1050 	}
1051 }
1052 
1053 static void
1054 nicvf_set_rss_key(struct nicvf *nic)
1055 {
1056 	struct nicvf_rss_info *rss;
1057 	uint64_t key_addr;
1058 	int idx;
1059 
1060 	rss = &nic->rss_info;
1061 	key_addr = NIC_VNIC_RSS_KEY_0_4;
1062 
1063 	for (idx = 0; idx < RSS_HASH_KEY_SIZE; idx++) {
1064 		nicvf_reg_write(nic, key_addr, rss->key[idx]);
1065 		key_addr += sizeof(uint64_t);
1066 	}
1067 }
1068 
1069 static int
1070 nicvf_rss_init(struct nicvf *nic)
1071 {
1072 	struct nicvf_rss_info *rss;
1073 	int idx;
1074 
1075 	nicvf_get_rss_size(nic);
1076 
1077 	rss = &nic->rss_info;
1078 	if (nic->cpi_alg != CPI_ALG_NONE) {
1079 		rss->enable = FALSE;
1080 		rss->hash_bits = 0;
1081 		return (ENXIO);
1082 	}
1083 
1084 	rss->enable = TRUE;
1085 
1086 	/* Using the HW reset value for now */
1087 	rss->key[0] = 0xFEED0BADFEED0BADUL;
1088 	rss->key[1] = 0xFEED0BADFEED0BADUL;
1089 	rss->key[2] = 0xFEED0BADFEED0BADUL;
1090 	rss->key[3] = 0xFEED0BADFEED0BADUL;
1091 	rss->key[4] = 0xFEED0BADFEED0BADUL;
1092 
1093 	nicvf_set_rss_key(nic);
1094 
1095 	rss->cfg = RSS_IP_HASH_ENA | RSS_TCP_HASH_ENA | RSS_UDP_HASH_ENA;
1096 	nicvf_reg_write(nic, NIC_VNIC_RSS_CFG, rss->cfg);
1097 
1098 	rss->hash_bits = fls(rss->rss_size) - 1;
1099 	for (idx = 0; idx < rss->rss_size; idx++)
1100 		rss->ind_tbl[idx] = idx % nic->rx_queues;
1101 
1102 	nicvf_config_rss(nic);
1103 
1104 	return (0);
1105 }
1106 
1107 static int
1108 nicvf_init_resources(struct nicvf *nic)
1109 {
1110 	int err;
1111 	union nic_mbx mbx = {};
1112 
1113 	mbx.msg.msg = NIC_MBOX_MSG_CFG_DONE;
1114 
1115 	/* Enable Qset */
1116 	nicvf_qset_config(nic, TRUE);
1117 
1118 	/* Initialize queues and HW for data transfer */
1119 	err = nicvf_config_data_transfer(nic, TRUE);
1120 	if (err) {
1121 		device_printf(nic->dev,
1122 		    "Failed to alloc/config VF's QSet resources\n");
1123 		return (err);
1124 	}
1125 
1126 	/* Send VF config done msg to PF */
1127 	nicvf_write_to_mbx(nic, &mbx);
1128 
1129 	return (0);
1130 }
1131 
1132 static void
1133 nicvf_misc_intr_handler(void *arg)
1134 {
1135 	struct nicvf *nic = (struct nicvf *)arg;
1136 	uint64_t intr;
1137 
1138 	intr = nicvf_reg_read(nic, NIC_VF_INT);
1139 	/* Check for spurious interrupt */
1140 	if (!(intr & NICVF_INTR_MBOX_MASK))
1141 		return;
1142 
1143 	nicvf_handle_mbx_intr(nic);
1144 }
1145 
1146 static int
1147 nicvf_intr_handler(void *arg)
1148 {
1149 	struct nicvf *nic;
1150 	struct cmp_queue *cq;
1151 	int qidx;
1152 
1153 	cq = (struct cmp_queue *)arg;
1154 	nic = cq->nic;
1155 	qidx = cq->idx;
1156 
1157 	/* Disable interrupts */
1158 	nicvf_disable_intr(nic, NICVF_INTR_CQ, qidx);
1159 
1160 	taskqueue_enqueue(cq->cmp_taskq, &cq->cmp_task);
1161 
1162 	/* Clear interrupt */
1163 	nicvf_clear_intr(nic, NICVF_INTR_CQ, qidx);
1164 
1165 	return (FILTER_HANDLED);
1166 }
1167 
1168 static void
1169 nicvf_rbdr_intr_handler(void *arg)
1170 {
1171 	struct nicvf *nic;
1172 	struct queue_set *qs;
1173 	struct rbdr *rbdr;
1174 	int qidx;
1175 
1176 	nic = (struct nicvf *)arg;
1177 
1178 	/* Disable RBDR interrupt and schedule softirq */
1179 	for (qidx = 0; qidx < nic->qs->rbdr_cnt; qidx++) {
1180 		if (!nicvf_is_intr_enabled(nic, NICVF_INTR_RBDR, qidx))
1181 			continue;
1182 		nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
1183 
1184 		qs = nic->qs;
1185 		rbdr = &qs->rbdr[qidx];
1186 		taskqueue_enqueue(rbdr->rbdr_taskq, &rbdr->rbdr_task_nowait);
1187 		/* Clear interrupt */
1188 		nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
1189 	}
1190 }
1191 
1192 static void
1193 nicvf_qs_err_intr_handler(void *arg)
1194 {
1195 	struct nicvf *nic = (struct nicvf *)arg;
1196 	struct queue_set *qs = nic->qs;
1197 
1198 	/* Disable Qset err interrupt and schedule softirq */
1199 	nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
1200 	taskqueue_enqueue(qs->qs_err_taskq, &qs->qs_err_task);
1201 	nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
1202 
1203 }
1204 
1205 static int
1206 nicvf_enable_msix(struct nicvf *nic)
1207 {
1208 	struct pci_devinfo *dinfo;
1209 	int rid, count;
1210 	int ret;
1211 
1212 	dinfo = device_get_ivars(nic->dev);
1213 	rid = dinfo->cfg.msix.msix_table_bar;
1214 	nic->msix_table_res =
1215 	    bus_alloc_resource_any(nic->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE);
1216 	if (nic->msix_table_res == NULL) {
1217 		device_printf(nic->dev,
1218 		    "Could not allocate memory for MSI-X table\n");
1219 		return (ENXIO);
1220 	}
1221 
1222 	count = nic->num_vec = NIC_VF_MSIX_VECTORS;
1223 
1224 	ret = pci_alloc_msix(nic->dev, &count);
1225 	if ((ret != 0) || (count != nic->num_vec)) {
1226 		device_printf(nic->dev,
1227 		    "Request for #%d msix vectors failed, error: %d\n",
1228 		    nic->num_vec, ret);
1229 		return (ret);
1230 	}
1231 
1232 	nic->msix_enabled = 1;
1233 	return (0);
1234 }
1235 
1236 static void
1237 nicvf_disable_msix(struct nicvf *nic)
1238 {
1239 
1240 	if (nic->msix_enabled) {
1241 		pci_release_msi(nic->dev);
1242 		nic->msix_enabled = 0;
1243 		nic->num_vec = 0;
1244 	}
1245 }
1246 
1247 static void
1248 nicvf_release_all_interrupts(struct nicvf *nic)
1249 {
1250 	struct resource *res;
1251 	int irq;
1252 	int err __diagused;
1253 
1254 	/* Free registered interrupts */
1255 	for (irq = 0; irq < nic->num_vec; irq++) {
1256 		res = nic->msix_entries[irq].irq_res;
1257 		if (res == NULL)
1258 			continue;
1259 		/* Teardown interrupt first */
1260 		if (nic->msix_entries[irq].handle != NULL) {
1261 			err = bus_teardown_intr(nic->dev,
1262 			    nic->msix_entries[irq].irq_res,
1263 			    nic->msix_entries[irq].handle);
1264 			KASSERT(err == 0,
1265 			    ("ERROR: Unable to teardown interrupt %d", irq));
1266 			nic->msix_entries[irq].handle = NULL;
1267 		}
1268 
1269 		bus_release_resource(nic->dev, SYS_RES_IRQ,
1270 			    rman_get_rid(res), nic->msix_entries[irq].irq_res);
1271 		nic->msix_entries[irq].irq_res = NULL;
1272 	}
1273 	/* Disable MSI-X */
1274 	nicvf_disable_msix(nic);
1275 }
1276 
1277 /*
1278  * Initialize MSIX vectors and register MISC interrupt.
1279  * Send READY message to PF to check if its alive
1280  */
1281 static int
1282 nicvf_allocate_misc_interrupt(struct nicvf *nic)
1283 {
1284 	struct resource *res;
1285 	int irq, rid;
1286 	int ret = 0;
1287 
1288 	/* Return if mailbox interrupt is already registered */
1289 	if (nic->msix_enabled)
1290 		return (0);
1291 
1292 	/* Enable MSI-X */
1293 	if (nicvf_enable_msix(nic) != 0)
1294 		return (ENXIO);
1295 
1296 	irq = NICVF_INTR_ID_MISC;
1297 	rid = irq + 1;
1298 	nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1299 	    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1300 	if (nic->msix_entries[irq].irq_res == NULL) {
1301 		device_printf(nic->dev,
1302 		    "Could not allocate Mbox interrupt for VF%d\n",
1303 		    device_get_unit(nic->dev));
1304 		return (ENXIO);
1305 	}
1306 
1307 	ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1308 	    (INTR_MPSAFE | INTR_TYPE_MISC), NULL, nicvf_misc_intr_handler, nic,
1309 	    &nic->msix_entries[irq].handle);
1310 	if (ret != 0) {
1311 		res = nic->msix_entries[irq].irq_res;
1312 		bus_release_resource(nic->dev, SYS_RES_IRQ,
1313 			    rman_get_rid(res), res);
1314 		nic->msix_entries[irq].irq_res = NULL;
1315 		return (ret);
1316 	}
1317 
1318 	return (0);
1319 }
1320 
1321 static int
1322 nicvf_enable_misc_interrupt(struct nicvf *nic)
1323 {
1324 
1325 	/* Enable mailbox interrupt */
1326 	nicvf_enable_intr(nic, NICVF_INTR_MBOX, 0);
1327 
1328 	/* Check if VF is able to communicate with PF */
1329 	if (!nicvf_check_pf_ready(nic)) {
1330 		nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
1331 		return (ENXIO);
1332 	}
1333 
1334 	return (0);
1335 }
1336 
1337 static void
1338 nicvf_release_net_interrupts(struct nicvf *nic)
1339 {
1340 	struct resource *res;
1341 	int irq;
1342 	int err;
1343 
1344 	for_each_cq_irq(irq) {
1345 		res = nic->msix_entries[irq].irq_res;
1346 		if (res == NULL)
1347 			continue;
1348 		/* Teardown active interrupts first */
1349 		if (nic->msix_entries[irq].handle != NULL) {
1350 			err = bus_teardown_intr(nic->dev,
1351 			    nic->msix_entries[irq].irq_res,
1352 			    nic->msix_entries[irq].handle);
1353 			KASSERT(err == 0,
1354 			    ("ERROR: Unable to teardown CQ interrupt %d",
1355 			    (irq - NICVF_INTR_ID_CQ)));
1356 			if (err != 0)
1357 				continue;
1358 		}
1359 
1360 		/* Release resource */
1361 		bus_release_resource(nic->dev, SYS_RES_IRQ, rman_get_rid(res),
1362 		    res);
1363 		nic->msix_entries[irq].irq_res = NULL;
1364 	}
1365 
1366 	for_each_rbdr_irq(irq) {
1367 		res = nic->msix_entries[irq].irq_res;
1368 		if (res == NULL)
1369 			continue;
1370 		/* Teardown active interrupts first */
1371 		if (nic->msix_entries[irq].handle != NULL) {
1372 			err = bus_teardown_intr(nic->dev,
1373 			    nic->msix_entries[irq].irq_res,
1374 			    nic->msix_entries[irq].handle);
1375 			KASSERT(err == 0,
1376 			    ("ERROR: Unable to teardown RDBR interrupt %d",
1377 			    (irq - NICVF_INTR_ID_RBDR)));
1378 			if (err != 0)
1379 				continue;
1380 		}
1381 
1382 		/* Release resource */
1383 		bus_release_resource(nic->dev, SYS_RES_IRQ, rman_get_rid(res),
1384 		    res);
1385 		nic->msix_entries[irq].irq_res = NULL;
1386 	}
1387 
1388 	irq = NICVF_INTR_ID_QS_ERR;
1389 	res = nic->msix_entries[irq].irq_res;
1390 	if (res != NULL) {
1391 		/* Teardown active interrupts first */
1392 		if (nic->msix_entries[irq].handle != NULL) {
1393 			err = bus_teardown_intr(nic->dev,
1394 			    nic->msix_entries[irq].irq_res,
1395 			    nic->msix_entries[irq].handle);
1396 			KASSERT(err == 0,
1397 			    ("ERROR: Unable to teardown QS Error interrupt %d",
1398 			    irq));
1399 			if (err != 0)
1400 				return;
1401 		}
1402 
1403 		/* Release resource */
1404 		bus_release_resource(nic->dev, SYS_RES_IRQ, rman_get_rid(res),
1405 		    res);
1406 		nic->msix_entries[irq].irq_res = NULL;
1407 	}
1408 }
1409 
1410 static int
1411 nicvf_allocate_net_interrupts(struct nicvf *nic)
1412 {
1413 	u_int cpuid;
1414 	int irq, rid;
1415 	int qidx;
1416 	int ret = 0;
1417 
1418 	/* MSI-X must be configured by now */
1419 	if (!nic->msix_enabled) {
1420 		device_printf(nic->dev, "Cannot alloacte queue interrups. "
1421 		    "MSI-X interrupts disabled.\n");
1422 		return (ENXIO);
1423 	}
1424 
1425 	/* Register CQ interrupts */
1426 	for_each_cq_irq(irq) {
1427 		if (irq >= (NICVF_INTR_ID_CQ + nic->qs->cq_cnt))
1428 			break;
1429 
1430 		qidx = irq - NICVF_INTR_ID_CQ;
1431 		rid = irq + 1;
1432 		nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1433 		    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1434 		if (nic->msix_entries[irq].irq_res == NULL) {
1435 			device_printf(nic->dev,
1436 			    "Could not allocate CQ interrupt %d for VF%d\n",
1437 			    (irq - NICVF_INTR_ID_CQ), device_get_unit(nic->dev));
1438 			ret = ENXIO;
1439 			goto error;
1440 		}
1441 		ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1442 		    (INTR_MPSAFE | INTR_TYPE_NET), nicvf_intr_handler,
1443 		    NULL, &nic->qs->cq[qidx], &nic->msix_entries[irq].handle);
1444 		if (ret != 0) {
1445 			device_printf(nic->dev,
1446 			    "Could not setup CQ interrupt %d for VF%d\n",
1447 			    (irq - NICVF_INTR_ID_CQ), device_get_unit(nic->dev));
1448 			goto error;
1449 		}
1450 		cpuid = (device_get_unit(nic->dev) * CMP_QUEUE_CNT) + qidx;
1451 		cpuid %= mp_ncpus;
1452 		/*
1453 		 * Save CPU ID for later use when system-wide RSS is enabled.
1454 		 * It will be used to pit the CQ task to the same CPU that got
1455 		 * interrupted.
1456 		 */
1457 		nic->qs->cq[qidx].cmp_cpuid = cpuid;
1458 		if (bootverbose) {
1459 			device_printf(nic->dev, "bind CQ%d IRQ to CPU%d\n",
1460 			    qidx, cpuid);
1461 		}
1462 		/* Bind interrupts to the given CPU */
1463 		bus_bind_intr(nic->dev, nic->msix_entries[irq].irq_res, cpuid);
1464 	}
1465 
1466 	/* Register RBDR interrupt */
1467 	for_each_rbdr_irq(irq) {
1468 		if (irq >= (NICVF_INTR_ID_RBDR + nic->qs->rbdr_cnt))
1469 			break;
1470 
1471 		rid = irq + 1;
1472 		nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1473 		    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1474 		if (nic->msix_entries[irq].irq_res == NULL) {
1475 			device_printf(nic->dev,
1476 			    "Could not allocate RBDR interrupt %d for VF%d\n",
1477 			    (irq - NICVF_INTR_ID_RBDR),
1478 			    device_get_unit(nic->dev));
1479 			ret = ENXIO;
1480 			goto error;
1481 		}
1482 		ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1483 		    (INTR_MPSAFE | INTR_TYPE_NET), NULL,
1484 		    nicvf_rbdr_intr_handler, nic,
1485 		    &nic->msix_entries[irq].handle);
1486 		if (ret != 0) {
1487 			device_printf(nic->dev,
1488 			    "Could not setup RBDR interrupt %d for VF%d\n",
1489 			    (irq - NICVF_INTR_ID_RBDR),
1490 			    device_get_unit(nic->dev));
1491 			goto error;
1492 		}
1493 	}
1494 
1495 	/* Register QS error interrupt */
1496 	irq = NICVF_INTR_ID_QS_ERR;
1497 	rid = irq + 1;
1498 	nic->msix_entries[irq].irq_res = bus_alloc_resource_any(nic->dev,
1499 	    SYS_RES_IRQ, &rid, (RF_SHAREABLE | RF_ACTIVE));
1500 	if (nic->msix_entries[irq].irq_res == NULL) {
1501 		device_printf(nic->dev,
1502 		    "Could not allocate QS Error interrupt for VF%d\n",
1503 		    device_get_unit(nic->dev));
1504 		ret = ENXIO;
1505 		goto error;
1506 	}
1507 	ret = bus_setup_intr(nic->dev, nic->msix_entries[irq].irq_res,
1508 	    (INTR_MPSAFE | INTR_TYPE_NET), NULL, nicvf_qs_err_intr_handler,
1509 	    nic, &nic->msix_entries[irq].handle);
1510 	if (ret != 0) {
1511 		device_printf(nic->dev,
1512 		    "Could not setup QS Error interrupt for VF%d\n",
1513 		    device_get_unit(nic->dev));
1514 		goto error;
1515 	}
1516 
1517 	return (0);
1518 error:
1519 	nicvf_release_net_interrupts(nic);
1520 	return (ret);
1521 }
1522 
1523 static int
1524 nicvf_stop_locked(struct nicvf *nic)
1525 {
1526 	struct ifnet *ifp;
1527 	int qidx;
1528 	struct queue_set *qs = nic->qs;
1529 	union nic_mbx mbx = {};
1530 
1531 	NICVF_CORE_LOCK_ASSERT(nic);
1532 	/* Stop callout. Can block here since holding SX lock */
1533 	callout_drain(&nic->stats_callout);
1534 
1535 	ifp = nic->ifp;
1536 
1537 	mbx.msg.msg = NIC_MBOX_MSG_SHUTDOWN;
1538 	nicvf_send_msg_to_pf(nic, &mbx);
1539 
1540 	/* Disable RBDR & QS error interrupts */
1541 	for (qidx = 0; qidx < qs->rbdr_cnt; qidx++) {
1542 		nicvf_disable_intr(nic, NICVF_INTR_RBDR, qidx);
1543 		nicvf_clear_intr(nic, NICVF_INTR_RBDR, qidx);
1544 	}
1545 	nicvf_disable_intr(nic, NICVF_INTR_QS_ERR, 0);
1546 	nicvf_clear_intr(nic, NICVF_INTR_QS_ERR, 0);
1547 
1548 	/* Deactivate network interface */
1549 	if_setdrvflagbits(ifp, IFF_DRV_OACTIVE, IFF_DRV_RUNNING);
1550 
1551 	/* Free resources */
1552 	nicvf_config_data_transfer(nic, FALSE);
1553 
1554 	/* Disable HW Qset */
1555 	nicvf_qset_config(nic, FALSE);
1556 
1557 	/* disable mailbox interrupt */
1558 	nicvf_disable_intr(nic, NICVF_INTR_MBOX, 0);
1559 
1560 	return (0);
1561 }
1562 
1563 static void
1564 nicvf_update_stats(struct nicvf *nic)
1565 {
1566 	int qidx;
1567 	struct nicvf_hw_stats *stats = &nic->hw_stats;
1568 	struct nicvf_drv_stats *drv_stats = &nic->drv_stats;
1569 	struct queue_set *qs = nic->qs;
1570 
1571 #define	GET_RX_STATS(reg) \
1572     nicvf_reg_read(nic, NIC_VNIC_RX_STAT_0_13 | ((reg) << 3))
1573 #define GET_TX_STATS(reg) \
1574     nicvf_reg_read(nic, NIC_VNIC_TX_STAT_0_4 | ((reg) << 3))
1575 
1576 	stats->rx_bytes = GET_RX_STATS(RX_OCTS);
1577 	stats->rx_ucast_frames = GET_RX_STATS(RX_UCAST);
1578 	stats->rx_bcast_frames = GET_RX_STATS(RX_BCAST);
1579 	stats->rx_mcast_frames = GET_RX_STATS(RX_MCAST);
1580 	stats->rx_fcs_errors = GET_RX_STATS(RX_FCS);
1581 	stats->rx_l2_errors = GET_RX_STATS(RX_L2ERR);
1582 	stats->rx_drop_red = GET_RX_STATS(RX_RED);
1583 	stats->rx_drop_red_bytes = GET_RX_STATS(RX_RED_OCTS);
1584 	stats->rx_drop_overrun = GET_RX_STATS(RX_ORUN);
1585 	stats->rx_drop_overrun_bytes = GET_RX_STATS(RX_ORUN_OCTS);
1586 	stats->rx_drop_bcast = GET_RX_STATS(RX_DRP_BCAST);
1587 	stats->rx_drop_mcast = GET_RX_STATS(RX_DRP_MCAST);
1588 	stats->rx_drop_l3_bcast = GET_RX_STATS(RX_DRP_L3BCAST);
1589 	stats->rx_drop_l3_mcast = GET_RX_STATS(RX_DRP_L3MCAST);
1590 
1591 	stats->tx_bytes_ok = GET_TX_STATS(TX_OCTS);
1592 	stats->tx_ucast_frames_ok = GET_TX_STATS(TX_UCAST);
1593 	stats->tx_bcast_frames_ok = GET_TX_STATS(TX_BCAST);
1594 	stats->tx_mcast_frames_ok = GET_TX_STATS(TX_MCAST);
1595 	stats->tx_drops = GET_TX_STATS(TX_DROP);
1596 
1597 	drv_stats->tx_frames_ok = stats->tx_ucast_frames_ok +
1598 	    stats->tx_bcast_frames_ok + stats->tx_mcast_frames_ok;
1599 	drv_stats->rx_drops = stats->rx_drop_red + stats->rx_drop_overrun;
1600 	drv_stats->tx_drops = stats->tx_drops;
1601 
1602 	/* Update RQ and SQ stats */
1603 	for (qidx = 0; qidx < qs->rq_cnt; qidx++)
1604 		nicvf_update_rq_stats(nic, qidx);
1605 	for (qidx = 0; qidx < qs->sq_cnt; qidx++)
1606 		nicvf_update_sq_stats(nic, qidx);
1607 }
1608 
1609 static void
1610 nicvf_tick_stats(void *arg)
1611 {
1612 	struct nicvf *nic;
1613 
1614 	nic = (struct nicvf *)arg;
1615 
1616 	/* Read the statistics */
1617 	nicvf_update_stats(nic);
1618 
1619 	callout_reset(&nic->stats_callout, hz, nicvf_tick_stats, nic);
1620 }
1621