xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision 28f4385e45a2681c14bd04b83fe1796eaefe8265)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD$");
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 
50 #include <netinet/in_systm.h>
51 #include <netinet/in.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip6.h>
54 #include <netinet6/ip6_var.h>
55 #include <netinet/udp.h>
56 #include <netinet/tcp.h>
57 
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 #include <sys/bus.h>
61 #include <sys/rman.h>
62 
63 #include <dev/pci/pcireg.h>
64 #include <dev/pci/pcivar.h>
65 
66 #include "ifdi_if.h"
67 
68 #include "if_vmxreg.h"
69 #include "if_vmxvar.h"
70 
71 #include "opt_inet.h"
72 #include "opt_inet6.h"
73 
74 
75 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
76 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
77 
78 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
79 {
80 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
81 	/* required last entry */
82 	PVID_END
83 };
84 
85 static void	*vmxnet3_register(device_t);
86 static int	vmxnet3_attach_pre(if_ctx_t);
87 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
88 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
89 static int	vmxnet3_attach_post(if_ctx_t);
90 static int	vmxnet3_detach(if_ctx_t);
91 static int	vmxnet3_shutdown(if_ctx_t);
92 static int	vmxnet3_suspend(if_ctx_t);
93 static int	vmxnet3_resume(if_ctx_t);
94 
95 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
96 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
97 static int	vmxnet3_check_version(struct vmxnet3_softc *);
98 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
99 
100 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
101 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
102 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
103 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
104 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void	vmxnet3_queues_free(if_ctx_t);
106 
107 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
108 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
109 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
110 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
111 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
112 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
113 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
114 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
115 static void	vmxnet3_free_data(struct vmxnet3_softc *);
116 
117 static void	vmxnet3_evintr(struct vmxnet3_softc *);
118 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
119 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
120 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
121 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
122 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
123 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
124 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
125 static int	vmxnet3_legacy_intr(void *);
126 static int	vmxnet3_rxq_intr(void *);
127 static int	vmxnet3_event_intr(void *);
128 
129 static void	vmxnet3_stop(if_ctx_t);
130 
131 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
132 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
133 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
134 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
135 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
136 static void	vmxnet3_init(if_ctx_t);
137 static void	vmxnet3_multi_set(if_ctx_t);
138 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
139 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
140 static int	vmxnet3_media_change(if_ctx_t);
141 static int	vmxnet3_promisc_set(if_ctx_t, int);
142 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
143 static void	vmxnet3_update_admin_status(if_ctx_t);
144 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
145 
146 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
147 		    uint16_t);
148 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
149 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
150 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
151 
152 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
153 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
154 static void	vmxnet3_link_status(struct vmxnet3_softc *);
155 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
156 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
157 
158 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
159 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
160 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
161 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
165 
166 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
167 		    uint32_t);
168 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
169 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
170 		    uint32_t);
171 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
172 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
173 
174 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
175 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
176 static void	vmxnet3_link_intr_enable(if_ctx_t);
177 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
178 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
179 static void	vmxnet3_intr_enable_all(if_ctx_t);
180 static void	vmxnet3_intr_disable_all(if_ctx_t);
181 
182 typedef enum {
183 	VMXNET3_BARRIER_RD,
184 	VMXNET3_BARRIER_WR,
185 	VMXNET3_BARRIER_RDWR,
186 } vmxnet3_barrier_t;
187 
188 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
189 
190 
191 static device_method_t vmxnet3_methods[] = {
192 	/* Device interface */
193 	DEVMETHOD(device_register, vmxnet3_register),
194 	DEVMETHOD(device_probe, iflib_device_probe),
195 	DEVMETHOD(device_attach, iflib_device_attach),
196 	DEVMETHOD(device_detach, iflib_device_detach),
197 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
198 	DEVMETHOD(device_suspend, iflib_device_suspend),
199 	DEVMETHOD(device_resume, iflib_device_resume),
200 	DEVMETHOD_END
201 };
202 
203 static driver_t vmxnet3_driver = {
204 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
205 };
206 
207 static devclass_t vmxnet3_devclass;
208 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
209 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
210 MODULE_VERSION(vmx, 2);
211 
212 MODULE_DEPEND(vmx, pci, 1, 1, 1);
213 MODULE_DEPEND(vmx, ether, 1, 1, 1);
214 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
215 
216 static device_method_t vmxnet3_iflib_methods[] = {
217 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
218 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
219 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
220 
221 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
222 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
223 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
224 
225 	DEVMETHOD(ifdi_init, vmxnet3_init),
226 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
227 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
228 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
229 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
230 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
231 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
232 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
233 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
234 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
235 
236 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
237 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
238 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
239 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
240 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
241 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
242 
243 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
244 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
245 
246 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
247 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
248 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
249 
250 	DEVMETHOD_END
251 };
252 
253 static driver_t vmxnet3_iflib_driver = {
254 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
255 };
256 
257 struct if_txrx vmxnet3_txrx = {
258 	.ift_txd_encap = vmxnet3_isc_txd_encap,
259 	.ift_txd_flush = vmxnet3_isc_txd_flush,
260 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
261 	.ift_rxd_available = vmxnet3_isc_rxd_available,
262 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
263 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
264 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
265 	.ift_legacy_intr = vmxnet3_legacy_intr
266 };
267 
268 static struct if_shared_ctx vmxnet3_sctx_init = {
269 	.isc_magic = IFLIB_MAGIC,
270 	.isc_q_align = 512,
271 
272 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
273 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
274 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
275 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
276 
277 	/*
278 	 * These values are used to configure the busdma tag used for
279 	 * receive descriptors.  Each receive descriptor only points to one
280 	 * buffer.
281 	 */
282 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
283 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
284 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
285 
286 	.isc_admin_intrcnt = 1,
287 	.isc_vendor_info = vmxnet3_vendor_info_array,
288 	.isc_driver_version = "2",
289 	.isc_driver = &vmxnet3_iflib_driver,
290 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ,
291 
292 	/*
293 	 * Number of receive queues per receive queue set, with associated
294 	 * descriptor settings for each.
295 	 */
296 	.isc_nrxqs = 3,
297 	.isc_nfl = 2, /* one free list for each receive command queue */
298 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
299 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
300 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
301 
302 	/*
303 	 * Number of transmit queues per transmit queue set, with associated
304 	 * descriptor settings for each.
305 	 */
306 	.isc_ntxqs = 2,
307 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
308 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
309 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
310 };
311 
312 static void *
313 vmxnet3_register(device_t dev)
314 {
315 	return (&vmxnet3_sctx_init);
316 }
317 
318 static int
319 vmxnet3_attach_pre(if_ctx_t ctx)
320 {
321 	device_t dev;
322 	if_softc_ctx_t scctx;
323 	struct vmxnet3_softc *sc;
324 	uint32_t intr_config;
325 	int error;
326 
327 	dev = iflib_get_dev(ctx);
328 	sc = iflib_get_softc(ctx);
329 	sc->vmx_dev = dev;
330 	sc->vmx_ctx = ctx;
331 	sc->vmx_sctx = iflib_get_sctx(ctx);
332 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
333 	sc->vmx_ifp = iflib_get_ifp(ctx);
334 	sc->vmx_media = iflib_get_media(ctx);
335 	scctx = sc->vmx_scctx;
336 
337 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
338 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
339 	/* isc_tx_tso_size_max doesn't include possible vlan header */
340 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
341 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
342 	scctx->isc_txrx = &vmxnet3_txrx;
343 
344 	/* If 0, the iflib tunable was not set, so set to the default */
345 	if (scctx->isc_nrxqsets == 0)
346 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
347 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
348 
349 	/* If 0, the iflib tunable was not set, so set to the default */
350 	if (scctx->isc_ntxqsets == 0)
351 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
352 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
353 
354 	/*
355 	 * Enforce that the transmit completion queue descriptor count is
356 	 * the same as the transmit command queue descriptor count.
357 	 */
358 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
359 	scctx->isc_txqsizes[0] =
360 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
361 	scctx->isc_txqsizes[1] =
362 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
363 
364 	/*
365 	 * Enforce that the receive completion queue descriptor count is the
366 	 * sum of the receive command queue descriptor counts, and that the
367 	 * second receive command queue descriptor count is the same as the
368 	 * first one.
369 	 */
370 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
371 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
372 	scctx->isc_rxqsizes[0] =
373 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
374 	scctx->isc_rxqsizes[1] =
375 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
376 	scctx->isc_rxqsizes[2] =
377 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
378 
379 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
380 
381 	/* Map PCI BARs */
382 	error = vmxnet3_alloc_resources(sc);
383 	if (error)
384 		goto fail;
385 
386 	/* Check device versions */
387 	error = vmxnet3_check_version(sc);
388 	if (error)
389 		goto fail;
390 
391 	/*
392 	 * The interrupt mode can be set in the hypervisor configuration via
393 	 * the parameter ethernet<N>.intrMode.
394 	 */
395 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
396 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
397 
398 	/*
399 	 * Configure the softc context to attempt to configure the interrupt
400 	 * mode now indicated by intr_config.  iflib will follow the usual
401 	 * fallback path MSIX -> MSI -> LEGACY, starting at the configured
402 	 * starting mode.
403 	 */
404 	switch (intr_config & 0x03) {
405 	case VMXNET3_IT_AUTO:
406 	case VMXNET3_IT_MSIX:
407 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
408 		break;
409 	case VMXNET3_IT_MSI:
410 		scctx->isc_msix_bar = -1;
411 		scctx->isc_disable_msix = 1;
412 		break;
413 	case VMXNET3_IT_LEGACY:
414 		scctx->isc_msix_bar = 0;
415 		break;
416 	}
417 
418 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
419 	scctx->isc_capabilities = scctx->isc_capenable =
420 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
421 	    IFCAP_TSO4 | IFCAP_TSO6 |
422 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
423 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
424 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
425 	    IFCAP_JUMBO_MTU;
426 
427 	/* These capabilities are not enabled by default. */
428 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
429 
430 	vmxnet3_get_lladdr(sc);
431 	iflib_set_mac(ctx, sc->vmx_lladdr);
432 
433 	return (0);
434 fail:
435 	/*
436 	 * We must completely clean up anything allocated above as iflib
437 	 * will not invoke any other driver entry points as a result of this
438 	 * failure.
439 	 */
440 	vmxnet3_free_resources(sc);
441 
442 	return (error);
443 }
444 
445 static int
446 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
447 {
448 	struct vmxnet3_softc *sc;
449 	if_softc_ctx_t scctx;
450 	struct vmxnet3_rxqueue *rxq;
451 	int error;
452 	int i;
453 	char irq_name[16];
454 
455 	sc = iflib_get_softc(ctx);
456 	scctx = sc->vmx_scctx;
457 
458 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
459 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
460 
461 		rxq = &sc->vmx_rxq[i];
462 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
463 		    IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
464 		if (error) {
465 			device_printf(iflib_get_dev(ctx),
466 			    "Failed to register rxq %d interrupt handler\n", i);
467 			return (error);
468 		}
469 	}
470 
471 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
472 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
473 
474 		/*
475 		 * Don't provide the corresponding rxq irq for reference -
476 		 * we want the transmit task to be attached to a task queue
477 		 * that is different from the one used by the corresponding
478 		 * rxq irq.  That is because the TX doorbell writes are very
479 		 * expensive as virtualized MMIO operations, so we want to
480 		 * be able to defer them to another core when possible so
481 		 * that they don't steal receive processing cycles during
482 		 * stack turnarounds like TCP ACK generation.  The other
483 		 * piece to this approach is enabling the iflib abdicate
484 		 * option (currently via an interface-specific
485 		 * tunable/sysctl).
486 		 */
487 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
488 		    irq_name);
489 	}
490 
491 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
492 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
493 	    "event");
494 	if (error) {
495 		device_printf(iflib_get_dev(ctx),
496 		    "Failed to register event interrupt handler\n");
497 		return (error);
498 	}
499 
500 	return (0);
501 }
502 
503 static void
504 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
505 {
506 	if_softc_ctx_t scctx;
507 	struct vmxnet3_rxqueue *rxq;
508 	int i;
509 
510 	scctx = sc->vmx_scctx;
511 
512 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
513 		rxq = &sc->vmx_rxq[i];
514 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
515 	}
516 
517 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
518 }
519 
520 static int
521 vmxnet3_attach_post(if_ctx_t ctx)
522 {
523 	device_t dev;
524 	if_softc_ctx_t scctx;
525 	struct vmxnet3_softc *sc;
526 	int error;
527 
528 	dev = iflib_get_dev(ctx);
529 	scctx = iflib_get_softc_ctx(ctx);
530 	sc = iflib_get_softc(ctx);
531 
532 	if (scctx->isc_nrxqsets > 1)
533 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
534 
535 	error = vmxnet3_alloc_data(sc);
536 	if (error)
537 		goto fail;
538 
539 	vmxnet3_set_interrupt_idx(sc);
540 	vmxnet3_setup_sysctl(sc);
541 
542 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
543 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
544 
545 fail:
546 	return (error);
547 }
548 
549 static int
550 vmxnet3_detach(if_ctx_t ctx)
551 {
552 	struct vmxnet3_softc *sc;
553 
554 	sc = iflib_get_softc(ctx);
555 
556 	vmxnet3_free_irqs(sc);
557 	vmxnet3_free_data(sc);
558 	vmxnet3_free_resources(sc);
559 
560 	return (0);
561 }
562 
563 static int
564 vmxnet3_shutdown(if_ctx_t ctx)
565 {
566 
567 	return (0);
568 }
569 
570 static int
571 vmxnet3_suspend(if_ctx_t ctx)
572 {
573 
574 	return (0);
575 }
576 
577 static int
578 vmxnet3_resume(if_ctx_t ctx)
579 {
580 
581 	return (0);
582 }
583 
584 static int
585 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
586 {
587 	device_t dev;
588 	int rid;
589 
590 	dev = sc->vmx_dev;
591 
592 	rid = PCIR_BAR(0);
593 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
594 	    RF_ACTIVE);
595 	if (sc->vmx_res0 == NULL) {
596 		device_printf(dev,
597 		    "could not map BAR0 memory\n");
598 		return (ENXIO);
599 	}
600 
601 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
602 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
603 
604 	rid = PCIR_BAR(1);
605 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
606 	    RF_ACTIVE);
607 	if (sc->vmx_res1 == NULL) {
608 		device_printf(dev,
609 		    "could not map BAR1 memory\n");
610 		return (ENXIO);
611 	}
612 
613 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
614 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
615 
616 	return (0);
617 }
618 
619 static void
620 vmxnet3_free_resources(struct vmxnet3_softc *sc)
621 {
622 	device_t dev;
623 	int rid;
624 
625 	dev = sc->vmx_dev;
626 
627 	if (sc->vmx_res0 != NULL) {
628 		rid = PCIR_BAR(0);
629 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res0);
630 		sc->vmx_res0 = NULL;
631 	}
632 
633 	if (sc->vmx_res1 != NULL) {
634 		rid = PCIR_BAR(1);
635 		bus_release_resource(dev, SYS_RES_MEMORY, rid, sc->vmx_res1);
636 		sc->vmx_res1 = NULL;
637 	}
638 }
639 
640 static int
641 vmxnet3_check_version(struct vmxnet3_softc *sc)
642 {
643 	device_t dev;
644 	uint32_t version;
645 
646 	dev = sc->vmx_dev;
647 
648 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
649 	if ((version & 0x01) == 0) {
650 		device_printf(dev, "unsupported hardware version %#x\n",
651 		    version);
652 		return (ENOTSUP);
653 	}
654 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
655 
656 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
657 	if ((version & 0x01) == 0) {
658 		device_printf(dev, "unsupported UPT version %#x\n", version);
659 		return (ENOTSUP);
660 	}
661 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
662 
663 	return (0);
664 }
665 
666 static void
667 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
668 {
669 	if_softc_ctx_t scctx;
670 	struct vmxnet3_txqueue *txq;
671 	struct vmxnet3_txq_shared *txs;
672 	struct vmxnet3_rxqueue *rxq;
673 	struct vmxnet3_rxq_shared *rxs;
674 	int intr_idx;
675 	int i;
676 
677 	scctx = sc->vmx_scctx;
678 
679 	/*
680 	 * There is either one interrupt, or there is one interrupt per
681 	 * receive queue.  If there is one interrupt, then all interrupt
682 	 * indexes are zero.  If there is one interrupt per receive queue,
683 	 * the transmit queue interrupt indexes are assigned the receive
684 	 * queue interrupt indexesin round-robin fashion.
685 	 *
686 	 * The event interrupt is always the last interrupt index.
687 	 */
688 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
689 
690 	intr_idx = 0;
691 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
692 		rxq = &sc->vmx_rxq[i];
693 		rxs = rxq->vxrxq_rs;
694 		rxq->vxrxq_intr_idx = intr_idx;
695 		rxs->intr_idx = rxq->vxrxq_intr_idx;
696 	}
697 
698 	/*
699 	 * Assign the tx queues interrupt indexes above what we are actually
700 	 * using.  These interrupts will never be enabled.
701 	 */
702 	intr_idx = scctx->isc_vectors;
703 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
704 		txq = &sc->vmx_txq[i];
705 		txs = txq->vxtxq_ts;
706 		txq->vxtxq_intr_idx = intr_idx;
707 		txs->intr_idx = txq->vxtxq_intr_idx;
708 	}
709 }
710 
711 static int
712 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
713 {
714 	if_softc_ctx_t scctx;
715 	int size;
716 	int error;
717 
718 	scctx = sc->vmx_scctx;
719 
720 	/*
721 	 * The txq and rxq shared data areas must be allocated contiguously
722 	 * as vmxnet3_driver_shared contains only a single address member
723 	 * for the shared queue data area.
724 	 */
725 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
726 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
727 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
728 	if (error) {
729 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
730 		return (error);
731 	}
732 
733 	return (0);
734 }
735 
736 static void
737 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
738 {
739 	struct vmxnet3_txqueue *txq;
740 	struct vmxnet3_comp_ring *txc;
741 	struct vmxnet3_txring *txr;
742 	if_softc_ctx_t scctx;
743 
744 	txq = &sc->vmx_txq[q];
745 	txc = &txq->vxtxq_comp_ring;
746 	txr = &txq->vxtxq_cmd_ring;
747 	scctx = sc->vmx_scctx;
748 
749 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
750 	    device_get_nameunit(sc->vmx_dev), q);
751 
752 	txq->vxtxq_sc = sc;
753 	txq->vxtxq_id = q;
754 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
755 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
756 }
757 
758 static int
759 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
760     int ntxqs, int ntxqsets)
761 {
762 	struct vmxnet3_softc *sc;
763 	int q;
764 	int error;
765 	caddr_t kva;
766 
767 	sc = iflib_get_softc(ctx);
768 
769 	/* Allocate the array of transmit queues */
770 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
771 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
772 	if (sc->vmx_txq == NULL)
773 		return (ENOMEM);
774 
775 	/* Initialize driver state for each transmit queue */
776 	for (q = 0; q < ntxqsets; q++)
777 		vmxnet3_init_txq(sc, q);
778 
779 	/*
780 	 * Allocate queue state that is shared with the device.  This check
781 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
782 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
783 	 * order iflib invokes those routines in.
784 	 */
785 	if (sc->vmx_qs_dma.idi_size == 0) {
786 		error = vmxnet3_queues_shared_alloc(sc);
787 		if (error)
788 			return (error);
789 	}
790 
791 	kva = sc->vmx_qs_dma.idi_vaddr;
792 	for (q = 0; q < ntxqsets; q++) {
793 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
794 		kva += sizeof(struct vmxnet3_txq_shared);
795 	}
796 
797 	/* Record descriptor ring vaddrs and paddrs */
798 	for (q = 0; q < ntxqsets; q++) {
799 		struct vmxnet3_txqueue *txq;
800 		struct vmxnet3_txring *txr;
801 		struct vmxnet3_comp_ring *txc;
802 
803 		txq = &sc->vmx_txq[q];
804 		txc = &txq->vxtxq_comp_ring;
805 		txr = &txq->vxtxq_cmd_ring;
806 
807 		/* Completion ring */
808 		txc->vxcr_u.txcd =
809 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
810 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
811 
812 		/* Command ring */
813 		txr->vxtxr_txd =
814 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
815 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
816 	}
817 
818 	return (0);
819 }
820 
821 static void
822 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
823 {
824 	struct vmxnet3_rxqueue *rxq;
825 	struct vmxnet3_comp_ring *rxc;
826 	struct vmxnet3_rxring *rxr;
827 	if_softc_ctx_t scctx;
828 	int i;
829 
830 	rxq = &sc->vmx_rxq[q];
831 	rxc = &rxq->vxrxq_comp_ring;
832 	scctx = sc->vmx_scctx;
833 
834 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
835 	    device_get_nameunit(sc->vmx_dev), q);
836 
837 	rxq->vxrxq_sc = sc;
838 	rxq->vxrxq_id = q;
839 
840 	/*
841 	 * First rxq is the completion queue, so there are nrxqs - 1 command
842 	 * rings starting at iflib queue id 1.
843 	 */
844 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
845 	for (i = 0; i < nrxqs - 1; i++) {
846 		rxr = &rxq->vxrxq_cmd_ring[i];
847 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
848 	}
849 }
850 
851 static int
852 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
853     int nrxqs, int nrxqsets)
854 {
855 	struct vmxnet3_softc *sc;
856 	if_softc_ctx_t scctx;
857 	int q;
858 	int i;
859 	int error;
860 	caddr_t kva;
861 
862 	sc = iflib_get_softc(ctx);
863 	scctx = sc->vmx_scctx;
864 
865 	/* Allocate the array of receive queues */
866 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
867 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
868 	if (sc->vmx_rxq == NULL)
869 		return (ENOMEM);
870 
871 	/* Initialize driver state for each receive queue */
872 	for (q = 0; q < nrxqsets; q++)
873 		vmxnet3_init_rxq(sc, q, nrxqs);
874 
875 	/*
876 	 * Allocate queue state that is shared with the device.  This check
877 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
878 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
879 	 * order iflib invokes those routines in.
880 	 */
881 	if (sc->vmx_qs_dma.idi_size == 0) {
882 		error = vmxnet3_queues_shared_alloc(sc);
883 		if (error)
884 			return (error);
885 	}
886 
887 	kva = sc->vmx_qs_dma.idi_vaddr +
888 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
889 	for (q = 0; q < nrxqsets; q++) {
890 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
891 		kva += sizeof(struct vmxnet3_rxq_shared);
892 	}
893 
894 	/* Record descriptor ring vaddrs and paddrs */
895 	for (q = 0; q < nrxqsets; q++) {
896 		struct vmxnet3_rxqueue *rxq;
897 		struct vmxnet3_rxring *rxr;
898 		struct vmxnet3_comp_ring *rxc;
899 
900 		rxq = &sc->vmx_rxq[q];
901 		rxc = &rxq->vxrxq_comp_ring;
902 
903 		/* Completion ring */
904 		rxc->vxcr_u.rxcd =
905 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
906 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
907 
908 		/* Command ring(s) */
909 		for (i = 0; i < nrxqs - 1; i++) {
910 			rxr = &rxq->vxrxq_cmd_ring[i];
911 
912 			rxr->vxrxr_rxd =
913 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
914 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
915 		}
916 	}
917 
918 	return (0);
919 }
920 
921 static void
922 vmxnet3_queues_free(if_ctx_t ctx)
923 {
924 	struct vmxnet3_softc *sc;
925 
926 	sc = iflib_get_softc(ctx);
927 
928 	/* Free queue state area that is shared with the device */
929 	if (sc->vmx_qs_dma.idi_size != 0) {
930 		iflib_dma_free(&sc->vmx_qs_dma);
931 		sc->vmx_qs_dma.idi_size = 0;
932 	}
933 
934 	/* Free array of receive queues */
935 	if (sc->vmx_rxq != NULL) {
936 		free(sc->vmx_rxq, M_DEVBUF);
937 		sc->vmx_rxq = NULL;
938 	}
939 
940 	/* Free array of transmit queues */
941 	if (sc->vmx_txq != NULL) {
942 		free(sc->vmx_txq, M_DEVBUF);
943 		sc->vmx_txq = NULL;
944 	}
945 }
946 
947 static int
948 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
949 {
950 	device_t dev;
951 	size_t size;
952 	int error;
953 
954 	dev = sc->vmx_dev;
955 
956 	/* Top level state structure shared with the device */
957 	size = sizeof(struct vmxnet3_driver_shared);
958 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
959 	if (error) {
960 		device_printf(dev, "cannot alloc shared memory\n");
961 		return (error);
962 	}
963 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
964 
965 	/* RSS table state shared with the device */
966 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
967 		size = sizeof(struct vmxnet3_rss_shared);
968 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
969 		    &sc->vmx_rss_dma, 0);
970 		if (error) {
971 			device_printf(dev, "cannot alloc rss shared memory\n");
972 			return (error);
973 		}
974 		sc->vmx_rss =
975 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
976 	}
977 
978 	return (0);
979 }
980 
981 static void
982 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
983 {
984 
985 	/* Free RSS table state shared with the device */
986 	if (sc->vmx_rss != NULL) {
987 		iflib_dma_free(&sc->vmx_rss_dma);
988 		sc->vmx_rss = NULL;
989 	}
990 
991 	/* Free top level state structure shared with the device */
992 	if (sc->vmx_ds != NULL) {
993 		iflib_dma_free(&sc->vmx_ds_dma);
994 		sc->vmx_ds = NULL;
995 	}
996 }
997 
998 static int
999 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1000 {
1001 	int error;
1002 
1003 	/* Multicast table state shared with the device */
1004 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1005 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1006 	if (error)
1007 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1008 	else
1009 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1010 
1011 	return (error);
1012 }
1013 
1014 static void
1015 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1016 {
1017 
1018 	/* Free multicast table state shared with the device */
1019 	if (sc->vmx_mcast != NULL) {
1020 		iflib_dma_free(&sc->vmx_mcast_dma);
1021 		sc->vmx_mcast = NULL;
1022 	}
1023 }
1024 
1025 static void
1026 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1027 {
1028 	struct vmxnet3_driver_shared *ds;
1029 	if_shared_ctx_t sctx;
1030 	if_softc_ctx_t scctx;
1031 	struct vmxnet3_txqueue *txq;
1032 	struct vmxnet3_txq_shared *txs;
1033 	struct vmxnet3_rxqueue *rxq;
1034 	struct vmxnet3_rxq_shared *rxs;
1035 	int i;
1036 
1037 	ds = sc->vmx_ds;
1038 	sctx = sc->vmx_sctx;
1039 	scctx = sc->vmx_scctx;
1040 
1041 	/*
1042 	 * Initialize fields of the shared data that remains the same across
1043 	 * reinits. Note the shared data is zero'd when allocated.
1044 	 */
1045 
1046 	ds->magic = VMXNET3_REV1_MAGIC;
1047 
1048 	/* DriverInfo */
1049 	ds->version = VMXNET3_DRIVER_VERSION;
1050 	ds->guest = VMXNET3_GOS_FREEBSD |
1051 #ifdef __LP64__
1052 	    VMXNET3_GOS_64BIT;
1053 #else
1054 	    VMXNET3_GOS_32BIT;
1055 #endif
1056 	ds->vmxnet3_revision = 1;
1057 	ds->upt_version = 1;
1058 
1059 	/* Misc. conf */
1060 	ds->driver_data = vtophys(sc);
1061 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1062 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1063 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1064 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1065 
1066 	/* RSS conf */
1067 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1068 		ds->rss.version = 1;
1069 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1070 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1071 	}
1072 
1073 	/* Interrupt control. */
1074 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1075 	/*
1076 	 * Total number of interrupt indexes we are using in the shared
1077 	 * config data, even though we don't actually allocate MSIX
1078 	 * resources for the tx queues.  Some versions of the device will
1079 	 * fail to initialize successfully if interrupt indexes are used in
1080 	 * the shared config that exceed the number of interrupts configured
1081 	 * here.
1082 	 */
1083 	ds->nintr = (scctx->isc_vectors == 1) ?
1084 	    1 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1085 	ds->evintr = sc->vmx_event_intr_idx;
1086 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1087 
1088 	for (i = 0; i < ds->nintr; i++)
1089 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1090 
1091 	/* Receive filter. */
1092 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1093 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1094 
1095 	/* Tx queues */
1096 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1097 		txq = &sc->vmx_txq[i];
1098 		txs = txq->vxtxq_ts;
1099 
1100 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1101 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1102 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1103 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1104 		txs->driver_data = vtophys(txq);
1105 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1106 	}
1107 
1108 	/* Rx queues */
1109 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1110 		rxq = &sc->vmx_rxq[i];
1111 		rxs = rxq->vxrxq_rs;
1112 
1113 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1114 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1115 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1116 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1117 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1118 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1119 		rxs->driver_data = vtophys(rxq);
1120 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1121 	}
1122 }
1123 
1124 static void
1125 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1126 {
1127 	/*
1128 	 * Use the same key as the Linux driver until FreeBSD can do
1129 	 * RSS (presumably Toeplitz) in software.
1130 	 */
1131 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1132 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1133 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1134 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1135 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1136 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1137 	};
1138 
1139 	struct vmxnet3_driver_shared *ds;
1140 	if_softc_ctx_t scctx;
1141 	struct vmxnet3_rss_shared *rss;
1142 	int i;
1143 
1144 	ds = sc->vmx_ds;
1145 	scctx = sc->vmx_scctx;
1146 	rss = sc->vmx_rss;
1147 
1148 	rss->hash_type =
1149 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1150 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1151 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1152 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1153 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1154 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1155 
1156 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1157 		rss->ind_table[i] = i % scctx->isc_nrxqsets;
1158 }
1159 
1160 static void
1161 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1162 {
1163 	struct ifnet *ifp;
1164 	struct vmxnet3_driver_shared *ds;
1165 	if_softc_ctx_t scctx;
1166 
1167 	ifp = sc->vmx_ifp;
1168 	ds = sc->vmx_ds;
1169 	scctx = sc->vmx_scctx;
1170 
1171 	ds->mtu = ifp->if_mtu;
1172 	ds->ntxqueue = scctx->isc_ntxqsets;
1173 	ds->nrxqueue = scctx->isc_nrxqsets;
1174 
1175 	ds->upt_features = 0;
1176 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1177 		ds->upt_features |= UPT1_F_CSUM;
1178 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1179 		ds->upt_features |= UPT1_F_VLAN;
1180 	if (ifp->if_capenable & IFCAP_LRO)
1181 		ds->upt_features |= UPT1_F_LRO;
1182 
1183 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1184 		ds->upt_features |= UPT1_F_RSS;
1185 		vmxnet3_reinit_rss_shared_data(sc);
1186 	}
1187 
1188 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1189 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1190 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1191 }
1192 
1193 static int
1194 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1195 {
1196 	int error;
1197 
1198 	error = vmxnet3_alloc_shared_data(sc);
1199 	if (error)
1200 		return (error);
1201 
1202 	error = vmxnet3_alloc_mcast_table(sc);
1203 	if (error)
1204 		return (error);
1205 
1206 	vmxnet3_init_shared_data(sc);
1207 
1208 	return (0);
1209 }
1210 
1211 static void
1212 vmxnet3_free_data(struct vmxnet3_softc *sc)
1213 {
1214 
1215 	vmxnet3_free_mcast_table(sc);
1216 	vmxnet3_free_shared_data(sc);
1217 }
1218 
1219 static void
1220 vmxnet3_evintr(struct vmxnet3_softc *sc)
1221 {
1222 	device_t dev;
1223 	struct vmxnet3_txq_shared *ts;
1224 	struct vmxnet3_rxq_shared *rs;
1225 	uint32_t event;
1226 
1227 	dev = sc->vmx_dev;
1228 
1229 	/* Clear events. */
1230 	event = sc->vmx_ds->event;
1231 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1232 
1233 	if (event & VMXNET3_EVENT_LINK)
1234 		vmxnet3_link_status(sc);
1235 
1236 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1237 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1238 		ts = sc->vmx_txq[0].vxtxq_ts;
1239 		if (ts->stopped != 0)
1240 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1241 		rs = sc->vmx_rxq[0].vxrxq_rs;
1242 		if (rs->stopped != 0)
1243 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1244 
1245 		/* XXX - rely on liflib watchdog to reset us? */
1246 		device_printf(dev, "Rx/Tx queue error event ... "
1247 		    "waiting for iflib watchdog reset\n");
1248 	}
1249 
1250 	if (event & VMXNET3_EVENT_DIC)
1251 		device_printf(dev, "device implementation change event\n");
1252 	if (event & VMXNET3_EVENT_DEBUG)
1253 		device_printf(dev, "debug event\n");
1254 }
1255 
1256 static int
1257 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1258 {
1259 	struct vmxnet3_softc *sc;
1260 	struct vmxnet3_txqueue *txq;
1261 	struct vmxnet3_txring *txr;
1262 	struct vmxnet3_txdesc *txd, *sop;
1263 	bus_dma_segment_t *segs;
1264 	int nsegs;
1265 	int pidx;
1266 	int hdrlen;
1267 	int i;
1268 	int gen;
1269 
1270 	sc = vsc;
1271 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1272 	txr = &txq->vxtxq_cmd_ring;
1273 	segs = pi->ipi_segs;
1274 	nsegs = pi->ipi_nsegs;
1275 	pidx = pi->ipi_pidx;
1276 
1277 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1278 	    ("%s: packet with too many segments %d", __func__, nsegs));
1279 
1280 	sop = &txr->vxtxr_txd[pidx];
1281 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1282 
1283 	for (i = 0; i < nsegs; i++) {
1284 		txd = &txr->vxtxr_txd[pidx];
1285 
1286 		txd->addr = segs[i].ds_addr;
1287 		txd->len = segs[i].ds_len;
1288 		txd->gen = gen;
1289 		txd->dtype = 0;
1290 		txd->offload_mode = VMXNET3_OM_NONE;
1291 		txd->offload_pos = 0;
1292 		txd->hlen = 0;
1293 		txd->eop = 0;
1294 		txd->compreq = 0;
1295 		txd->vtag_mode = 0;
1296 		txd->vtag = 0;
1297 
1298 		if (++pidx == txr->vxtxr_ndesc) {
1299 			pidx = 0;
1300 			txr->vxtxr_gen ^= 1;
1301 		}
1302 		gen = txr->vxtxr_gen;
1303 	}
1304 	txd->eop = 1;
1305 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1306 	pi->ipi_new_pidx = pidx;
1307 
1308 	/*
1309 	 * VLAN
1310 	 */
1311 	if (pi->ipi_mflags & M_VLANTAG) {
1312 		sop->vtag_mode = 1;
1313 		sop->vtag = pi->ipi_vtag;
1314 	}
1315 
1316 	/*
1317 	 * TSO and checksum offloads
1318 	 */
1319 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1320 	if (pi->ipi_csum_flags & CSUM_TSO) {
1321 		sop->offload_mode = VMXNET3_OM_TSO;
1322 		sop->hlen = hdrlen;
1323 		sop->offload_pos = pi->ipi_tso_segsz;
1324 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1325 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1326 		sop->offload_mode = VMXNET3_OM_CSUM;
1327 		sop->hlen = hdrlen;
1328 		sop->offload_pos = hdrlen +
1329 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1330 			offsetof(struct tcphdr, th_sum) :
1331 			offsetof(struct udphdr, uh_sum));
1332 	}
1333 
1334 	/* Finally, change the ownership. */
1335 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1336 	sop->gen ^= 1;
1337 
1338 	return (0);
1339 }
1340 
1341 static void
1342 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1343 {
1344 	struct vmxnet3_softc *sc;
1345 	struct vmxnet3_txqueue *txq;
1346 
1347 	sc = vsc;
1348 	txq = &sc->vmx_txq[txqid];
1349 
1350 	/*
1351 	 * pidx is what we last set ipi_new_pidx to in
1352 	 * vmxnet3_isc_txd_encap()
1353 	 */
1354 
1355 	/*
1356 	 * Avoid expensive register updates if the flush request is
1357 	 * redundant.
1358 	 */
1359 	if (txq->vxtxq_last_flush == pidx)
1360 		return;
1361 	txq->vxtxq_last_flush = pidx;
1362 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1363 }
1364 
1365 static int
1366 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1367 {
1368 	struct vmxnet3_softc *sc;
1369 	struct vmxnet3_txqueue *txq;
1370 	struct vmxnet3_comp_ring *txc;
1371 	struct vmxnet3_txcompdesc *txcd;
1372 	struct vmxnet3_txring *txr;
1373 	int processed;
1374 
1375 	sc = vsc;
1376 	txq = &sc->vmx_txq[txqid];
1377 	txc = &txq->vxtxq_comp_ring;
1378 	txr = &txq->vxtxq_cmd_ring;
1379 
1380 	/*
1381 	 * If clear is true, we need to report the number of TX command ring
1382 	 * descriptors that have been processed by the device.  If clear is
1383 	 * false, we just need to report whether or not at least one TX
1384 	 * command ring descriptor has been processed by the device.
1385 	 */
1386 	processed = 0;
1387 	for (;;) {
1388 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1389 		if (txcd->gen != txc->vxcr_gen)
1390 			break;
1391 		else if (!clear)
1392 			return (1);
1393 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1394 
1395 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1396 			txc->vxcr_next = 0;
1397 			txc->vxcr_gen ^= 1;
1398 		}
1399 
1400 		if (txcd->eop_idx < txr->vxtxr_next)
1401 			processed += txr->vxtxr_ndesc -
1402 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1403 		else
1404 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1405 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1406 	}
1407 
1408 	return (processed);
1409 }
1410 
1411 static int
1412 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1413 {
1414 	struct vmxnet3_softc *sc;
1415 	struct vmxnet3_rxqueue *rxq;
1416 	struct vmxnet3_comp_ring *rxc;
1417 	struct vmxnet3_rxcompdesc *rxcd;
1418 	int avail;
1419 	int completed_gen;
1420 #ifdef INVARIANTS
1421 	int expect_sop = 1;
1422 #endif
1423 	sc = vsc;
1424 	rxq = &sc->vmx_rxq[rxqid];
1425 	rxc = &rxq->vxrxq_comp_ring;
1426 
1427 	avail = 0;
1428 	completed_gen = rxc->vxcr_gen;
1429 	for (;;) {
1430 		rxcd = &rxc->vxcr_u.rxcd[idx];
1431 		if (rxcd->gen != completed_gen)
1432 			break;
1433 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1434 
1435 #ifdef INVARIANTS
1436 		if (expect_sop)
1437 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1438 		else
1439 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1440 		expect_sop = rxcd->eop;
1441 #endif
1442 		if (rxcd->eop && (rxcd->len != 0))
1443 			avail++;
1444 		if (avail > budget)
1445 			break;
1446 		if (++idx == rxc->vxcr_ndesc) {
1447 			idx = 0;
1448 			completed_gen ^= 1;
1449 		}
1450 	}
1451 
1452 	return (avail);
1453 }
1454 
1455 static int
1456 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1457 {
1458 	struct vmxnet3_softc *sc;
1459 	if_softc_ctx_t scctx;
1460 	struct vmxnet3_rxqueue *rxq;
1461 	struct vmxnet3_comp_ring *rxc;
1462 	struct vmxnet3_rxcompdesc *rxcd;
1463 	struct vmxnet3_rxring *rxr;
1464 	struct vmxnet3_rxdesc *rxd;
1465 	if_rxd_frag_t frag;
1466 	int cqidx;
1467 	uint16_t total_len;
1468 	uint8_t nfrags;
1469 	uint8_t flid;
1470 
1471 	sc = vsc;
1472 	scctx = sc->vmx_scctx;
1473 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1474 	rxc = &rxq->vxrxq_comp_ring;
1475 
1476 	/*
1477 	 * Get a single packet starting at the given index in the completion
1478 	 * queue.  That we have been called indicates that
1479 	 * vmxnet3_isc_rxd_available() has already verified that either
1480 	 * there is a complete packet available starting at the given index,
1481 	 * or there are one or more zero length packets starting at the
1482 	 * given index followed by a complete packet, so no verification of
1483 	 * ownership of the descriptors (and no associated read barrier) is
1484 	 * required here.
1485 	 */
1486 	cqidx = ri->iri_cidx;
1487 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1488 	while (rxcd->len == 0) {
1489 		KASSERT(rxcd->sop && rxcd->eop,
1490 		    ("%s: zero-length packet without both sop and eop set",
1491 			__func__));
1492 		if (++cqidx == rxc->vxcr_ndesc) {
1493 			cqidx = 0;
1494 			rxc->vxcr_gen ^= 1;
1495 		}
1496 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1497 	}
1498 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1499 
1500 	/*
1501 	 * RSS and flow ID
1502 	 */
1503 	ri->iri_flowid = rxcd->rss_hash;
1504 	switch (rxcd->rss_type) {
1505 	case VMXNET3_RCD_RSS_TYPE_NONE:
1506 		ri->iri_flowid = ri->iri_qsidx;
1507 		ri->iri_rsstype = M_HASHTYPE_NONE;
1508 		break;
1509 	case VMXNET3_RCD_RSS_TYPE_IPV4:
1510 		ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1511 		break;
1512 	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1513 		ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1514 		break;
1515 	case VMXNET3_RCD_RSS_TYPE_IPV6:
1516 		ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1517 		break;
1518 	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1519 		ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1520 		break;
1521 	default:
1522 		ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1523 		break;
1524 	}
1525 
1526 	/* VLAN */
1527 	if (rxcd->vlan) {
1528 		ri->iri_flags |= M_VLANTAG;
1529 		ri->iri_vtag = rxcd->vtag;
1530 	}
1531 
1532 	/* Checksum offload */
1533 	if (!rxcd->no_csum) {
1534 		uint32_t csum_flags = 0;
1535 
1536 		if (rxcd->ipv4) {
1537 			csum_flags |= CSUM_IP_CHECKED;
1538 			if (rxcd->ipcsum_ok)
1539 				csum_flags |= CSUM_IP_VALID;
1540 		}
1541 		if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1542 			csum_flags |= CSUM_L4_CALC;
1543 			if (rxcd->csum_ok) {
1544 				csum_flags |= CSUM_L4_VALID;
1545 				ri->iri_csum_data = 0xffff;
1546 			}
1547 		}
1548 		ri->iri_csum_flags = csum_flags;
1549 	}
1550 
1551 	/*
1552 	 * The queue numbering scheme used for rxcd->qid is as follows:
1553 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1554 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1555 	 *
1556 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1557 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1558 	 * indicates command ring (and flid) 1.
1559 	 */
1560 	nfrags = 0;
1561 	total_len = 0;
1562 	do {
1563 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1564 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1565 		    ("%s: generation mismatch", __func__));
1566 		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1567 		rxr = &rxq->vxrxq_cmd_ring[flid];
1568 		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1569 
1570 		frag = &ri->iri_frags[nfrags];
1571 		frag->irf_flid = flid;
1572 		frag->irf_idx = rxcd->rxd_idx;
1573 		frag->irf_len = rxcd->len;
1574 		total_len += rxcd->len;
1575 		nfrags++;
1576 		if (++cqidx == rxc->vxcr_ndesc) {
1577 			cqidx = 0;
1578 			rxc->vxcr_gen ^= 1;
1579 		}
1580 	} while (!rxcd->eop);
1581 
1582 	ri->iri_cidx = cqidx;
1583 	ri->iri_nfrags = nfrags;
1584 	ri->iri_len = total_len;
1585 
1586 	return (0);
1587 }
1588 
1589 static void
1590 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1591 {
1592 	struct vmxnet3_softc *sc;
1593 	struct vmxnet3_rxqueue *rxq;
1594 	struct vmxnet3_rxring *rxr;
1595 	struct vmxnet3_rxdesc *rxd;
1596 	uint64_t *paddrs;
1597 	int count;
1598 	int len;
1599 	int pidx;
1600 	int i;
1601 	uint8_t flid;
1602 	uint8_t btype;
1603 
1604 	count = iru->iru_count;
1605 	len = iru->iru_buf_size;
1606 	pidx = iru->iru_pidx;
1607 	flid = iru->iru_flidx;
1608 	paddrs = iru->iru_paddrs;
1609 
1610 	sc = vsc;
1611 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1612 	rxr = &rxq->vxrxq_cmd_ring[flid];
1613 	rxd = rxr->vxrxr_rxd;
1614 
1615 	/*
1616 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1617 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1618 	 */
1619 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1620 	for (i = 0; i < count; i++) {
1621 		rxd[pidx].addr = paddrs[i];
1622 		rxd[pidx].len = len;
1623 		rxd[pidx].btype = btype;
1624 		rxd[pidx].gen = rxr->vxrxr_gen;
1625 
1626 		if (++pidx == rxr->vxrxr_ndesc) {
1627 			pidx = 0;
1628 			rxr->vxrxr_gen ^= 1;
1629 		}
1630 	}
1631 }
1632 
1633 static void
1634 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1635 {
1636 	struct vmxnet3_softc *sc;
1637 	struct vmxnet3_rxqueue *rxq;
1638 	struct vmxnet3_rxring *rxr;
1639 	bus_size_t r;
1640 
1641 	sc = vsc;
1642 	rxq = &sc->vmx_rxq[rxqid];
1643 	rxr = &rxq->vxrxq_cmd_ring[flid];
1644 
1645 	if (flid == 0)
1646 		r = VMXNET3_BAR0_RXH1(rxqid);
1647 	else
1648 		r = VMXNET3_BAR0_RXH2(rxqid);
1649 
1650 	/*
1651 	 * pidx is the index of the last descriptor with a buffer the device
1652 	 * can use, and the device needs to be told which index is one past
1653 	 * that.
1654 	 */
1655 	if (++pidx == rxr->vxrxr_ndesc)
1656 		pidx = 0;
1657 	vmxnet3_write_bar0(sc, r, pidx);
1658 }
1659 
1660 static int
1661 vmxnet3_legacy_intr(void *xsc)
1662 {
1663 	struct vmxnet3_softc *sc;
1664 	if_softc_ctx_t scctx;
1665 	if_ctx_t ctx;
1666 
1667 	sc = xsc;
1668 	scctx = sc->vmx_scctx;
1669 	ctx = sc->vmx_ctx;
1670 
1671 	/*
1672 	 * When there is only a single interrupt configured, this routine
1673 	 * runs in fast interrupt context, following which the rxq 0 task
1674 	 * will be enqueued.
1675 	 */
1676 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1677 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1678 			return (FILTER_HANDLED);
1679 	}
1680 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1681 		vmxnet3_intr_disable_all(ctx);
1682 
1683 	if (sc->vmx_ds->event != 0)
1684 		iflib_admin_intr_deferred(ctx);
1685 
1686 	/*
1687 	 * XXX - When there is both rxq and event activity, do we care
1688 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1689 	 * first?
1690 	 */
1691 	return (FILTER_SCHEDULE_THREAD);
1692 }
1693 
1694 static int
1695 vmxnet3_rxq_intr(void *vrxq)
1696 {
1697 	struct vmxnet3_softc *sc;
1698 	struct vmxnet3_rxqueue *rxq;
1699 
1700 	rxq = vrxq;
1701 	sc = rxq->vxrxq_sc;
1702 
1703 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1704 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1705 
1706 	return (FILTER_SCHEDULE_THREAD);
1707 }
1708 
1709 static int
1710 vmxnet3_event_intr(void *vsc)
1711 {
1712 	struct vmxnet3_softc *sc;
1713 
1714 	sc = vsc;
1715 
1716 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1717 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1718 
1719 	/*
1720 	 * The work will be done via vmxnet3_update_admin_status(), and the
1721 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1722 	 *
1723 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1724 	 */
1725 	return (FILTER_SCHEDULE_THREAD);
1726 }
1727 
1728 static void
1729 vmxnet3_stop(if_ctx_t ctx)
1730 {
1731 	struct vmxnet3_softc *sc;
1732 
1733 	sc = iflib_get_softc(ctx);
1734 
1735 	sc->vmx_link_active = 0;
1736 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1737 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1738 }
1739 
1740 static void
1741 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1742 {
1743 	struct vmxnet3_txring *txr;
1744 	struct vmxnet3_comp_ring *txc;
1745 
1746 	txq->vxtxq_last_flush = -1;
1747 
1748 	txr = &txq->vxtxq_cmd_ring;
1749 	txr->vxtxr_next = 0;
1750 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1751 	/*
1752 	 * iflib has zeroed out the descriptor array during the prior attach
1753 	 * or stop
1754 	 */
1755 
1756 	txc = &txq->vxtxq_comp_ring;
1757 	txc->vxcr_next = 0;
1758 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1759 	/*
1760 	 * iflib has zeroed out the descriptor array during the prior attach
1761 	 * or stop
1762 	 */
1763 }
1764 
1765 static void
1766 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1767 {
1768 	struct vmxnet3_rxring *rxr;
1769 	struct vmxnet3_comp_ring *rxc;
1770 	int i;
1771 
1772 	/*
1773 	 * The descriptors will be populated with buffers during a
1774 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1775 	 */
1776 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1777 		rxr = &rxq->vxrxq_cmd_ring[i];
1778 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1779 		/*
1780 		 * iflib has zeroed out the descriptor array during the
1781 		 * prior attach or stop
1782 		 */
1783 	}
1784 
1785 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1786 		rxr = &rxq->vxrxq_cmd_ring[i];
1787 		rxr->vxrxr_gen = 0;
1788 		bzero(rxr->vxrxr_rxd,
1789 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1790 	}
1791 
1792 	rxc = &rxq->vxrxq_comp_ring;
1793 	rxc->vxcr_next = 0;
1794 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1795 	/*
1796 	 * iflib has zeroed out the descriptor array during the prior attach
1797 	 * or stop
1798 	 */
1799 }
1800 
1801 static void
1802 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1803 {
1804 	if_softc_ctx_t scctx;
1805 	int q;
1806 
1807 	scctx = sc->vmx_scctx;
1808 
1809 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1810 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1811 
1812 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1813 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1814 }
1815 
1816 static int
1817 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1818 {
1819 	if_softc_ctx_t scctx;
1820 	int q;
1821 
1822 	scctx = sc->vmx_scctx;
1823 
1824 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1825 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1826 		return (1);
1827 	}
1828 
1829 	/* Reset the Rx queue heads. */
1830 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1831 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1832 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1833 	}
1834 
1835 	return (0);
1836 }
1837 
1838 static void
1839 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1840 {
1841 	struct ifnet *ifp;
1842 
1843 	ifp = sc->vmx_ifp;
1844 
1845 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1846 
1847 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1848 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1849 		    sizeof(sc->vmx_ds->vlan_filter));
1850 	else
1851 		bzero(sc->vmx_ds->vlan_filter,
1852 		    sizeof(sc->vmx_ds->vlan_filter));
1853 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1854 }
1855 
1856 static void
1857 vmxnet3_init(if_ctx_t ctx)
1858 {
1859 	struct vmxnet3_softc *sc;
1860 	if_softc_ctx_t scctx;
1861 
1862 	sc = iflib_get_softc(ctx);
1863 	scctx = sc->vmx_scctx;
1864 
1865 	scctx->isc_max_frame_size = if_getmtu(iflib_get_ifp(ctx)) +
1866 	    ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1867 
1868 	/* Use the current MAC address. */
1869 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1870 	vmxnet3_set_lladdr(sc);
1871 
1872 	vmxnet3_reinit_shared_data(sc);
1873 	vmxnet3_reinit_queues(sc);
1874 
1875 	vmxnet3_enable_device(sc);
1876 
1877 	vmxnet3_reinit_rxfilters(sc);
1878 	vmxnet3_link_status(sc);
1879 }
1880 
1881 static void
1882 vmxnet3_multi_set(if_ctx_t ctx)
1883 {
1884 
1885 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1886 	    if_getflags(iflib_get_ifp(ctx)));
1887 }
1888 
1889 static int
1890 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1891 {
1892 
1893 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1894 		ETHER_CRC_LEN))
1895 		return (EINVAL);
1896 
1897 	return (0);
1898 }
1899 
1900 static void
1901 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
1902 {
1903 	struct vmxnet3_softc *sc;
1904 
1905 	sc = iflib_get_softc(ctx);
1906 
1907 	ifmr->ifm_status = IFM_AVALID;
1908 	ifmr->ifm_active = IFM_ETHER;
1909 
1910 	if (vmxnet3_link_is_up(sc) != 0) {
1911 		ifmr->ifm_status |= IFM_ACTIVE;
1912 		ifmr->ifm_active |= IFM_AUTO;
1913 	} else
1914 		ifmr->ifm_active |= IFM_NONE;
1915 }
1916 
1917 static int
1918 vmxnet3_media_change(if_ctx_t ctx)
1919 {
1920 
1921 	/* Ignore. */
1922 	return (0);
1923 }
1924 
1925 static int
1926 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
1927 {
1928 
1929 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
1930 
1931 	return (0);
1932 }
1933 
1934 static uint64_t
1935 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
1936 {
1937 	if_t ifp = iflib_get_ifp(ctx);
1938 
1939 	if (cnt < IFCOUNTERS)
1940 		return if_get_counter_default(ifp, cnt);
1941 
1942 	return (0);
1943 }
1944 
1945 static void
1946 vmxnet3_update_admin_status(if_ctx_t ctx)
1947 {
1948 	struct vmxnet3_softc *sc;
1949 
1950 	sc = iflib_get_softc(ctx);
1951 	if (sc->vmx_ds->event != 0)
1952 		vmxnet3_evintr(sc);
1953 
1954 	vmxnet3_refresh_host_stats(sc);
1955 }
1956 
1957 static void
1958 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
1959 {
1960 	/* Host stats refresh is global, so just trigger it on txq 0 */
1961 	if (qid == 0)
1962 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
1963 }
1964 
1965 static void
1966 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
1967 {
1968 	int idx, bit;
1969 
1970 	if (tag == 0 || tag > 4095)
1971 		return;
1972 
1973 	idx = (tag >> 5) & 0x7F;
1974 	bit = tag & 0x1F;
1975 
1976 	/* Update our private VLAN bitvector. */
1977 	if (add)
1978 		sc->vmx_vlan_filter[idx] |= (1 << bit);
1979 	else
1980 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
1981 }
1982 
1983 static void
1984 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
1985 {
1986 
1987 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
1988 }
1989 
1990 static void
1991 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
1992 {
1993 
1994 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
1995 }
1996 
1997 static void
1998 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
1999 {
2000 	struct ifnet *ifp;
2001 	struct vmxnet3_driver_shared *ds;
2002 	struct ifmultiaddr *ifma;
2003 	u_int mode;
2004 
2005 	ifp = sc->vmx_ifp;
2006 	ds = sc->vmx_ds;
2007 
2008 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2009 	if (flags & IFF_PROMISC)
2010 		mode |= VMXNET3_RXMODE_PROMISC;
2011 	if (flags & IFF_ALLMULTI)
2012 		mode |= VMXNET3_RXMODE_ALLMULTI;
2013 	else {
2014 		int cnt = 0, overflow = 0;
2015 
2016 		if_maddr_rlock(ifp);
2017 		CK_STAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) {
2018 			if (ifma->ifma_addr->sa_family != AF_LINK)
2019 				continue;
2020 			else if (cnt == VMXNET3_MULTICAST_MAX) {
2021 				overflow = 1;
2022 				break;
2023 			}
2024 
2025 			bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr),
2026 			   &sc->vmx_mcast[cnt*ETHER_ADDR_LEN], ETHER_ADDR_LEN);
2027 			cnt++;
2028 		}
2029 		if_maddr_runlock(ifp);
2030 
2031 		if (overflow != 0) {
2032 			cnt = 0;
2033 			mode |= VMXNET3_RXMODE_ALLMULTI;
2034 		} else if (cnt > 0)
2035 			mode |= VMXNET3_RXMODE_MCAST;
2036 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2037 	}
2038 
2039 	ds->rxmode = mode;
2040 
2041 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2042 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2043 }
2044 
2045 static void
2046 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2047 {
2048 
2049 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2050 }
2051 
2052 static int
2053 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2054 {
2055 	uint32_t status;
2056 
2057 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2058 	return !!(status & 0x1);
2059 }
2060 
2061 static void
2062 vmxnet3_link_status(struct vmxnet3_softc *sc)
2063 {
2064 	if_ctx_t ctx;
2065 	uint64_t speed;
2066 	int link;
2067 
2068 	ctx = sc->vmx_ctx;
2069 	link = vmxnet3_link_is_up(sc);
2070 	speed = IF_Gbps(10);
2071 
2072 	if (link != 0 && sc->vmx_link_active == 0) {
2073 		sc->vmx_link_active = 1;
2074 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2075 	} else if (link == 0 && sc->vmx_link_active != 0) {
2076 		sc->vmx_link_active = 0;
2077 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2078 	}
2079 }
2080 
2081 static void
2082 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2083 {
2084 	uint32_t ml, mh;
2085 
2086 	ml  = sc->vmx_lladdr[0];
2087 	ml |= sc->vmx_lladdr[1] << 8;
2088 	ml |= sc->vmx_lladdr[2] << 16;
2089 	ml |= sc->vmx_lladdr[3] << 24;
2090 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2091 
2092 	mh  = sc->vmx_lladdr[4];
2093 	mh |= sc->vmx_lladdr[5] << 8;
2094 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2095 }
2096 
2097 static void
2098 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2099 {
2100 	uint32_t ml, mh;
2101 
2102 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2103 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2104 
2105 	sc->vmx_lladdr[0] = ml;
2106 	sc->vmx_lladdr[1] = ml >> 8;
2107 	sc->vmx_lladdr[2] = ml >> 16;
2108 	sc->vmx_lladdr[3] = ml >> 24;
2109 	sc->vmx_lladdr[4] = mh;
2110 	sc->vmx_lladdr[5] = mh >> 8;
2111 }
2112 
2113 static void
2114 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2115     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2116 {
2117 	struct sysctl_oid *node, *txsnode;
2118 	struct sysctl_oid_list *list, *txslist;
2119 	struct UPT1_TxStats *txstats;
2120 	char namebuf[16];
2121 
2122 	txstats = &txq->vxtxq_ts->stats;
2123 
2124 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2125 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2126 	    NULL, "Transmit Queue");
2127 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2128 
2129 	/*
2130 	 * Add statistics reported by the host. These are updated by the
2131 	 * iflib txq timer on txq 0.
2132 	 */
2133 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2134 	    NULL, "Host Statistics");
2135 	txslist = SYSCTL_CHILDREN(txsnode);
2136 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2137 	    &txstats->TSO_packets, "TSO packets");
2138 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2139 	    &txstats->TSO_bytes, "TSO bytes");
2140 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2141 	    &txstats->ucast_packets, "Unicast packets");
2142 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2143 	    &txstats->ucast_bytes, "Unicast bytes");
2144 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2145 	    &txstats->mcast_packets, "Multicast packets");
2146 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2147 	    &txstats->mcast_bytes, "Multicast bytes");
2148 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2149 	    &txstats->error, "Errors");
2150 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2151 	    &txstats->discard, "Discards");
2152 }
2153 
2154 static void
2155 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2156     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2157 {
2158 	struct sysctl_oid *node, *rxsnode;
2159 	struct sysctl_oid_list *list, *rxslist;
2160 	struct UPT1_RxStats *rxstats;
2161 	char namebuf[16];
2162 
2163 	rxstats = &rxq->vxrxq_rs->stats;
2164 
2165 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2166 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2167 	    NULL, "Receive Queue");
2168 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2169 
2170 	/*
2171 	 * Add statistics reported by the host. These are updated by the
2172 	 * iflib txq timer on txq 0.
2173 	 */
2174 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2175 	    NULL, "Host Statistics");
2176 	rxslist = SYSCTL_CHILDREN(rxsnode);
2177 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2178 	    &rxstats->LRO_packets, "LRO packets");
2179 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2180 	    &rxstats->LRO_bytes, "LRO bytes");
2181 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2182 	    &rxstats->ucast_packets, "Unicast packets");
2183 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2184 	    &rxstats->ucast_bytes, "Unicast bytes");
2185 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2186 	    &rxstats->mcast_packets, "Multicast packets");
2187 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2188 	    &rxstats->mcast_bytes, "Multicast bytes");
2189 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2190 	    &rxstats->bcast_packets, "Broadcast packets");
2191 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2192 	    &rxstats->bcast_bytes, "Broadcast bytes");
2193 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2194 	    &rxstats->nobuffer, "No buffer");
2195 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2196 	    &rxstats->error, "Errors");
2197 }
2198 
2199 static void
2200 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2201     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2202 {
2203 	if_softc_ctx_t scctx;
2204 	struct sysctl_oid *node;
2205 	struct sysctl_oid_list *list;
2206 	int i;
2207 
2208 	scctx = sc->vmx_scctx;
2209 
2210 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2211 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2212 
2213 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2214 		    "debug", CTLFLAG_RD, NULL, "");
2215 		list = SYSCTL_CHILDREN(node);
2216 
2217 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2218 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2219 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2220 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2221 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2222 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2223 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2224 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2225 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2226 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2227 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2228 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2229 	}
2230 
2231 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2232 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2233 
2234 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2235 		    "debug", CTLFLAG_RD, NULL, "");
2236 		list = SYSCTL_CHILDREN(node);
2237 
2238 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2239 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2240 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2241 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2242 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2243 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2244 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2245 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2246 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2247 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2248 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2249 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2250 	}
2251 }
2252 
2253 static void
2254 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2255     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2256 {
2257 	if_softc_ctx_t scctx;
2258 	int i;
2259 
2260 	scctx = sc->vmx_scctx;
2261 
2262 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2263 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2264 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2265 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2266 
2267 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2268 }
2269 
2270 static void
2271 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2272 {
2273 	device_t dev;
2274 	struct sysctl_ctx_list *ctx;
2275 	struct sysctl_oid *tree;
2276 	struct sysctl_oid_list *child;
2277 
2278 	dev = sc->vmx_dev;
2279 	ctx = device_get_sysctl_ctx(dev);
2280 	tree = device_get_sysctl_tree(dev);
2281 	child = SYSCTL_CHILDREN(tree);
2282 
2283 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2284 }
2285 
2286 static void
2287 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2288 {
2289 
2290 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2291 }
2292 
2293 static uint32_t
2294 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2295 {
2296 
2297 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2298 }
2299 
2300 static void
2301 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2302 {
2303 
2304 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2305 }
2306 
2307 static void
2308 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2309 {
2310 
2311 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2312 }
2313 
2314 static uint32_t
2315 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2316 {
2317 
2318 	vmxnet3_write_cmd(sc, cmd);
2319 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2320 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2321 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2322 }
2323 
2324 static void
2325 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2326 {
2327 
2328 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2329 }
2330 
2331 static void
2332 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2333 {
2334 
2335 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2336 }
2337 
2338 static int
2339 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2340 {
2341 	/* Not using interrupts for TX */
2342 	return (0);
2343 }
2344 
2345 static int
2346 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2347 {
2348 	struct vmxnet3_softc *sc;
2349 
2350 	sc = iflib_get_softc(ctx);
2351 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2352 	return (0);
2353 }
2354 
2355 static void
2356 vmxnet3_link_intr_enable(if_ctx_t ctx)
2357 {
2358 	struct vmxnet3_softc *sc;
2359 
2360 	sc = iflib_get_softc(ctx);
2361 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2362 }
2363 
2364 static void
2365 vmxnet3_intr_enable_all(if_ctx_t ctx)
2366 {
2367 	struct vmxnet3_softc *sc;
2368 	if_softc_ctx_t scctx;
2369 	int i;
2370 
2371 	sc = iflib_get_softc(ctx);
2372 	scctx = sc->vmx_scctx;
2373 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2374 	for (i = 0; i < scctx->isc_vectors; i++)
2375 		vmxnet3_enable_intr(sc, i);
2376 }
2377 
2378 static void
2379 vmxnet3_intr_disable_all(if_ctx_t ctx)
2380 {
2381 	struct vmxnet3_softc *sc;
2382 	int i;
2383 
2384 	sc = iflib_get_softc(ctx);
2385 	/*
2386 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2387 	 * run, which is before the top level shared data area is
2388 	 * initialized and the device made aware of it.
2389 	 */
2390 	if (sc->vmx_ds != NULL)
2391 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2392 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2393 		vmxnet3_disable_intr(sc, i);
2394 }
2395 
2396 /*
2397  * Since this is a purely paravirtualized device, we do not have
2398  * to worry about DMA coherency. But at times, we must make sure
2399  * both the compiler and CPU do not reorder memory operations.
2400  */
2401 static inline void
2402 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2403 {
2404 
2405 	switch (type) {
2406 	case VMXNET3_BARRIER_RD:
2407 		rmb();
2408 		break;
2409 	case VMXNET3_BARRIER_WR:
2410 		wmb();
2411 		break;
2412 	case VMXNET3_BARRIER_RDWR:
2413 		mb();
2414 		break;
2415 	default:
2416 		panic("%s: bad barrier type %d", __func__, type);
2417 	}
2418 }
2419