xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision 2dd94b045e8c069c1a748d40d30d979e30e02fc9)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD$");
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 
50 #include <netinet/in_systm.h>
51 #include <netinet/in.h>
52 #include <netinet/ip.h>
53 #include <netinet/ip6.h>
54 #include <netinet6/ip6_var.h>
55 #include <netinet/udp.h>
56 #include <netinet/tcp.h>
57 
58 #include <machine/bus.h>
59 #include <machine/resource.h>
60 #include <sys/bus.h>
61 #include <sys/rman.h>
62 
63 #include <dev/pci/pcireg.h>
64 #include <dev/pci/pcivar.h>
65 
66 #include "ifdi_if.h"
67 
68 #include "if_vmxreg.h"
69 #include "if_vmxvar.h"
70 
71 #include "opt_inet.h"
72 #include "opt_inet6.h"
73 
74 
75 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
76 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
77 
78 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
79 {
80 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
81 	/* required last entry */
82 	PVID_END
83 };
84 
85 static void	*vmxnet3_register(device_t);
86 static int	vmxnet3_attach_pre(if_ctx_t);
87 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
88 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
89 static int	vmxnet3_attach_post(if_ctx_t);
90 static int	vmxnet3_detach(if_ctx_t);
91 static int	vmxnet3_shutdown(if_ctx_t);
92 static int	vmxnet3_suspend(if_ctx_t);
93 static int	vmxnet3_resume(if_ctx_t);
94 
95 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
96 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
97 static int	vmxnet3_check_version(struct vmxnet3_softc *);
98 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
99 
100 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
101 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
102 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
103 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
104 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void	vmxnet3_queues_free(if_ctx_t);
106 
107 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
108 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
109 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
110 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
111 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
112 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
113 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
114 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
115 static void	vmxnet3_free_data(struct vmxnet3_softc *);
116 
117 static void	vmxnet3_evintr(struct vmxnet3_softc *);
118 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
119 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
120 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
121 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
122 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
123 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
124 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
125 static int	vmxnet3_legacy_intr(void *);
126 static int	vmxnet3_rxq_intr(void *);
127 static int	vmxnet3_event_intr(void *);
128 
129 static void	vmxnet3_stop(if_ctx_t);
130 
131 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
132 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
133 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
134 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
135 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
136 static void	vmxnet3_init(if_ctx_t);
137 static void	vmxnet3_multi_set(if_ctx_t);
138 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
139 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
140 static int	vmxnet3_media_change(if_ctx_t);
141 static int	vmxnet3_promisc_set(if_ctx_t, int);
142 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
143 static void	vmxnet3_update_admin_status(if_ctx_t);
144 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
145 
146 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
147 		    uint16_t);
148 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
149 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
150 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
151 
152 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
153 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
154 static void	vmxnet3_link_status(struct vmxnet3_softc *);
155 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
156 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
157 
158 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
159 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
160 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
161 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
165 
166 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
167 		    uint32_t);
168 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
169 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
170 		    uint32_t);
171 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
172 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
173 
174 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
175 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
176 static void	vmxnet3_link_intr_enable(if_ctx_t);
177 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
178 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
179 static void	vmxnet3_intr_enable_all(if_ctx_t);
180 static void	vmxnet3_intr_disable_all(if_ctx_t);
181 
182 typedef enum {
183 	VMXNET3_BARRIER_RD,
184 	VMXNET3_BARRIER_WR,
185 	VMXNET3_BARRIER_RDWR,
186 } vmxnet3_barrier_t;
187 
188 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
189 
190 
191 static device_method_t vmxnet3_methods[] = {
192 	/* Device interface */
193 	DEVMETHOD(device_register, vmxnet3_register),
194 	DEVMETHOD(device_probe, iflib_device_probe),
195 	DEVMETHOD(device_attach, iflib_device_attach),
196 	DEVMETHOD(device_detach, iflib_device_detach),
197 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
198 	DEVMETHOD(device_suspend, iflib_device_suspend),
199 	DEVMETHOD(device_resume, iflib_device_resume),
200 	DEVMETHOD_END
201 };
202 
203 static driver_t vmxnet3_driver = {
204 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
205 };
206 
207 static devclass_t vmxnet3_devclass;
208 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
209 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
210 MODULE_VERSION(vmx, 2);
211 
212 MODULE_DEPEND(vmx, pci, 1, 1, 1);
213 MODULE_DEPEND(vmx, ether, 1, 1, 1);
214 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
215 
216 static device_method_t vmxnet3_iflib_methods[] = {
217 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
218 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
219 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
220 
221 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
222 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
223 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
224 
225 	DEVMETHOD(ifdi_init, vmxnet3_init),
226 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
227 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
228 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
229 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
230 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
231 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
232 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
233 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
234 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
235 
236 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
237 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
238 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
239 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
240 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
241 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
242 
243 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
244 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
245 
246 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
247 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
248 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
249 
250 	DEVMETHOD_END
251 };
252 
253 static driver_t vmxnet3_iflib_driver = {
254 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
255 };
256 
257 struct if_txrx vmxnet3_txrx = {
258 	.ift_txd_encap = vmxnet3_isc_txd_encap,
259 	.ift_txd_flush = vmxnet3_isc_txd_flush,
260 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
261 	.ift_rxd_available = vmxnet3_isc_rxd_available,
262 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
263 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
264 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
265 	.ift_legacy_intr = vmxnet3_legacy_intr
266 };
267 
268 static struct if_shared_ctx vmxnet3_sctx_init = {
269 	.isc_magic = IFLIB_MAGIC,
270 	.isc_q_align = 512,
271 
272 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
273 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
274 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
275 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
276 
277 	/*
278 	 * These values are used to configure the busdma tag used for
279 	 * receive descriptors.  Each receive descriptor only points to one
280 	 * buffer.
281 	 */
282 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
283 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
284 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
285 
286 	.isc_admin_intrcnt = 1,
287 	.isc_vendor_info = vmxnet3_vendor_info_array,
288 	.isc_driver_version = "2",
289 	.isc_driver = &vmxnet3_iflib_driver,
290 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
291 
292 	/*
293 	 * Number of receive queues per receive queue set, with associated
294 	 * descriptor settings for each.
295 	 */
296 	.isc_nrxqs = 3,
297 	.isc_nfl = 2, /* one free list for each receive command queue */
298 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
299 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
300 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
301 
302 	/*
303 	 * Number of transmit queues per transmit queue set, with associated
304 	 * descriptor settings for each.
305 	 */
306 	.isc_ntxqs = 2,
307 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
308 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
309 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
310 };
311 
312 static void *
313 vmxnet3_register(device_t dev)
314 {
315 	return (&vmxnet3_sctx_init);
316 }
317 
318 static int
319 vmxnet3_attach_pre(if_ctx_t ctx)
320 {
321 	device_t dev;
322 	if_softc_ctx_t scctx;
323 	struct vmxnet3_softc *sc;
324 	uint32_t intr_config;
325 	int error;
326 
327 	dev = iflib_get_dev(ctx);
328 	sc = iflib_get_softc(ctx);
329 	sc->vmx_dev = dev;
330 	sc->vmx_ctx = ctx;
331 	sc->vmx_sctx = iflib_get_sctx(ctx);
332 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
333 	sc->vmx_ifp = iflib_get_ifp(ctx);
334 	sc->vmx_media = iflib_get_media(ctx);
335 	scctx = sc->vmx_scctx;
336 
337 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
338 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
339 	/* isc_tx_tso_size_max doesn't include possible vlan header */
340 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
341 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
342 	scctx->isc_txrx = &vmxnet3_txrx;
343 
344 	/* If 0, the iflib tunable was not set, so set to the default */
345 	if (scctx->isc_nrxqsets == 0)
346 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
347 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
348 
349 	/* If 0, the iflib tunable was not set, so set to the default */
350 	if (scctx->isc_ntxqsets == 0)
351 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
352 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
353 
354 	/*
355 	 * Enforce that the transmit completion queue descriptor count is
356 	 * the same as the transmit command queue descriptor count.
357 	 */
358 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
359 	scctx->isc_txqsizes[0] =
360 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
361 	scctx->isc_txqsizes[1] =
362 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
363 
364 	/*
365 	 * Enforce that the receive completion queue descriptor count is the
366 	 * sum of the receive command queue descriptor counts, and that the
367 	 * second receive command queue descriptor count is the same as the
368 	 * first one.
369 	 */
370 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
371 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
372 	scctx->isc_rxqsizes[0] =
373 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
374 	scctx->isc_rxqsizes[1] =
375 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
376 	scctx->isc_rxqsizes[2] =
377 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
378 
379 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
380 
381 	/* Map PCI BARs */
382 	error = vmxnet3_alloc_resources(sc);
383 	if (error)
384 		goto fail;
385 
386 	/* Check device versions */
387 	error = vmxnet3_check_version(sc);
388 	if (error)
389 		goto fail;
390 
391 	/*
392 	 * The interrupt mode can be set in the hypervisor configuration via
393 	 * the parameter ethernet<N>.intrMode.
394 	 */
395 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
396 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
397 
398 	/*
399 	 * Configure the softc context to attempt to configure the interrupt
400 	 * mode now indicated by intr_config.  iflib will follow the usual
401 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
402 	 * starting mode.
403 	 */
404 	switch (intr_config & 0x03) {
405 	case VMXNET3_IT_AUTO:
406 	case VMXNET3_IT_MSIX:
407 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
408 		break;
409 	case VMXNET3_IT_MSI:
410 		scctx->isc_msix_bar = -1;
411 		scctx->isc_disable_msix = 1;
412 		break;
413 	case VMXNET3_IT_LEGACY:
414 		scctx->isc_msix_bar = 0;
415 		break;
416 	}
417 
418 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
419 	scctx->isc_capabilities = scctx->isc_capenable =
420 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
421 	    IFCAP_TSO4 | IFCAP_TSO6 |
422 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
423 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
424 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
425 	    IFCAP_JUMBO_MTU;
426 
427 	/* These capabilities are not enabled by default. */
428 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
429 
430 	vmxnet3_get_lladdr(sc);
431 	iflib_set_mac(ctx, sc->vmx_lladdr);
432 
433 	return (0);
434 fail:
435 	/*
436 	 * We must completely clean up anything allocated above as iflib
437 	 * will not invoke any other driver entry points as a result of this
438 	 * failure.
439 	 */
440 	vmxnet3_free_resources(sc);
441 
442 	return (error);
443 }
444 
445 static int
446 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
447 {
448 	struct vmxnet3_softc *sc;
449 	if_softc_ctx_t scctx;
450 	struct vmxnet3_rxqueue *rxq;
451 	int error;
452 	int i;
453 	char irq_name[16];
454 
455 	sc = iflib_get_softc(ctx);
456 	scctx = sc->vmx_scctx;
457 
458 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
459 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
460 
461 		rxq = &sc->vmx_rxq[i];
462 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
463 		    IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
464 		if (error) {
465 			device_printf(iflib_get_dev(ctx),
466 			    "Failed to register rxq %d interrupt handler\n", i);
467 			return (error);
468 		}
469 	}
470 
471 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
472 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
473 
474 		/*
475 		 * Don't provide the corresponding rxq irq for reference -
476 		 * we want the transmit task to be attached to a task queue
477 		 * that is different from the one used by the corresponding
478 		 * rxq irq.  That is because the TX doorbell writes are very
479 		 * expensive as virtualized MMIO operations, so we want to
480 		 * be able to defer them to another core when possible so
481 		 * that they don't steal receive processing cycles during
482 		 * stack turnarounds like TCP ACK generation.  The other
483 		 * piece to this approach is enabling the iflib abdicate
484 		 * option (currently via an interface-specific
485 		 * tunable/sysctl).
486 		 */
487 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
488 		    irq_name);
489 	}
490 
491 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
492 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
493 	    "event");
494 	if (error) {
495 		device_printf(iflib_get_dev(ctx),
496 		    "Failed to register event interrupt handler\n");
497 		return (error);
498 	}
499 
500 	return (0);
501 }
502 
503 static void
504 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
505 {
506 	if_softc_ctx_t scctx;
507 	struct vmxnet3_rxqueue *rxq;
508 	int i;
509 
510 	scctx = sc->vmx_scctx;
511 
512 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
513 		rxq = &sc->vmx_rxq[i];
514 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
515 	}
516 
517 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
518 }
519 
520 static int
521 vmxnet3_attach_post(if_ctx_t ctx)
522 {
523 	device_t dev;
524 	if_softc_ctx_t scctx;
525 	struct vmxnet3_softc *sc;
526 	int error;
527 
528 	dev = iflib_get_dev(ctx);
529 	scctx = iflib_get_softc_ctx(ctx);
530 	sc = iflib_get_softc(ctx);
531 
532 	if (scctx->isc_nrxqsets > 1)
533 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
534 
535 	error = vmxnet3_alloc_data(sc);
536 	if (error)
537 		goto fail;
538 
539 	vmxnet3_set_interrupt_idx(sc);
540 	vmxnet3_setup_sysctl(sc);
541 
542 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
543 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
544 
545 fail:
546 	return (error);
547 }
548 
549 static int
550 vmxnet3_detach(if_ctx_t ctx)
551 {
552 	struct vmxnet3_softc *sc;
553 
554 	sc = iflib_get_softc(ctx);
555 
556 	vmxnet3_free_irqs(sc);
557 	vmxnet3_free_data(sc);
558 	vmxnet3_free_resources(sc);
559 
560 	return (0);
561 }
562 
563 static int
564 vmxnet3_shutdown(if_ctx_t ctx)
565 {
566 
567 	return (0);
568 }
569 
570 static int
571 vmxnet3_suspend(if_ctx_t ctx)
572 {
573 
574 	return (0);
575 }
576 
577 static int
578 vmxnet3_resume(if_ctx_t ctx)
579 {
580 
581 	return (0);
582 }
583 
584 static int
585 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
586 {
587 	device_t dev;
588 	int rid;
589 
590 	dev = sc->vmx_dev;
591 
592 	rid = PCIR_BAR(0);
593 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
594 	    RF_ACTIVE);
595 	if (sc->vmx_res0 == NULL) {
596 		device_printf(dev,
597 		    "could not map BAR0 memory\n");
598 		return (ENXIO);
599 	}
600 
601 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
602 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
603 
604 	rid = PCIR_BAR(1);
605 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
606 	    RF_ACTIVE);
607 	if (sc->vmx_res1 == NULL) {
608 		device_printf(dev,
609 		    "could not map BAR1 memory\n");
610 		return (ENXIO);
611 	}
612 
613 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
614 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
615 
616 	return (0);
617 }
618 
619 static void
620 vmxnet3_free_resources(struct vmxnet3_softc *sc)
621 {
622 	device_t dev;
623 
624 	dev = sc->vmx_dev;
625 
626 	if (sc->vmx_res0 != NULL) {
627 		bus_release_resource(dev, SYS_RES_MEMORY,
628 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
629 		sc->vmx_res0 = NULL;
630 	}
631 
632 	if (sc->vmx_res1 != NULL) {
633 		bus_release_resource(dev, SYS_RES_MEMORY,
634 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
635 		sc->vmx_res1 = NULL;
636 	}
637 }
638 
639 static int
640 vmxnet3_check_version(struct vmxnet3_softc *sc)
641 {
642 	device_t dev;
643 	uint32_t version;
644 
645 	dev = sc->vmx_dev;
646 
647 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
648 	if ((version & 0x01) == 0) {
649 		device_printf(dev, "unsupported hardware version %#x\n",
650 		    version);
651 		return (ENOTSUP);
652 	}
653 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
654 
655 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
656 	if ((version & 0x01) == 0) {
657 		device_printf(dev, "unsupported UPT version %#x\n", version);
658 		return (ENOTSUP);
659 	}
660 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
661 
662 	return (0);
663 }
664 
665 static void
666 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
667 {
668 	if_softc_ctx_t scctx;
669 	struct vmxnet3_txqueue *txq;
670 	struct vmxnet3_txq_shared *txs;
671 	struct vmxnet3_rxqueue *rxq;
672 	struct vmxnet3_rxq_shared *rxs;
673 	int intr_idx;
674 	int i;
675 
676 	scctx = sc->vmx_scctx;
677 
678 	/*
679 	 * There is always one interrupt per receive queue, assigned
680 	 * starting with the first interrupt.  When there is only one
681 	 * interrupt available, the event interrupt shares the receive queue
682 	 * interrupt, otherwise it uses the interrupt following the last
683 	 * receive queue interrupt.  Transmit queues are not assigned
684 	 * interrupts, so they are given indexes beyond the indexes that
685 	 * correspond to the real interrupts.
686 	 */
687 
688 	/* The event interrupt is always the last vector. */
689 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
690 
691 	intr_idx = 0;
692 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
693 		rxq = &sc->vmx_rxq[i];
694 		rxs = rxq->vxrxq_rs;
695 		rxq->vxrxq_intr_idx = intr_idx;
696 		rxs->intr_idx = rxq->vxrxq_intr_idx;
697 	}
698 
699 	/*
700 	 * Assign the tx queues interrupt indexes above what we are actually
701 	 * using.  These interrupts will never be enabled.
702 	 */
703 	intr_idx = scctx->isc_vectors;
704 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
705 		txq = &sc->vmx_txq[i];
706 		txs = txq->vxtxq_ts;
707 		txq->vxtxq_intr_idx = intr_idx;
708 		txs->intr_idx = txq->vxtxq_intr_idx;
709 	}
710 }
711 
712 static int
713 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
714 {
715 	if_softc_ctx_t scctx;
716 	int size;
717 	int error;
718 
719 	scctx = sc->vmx_scctx;
720 
721 	/*
722 	 * The txq and rxq shared data areas must be allocated contiguously
723 	 * as vmxnet3_driver_shared contains only a single address member
724 	 * for the shared queue data area.
725 	 */
726 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
727 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
728 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
729 	if (error) {
730 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
731 		return (error);
732 	}
733 
734 	return (0);
735 }
736 
737 static void
738 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
739 {
740 	struct vmxnet3_txqueue *txq;
741 	struct vmxnet3_comp_ring *txc;
742 	struct vmxnet3_txring *txr;
743 	if_softc_ctx_t scctx;
744 
745 	txq = &sc->vmx_txq[q];
746 	txc = &txq->vxtxq_comp_ring;
747 	txr = &txq->vxtxq_cmd_ring;
748 	scctx = sc->vmx_scctx;
749 
750 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
751 	    device_get_nameunit(sc->vmx_dev), q);
752 
753 	txq->vxtxq_sc = sc;
754 	txq->vxtxq_id = q;
755 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
756 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
757 }
758 
759 static int
760 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
761     int ntxqs, int ntxqsets)
762 {
763 	struct vmxnet3_softc *sc;
764 	int q;
765 	int error;
766 	caddr_t kva;
767 
768 	sc = iflib_get_softc(ctx);
769 
770 	/* Allocate the array of transmit queues */
771 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
772 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
773 	if (sc->vmx_txq == NULL)
774 		return (ENOMEM);
775 
776 	/* Initialize driver state for each transmit queue */
777 	for (q = 0; q < ntxqsets; q++)
778 		vmxnet3_init_txq(sc, q);
779 
780 	/*
781 	 * Allocate queue state that is shared with the device.  This check
782 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
783 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
784 	 * order iflib invokes those routines in.
785 	 */
786 	if (sc->vmx_qs_dma.idi_size == 0) {
787 		error = vmxnet3_queues_shared_alloc(sc);
788 		if (error)
789 			return (error);
790 	}
791 
792 	kva = sc->vmx_qs_dma.idi_vaddr;
793 	for (q = 0; q < ntxqsets; q++) {
794 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
795 		kva += sizeof(struct vmxnet3_txq_shared);
796 	}
797 
798 	/* Record descriptor ring vaddrs and paddrs */
799 	for (q = 0; q < ntxqsets; q++) {
800 		struct vmxnet3_txqueue *txq;
801 		struct vmxnet3_txring *txr;
802 		struct vmxnet3_comp_ring *txc;
803 
804 		txq = &sc->vmx_txq[q];
805 		txc = &txq->vxtxq_comp_ring;
806 		txr = &txq->vxtxq_cmd_ring;
807 
808 		/* Completion ring */
809 		txc->vxcr_u.txcd =
810 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
811 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
812 
813 		/* Command ring */
814 		txr->vxtxr_txd =
815 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
816 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
817 	}
818 
819 	return (0);
820 }
821 
822 static void
823 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
824 {
825 	struct vmxnet3_rxqueue *rxq;
826 	struct vmxnet3_comp_ring *rxc;
827 	struct vmxnet3_rxring *rxr;
828 	if_softc_ctx_t scctx;
829 	int i;
830 
831 	rxq = &sc->vmx_rxq[q];
832 	rxc = &rxq->vxrxq_comp_ring;
833 	scctx = sc->vmx_scctx;
834 
835 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
836 	    device_get_nameunit(sc->vmx_dev), q);
837 
838 	rxq->vxrxq_sc = sc;
839 	rxq->vxrxq_id = q;
840 
841 	/*
842 	 * First rxq is the completion queue, so there are nrxqs - 1 command
843 	 * rings starting at iflib queue id 1.
844 	 */
845 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
846 	for (i = 0; i < nrxqs - 1; i++) {
847 		rxr = &rxq->vxrxq_cmd_ring[i];
848 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
849 	}
850 }
851 
852 static int
853 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
854     int nrxqs, int nrxqsets)
855 {
856 	struct vmxnet3_softc *sc;
857 	if_softc_ctx_t scctx;
858 	int q;
859 	int i;
860 	int error;
861 	caddr_t kva;
862 
863 	sc = iflib_get_softc(ctx);
864 	scctx = sc->vmx_scctx;
865 
866 	/* Allocate the array of receive queues */
867 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
868 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
869 	if (sc->vmx_rxq == NULL)
870 		return (ENOMEM);
871 
872 	/* Initialize driver state for each receive queue */
873 	for (q = 0; q < nrxqsets; q++)
874 		vmxnet3_init_rxq(sc, q, nrxqs);
875 
876 	/*
877 	 * Allocate queue state that is shared with the device.  This check
878 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
879 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
880 	 * order iflib invokes those routines in.
881 	 */
882 	if (sc->vmx_qs_dma.idi_size == 0) {
883 		error = vmxnet3_queues_shared_alloc(sc);
884 		if (error)
885 			return (error);
886 	}
887 
888 	kva = sc->vmx_qs_dma.idi_vaddr +
889 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
890 	for (q = 0; q < nrxqsets; q++) {
891 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
892 		kva += sizeof(struct vmxnet3_rxq_shared);
893 	}
894 
895 	/* Record descriptor ring vaddrs and paddrs */
896 	for (q = 0; q < nrxqsets; q++) {
897 		struct vmxnet3_rxqueue *rxq;
898 		struct vmxnet3_rxring *rxr;
899 		struct vmxnet3_comp_ring *rxc;
900 
901 		rxq = &sc->vmx_rxq[q];
902 		rxc = &rxq->vxrxq_comp_ring;
903 
904 		/* Completion ring */
905 		rxc->vxcr_u.rxcd =
906 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
907 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
908 
909 		/* Command ring(s) */
910 		for (i = 0; i < nrxqs - 1; i++) {
911 			rxr = &rxq->vxrxq_cmd_ring[i];
912 
913 			rxr->vxrxr_rxd =
914 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
915 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
916 		}
917 	}
918 
919 	return (0);
920 }
921 
922 static void
923 vmxnet3_queues_free(if_ctx_t ctx)
924 {
925 	struct vmxnet3_softc *sc;
926 
927 	sc = iflib_get_softc(ctx);
928 
929 	/* Free queue state area that is shared with the device */
930 	if (sc->vmx_qs_dma.idi_size != 0) {
931 		iflib_dma_free(&sc->vmx_qs_dma);
932 		sc->vmx_qs_dma.idi_size = 0;
933 	}
934 
935 	/* Free array of receive queues */
936 	if (sc->vmx_rxq != NULL) {
937 		free(sc->vmx_rxq, M_DEVBUF);
938 		sc->vmx_rxq = NULL;
939 	}
940 
941 	/* Free array of transmit queues */
942 	if (sc->vmx_txq != NULL) {
943 		free(sc->vmx_txq, M_DEVBUF);
944 		sc->vmx_txq = NULL;
945 	}
946 }
947 
948 static int
949 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
950 {
951 	device_t dev;
952 	size_t size;
953 	int error;
954 
955 	dev = sc->vmx_dev;
956 
957 	/* Top level state structure shared with the device */
958 	size = sizeof(struct vmxnet3_driver_shared);
959 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
960 	if (error) {
961 		device_printf(dev, "cannot alloc shared memory\n");
962 		return (error);
963 	}
964 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
965 
966 	/* RSS table state shared with the device */
967 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
968 		size = sizeof(struct vmxnet3_rss_shared);
969 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
970 		    &sc->vmx_rss_dma, 0);
971 		if (error) {
972 			device_printf(dev, "cannot alloc rss shared memory\n");
973 			return (error);
974 		}
975 		sc->vmx_rss =
976 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
977 	}
978 
979 	return (0);
980 }
981 
982 static void
983 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
984 {
985 
986 	/* Free RSS table state shared with the device */
987 	if (sc->vmx_rss != NULL) {
988 		iflib_dma_free(&sc->vmx_rss_dma);
989 		sc->vmx_rss = NULL;
990 	}
991 
992 	/* Free top level state structure shared with the device */
993 	if (sc->vmx_ds != NULL) {
994 		iflib_dma_free(&sc->vmx_ds_dma);
995 		sc->vmx_ds = NULL;
996 	}
997 }
998 
999 static int
1000 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1001 {
1002 	int error;
1003 
1004 	/* Multicast table state shared with the device */
1005 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1006 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1007 	if (error)
1008 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1009 	else
1010 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1011 
1012 	return (error);
1013 }
1014 
1015 static void
1016 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1017 {
1018 
1019 	/* Free multicast table state shared with the device */
1020 	if (sc->vmx_mcast != NULL) {
1021 		iflib_dma_free(&sc->vmx_mcast_dma);
1022 		sc->vmx_mcast = NULL;
1023 	}
1024 }
1025 
1026 static void
1027 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1028 {
1029 	struct vmxnet3_driver_shared *ds;
1030 	if_shared_ctx_t sctx;
1031 	if_softc_ctx_t scctx;
1032 	struct vmxnet3_txqueue *txq;
1033 	struct vmxnet3_txq_shared *txs;
1034 	struct vmxnet3_rxqueue *rxq;
1035 	struct vmxnet3_rxq_shared *rxs;
1036 	int i;
1037 
1038 	ds = sc->vmx_ds;
1039 	sctx = sc->vmx_sctx;
1040 	scctx = sc->vmx_scctx;
1041 
1042 	/*
1043 	 * Initialize fields of the shared data that remains the same across
1044 	 * reinits. Note the shared data is zero'd when allocated.
1045 	 */
1046 
1047 	ds->magic = VMXNET3_REV1_MAGIC;
1048 
1049 	/* DriverInfo */
1050 	ds->version = VMXNET3_DRIVER_VERSION;
1051 	ds->guest = VMXNET3_GOS_FREEBSD |
1052 #ifdef __LP64__
1053 	    VMXNET3_GOS_64BIT;
1054 #else
1055 	    VMXNET3_GOS_32BIT;
1056 #endif
1057 	ds->vmxnet3_revision = 1;
1058 	ds->upt_version = 1;
1059 
1060 	/* Misc. conf */
1061 	ds->driver_data = vtophys(sc);
1062 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1063 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1064 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1065 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1066 
1067 	/* RSS conf */
1068 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1069 		ds->rss.version = 1;
1070 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1071 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1072 	}
1073 
1074 	/* Interrupt control. */
1075 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1076 	/*
1077 	 * Total number of interrupt indexes we are using in the shared
1078 	 * config data, even though we don't actually allocate interrupt
1079 	 * resources for the tx queues.  Some versions of the device will
1080 	 * fail to initialize successfully if interrupt indexes are used in
1081 	 * the shared config that exceed the number of interrupts configured
1082 	 * here.
1083 	 */
1084 	ds->nintr = (scctx->isc_vectors == 1) ?
1085 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1086 	ds->evintr = sc->vmx_event_intr_idx;
1087 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1088 
1089 	for (i = 0; i < ds->nintr; i++)
1090 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1091 
1092 	/* Receive filter. */
1093 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1094 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1095 
1096 	/* Tx queues */
1097 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1098 		txq = &sc->vmx_txq[i];
1099 		txs = txq->vxtxq_ts;
1100 
1101 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1102 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1103 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1104 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1105 		txs->driver_data = vtophys(txq);
1106 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1107 	}
1108 
1109 	/* Rx queues */
1110 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1111 		rxq = &sc->vmx_rxq[i];
1112 		rxs = rxq->vxrxq_rs;
1113 
1114 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1115 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1116 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1117 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1118 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1119 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1120 		rxs->driver_data = vtophys(rxq);
1121 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1122 	}
1123 }
1124 
1125 static void
1126 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1127 {
1128 	/*
1129 	 * Use the same key as the Linux driver until FreeBSD can do
1130 	 * RSS (presumably Toeplitz) in software.
1131 	 */
1132 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1133 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1134 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1135 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1136 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1137 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1138 	};
1139 
1140 	struct vmxnet3_driver_shared *ds;
1141 	if_softc_ctx_t scctx;
1142 	struct vmxnet3_rss_shared *rss;
1143 	int i;
1144 
1145 	ds = sc->vmx_ds;
1146 	scctx = sc->vmx_scctx;
1147 	rss = sc->vmx_rss;
1148 
1149 	rss->hash_type =
1150 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1151 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1152 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1153 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1154 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1155 	memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1156 
1157 	for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1158 		rss->ind_table[i] = i % scctx->isc_nrxqsets;
1159 }
1160 
1161 static void
1162 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1163 {
1164 	struct ifnet *ifp;
1165 	struct vmxnet3_driver_shared *ds;
1166 	if_softc_ctx_t scctx;
1167 
1168 	ifp = sc->vmx_ifp;
1169 	ds = sc->vmx_ds;
1170 	scctx = sc->vmx_scctx;
1171 
1172 	ds->mtu = ifp->if_mtu;
1173 	ds->ntxqueue = scctx->isc_ntxqsets;
1174 	ds->nrxqueue = scctx->isc_nrxqsets;
1175 
1176 	ds->upt_features = 0;
1177 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1178 		ds->upt_features |= UPT1_F_CSUM;
1179 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1180 		ds->upt_features |= UPT1_F_VLAN;
1181 	if (ifp->if_capenable & IFCAP_LRO)
1182 		ds->upt_features |= UPT1_F_LRO;
1183 
1184 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1185 		ds->upt_features |= UPT1_F_RSS;
1186 		vmxnet3_reinit_rss_shared_data(sc);
1187 	}
1188 
1189 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1190 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1191 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1192 }
1193 
1194 static int
1195 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1196 {
1197 	int error;
1198 
1199 	error = vmxnet3_alloc_shared_data(sc);
1200 	if (error)
1201 		return (error);
1202 
1203 	error = vmxnet3_alloc_mcast_table(sc);
1204 	if (error)
1205 		return (error);
1206 
1207 	vmxnet3_init_shared_data(sc);
1208 
1209 	return (0);
1210 }
1211 
1212 static void
1213 vmxnet3_free_data(struct vmxnet3_softc *sc)
1214 {
1215 
1216 	vmxnet3_free_mcast_table(sc);
1217 	vmxnet3_free_shared_data(sc);
1218 }
1219 
1220 static void
1221 vmxnet3_evintr(struct vmxnet3_softc *sc)
1222 {
1223 	device_t dev;
1224 	struct vmxnet3_txq_shared *ts;
1225 	struct vmxnet3_rxq_shared *rs;
1226 	uint32_t event;
1227 
1228 	dev = sc->vmx_dev;
1229 
1230 	/* Clear events. */
1231 	event = sc->vmx_ds->event;
1232 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1233 
1234 	if (event & VMXNET3_EVENT_LINK)
1235 		vmxnet3_link_status(sc);
1236 
1237 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1238 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1239 		ts = sc->vmx_txq[0].vxtxq_ts;
1240 		if (ts->stopped != 0)
1241 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1242 		rs = sc->vmx_rxq[0].vxrxq_rs;
1243 		if (rs->stopped != 0)
1244 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1245 
1246 		/* XXX - rely on liflib watchdog to reset us? */
1247 		device_printf(dev, "Rx/Tx queue error event ... "
1248 		    "waiting for iflib watchdog reset\n");
1249 	}
1250 
1251 	if (event & VMXNET3_EVENT_DIC)
1252 		device_printf(dev, "device implementation change event\n");
1253 	if (event & VMXNET3_EVENT_DEBUG)
1254 		device_printf(dev, "debug event\n");
1255 }
1256 
1257 static int
1258 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1259 {
1260 	struct vmxnet3_softc *sc;
1261 	struct vmxnet3_txqueue *txq;
1262 	struct vmxnet3_txring *txr;
1263 	struct vmxnet3_txdesc *txd, *sop;
1264 	bus_dma_segment_t *segs;
1265 	int nsegs;
1266 	int pidx;
1267 	int hdrlen;
1268 	int i;
1269 	int gen;
1270 
1271 	sc = vsc;
1272 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1273 	txr = &txq->vxtxq_cmd_ring;
1274 	segs = pi->ipi_segs;
1275 	nsegs = pi->ipi_nsegs;
1276 	pidx = pi->ipi_pidx;
1277 
1278 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1279 	    ("%s: packet with too many segments %d", __func__, nsegs));
1280 
1281 	sop = &txr->vxtxr_txd[pidx];
1282 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1283 
1284 	for (i = 0; i < nsegs; i++) {
1285 		txd = &txr->vxtxr_txd[pidx];
1286 
1287 		txd->addr = segs[i].ds_addr;
1288 		txd->len = segs[i].ds_len;
1289 		txd->gen = gen;
1290 		txd->dtype = 0;
1291 		txd->offload_mode = VMXNET3_OM_NONE;
1292 		txd->offload_pos = 0;
1293 		txd->hlen = 0;
1294 		txd->eop = 0;
1295 		txd->compreq = 0;
1296 		txd->vtag_mode = 0;
1297 		txd->vtag = 0;
1298 
1299 		if (++pidx == txr->vxtxr_ndesc) {
1300 			pidx = 0;
1301 			txr->vxtxr_gen ^= 1;
1302 		}
1303 		gen = txr->vxtxr_gen;
1304 	}
1305 	txd->eop = 1;
1306 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1307 	pi->ipi_new_pidx = pidx;
1308 
1309 	/*
1310 	 * VLAN
1311 	 */
1312 	if (pi->ipi_mflags & M_VLANTAG) {
1313 		sop->vtag_mode = 1;
1314 		sop->vtag = pi->ipi_vtag;
1315 	}
1316 
1317 	/*
1318 	 * TSO and checksum offloads
1319 	 */
1320 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1321 	if (pi->ipi_csum_flags & CSUM_TSO) {
1322 		sop->offload_mode = VMXNET3_OM_TSO;
1323 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1324 		sop->offload_pos = pi->ipi_tso_segsz;
1325 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1326 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1327 		sop->offload_mode = VMXNET3_OM_CSUM;
1328 		sop->hlen = hdrlen;
1329 		sop->offload_pos = hdrlen +
1330 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1331 			offsetof(struct tcphdr, th_sum) :
1332 			offsetof(struct udphdr, uh_sum));
1333 	}
1334 
1335 	/* Finally, change the ownership. */
1336 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1337 	sop->gen ^= 1;
1338 
1339 	return (0);
1340 }
1341 
1342 static void
1343 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1344 {
1345 	struct vmxnet3_softc *sc;
1346 	struct vmxnet3_txqueue *txq;
1347 
1348 	sc = vsc;
1349 	txq = &sc->vmx_txq[txqid];
1350 
1351 	/*
1352 	 * pidx is what we last set ipi_new_pidx to in
1353 	 * vmxnet3_isc_txd_encap()
1354 	 */
1355 
1356 	/*
1357 	 * Avoid expensive register updates if the flush request is
1358 	 * redundant.
1359 	 */
1360 	if (txq->vxtxq_last_flush == pidx)
1361 		return;
1362 	txq->vxtxq_last_flush = pidx;
1363 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1364 }
1365 
1366 static int
1367 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1368 {
1369 	struct vmxnet3_softc *sc;
1370 	struct vmxnet3_txqueue *txq;
1371 	struct vmxnet3_comp_ring *txc;
1372 	struct vmxnet3_txcompdesc *txcd;
1373 	struct vmxnet3_txring *txr;
1374 	int processed;
1375 
1376 	sc = vsc;
1377 	txq = &sc->vmx_txq[txqid];
1378 	txc = &txq->vxtxq_comp_ring;
1379 	txr = &txq->vxtxq_cmd_ring;
1380 
1381 	/*
1382 	 * If clear is true, we need to report the number of TX command ring
1383 	 * descriptors that have been processed by the device.  If clear is
1384 	 * false, we just need to report whether or not at least one TX
1385 	 * command ring descriptor has been processed by the device.
1386 	 */
1387 	processed = 0;
1388 	for (;;) {
1389 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1390 		if (txcd->gen != txc->vxcr_gen)
1391 			break;
1392 		else if (!clear)
1393 			return (1);
1394 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1395 
1396 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1397 			txc->vxcr_next = 0;
1398 			txc->vxcr_gen ^= 1;
1399 		}
1400 
1401 		if (txcd->eop_idx < txr->vxtxr_next)
1402 			processed += txr->vxtxr_ndesc -
1403 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1404 		else
1405 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1406 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1407 	}
1408 
1409 	return (processed);
1410 }
1411 
1412 static int
1413 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1414 {
1415 	struct vmxnet3_softc *sc;
1416 	struct vmxnet3_rxqueue *rxq;
1417 	struct vmxnet3_comp_ring *rxc;
1418 	struct vmxnet3_rxcompdesc *rxcd;
1419 	int avail;
1420 	int completed_gen;
1421 #ifdef INVARIANTS
1422 	int expect_sop = 1;
1423 #endif
1424 	sc = vsc;
1425 	rxq = &sc->vmx_rxq[rxqid];
1426 	rxc = &rxq->vxrxq_comp_ring;
1427 
1428 	avail = 0;
1429 	completed_gen = rxc->vxcr_gen;
1430 	for (;;) {
1431 		rxcd = &rxc->vxcr_u.rxcd[idx];
1432 		if (rxcd->gen != completed_gen)
1433 			break;
1434 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1435 
1436 #ifdef INVARIANTS
1437 		if (expect_sop)
1438 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1439 		else
1440 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1441 		expect_sop = rxcd->eop;
1442 #endif
1443 		if (rxcd->eop && (rxcd->len != 0))
1444 			avail++;
1445 		if (avail > budget)
1446 			break;
1447 		if (++idx == rxc->vxcr_ndesc) {
1448 			idx = 0;
1449 			completed_gen ^= 1;
1450 		}
1451 	}
1452 
1453 	return (avail);
1454 }
1455 
1456 static int
1457 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1458 {
1459 	struct vmxnet3_softc *sc;
1460 	if_softc_ctx_t scctx;
1461 	struct vmxnet3_rxqueue *rxq;
1462 	struct vmxnet3_comp_ring *rxc;
1463 	struct vmxnet3_rxcompdesc *rxcd;
1464 	struct vmxnet3_rxring *rxr;
1465 	struct vmxnet3_rxdesc *rxd;
1466 	if_rxd_frag_t frag;
1467 	int cqidx;
1468 	uint16_t total_len;
1469 	uint8_t nfrags;
1470 	uint8_t flid;
1471 
1472 	sc = vsc;
1473 	scctx = sc->vmx_scctx;
1474 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1475 	rxc = &rxq->vxrxq_comp_ring;
1476 
1477 	/*
1478 	 * Get a single packet starting at the given index in the completion
1479 	 * queue.  That we have been called indicates that
1480 	 * vmxnet3_isc_rxd_available() has already verified that either
1481 	 * there is a complete packet available starting at the given index,
1482 	 * or there are one or more zero length packets starting at the
1483 	 * given index followed by a complete packet, so no verification of
1484 	 * ownership of the descriptors (and no associated read barrier) is
1485 	 * required here.
1486 	 */
1487 	cqidx = ri->iri_cidx;
1488 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1489 	while (rxcd->len == 0) {
1490 		KASSERT(rxcd->sop && rxcd->eop,
1491 		    ("%s: zero-length packet without both sop and eop set",
1492 			__func__));
1493 		if (++cqidx == rxc->vxcr_ndesc) {
1494 			cqidx = 0;
1495 			rxc->vxcr_gen ^= 1;
1496 		}
1497 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1498 	}
1499 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1500 
1501 	/*
1502 	 * RSS and flow ID
1503 	 */
1504 	ri->iri_flowid = rxcd->rss_hash;
1505 	switch (rxcd->rss_type) {
1506 	case VMXNET3_RCD_RSS_TYPE_NONE:
1507 		ri->iri_flowid = ri->iri_qsidx;
1508 		ri->iri_rsstype = M_HASHTYPE_NONE;
1509 		break;
1510 	case VMXNET3_RCD_RSS_TYPE_IPV4:
1511 		ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1512 		break;
1513 	case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1514 		ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1515 		break;
1516 	case VMXNET3_RCD_RSS_TYPE_IPV6:
1517 		ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1518 		break;
1519 	case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1520 		ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1521 		break;
1522 	default:
1523 		ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1524 		break;
1525 	}
1526 
1527 	/* VLAN */
1528 	if (rxcd->vlan) {
1529 		ri->iri_flags |= M_VLANTAG;
1530 		ri->iri_vtag = rxcd->vtag;
1531 	}
1532 
1533 	/* Checksum offload */
1534 	if (!rxcd->no_csum) {
1535 		uint32_t csum_flags = 0;
1536 
1537 		if (rxcd->ipv4) {
1538 			csum_flags |= CSUM_IP_CHECKED;
1539 			if (rxcd->ipcsum_ok)
1540 				csum_flags |= CSUM_IP_VALID;
1541 		}
1542 		if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1543 			csum_flags |= CSUM_L4_CALC;
1544 			if (rxcd->csum_ok) {
1545 				csum_flags |= CSUM_L4_VALID;
1546 				ri->iri_csum_data = 0xffff;
1547 			}
1548 		}
1549 		ri->iri_csum_flags = csum_flags;
1550 	}
1551 
1552 	/*
1553 	 * The queue numbering scheme used for rxcd->qid is as follows:
1554 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1555 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1556 	 *
1557 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1558 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1559 	 * indicates command ring (and flid) 1.
1560 	 */
1561 	nfrags = 0;
1562 	total_len = 0;
1563 	do {
1564 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1565 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1566 		    ("%s: generation mismatch", __func__));
1567 		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1568 		rxr = &rxq->vxrxq_cmd_ring[flid];
1569 		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1570 
1571 		frag = &ri->iri_frags[nfrags];
1572 		frag->irf_flid = flid;
1573 		frag->irf_idx = rxcd->rxd_idx;
1574 		frag->irf_len = rxcd->len;
1575 		total_len += rxcd->len;
1576 		nfrags++;
1577 		if (++cqidx == rxc->vxcr_ndesc) {
1578 			cqidx = 0;
1579 			rxc->vxcr_gen ^= 1;
1580 		}
1581 	} while (!rxcd->eop);
1582 
1583 	ri->iri_cidx = cqidx;
1584 	ri->iri_nfrags = nfrags;
1585 	ri->iri_len = total_len;
1586 
1587 	return (0);
1588 }
1589 
1590 static void
1591 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1592 {
1593 	struct vmxnet3_softc *sc;
1594 	struct vmxnet3_rxqueue *rxq;
1595 	struct vmxnet3_rxring *rxr;
1596 	struct vmxnet3_rxdesc *rxd;
1597 	uint64_t *paddrs;
1598 	int count;
1599 	int len;
1600 	int pidx;
1601 	int i;
1602 	uint8_t flid;
1603 	uint8_t btype;
1604 
1605 	count = iru->iru_count;
1606 	len = iru->iru_buf_size;
1607 	pidx = iru->iru_pidx;
1608 	flid = iru->iru_flidx;
1609 	paddrs = iru->iru_paddrs;
1610 
1611 	sc = vsc;
1612 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1613 	rxr = &rxq->vxrxq_cmd_ring[flid];
1614 	rxd = rxr->vxrxr_rxd;
1615 
1616 	/*
1617 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1618 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1619 	 */
1620 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1621 	for (i = 0; i < count; i++) {
1622 		rxd[pidx].addr = paddrs[i];
1623 		rxd[pidx].len = len;
1624 		rxd[pidx].btype = btype;
1625 		rxd[pidx].gen = rxr->vxrxr_gen;
1626 
1627 		if (++pidx == rxr->vxrxr_ndesc) {
1628 			pidx = 0;
1629 			rxr->vxrxr_gen ^= 1;
1630 		}
1631 	}
1632 }
1633 
1634 static void
1635 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1636 {
1637 	struct vmxnet3_softc *sc;
1638 	struct vmxnet3_rxqueue *rxq;
1639 	struct vmxnet3_rxring *rxr;
1640 	bus_size_t r;
1641 
1642 	sc = vsc;
1643 	rxq = &sc->vmx_rxq[rxqid];
1644 	rxr = &rxq->vxrxq_cmd_ring[flid];
1645 
1646 	if (flid == 0)
1647 		r = VMXNET3_BAR0_RXH1(rxqid);
1648 	else
1649 		r = VMXNET3_BAR0_RXH2(rxqid);
1650 
1651 	/*
1652 	 * pidx is the index of the last descriptor with a buffer the device
1653 	 * can use, and the device needs to be told which index is one past
1654 	 * that.
1655 	 */
1656 	if (++pidx == rxr->vxrxr_ndesc)
1657 		pidx = 0;
1658 	vmxnet3_write_bar0(sc, r, pidx);
1659 }
1660 
1661 static int
1662 vmxnet3_legacy_intr(void *xsc)
1663 {
1664 	struct vmxnet3_softc *sc;
1665 	if_softc_ctx_t scctx;
1666 	if_ctx_t ctx;
1667 
1668 	sc = xsc;
1669 	scctx = sc->vmx_scctx;
1670 	ctx = sc->vmx_ctx;
1671 
1672 	/*
1673 	 * When there is only a single interrupt configured, this routine
1674 	 * runs in fast interrupt context, following which the rxq 0 task
1675 	 * will be enqueued.
1676 	 */
1677 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1678 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1679 			return (FILTER_HANDLED);
1680 	}
1681 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1682 		vmxnet3_intr_disable_all(ctx);
1683 
1684 	if (sc->vmx_ds->event != 0)
1685 		iflib_admin_intr_deferred(ctx);
1686 
1687 	/*
1688 	 * XXX - When there is both rxq and event activity, do we care
1689 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1690 	 * first?
1691 	 */
1692 	return (FILTER_SCHEDULE_THREAD);
1693 }
1694 
1695 static int
1696 vmxnet3_rxq_intr(void *vrxq)
1697 {
1698 	struct vmxnet3_softc *sc;
1699 	struct vmxnet3_rxqueue *rxq;
1700 
1701 	rxq = vrxq;
1702 	sc = rxq->vxrxq_sc;
1703 
1704 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1705 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1706 
1707 	return (FILTER_SCHEDULE_THREAD);
1708 }
1709 
1710 static int
1711 vmxnet3_event_intr(void *vsc)
1712 {
1713 	struct vmxnet3_softc *sc;
1714 
1715 	sc = vsc;
1716 
1717 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1718 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1719 
1720 	/*
1721 	 * The work will be done via vmxnet3_update_admin_status(), and the
1722 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1723 	 *
1724 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1725 	 */
1726 	return (FILTER_SCHEDULE_THREAD);
1727 }
1728 
1729 static void
1730 vmxnet3_stop(if_ctx_t ctx)
1731 {
1732 	struct vmxnet3_softc *sc;
1733 
1734 	sc = iflib_get_softc(ctx);
1735 
1736 	sc->vmx_link_active = 0;
1737 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1738 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1739 }
1740 
1741 static void
1742 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1743 {
1744 	struct vmxnet3_txring *txr;
1745 	struct vmxnet3_comp_ring *txc;
1746 
1747 	txq->vxtxq_last_flush = -1;
1748 
1749 	txr = &txq->vxtxq_cmd_ring;
1750 	txr->vxtxr_next = 0;
1751 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1752 	/*
1753 	 * iflib has zeroed out the descriptor array during the prior attach
1754 	 * or stop
1755 	 */
1756 
1757 	txc = &txq->vxtxq_comp_ring;
1758 	txc->vxcr_next = 0;
1759 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1760 	/*
1761 	 * iflib has zeroed out the descriptor array during the prior attach
1762 	 * or stop
1763 	 */
1764 }
1765 
1766 static void
1767 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1768 {
1769 	struct vmxnet3_rxring *rxr;
1770 	struct vmxnet3_comp_ring *rxc;
1771 	int i;
1772 
1773 	/*
1774 	 * The descriptors will be populated with buffers during a
1775 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1776 	 */
1777 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1778 		rxr = &rxq->vxrxq_cmd_ring[i];
1779 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1780 		/*
1781 		 * iflib has zeroed out the descriptor array during the
1782 		 * prior attach or stop
1783 		 */
1784 	}
1785 
1786 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1787 		rxr = &rxq->vxrxq_cmd_ring[i];
1788 		rxr->vxrxr_gen = 0;
1789 		bzero(rxr->vxrxr_rxd,
1790 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1791 	}
1792 
1793 	rxc = &rxq->vxrxq_comp_ring;
1794 	rxc->vxcr_next = 0;
1795 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1796 	/*
1797 	 * iflib has zeroed out the descriptor array during the prior attach
1798 	 * or stop
1799 	 */
1800 }
1801 
1802 static void
1803 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1804 {
1805 	if_softc_ctx_t scctx;
1806 	int q;
1807 
1808 	scctx = sc->vmx_scctx;
1809 
1810 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1811 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1812 
1813 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1814 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1815 }
1816 
1817 static int
1818 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1819 {
1820 	if_softc_ctx_t scctx;
1821 	int q;
1822 
1823 	scctx = sc->vmx_scctx;
1824 
1825 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1826 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1827 		return (1);
1828 	}
1829 
1830 	/* Reset the Rx queue heads. */
1831 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1832 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1833 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1834 	}
1835 
1836 	return (0);
1837 }
1838 
1839 static void
1840 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1841 {
1842 	struct ifnet *ifp;
1843 
1844 	ifp = sc->vmx_ifp;
1845 
1846 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1847 
1848 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1849 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1850 		    sizeof(sc->vmx_ds->vlan_filter));
1851 	else
1852 		bzero(sc->vmx_ds->vlan_filter,
1853 		    sizeof(sc->vmx_ds->vlan_filter));
1854 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1855 }
1856 
1857 static void
1858 vmxnet3_init(if_ctx_t ctx)
1859 {
1860 	struct vmxnet3_softc *sc;
1861 	if_softc_ctx_t scctx;
1862 
1863 	sc = iflib_get_softc(ctx);
1864 	scctx = sc->vmx_scctx;
1865 
1866 	scctx->isc_max_frame_size = if_getmtu(iflib_get_ifp(ctx)) +
1867 	    ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1868 
1869 	/* Use the current MAC address. */
1870 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1871 	vmxnet3_set_lladdr(sc);
1872 
1873 	vmxnet3_reinit_shared_data(sc);
1874 	vmxnet3_reinit_queues(sc);
1875 
1876 	vmxnet3_enable_device(sc);
1877 
1878 	vmxnet3_reinit_rxfilters(sc);
1879 	vmxnet3_link_status(sc);
1880 }
1881 
1882 static void
1883 vmxnet3_multi_set(if_ctx_t ctx)
1884 {
1885 
1886 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1887 	    if_getflags(iflib_get_ifp(ctx)));
1888 }
1889 
1890 static int
1891 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1892 {
1893 
1894 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1895 		ETHER_CRC_LEN))
1896 		return (EINVAL);
1897 
1898 	return (0);
1899 }
1900 
1901 static void
1902 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
1903 {
1904 	struct vmxnet3_softc *sc;
1905 
1906 	sc = iflib_get_softc(ctx);
1907 
1908 	ifmr->ifm_status = IFM_AVALID;
1909 	ifmr->ifm_active = IFM_ETHER;
1910 
1911 	if (vmxnet3_link_is_up(sc) != 0) {
1912 		ifmr->ifm_status |= IFM_ACTIVE;
1913 		ifmr->ifm_active |= IFM_AUTO;
1914 	} else
1915 		ifmr->ifm_active |= IFM_NONE;
1916 }
1917 
1918 static int
1919 vmxnet3_media_change(if_ctx_t ctx)
1920 {
1921 
1922 	/* Ignore. */
1923 	return (0);
1924 }
1925 
1926 static int
1927 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
1928 {
1929 
1930 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
1931 
1932 	return (0);
1933 }
1934 
1935 static uint64_t
1936 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
1937 {
1938 	if_t ifp = iflib_get_ifp(ctx);
1939 
1940 	if (cnt < IFCOUNTERS)
1941 		return if_get_counter_default(ifp, cnt);
1942 
1943 	return (0);
1944 }
1945 
1946 static void
1947 vmxnet3_update_admin_status(if_ctx_t ctx)
1948 {
1949 	struct vmxnet3_softc *sc;
1950 
1951 	sc = iflib_get_softc(ctx);
1952 	if (sc->vmx_ds->event != 0)
1953 		vmxnet3_evintr(sc);
1954 
1955 	vmxnet3_refresh_host_stats(sc);
1956 }
1957 
1958 static void
1959 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
1960 {
1961 	/* Host stats refresh is global, so just trigger it on txq 0 */
1962 	if (qid == 0)
1963 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
1964 }
1965 
1966 static void
1967 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
1968 {
1969 	int idx, bit;
1970 
1971 	if (tag == 0 || tag > 4095)
1972 		return;
1973 
1974 	idx = (tag >> 5) & 0x7F;
1975 	bit = tag & 0x1F;
1976 
1977 	/* Update our private VLAN bitvector. */
1978 	if (add)
1979 		sc->vmx_vlan_filter[idx] |= (1 << bit);
1980 	else
1981 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
1982 }
1983 
1984 static void
1985 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
1986 {
1987 
1988 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
1989 }
1990 
1991 static void
1992 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
1993 {
1994 
1995 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
1996 }
1997 
1998 static u_int
1999 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2000 {
2001 	struct vmxnet3_softc *sc = arg;
2002 
2003 	if (count < VMXNET3_MULTICAST_MAX)
2004 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2005 		    ETHER_ADDR_LEN);
2006 
2007 	return (1);
2008 }
2009 
2010 static void
2011 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2012 {
2013 	struct ifnet *ifp;
2014 	struct vmxnet3_driver_shared *ds;
2015 	u_int mode;
2016 
2017 	ifp = sc->vmx_ifp;
2018 	ds = sc->vmx_ds;
2019 
2020 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2021 	if (flags & IFF_PROMISC)
2022 		mode |= VMXNET3_RXMODE_PROMISC;
2023 	if (flags & IFF_ALLMULTI)
2024 		mode |= VMXNET3_RXMODE_ALLMULTI;
2025 	else {
2026 		int cnt;
2027 
2028 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2029 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2030 			cnt = 0;
2031 			mode |= VMXNET3_RXMODE_ALLMULTI;
2032 		} else if (cnt > 0)
2033 			mode |= VMXNET3_RXMODE_MCAST;
2034 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2035 	}
2036 
2037 	ds->rxmode = mode;
2038 
2039 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2040 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2041 }
2042 
2043 static void
2044 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2045 {
2046 
2047 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2048 }
2049 
2050 static int
2051 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2052 {
2053 	uint32_t status;
2054 
2055 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2056 	return !!(status & 0x1);
2057 }
2058 
2059 static void
2060 vmxnet3_link_status(struct vmxnet3_softc *sc)
2061 {
2062 	if_ctx_t ctx;
2063 	uint64_t speed;
2064 	int link;
2065 
2066 	ctx = sc->vmx_ctx;
2067 	link = vmxnet3_link_is_up(sc);
2068 	speed = IF_Gbps(10);
2069 
2070 	if (link != 0 && sc->vmx_link_active == 0) {
2071 		sc->vmx_link_active = 1;
2072 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2073 	} else if (link == 0 && sc->vmx_link_active != 0) {
2074 		sc->vmx_link_active = 0;
2075 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2076 	}
2077 }
2078 
2079 static void
2080 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2081 {
2082 	uint32_t ml, mh;
2083 
2084 	ml  = sc->vmx_lladdr[0];
2085 	ml |= sc->vmx_lladdr[1] << 8;
2086 	ml |= sc->vmx_lladdr[2] << 16;
2087 	ml |= sc->vmx_lladdr[3] << 24;
2088 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2089 
2090 	mh  = sc->vmx_lladdr[4];
2091 	mh |= sc->vmx_lladdr[5] << 8;
2092 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2093 }
2094 
2095 static void
2096 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2097 {
2098 	uint32_t ml, mh;
2099 
2100 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2101 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2102 
2103 	sc->vmx_lladdr[0] = ml;
2104 	sc->vmx_lladdr[1] = ml >> 8;
2105 	sc->vmx_lladdr[2] = ml >> 16;
2106 	sc->vmx_lladdr[3] = ml >> 24;
2107 	sc->vmx_lladdr[4] = mh;
2108 	sc->vmx_lladdr[5] = mh >> 8;
2109 }
2110 
2111 static void
2112 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2113     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2114 {
2115 	struct sysctl_oid *node, *txsnode;
2116 	struct sysctl_oid_list *list, *txslist;
2117 	struct UPT1_TxStats *txstats;
2118 	char namebuf[16];
2119 
2120 	txstats = &txq->vxtxq_ts->stats;
2121 
2122 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2123 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2124 	    NULL, "Transmit Queue");
2125 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2126 
2127 	/*
2128 	 * Add statistics reported by the host. These are updated by the
2129 	 * iflib txq timer on txq 0.
2130 	 */
2131 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2132 	    NULL, "Host Statistics");
2133 	txslist = SYSCTL_CHILDREN(txsnode);
2134 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2135 	    &txstats->TSO_packets, "TSO packets");
2136 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2137 	    &txstats->TSO_bytes, "TSO bytes");
2138 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2139 	    &txstats->ucast_packets, "Unicast packets");
2140 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2141 	    &txstats->ucast_bytes, "Unicast bytes");
2142 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2143 	    &txstats->mcast_packets, "Multicast packets");
2144 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2145 	    &txstats->mcast_bytes, "Multicast bytes");
2146 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2147 	    &txstats->error, "Errors");
2148 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2149 	    &txstats->discard, "Discards");
2150 }
2151 
2152 static void
2153 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2154     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2155 {
2156 	struct sysctl_oid *node, *rxsnode;
2157 	struct sysctl_oid_list *list, *rxslist;
2158 	struct UPT1_RxStats *rxstats;
2159 	char namebuf[16];
2160 
2161 	rxstats = &rxq->vxrxq_rs->stats;
2162 
2163 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2164 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, CTLFLAG_RD,
2165 	    NULL, "Receive Queue");
2166 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2167 
2168 	/*
2169 	 * Add statistics reported by the host. These are updated by the
2170 	 * iflib txq timer on txq 0.
2171 	 */
2172 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats", CTLFLAG_RD,
2173 	    NULL, "Host Statistics");
2174 	rxslist = SYSCTL_CHILDREN(rxsnode);
2175 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2176 	    &rxstats->LRO_packets, "LRO packets");
2177 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2178 	    &rxstats->LRO_bytes, "LRO bytes");
2179 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2180 	    &rxstats->ucast_packets, "Unicast packets");
2181 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2182 	    &rxstats->ucast_bytes, "Unicast bytes");
2183 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2184 	    &rxstats->mcast_packets, "Multicast packets");
2185 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2186 	    &rxstats->mcast_bytes, "Multicast bytes");
2187 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2188 	    &rxstats->bcast_packets, "Broadcast packets");
2189 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2190 	    &rxstats->bcast_bytes, "Broadcast bytes");
2191 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2192 	    &rxstats->nobuffer, "No buffer");
2193 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2194 	    &rxstats->error, "Errors");
2195 }
2196 
2197 static void
2198 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2199     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2200 {
2201 	if_softc_ctx_t scctx;
2202 	struct sysctl_oid *node;
2203 	struct sysctl_oid_list *list;
2204 	int i;
2205 
2206 	scctx = sc->vmx_scctx;
2207 
2208 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2209 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2210 
2211 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2212 		    "debug", CTLFLAG_RD, NULL, "");
2213 		list = SYSCTL_CHILDREN(node);
2214 
2215 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2216 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2217 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2218 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2219 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2220 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2221 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2222 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2223 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2224 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2225 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2226 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2227 	}
2228 
2229 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2230 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2231 
2232 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2233 		    "debug", CTLFLAG_RD, NULL, "");
2234 		list = SYSCTL_CHILDREN(node);
2235 
2236 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2237 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2238 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2239 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2240 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2241 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2242 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2243 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2244 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2245 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2246 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2247 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2248 	}
2249 }
2250 
2251 static void
2252 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2253     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2254 {
2255 	if_softc_ctx_t scctx;
2256 	int i;
2257 
2258 	scctx = sc->vmx_scctx;
2259 
2260 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2261 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2262 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2263 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2264 
2265 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2266 }
2267 
2268 static void
2269 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2270 {
2271 	device_t dev;
2272 	struct sysctl_ctx_list *ctx;
2273 	struct sysctl_oid *tree;
2274 	struct sysctl_oid_list *child;
2275 
2276 	dev = sc->vmx_dev;
2277 	ctx = device_get_sysctl_ctx(dev);
2278 	tree = device_get_sysctl_tree(dev);
2279 	child = SYSCTL_CHILDREN(tree);
2280 
2281 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2282 }
2283 
2284 static void
2285 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2286 {
2287 
2288 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2289 }
2290 
2291 static uint32_t
2292 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2293 {
2294 
2295 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2296 }
2297 
2298 static void
2299 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2300 {
2301 
2302 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2303 }
2304 
2305 static void
2306 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2307 {
2308 
2309 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2310 }
2311 
2312 static uint32_t
2313 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2314 {
2315 
2316 	vmxnet3_write_cmd(sc, cmd);
2317 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2318 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2319 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2320 }
2321 
2322 static void
2323 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2324 {
2325 
2326 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2327 }
2328 
2329 static void
2330 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2331 {
2332 
2333 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2334 }
2335 
2336 static int
2337 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2338 {
2339 	/* Not using interrupts for TX */
2340 	return (0);
2341 }
2342 
2343 static int
2344 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2345 {
2346 	struct vmxnet3_softc *sc;
2347 
2348 	sc = iflib_get_softc(ctx);
2349 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2350 	return (0);
2351 }
2352 
2353 static void
2354 vmxnet3_link_intr_enable(if_ctx_t ctx)
2355 {
2356 	struct vmxnet3_softc *sc;
2357 
2358 	sc = iflib_get_softc(ctx);
2359 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2360 }
2361 
2362 static void
2363 vmxnet3_intr_enable_all(if_ctx_t ctx)
2364 {
2365 	struct vmxnet3_softc *sc;
2366 	if_softc_ctx_t scctx;
2367 	int i;
2368 
2369 	sc = iflib_get_softc(ctx);
2370 	scctx = sc->vmx_scctx;
2371 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2372 	for (i = 0; i < scctx->isc_vectors; i++)
2373 		vmxnet3_enable_intr(sc, i);
2374 }
2375 
2376 static void
2377 vmxnet3_intr_disable_all(if_ctx_t ctx)
2378 {
2379 	struct vmxnet3_softc *sc;
2380 	int i;
2381 
2382 	sc = iflib_get_softc(ctx);
2383 	/*
2384 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2385 	 * run, which is before the top level shared data area is
2386 	 * initialized and the device made aware of it.
2387 	 */
2388 	if (sc->vmx_ds != NULL)
2389 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2390 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2391 		vmxnet3_disable_intr(sc, i);
2392 }
2393 
2394 /*
2395  * Since this is a purely paravirtualized device, we do not have
2396  * to worry about DMA coherency. But at times, we must make sure
2397  * both the compiler and CPU do not reorder memory operations.
2398  */
2399 static inline void
2400 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2401 {
2402 
2403 	switch (type) {
2404 	case VMXNET3_BARRIER_RD:
2405 		rmb();
2406 		break;
2407 	case VMXNET3_BARRIER_WR:
2408 		wmb();
2409 		break;
2410 	case VMXNET3_BARRIER_RDWR:
2411 		mb();
2412 		break;
2413 	default:
2414 		panic("%s: bad barrier type %d", __func__, type);
2415 	}
2416 }
2417