xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision 829f0bcb5fe24bb523c5a9e7bd3bb79412e06906)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD$");
25 
26 #include "opt_rss.h"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/endian.h>
32 #include <sys/sockio.h>
33 #include <sys/mbuf.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/smp.h>
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_arp.h>
46 #include <net/if_dl.h>
47 #include <net/if_types.h>
48 #include <net/if_media.h>
49 #include <net/if_vlan_var.h>
50 #include <net/iflib.h>
51 #ifdef RSS
52 #include <net/rss_config.h>
53 #endif
54 
55 #include <netinet/in_systm.h>
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip6.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/udp.h>
61 #include <netinet/tcp.h>
62 
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65 #include <sys/bus.h>
66 #include <sys/rman.h>
67 
68 #include <dev/pci/pcireg.h>
69 #include <dev/pci/pcivar.h>
70 
71 #include "ifdi_if.h"
72 
73 #include "if_vmxreg.h"
74 #include "if_vmxvar.h"
75 
76 #include "opt_inet.h"
77 #include "opt_inet6.h"
78 
79 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
80 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
81 
82 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
83 {
84 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
85 	/* required last entry */
86 	PVID_END
87 };
88 
89 static void	*vmxnet3_register(device_t);
90 static int	vmxnet3_attach_pre(if_ctx_t);
91 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
92 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
93 static int	vmxnet3_attach_post(if_ctx_t);
94 static int	vmxnet3_detach(if_ctx_t);
95 static int	vmxnet3_shutdown(if_ctx_t);
96 static int	vmxnet3_suspend(if_ctx_t);
97 static int	vmxnet3_resume(if_ctx_t);
98 
99 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
100 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
101 static int	vmxnet3_check_version(struct vmxnet3_softc *);
102 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
103 
104 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
105 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
106 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
108 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
109 static void	vmxnet3_queues_free(if_ctx_t);
110 
111 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
112 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
113 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
114 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
115 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
116 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
117 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
118 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
119 static void	vmxnet3_free_data(struct vmxnet3_softc *);
120 
121 static void	vmxnet3_evintr(struct vmxnet3_softc *);
122 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
123 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
124 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
125 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
126 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
127 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
128 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
129 static int	vmxnet3_legacy_intr(void *);
130 static int	vmxnet3_rxq_intr(void *);
131 static int	vmxnet3_event_intr(void *);
132 
133 static void	vmxnet3_stop(if_ctx_t);
134 
135 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
136 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
137 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
138 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
139 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
140 static void	vmxnet3_init(if_ctx_t);
141 static void	vmxnet3_multi_set(if_ctx_t);
142 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
143 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
144 static int	vmxnet3_media_change(if_ctx_t);
145 static int	vmxnet3_promisc_set(if_ctx_t, int);
146 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
147 static void	vmxnet3_update_admin_status(if_ctx_t);
148 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
149 
150 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
151 		    uint16_t);
152 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
153 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
154 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
155 
156 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
157 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
158 static void	vmxnet3_link_status(struct vmxnet3_softc *);
159 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
160 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
161 
162 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
165 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
166 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
167 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
168 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
169 
170 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
171 		    uint32_t);
172 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
173 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
174 		    uint32_t);
175 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
176 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
177 
178 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
179 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
180 static void	vmxnet3_link_intr_enable(if_ctx_t);
181 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
182 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
183 static void	vmxnet3_intr_enable_all(if_ctx_t);
184 static void	vmxnet3_intr_disable_all(if_ctx_t);
185 
186 typedef enum {
187 	VMXNET3_BARRIER_RD,
188 	VMXNET3_BARRIER_WR,
189 	VMXNET3_BARRIER_RDWR,
190 } vmxnet3_barrier_t;
191 
192 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
193 
194 static device_method_t vmxnet3_methods[] = {
195 	/* Device interface */
196 	DEVMETHOD(device_register, vmxnet3_register),
197 	DEVMETHOD(device_probe, iflib_device_probe),
198 	DEVMETHOD(device_attach, iflib_device_attach),
199 	DEVMETHOD(device_detach, iflib_device_detach),
200 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
201 	DEVMETHOD(device_suspend, iflib_device_suspend),
202 	DEVMETHOD(device_resume, iflib_device_resume),
203 	DEVMETHOD_END
204 };
205 
206 static driver_t vmxnet3_driver = {
207 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
208 };
209 
210 DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
211 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
212 MODULE_VERSION(vmx, 2);
213 
214 MODULE_DEPEND(vmx, pci, 1, 1, 1);
215 MODULE_DEPEND(vmx, ether, 1, 1, 1);
216 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
217 
218 static device_method_t vmxnet3_iflib_methods[] = {
219 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
220 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
221 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
222 
223 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
224 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
225 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
226 
227 	DEVMETHOD(ifdi_init, vmxnet3_init),
228 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
229 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
230 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
231 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
232 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
233 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
234 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
235 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
236 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
237 
238 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
239 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
240 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
241 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
242 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
243 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
244 
245 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
246 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
247 
248 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
249 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
250 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
251 
252 	DEVMETHOD_END
253 };
254 
255 static driver_t vmxnet3_iflib_driver = {
256 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
257 };
258 
259 struct if_txrx vmxnet3_txrx = {
260 	.ift_txd_encap = vmxnet3_isc_txd_encap,
261 	.ift_txd_flush = vmxnet3_isc_txd_flush,
262 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
263 	.ift_rxd_available = vmxnet3_isc_rxd_available,
264 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
265 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
266 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
267 	.ift_legacy_intr = vmxnet3_legacy_intr
268 };
269 
270 static struct if_shared_ctx vmxnet3_sctx_init = {
271 	.isc_magic = IFLIB_MAGIC,
272 	.isc_q_align = 512,
273 
274 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
275 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
276 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
277 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
278 
279 	/*
280 	 * These values are used to configure the busdma tag used for
281 	 * receive descriptors.  Each receive descriptor only points to one
282 	 * buffer.
283 	 */
284 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
285 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
286 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
287 
288 	.isc_admin_intrcnt = 1,
289 	.isc_vendor_info = vmxnet3_vendor_info_array,
290 	.isc_driver_version = "2",
291 	.isc_driver = &vmxnet3_iflib_driver,
292 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
293 
294 	/*
295 	 * Number of receive queues per receive queue set, with associated
296 	 * descriptor settings for each.
297 	 */
298 	.isc_nrxqs = 3,
299 	.isc_nfl = 2, /* one free list for each receive command queue */
300 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
301 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
302 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
303 
304 	/*
305 	 * Number of transmit queues per transmit queue set, with associated
306 	 * descriptor settings for each.
307 	 */
308 	.isc_ntxqs = 2,
309 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
310 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
311 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
312 };
313 
314 static void *
315 vmxnet3_register(device_t dev)
316 {
317 	return (&vmxnet3_sctx_init);
318 }
319 
320 static int
321 trunc_powerof2(int val)
322 {
323 
324 	return (1U << (fls(val) - 1));
325 }
326 
327 static int
328 vmxnet3_attach_pre(if_ctx_t ctx)
329 {
330 	device_t dev;
331 	if_softc_ctx_t scctx;
332 	struct vmxnet3_softc *sc;
333 	uint32_t intr_config;
334 	int error;
335 
336 	dev = iflib_get_dev(ctx);
337 	sc = iflib_get_softc(ctx);
338 	sc->vmx_dev = dev;
339 	sc->vmx_ctx = ctx;
340 	sc->vmx_sctx = iflib_get_sctx(ctx);
341 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
342 	sc->vmx_ifp = iflib_get_ifp(ctx);
343 	sc->vmx_media = iflib_get_media(ctx);
344 	scctx = sc->vmx_scctx;
345 
346 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
347 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
348 	/* isc_tx_tso_size_max doesn't include possible vlan header */
349 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
350 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
351 	scctx->isc_txrx = &vmxnet3_txrx;
352 
353 	/* If 0, the iflib tunable was not set, so set to the default */
354 	if (scctx->isc_nrxqsets == 0)
355 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
356 	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
357 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
358 	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
359 
360 	/* If 0, the iflib tunable was not set, so set to the default */
361 	if (scctx->isc_ntxqsets == 0)
362 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
363 	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
364 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
365 	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
366 
367 	/*
368 	 * Enforce that the transmit completion queue descriptor count is
369 	 * the same as the transmit command queue descriptor count.
370 	 */
371 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
372 	scctx->isc_txqsizes[0] =
373 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
374 	scctx->isc_txqsizes[1] =
375 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
376 
377 	/*
378 	 * Enforce that the receive completion queue descriptor count is the
379 	 * sum of the receive command queue descriptor counts, and that the
380 	 * second receive command queue descriptor count is the same as the
381 	 * first one.
382 	 */
383 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
384 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
385 	scctx->isc_rxqsizes[0] =
386 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
387 	scctx->isc_rxqsizes[1] =
388 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
389 	scctx->isc_rxqsizes[2] =
390 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
391 
392 	/*
393 	 * Initialize the max frame size and descriptor queue buffer
394 	 * sizes.
395 	 */
396 	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
397 
398 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
399 
400 	/* Map PCI BARs */
401 	error = vmxnet3_alloc_resources(sc);
402 	if (error)
403 		goto fail;
404 
405 	/* Check device versions */
406 	error = vmxnet3_check_version(sc);
407 	if (error)
408 		goto fail;
409 
410 	/*
411 	 * The interrupt mode can be set in the hypervisor configuration via
412 	 * the parameter ethernet<N>.intrMode.
413 	 */
414 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
415 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
416 
417 	/*
418 	 * Configure the softc context to attempt to configure the interrupt
419 	 * mode now indicated by intr_config.  iflib will follow the usual
420 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
421 	 * starting mode.
422 	 */
423 	switch (intr_config & 0x03) {
424 	case VMXNET3_IT_AUTO:
425 	case VMXNET3_IT_MSIX:
426 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
427 		break;
428 	case VMXNET3_IT_MSI:
429 		scctx->isc_msix_bar = -1;
430 		scctx->isc_disable_msix = 1;
431 		break;
432 	case VMXNET3_IT_LEGACY:
433 		scctx->isc_msix_bar = 0;
434 		break;
435 	}
436 
437 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
438 	scctx->isc_capabilities = scctx->isc_capenable =
439 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
440 	    IFCAP_TSO4 | IFCAP_TSO6 |
441 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
442 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
443 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
444 	    IFCAP_JUMBO_MTU;
445 
446 	/* These capabilities are not enabled by default. */
447 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
448 
449 	vmxnet3_get_lladdr(sc);
450 	iflib_set_mac(ctx, sc->vmx_lladdr);
451 
452 	return (0);
453 fail:
454 	/*
455 	 * We must completely clean up anything allocated above as iflib
456 	 * will not invoke any other driver entry points as a result of this
457 	 * failure.
458 	 */
459 	vmxnet3_free_resources(sc);
460 
461 	return (error);
462 }
463 
464 static int
465 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
466 {
467 	struct vmxnet3_softc *sc;
468 	if_softc_ctx_t scctx;
469 	struct vmxnet3_rxqueue *rxq;
470 	int error;
471 	int i;
472 	char irq_name[16];
473 
474 	sc = iflib_get_softc(ctx);
475 	scctx = sc->vmx_scctx;
476 
477 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
478 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
479 
480 		rxq = &sc->vmx_rxq[i];
481 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
482 		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
483 		if (error) {
484 			device_printf(iflib_get_dev(ctx),
485 			    "Failed to register rxq %d interrupt handler\n", i);
486 			return (error);
487 		}
488 	}
489 
490 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
491 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
492 
493 		/*
494 		 * Don't provide the corresponding rxq irq for reference -
495 		 * we want the transmit task to be attached to a task queue
496 		 * that is different from the one used by the corresponding
497 		 * rxq irq.  That is because the TX doorbell writes are very
498 		 * expensive as virtualized MMIO operations, so we want to
499 		 * be able to defer them to another core when possible so
500 		 * that they don't steal receive processing cycles during
501 		 * stack turnarounds like TCP ACK generation.  The other
502 		 * piece to this approach is enabling the iflib abdicate
503 		 * option (currently via an interface-specific
504 		 * tunable/sysctl).
505 		 */
506 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
507 		    irq_name);
508 	}
509 
510 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
511 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
512 	    "event");
513 	if (error) {
514 		device_printf(iflib_get_dev(ctx),
515 		    "Failed to register event interrupt handler\n");
516 		return (error);
517 	}
518 
519 	return (0);
520 }
521 
522 static void
523 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
524 {
525 	if_softc_ctx_t scctx;
526 	struct vmxnet3_rxqueue *rxq;
527 	int i;
528 
529 	scctx = sc->vmx_scctx;
530 
531 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
532 		rxq = &sc->vmx_rxq[i];
533 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
534 	}
535 
536 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
537 }
538 
539 static int
540 vmxnet3_attach_post(if_ctx_t ctx)
541 {
542 	if_softc_ctx_t scctx;
543 	struct vmxnet3_softc *sc;
544 	int error;
545 
546 	scctx = iflib_get_softc_ctx(ctx);
547 	sc = iflib_get_softc(ctx);
548 
549 	if (scctx->isc_nrxqsets > 1)
550 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
551 
552 	error = vmxnet3_alloc_data(sc);
553 	if (error)
554 		goto fail;
555 
556 	vmxnet3_set_interrupt_idx(sc);
557 	vmxnet3_setup_sysctl(sc);
558 
559 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
560 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
561 
562 fail:
563 	return (error);
564 }
565 
566 static int
567 vmxnet3_detach(if_ctx_t ctx)
568 {
569 	struct vmxnet3_softc *sc;
570 
571 	sc = iflib_get_softc(ctx);
572 
573 	vmxnet3_free_irqs(sc);
574 	vmxnet3_free_data(sc);
575 	vmxnet3_free_resources(sc);
576 
577 	return (0);
578 }
579 
580 static int
581 vmxnet3_shutdown(if_ctx_t ctx)
582 {
583 
584 	return (0);
585 }
586 
587 static int
588 vmxnet3_suspend(if_ctx_t ctx)
589 {
590 
591 	return (0);
592 }
593 
594 static int
595 vmxnet3_resume(if_ctx_t ctx)
596 {
597 
598 	return (0);
599 }
600 
601 static int
602 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
603 {
604 	device_t dev;
605 	int rid;
606 
607 	dev = sc->vmx_dev;
608 
609 	rid = PCIR_BAR(0);
610 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
611 	    RF_ACTIVE);
612 	if (sc->vmx_res0 == NULL) {
613 		device_printf(dev,
614 		    "could not map BAR0 memory\n");
615 		return (ENXIO);
616 	}
617 
618 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
619 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
620 
621 	rid = PCIR_BAR(1);
622 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
623 	    RF_ACTIVE);
624 	if (sc->vmx_res1 == NULL) {
625 		device_printf(dev,
626 		    "could not map BAR1 memory\n");
627 		return (ENXIO);
628 	}
629 
630 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
631 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
632 
633 	return (0);
634 }
635 
636 static void
637 vmxnet3_free_resources(struct vmxnet3_softc *sc)
638 {
639 	device_t dev;
640 
641 	dev = sc->vmx_dev;
642 
643 	if (sc->vmx_res0 != NULL) {
644 		bus_release_resource(dev, SYS_RES_MEMORY,
645 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
646 		sc->vmx_res0 = NULL;
647 	}
648 
649 	if (sc->vmx_res1 != NULL) {
650 		bus_release_resource(dev, SYS_RES_MEMORY,
651 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
652 		sc->vmx_res1 = NULL;
653 	}
654 }
655 
656 static int
657 vmxnet3_check_version(struct vmxnet3_softc *sc)
658 {
659 	device_t dev;
660 	uint32_t version;
661 
662 	dev = sc->vmx_dev;
663 
664 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
665 	if ((version & 0x01) == 0) {
666 		device_printf(dev, "unsupported hardware version %#x\n",
667 		    version);
668 		return (ENOTSUP);
669 	}
670 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
671 
672 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
673 	if ((version & 0x01) == 0) {
674 		device_printf(dev, "unsupported UPT version %#x\n", version);
675 		return (ENOTSUP);
676 	}
677 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
678 
679 	return (0);
680 }
681 
682 static void
683 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
684 {
685 	if_softc_ctx_t scctx;
686 	struct vmxnet3_txqueue *txq;
687 	struct vmxnet3_txq_shared *txs;
688 	struct vmxnet3_rxqueue *rxq;
689 	struct vmxnet3_rxq_shared *rxs;
690 	int intr_idx;
691 	int i;
692 
693 	scctx = sc->vmx_scctx;
694 
695 	/*
696 	 * There is always one interrupt per receive queue, assigned
697 	 * starting with the first interrupt.  When there is only one
698 	 * interrupt available, the event interrupt shares the receive queue
699 	 * interrupt, otherwise it uses the interrupt following the last
700 	 * receive queue interrupt.  Transmit queues are not assigned
701 	 * interrupts, so they are given indexes beyond the indexes that
702 	 * correspond to the real interrupts.
703 	 */
704 
705 	/* The event interrupt is always the last vector. */
706 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
707 
708 	intr_idx = 0;
709 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
710 		rxq = &sc->vmx_rxq[i];
711 		rxs = rxq->vxrxq_rs;
712 		rxq->vxrxq_intr_idx = intr_idx;
713 		rxs->intr_idx = rxq->vxrxq_intr_idx;
714 	}
715 
716 	/*
717 	 * Assign the tx queues interrupt indexes above what we are actually
718 	 * using.  These interrupts will never be enabled.
719 	 */
720 	intr_idx = scctx->isc_vectors;
721 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
722 		txq = &sc->vmx_txq[i];
723 		txs = txq->vxtxq_ts;
724 		txq->vxtxq_intr_idx = intr_idx;
725 		txs->intr_idx = txq->vxtxq_intr_idx;
726 	}
727 }
728 
729 static int
730 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
731 {
732 	if_softc_ctx_t scctx;
733 	int size;
734 	int error;
735 
736 	scctx = sc->vmx_scctx;
737 
738 	/*
739 	 * The txq and rxq shared data areas must be allocated contiguously
740 	 * as vmxnet3_driver_shared contains only a single address member
741 	 * for the shared queue data area.
742 	 */
743 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
744 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
745 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
746 	if (error) {
747 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
748 		return (error);
749 	}
750 
751 	return (0);
752 }
753 
754 static void
755 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
756 {
757 	struct vmxnet3_txqueue *txq;
758 	struct vmxnet3_comp_ring *txc;
759 	struct vmxnet3_txring *txr;
760 	if_softc_ctx_t scctx;
761 
762 	txq = &sc->vmx_txq[q];
763 	txc = &txq->vxtxq_comp_ring;
764 	txr = &txq->vxtxq_cmd_ring;
765 	scctx = sc->vmx_scctx;
766 
767 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
768 	    device_get_nameunit(sc->vmx_dev), q);
769 
770 	txq->vxtxq_sc = sc;
771 	txq->vxtxq_id = q;
772 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
773 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
774 }
775 
776 static int
777 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
778     int ntxqs, int ntxqsets)
779 {
780 	struct vmxnet3_softc *sc;
781 	int q;
782 	int error;
783 	caddr_t kva;
784 
785 	sc = iflib_get_softc(ctx);
786 
787 	/* Allocate the array of transmit queues */
788 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
789 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
790 	if (sc->vmx_txq == NULL)
791 		return (ENOMEM);
792 
793 	/* Initialize driver state for each transmit queue */
794 	for (q = 0; q < ntxqsets; q++)
795 		vmxnet3_init_txq(sc, q);
796 
797 	/*
798 	 * Allocate queue state that is shared with the device.  This check
799 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
800 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
801 	 * order iflib invokes those routines in.
802 	 */
803 	if (sc->vmx_qs_dma.idi_size == 0) {
804 		error = vmxnet3_queues_shared_alloc(sc);
805 		if (error)
806 			return (error);
807 	}
808 
809 	kva = sc->vmx_qs_dma.idi_vaddr;
810 	for (q = 0; q < ntxqsets; q++) {
811 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
812 		kva += sizeof(struct vmxnet3_txq_shared);
813 	}
814 
815 	/* Record descriptor ring vaddrs and paddrs */
816 	for (q = 0; q < ntxqsets; q++) {
817 		struct vmxnet3_txqueue *txq;
818 		struct vmxnet3_txring *txr;
819 		struct vmxnet3_comp_ring *txc;
820 
821 		txq = &sc->vmx_txq[q];
822 		txc = &txq->vxtxq_comp_ring;
823 		txr = &txq->vxtxq_cmd_ring;
824 
825 		/* Completion ring */
826 		txc->vxcr_u.txcd =
827 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
828 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
829 
830 		/* Command ring */
831 		txr->vxtxr_txd =
832 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
833 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
834 	}
835 
836 	return (0);
837 }
838 
839 static void
840 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
841 {
842 	struct vmxnet3_rxqueue *rxq;
843 	struct vmxnet3_comp_ring *rxc;
844 	struct vmxnet3_rxring *rxr;
845 	if_softc_ctx_t scctx;
846 	int i;
847 
848 	rxq = &sc->vmx_rxq[q];
849 	rxc = &rxq->vxrxq_comp_ring;
850 	scctx = sc->vmx_scctx;
851 
852 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
853 	    device_get_nameunit(sc->vmx_dev), q);
854 
855 	rxq->vxrxq_sc = sc;
856 	rxq->vxrxq_id = q;
857 
858 	/*
859 	 * First rxq is the completion queue, so there are nrxqs - 1 command
860 	 * rings starting at iflib queue id 1.
861 	 */
862 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
863 	for (i = 0; i < nrxqs - 1; i++) {
864 		rxr = &rxq->vxrxq_cmd_ring[i];
865 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
866 	}
867 }
868 
869 static int
870 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
871     int nrxqs, int nrxqsets)
872 {
873 	struct vmxnet3_softc *sc;
874 	if_softc_ctx_t scctx;
875 	int q;
876 	int i;
877 	int error;
878 	caddr_t kva;
879 
880 	sc = iflib_get_softc(ctx);
881 	scctx = sc->vmx_scctx;
882 
883 	/* Allocate the array of receive queues */
884 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
885 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
886 	if (sc->vmx_rxq == NULL)
887 		return (ENOMEM);
888 
889 	/* Initialize driver state for each receive queue */
890 	for (q = 0; q < nrxqsets; q++)
891 		vmxnet3_init_rxq(sc, q, nrxqs);
892 
893 	/*
894 	 * Allocate queue state that is shared with the device.  This check
895 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
896 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
897 	 * order iflib invokes those routines in.
898 	 */
899 	if (sc->vmx_qs_dma.idi_size == 0) {
900 		error = vmxnet3_queues_shared_alloc(sc);
901 		if (error)
902 			return (error);
903 	}
904 
905 	kva = sc->vmx_qs_dma.idi_vaddr +
906 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
907 	for (q = 0; q < nrxqsets; q++) {
908 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
909 		kva += sizeof(struct vmxnet3_rxq_shared);
910 	}
911 
912 	/* Record descriptor ring vaddrs and paddrs */
913 	for (q = 0; q < nrxqsets; q++) {
914 		struct vmxnet3_rxqueue *rxq;
915 		struct vmxnet3_rxring *rxr;
916 		struct vmxnet3_comp_ring *rxc;
917 
918 		rxq = &sc->vmx_rxq[q];
919 		rxc = &rxq->vxrxq_comp_ring;
920 
921 		/* Completion ring */
922 		rxc->vxcr_u.rxcd =
923 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
924 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
925 
926 		/* Command ring(s) */
927 		for (i = 0; i < nrxqs - 1; i++) {
928 			rxr = &rxq->vxrxq_cmd_ring[i];
929 
930 			rxr->vxrxr_rxd =
931 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
932 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
933 		}
934 	}
935 
936 	return (0);
937 }
938 
939 static void
940 vmxnet3_queues_free(if_ctx_t ctx)
941 {
942 	struct vmxnet3_softc *sc;
943 
944 	sc = iflib_get_softc(ctx);
945 
946 	/* Free queue state area that is shared with the device */
947 	if (sc->vmx_qs_dma.idi_size != 0) {
948 		iflib_dma_free(&sc->vmx_qs_dma);
949 		sc->vmx_qs_dma.idi_size = 0;
950 	}
951 
952 	/* Free array of receive queues */
953 	if (sc->vmx_rxq != NULL) {
954 		free(sc->vmx_rxq, M_DEVBUF);
955 		sc->vmx_rxq = NULL;
956 	}
957 
958 	/* Free array of transmit queues */
959 	if (sc->vmx_txq != NULL) {
960 		free(sc->vmx_txq, M_DEVBUF);
961 		sc->vmx_txq = NULL;
962 	}
963 }
964 
965 static int
966 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
967 {
968 	device_t dev;
969 	size_t size;
970 	int error;
971 
972 	dev = sc->vmx_dev;
973 
974 	/* Top level state structure shared with the device */
975 	size = sizeof(struct vmxnet3_driver_shared);
976 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
977 	if (error) {
978 		device_printf(dev, "cannot alloc shared memory\n");
979 		return (error);
980 	}
981 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
982 
983 	/* RSS table state shared with the device */
984 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
985 		size = sizeof(struct vmxnet3_rss_shared);
986 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
987 		    &sc->vmx_rss_dma, 0);
988 		if (error) {
989 			device_printf(dev, "cannot alloc rss shared memory\n");
990 			return (error);
991 		}
992 		sc->vmx_rss =
993 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
994 	}
995 
996 	return (0);
997 }
998 
999 static void
1000 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1001 {
1002 
1003 	/* Free RSS table state shared with the device */
1004 	if (sc->vmx_rss != NULL) {
1005 		iflib_dma_free(&sc->vmx_rss_dma);
1006 		sc->vmx_rss = NULL;
1007 	}
1008 
1009 	/* Free top level state structure shared with the device */
1010 	if (sc->vmx_ds != NULL) {
1011 		iflib_dma_free(&sc->vmx_ds_dma);
1012 		sc->vmx_ds = NULL;
1013 	}
1014 }
1015 
1016 static int
1017 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1018 {
1019 	int error;
1020 
1021 	/* Multicast table state shared with the device */
1022 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1023 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1024 	if (error)
1025 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1026 	else
1027 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1028 
1029 	return (error);
1030 }
1031 
1032 static void
1033 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1034 {
1035 
1036 	/* Free multicast table state shared with the device */
1037 	if (sc->vmx_mcast != NULL) {
1038 		iflib_dma_free(&sc->vmx_mcast_dma);
1039 		sc->vmx_mcast = NULL;
1040 	}
1041 }
1042 
1043 static void
1044 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1045 {
1046 	struct vmxnet3_driver_shared *ds;
1047 	if_softc_ctx_t scctx;
1048 	struct vmxnet3_txqueue *txq;
1049 	struct vmxnet3_txq_shared *txs;
1050 	struct vmxnet3_rxqueue *rxq;
1051 	struct vmxnet3_rxq_shared *rxs;
1052 	int i;
1053 
1054 	ds = sc->vmx_ds;
1055 	scctx = sc->vmx_scctx;
1056 
1057 	/*
1058 	 * Initialize fields of the shared data that remains the same across
1059 	 * reinits. Note the shared data is zero'd when allocated.
1060 	 */
1061 
1062 	ds->magic = VMXNET3_REV1_MAGIC;
1063 
1064 	/* DriverInfo */
1065 	ds->version = VMXNET3_DRIVER_VERSION;
1066 	ds->guest = VMXNET3_GOS_FREEBSD |
1067 #ifdef __LP64__
1068 	    VMXNET3_GOS_64BIT;
1069 #else
1070 	    VMXNET3_GOS_32BIT;
1071 #endif
1072 	ds->vmxnet3_revision = 1;
1073 	ds->upt_version = 1;
1074 
1075 	/* Misc. conf */
1076 	ds->driver_data = vtophys(sc);
1077 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1078 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1079 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1080 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1081 
1082 	/* RSS conf */
1083 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1084 		ds->rss.version = 1;
1085 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1086 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1087 	}
1088 
1089 	/* Interrupt control. */
1090 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1091 	/*
1092 	 * Total number of interrupt indexes we are using in the shared
1093 	 * config data, even though we don't actually allocate interrupt
1094 	 * resources for the tx queues.  Some versions of the device will
1095 	 * fail to initialize successfully if interrupt indexes are used in
1096 	 * the shared config that exceed the number of interrupts configured
1097 	 * here.
1098 	 */
1099 	ds->nintr = (scctx->isc_vectors == 1) ?
1100 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1101 	ds->evintr = sc->vmx_event_intr_idx;
1102 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1103 
1104 	for (i = 0; i < ds->nintr; i++)
1105 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1106 
1107 	/* Receive filter. */
1108 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1109 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1110 
1111 	/* Tx queues */
1112 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1113 		txq = &sc->vmx_txq[i];
1114 		txs = txq->vxtxq_ts;
1115 
1116 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1117 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1118 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1119 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1120 		txs->driver_data = vtophys(txq);
1121 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1122 	}
1123 
1124 	/* Rx queues */
1125 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1126 		rxq = &sc->vmx_rxq[i];
1127 		rxs = rxq->vxrxq_rs;
1128 
1129 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1130 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1131 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1132 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1133 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1134 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1135 		rxs->driver_data = vtophys(rxq);
1136 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1137 	}
1138 }
1139 
1140 static void
1141 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1142 {
1143 	/*
1144 	 * Use the same key as the Linux driver until FreeBSD can do
1145 	 * RSS (presumably Toeplitz) in software.
1146 	 */
1147 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1148 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1149 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1150 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1151 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1152 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1153 	};
1154 
1155 	if_softc_ctx_t scctx;
1156 	struct vmxnet3_rss_shared *rss;
1157 #ifdef RSS
1158 	uint8_t rss_algo;
1159 #endif
1160 	int i;
1161 
1162 	scctx = sc->vmx_scctx;
1163 	rss = sc->vmx_rss;
1164 
1165 	rss->hash_type =
1166 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1167 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1168 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1169 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1170 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1171 #ifdef RSS
1172 	/*
1173 	 * If the software RSS is configured to anything else other than
1174 	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1175 	 * the packet distribution, but report the hash as opaque to
1176 	 * disengage from the software RSS.
1177 	 */
1178 	rss_algo = rss_gethashalgo();
1179 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1180 		rss_getkey(rss->hash_key);
1181 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1182 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1183 			    scctx->isc_nrxqsets;
1184 		}
1185 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1186 	} else
1187 #endif
1188 	{
1189 		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1190 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1191 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1192 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1193 	}
1194 }
1195 
1196 static void
1197 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1198 {
1199 	if_t ifp;
1200 	struct vmxnet3_driver_shared *ds;
1201 	if_softc_ctx_t scctx;
1202 
1203 	ifp = sc->vmx_ifp;
1204 	ds = sc->vmx_ds;
1205 	scctx = sc->vmx_scctx;
1206 
1207 	ds->mtu = if_getmtu(ifp);
1208 	ds->ntxqueue = scctx->isc_ntxqsets;
1209 	ds->nrxqueue = scctx->isc_nrxqsets;
1210 
1211 	ds->upt_features = 0;
1212 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1213 		ds->upt_features |= UPT1_F_CSUM;
1214 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1215 		ds->upt_features |= UPT1_F_VLAN;
1216 	if (if_getcapenable(ifp) & IFCAP_LRO)
1217 		ds->upt_features |= UPT1_F_LRO;
1218 
1219 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1220 		ds->upt_features |= UPT1_F_RSS;
1221 		vmxnet3_reinit_rss_shared_data(sc);
1222 	}
1223 
1224 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1225 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1226 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1227 }
1228 
1229 static int
1230 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1231 {
1232 	int error;
1233 
1234 	error = vmxnet3_alloc_shared_data(sc);
1235 	if (error)
1236 		return (error);
1237 
1238 	error = vmxnet3_alloc_mcast_table(sc);
1239 	if (error)
1240 		return (error);
1241 
1242 	vmxnet3_init_shared_data(sc);
1243 
1244 	return (0);
1245 }
1246 
1247 static void
1248 vmxnet3_free_data(struct vmxnet3_softc *sc)
1249 {
1250 
1251 	vmxnet3_free_mcast_table(sc);
1252 	vmxnet3_free_shared_data(sc);
1253 }
1254 
1255 static void
1256 vmxnet3_evintr(struct vmxnet3_softc *sc)
1257 {
1258 	device_t dev;
1259 	struct vmxnet3_txq_shared *ts;
1260 	struct vmxnet3_rxq_shared *rs;
1261 	uint32_t event;
1262 
1263 	dev = sc->vmx_dev;
1264 
1265 	/* Clear events. */
1266 	event = sc->vmx_ds->event;
1267 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1268 
1269 	if (event & VMXNET3_EVENT_LINK)
1270 		vmxnet3_link_status(sc);
1271 
1272 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1273 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1274 		ts = sc->vmx_txq[0].vxtxq_ts;
1275 		if (ts->stopped != 0)
1276 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1277 		rs = sc->vmx_rxq[0].vxrxq_rs;
1278 		if (rs->stopped != 0)
1279 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1280 
1281 		/* XXX - rely on liflib watchdog to reset us? */
1282 		device_printf(dev, "Rx/Tx queue error event ... "
1283 		    "waiting for iflib watchdog reset\n");
1284 	}
1285 
1286 	if (event & VMXNET3_EVENT_DIC)
1287 		device_printf(dev, "device implementation change event\n");
1288 	if (event & VMXNET3_EVENT_DEBUG)
1289 		device_printf(dev, "debug event\n");
1290 }
1291 
1292 static int
1293 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1294 {
1295 	struct vmxnet3_softc *sc;
1296 	struct vmxnet3_txqueue *txq;
1297 	struct vmxnet3_txring *txr;
1298 	struct vmxnet3_txdesc *txd, *sop;
1299 	bus_dma_segment_t *segs;
1300 	int nsegs;
1301 	int pidx;
1302 	int hdrlen;
1303 	int i;
1304 	int gen;
1305 
1306 	sc = vsc;
1307 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1308 	txr = &txq->vxtxq_cmd_ring;
1309 	segs = pi->ipi_segs;
1310 	nsegs = pi->ipi_nsegs;
1311 	pidx = pi->ipi_pidx;
1312 
1313 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1314 	    ("%s: packet with too many segments %d", __func__, nsegs));
1315 
1316 	sop = &txr->vxtxr_txd[pidx];
1317 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1318 
1319 	for (i = 0; i < nsegs; i++) {
1320 		txd = &txr->vxtxr_txd[pidx];
1321 
1322 		txd->addr = segs[i].ds_addr;
1323 		txd->len = segs[i].ds_len;
1324 		txd->gen = gen;
1325 		txd->dtype = 0;
1326 		txd->offload_mode = VMXNET3_OM_NONE;
1327 		txd->offload_pos = 0;
1328 		txd->hlen = 0;
1329 		txd->eop = 0;
1330 		txd->compreq = 0;
1331 		txd->vtag_mode = 0;
1332 		txd->vtag = 0;
1333 
1334 		if (++pidx == txr->vxtxr_ndesc) {
1335 			pidx = 0;
1336 			txr->vxtxr_gen ^= 1;
1337 		}
1338 		gen = txr->vxtxr_gen;
1339 	}
1340 	txd->eop = 1;
1341 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1342 	pi->ipi_new_pidx = pidx;
1343 
1344 	/*
1345 	 * VLAN
1346 	 */
1347 	if (pi->ipi_mflags & M_VLANTAG) {
1348 		sop->vtag_mode = 1;
1349 		sop->vtag = pi->ipi_vtag;
1350 	}
1351 
1352 	/*
1353 	 * TSO and checksum offloads
1354 	 */
1355 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1356 	if (pi->ipi_csum_flags & CSUM_TSO) {
1357 		sop->offload_mode = VMXNET3_OM_TSO;
1358 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1359 		sop->offload_pos = pi->ipi_tso_segsz;
1360 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1361 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1362 		sop->offload_mode = VMXNET3_OM_CSUM;
1363 		sop->hlen = hdrlen;
1364 		sop->offload_pos = hdrlen +
1365 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1366 			offsetof(struct tcphdr, th_sum) :
1367 			offsetof(struct udphdr, uh_sum));
1368 	}
1369 
1370 	/* Finally, change the ownership. */
1371 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1372 	sop->gen ^= 1;
1373 
1374 	return (0);
1375 }
1376 
1377 static void
1378 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1379 {
1380 	struct vmxnet3_softc *sc;
1381 	struct vmxnet3_txqueue *txq;
1382 
1383 	sc = vsc;
1384 	txq = &sc->vmx_txq[txqid];
1385 
1386 	/*
1387 	 * pidx is what we last set ipi_new_pidx to in
1388 	 * vmxnet3_isc_txd_encap()
1389 	 */
1390 
1391 	/*
1392 	 * Avoid expensive register updates if the flush request is
1393 	 * redundant.
1394 	 */
1395 	if (txq->vxtxq_last_flush == pidx)
1396 		return;
1397 	txq->vxtxq_last_flush = pidx;
1398 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1399 }
1400 
1401 static int
1402 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1403 {
1404 	struct vmxnet3_softc *sc;
1405 	struct vmxnet3_txqueue *txq;
1406 	struct vmxnet3_comp_ring *txc;
1407 	struct vmxnet3_txcompdesc *txcd;
1408 	struct vmxnet3_txring *txr;
1409 	int processed;
1410 
1411 	sc = vsc;
1412 	txq = &sc->vmx_txq[txqid];
1413 	txc = &txq->vxtxq_comp_ring;
1414 	txr = &txq->vxtxq_cmd_ring;
1415 
1416 	/*
1417 	 * If clear is true, we need to report the number of TX command ring
1418 	 * descriptors that have been processed by the device.  If clear is
1419 	 * false, we just need to report whether or not at least one TX
1420 	 * command ring descriptor has been processed by the device.
1421 	 */
1422 	processed = 0;
1423 	for (;;) {
1424 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1425 		if (txcd->gen != txc->vxcr_gen)
1426 			break;
1427 		else if (!clear)
1428 			return (1);
1429 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1430 
1431 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1432 			txc->vxcr_next = 0;
1433 			txc->vxcr_gen ^= 1;
1434 		}
1435 
1436 		if (txcd->eop_idx < txr->vxtxr_next)
1437 			processed += txr->vxtxr_ndesc -
1438 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1439 		else
1440 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1441 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1442 	}
1443 
1444 	return (processed);
1445 }
1446 
1447 static int
1448 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1449 {
1450 	struct vmxnet3_softc *sc;
1451 	struct vmxnet3_rxqueue *rxq;
1452 	struct vmxnet3_comp_ring *rxc;
1453 	struct vmxnet3_rxcompdesc *rxcd;
1454 	int avail;
1455 	int completed_gen;
1456 #ifdef INVARIANTS
1457 	int expect_sop = 1;
1458 #endif
1459 	sc = vsc;
1460 	rxq = &sc->vmx_rxq[rxqid];
1461 	rxc = &rxq->vxrxq_comp_ring;
1462 
1463 	avail = 0;
1464 	completed_gen = rxc->vxcr_gen;
1465 	for (;;) {
1466 		rxcd = &rxc->vxcr_u.rxcd[idx];
1467 		if (rxcd->gen != completed_gen)
1468 			break;
1469 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1470 
1471 #ifdef INVARIANTS
1472 		if (expect_sop)
1473 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1474 		else
1475 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1476 		expect_sop = rxcd->eop;
1477 #endif
1478 		if (rxcd->eop && (rxcd->len != 0))
1479 			avail++;
1480 		if (avail > budget)
1481 			break;
1482 		if (++idx == rxc->vxcr_ndesc) {
1483 			idx = 0;
1484 			completed_gen ^= 1;
1485 		}
1486 	}
1487 
1488 	return (avail);
1489 }
1490 
1491 static int
1492 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1493 {
1494 	struct vmxnet3_softc *sc;
1495 	if_softc_ctx_t scctx;
1496 	struct vmxnet3_rxqueue *rxq;
1497 	struct vmxnet3_comp_ring *rxc;
1498 	struct vmxnet3_rxcompdesc *rxcd;
1499 	if_rxd_frag_t frag;
1500 	int cqidx;
1501 	uint16_t total_len;
1502 	uint8_t nfrags;
1503 	uint8_t i;
1504 	uint8_t flid;
1505 
1506 	sc = vsc;
1507 	scctx = sc->vmx_scctx;
1508 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1509 	rxc = &rxq->vxrxq_comp_ring;
1510 
1511 	/*
1512 	 * Get a single packet starting at the given index in the completion
1513 	 * queue.  That we have been called indicates that
1514 	 * vmxnet3_isc_rxd_available() has already verified that either
1515 	 * there is a complete packet available starting at the given index,
1516 	 * or there are one or more zero length packets starting at the
1517 	 * given index followed by a complete packet, so no verification of
1518 	 * ownership of the descriptors (and no associated read barrier) is
1519 	 * required here.
1520 	 */
1521 	cqidx = ri->iri_cidx;
1522 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1523 	while (rxcd->len == 0) {
1524 		KASSERT(rxcd->sop && rxcd->eop,
1525 		    ("%s: zero-length packet without both sop and eop set",
1526 			__func__));
1527 		rxc->vxcr_zero_length++;
1528 		if (++cqidx == rxc->vxcr_ndesc) {
1529 			cqidx = 0;
1530 			rxc->vxcr_gen ^= 1;
1531 		}
1532 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1533 	}
1534 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1535 
1536 	/*
1537 	 * RSS and flow ID.
1538 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1539 	 * be used only if the software RSS is enabled and it uses the same
1540 	 * algorithm and the hash key as the "hardware".  If the software RSS
1541 	 * is not enabled, then it's simply pointless to use those types.
1542 	 * If it's enabled but with different parameters, then hash values will
1543 	 * not match.
1544 	 */
1545 	ri->iri_flowid = rxcd->rss_hash;
1546 #ifdef RSS
1547 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1548 		switch (rxcd->rss_type) {
1549 		case VMXNET3_RCD_RSS_TYPE_NONE:
1550 			ri->iri_flowid = ri->iri_qsidx;
1551 			ri->iri_rsstype = M_HASHTYPE_NONE;
1552 			break;
1553 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1554 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1555 			break;
1556 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1557 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1558 			break;
1559 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1560 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1561 			break;
1562 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1563 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1564 			break;
1565 		default:
1566 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1567 			break;
1568 		}
1569 	} else
1570 #endif
1571 	{
1572 		switch (rxcd->rss_type) {
1573 		case VMXNET3_RCD_RSS_TYPE_NONE:
1574 			ri->iri_flowid = ri->iri_qsidx;
1575 			ri->iri_rsstype = M_HASHTYPE_NONE;
1576 			break;
1577 		default:
1578 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1579 			break;
1580 		}
1581 	}
1582 
1583 	/*
1584 	 * The queue numbering scheme used for rxcd->qid is as follows:
1585 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1586 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1587 	 *
1588 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1589 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1590 	 * indicates command ring (and flid) 1.
1591 	 */
1592 	nfrags = 0;
1593 	total_len = 0;
1594 	do {
1595 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1596 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1597 		    ("%s: generation mismatch", __func__));
1598 		KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1599 		    ("%s: too many fragments", __func__));
1600 		if (__predict_true(rxcd->len != 0)) {
1601 			frag = &ri->iri_frags[nfrags];
1602 			flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1603 			frag->irf_flid = flid;
1604 			frag->irf_idx = rxcd->rxd_idx;
1605 			frag->irf_len = rxcd->len;
1606 			total_len += rxcd->len;
1607 			nfrags++;
1608 		} else {
1609 			rxc->vcxr_zero_length_frag++;
1610 		}
1611 		if (++cqidx == rxc->vxcr_ndesc) {
1612 			cqidx = 0;
1613 			rxc->vxcr_gen ^= 1;
1614 		}
1615 	} while (!rxcd->eop);
1616 
1617 	ri->iri_cidx = cqidx;
1618 	ri->iri_nfrags = nfrags;
1619 	ri->iri_len = total_len;
1620 
1621 	/*
1622 	 * If there's an error, the last descriptor in the packet will
1623 	 * have the error indicator set.  In this case, set all
1624 	 * fragment lengths to zero.  This will cause iflib to discard
1625 	 * the packet, but process all associated descriptors through
1626 	 * the refill mechanism.
1627 	 */
1628 	if (__predict_false(rxcd->error)) {
1629 		rxc->vxcr_pkt_errors++;
1630 		for (i = 0; i < nfrags; i++) {
1631 			frag = &ri->iri_frags[i];
1632 			frag->irf_len = 0;
1633 		}
1634 	} else {
1635 		/* Checksum offload information is in the last descriptor. */
1636 		if (!rxcd->no_csum) {
1637 			uint32_t csum_flags = 0;
1638 
1639 			if (rxcd->ipv4) {
1640 				csum_flags |= CSUM_IP_CHECKED;
1641 				if (rxcd->ipcsum_ok)
1642 					csum_flags |= CSUM_IP_VALID;
1643 			}
1644 			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1645 				csum_flags |= CSUM_L4_CALC;
1646 				if (rxcd->csum_ok) {
1647 					csum_flags |= CSUM_L4_VALID;
1648 					ri->iri_csum_data = 0xffff;
1649 				}
1650 			}
1651 			ri->iri_csum_flags = csum_flags;
1652 		}
1653 
1654 		/* VLAN information is in the last descriptor. */
1655 		if (rxcd->vlan) {
1656 			ri->iri_flags |= M_VLANTAG;
1657 			ri->iri_vtag = rxcd->vtag;
1658 		}
1659 	}
1660 
1661 	return (0);
1662 }
1663 
1664 static void
1665 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1666 {
1667 	struct vmxnet3_softc *sc;
1668 	struct vmxnet3_rxqueue *rxq;
1669 	struct vmxnet3_rxring *rxr;
1670 	struct vmxnet3_rxdesc *rxd;
1671 	uint64_t *paddrs;
1672 	int count;
1673 	int len;
1674 	int idx;
1675 	int i;
1676 	uint8_t flid;
1677 	uint8_t btype;
1678 
1679 	count = iru->iru_count;
1680 	len = iru->iru_buf_size;
1681 	flid = iru->iru_flidx;
1682 	paddrs = iru->iru_paddrs;
1683 
1684 	sc = vsc;
1685 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1686 	rxr = &rxq->vxrxq_cmd_ring[flid];
1687 	rxd = rxr->vxrxr_rxd;
1688 
1689 	/*
1690 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1691 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1692 	 */
1693 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1694 	/*
1695 	 * The refill entries from iflib will advance monotonically,
1696 	 * but the refilled descriptors may not be contiguous due to
1697 	 * earlier skipping of descriptors by the device.  The refill
1698 	 * entries from iflib need an entire state update, while the
1699 	 * descriptors previously skipped by the device only need to
1700 	 * have their generation numbers updated.
1701 	 */
1702 	idx = rxr->vxrxr_refill_start;
1703 	i = 0;
1704 	do {
1705 		if (idx == iru->iru_idxs[i]) {
1706 			rxd[idx].addr = paddrs[i];
1707 			rxd[idx].len = len;
1708 			rxd[idx].btype = btype;
1709 			i++;
1710 		} else
1711 			rxr->vxrxr_desc_skips++;
1712 		rxd[idx].gen = rxr->vxrxr_gen;
1713 
1714 		if (++idx == rxr->vxrxr_ndesc) {
1715 			idx = 0;
1716 			rxr->vxrxr_gen ^= 1;
1717 		}
1718 	} while (i != count);
1719 	rxr->vxrxr_refill_start = idx;
1720 }
1721 
1722 static void
1723 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1724 {
1725 	struct vmxnet3_softc *sc;
1726 	bus_size_t r;
1727 
1728 	sc = vsc;
1729 
1730 	if (flid == 0)
1731 		r = VMXNET3_BAR0_RXH1(rxqid);
1732 	else
1733 		r = VMXNET3_BAR0_RXH2(rxqid);
1734 
1735 	vmxnet3_write_bar0(sc, r, pidx);
1736 }
1737 
1738 static int
1739 vmxnet3_legacy_intr(void *xsc)
1740 {
1741 	struct vmxnet3_softc *sc;
1742 	if_softc_ctx_t scctx;
1743 	if_ctx_t ctx;
1744 
1745 	sc = xsc;
1746 	scctx = sc->vmx_scctx;
1747 	ctx = sc->vmx_ctx;
1748 
1749 	/*
1750 	 * When there is only a single interrupt configured, this routine
1751 	 * runs in fast interrupt context, following which the rxq 0 task
1752 	 * will be enqueued.
1753 	 */
1754 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1755 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1756 			return (FILTER_HANDLED);
1757 	}
1758 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1759 		vmxnet3_intr_disable_all(ctx);
1760 
1761 	if (sc->vmx_ds->event != 0)
1762 		iflib_admin_intr_deferred(ctx);
1763 
1764 	/*
1765 	 * XXX - When there is both rxq and event activity, do we care
1766 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1767 	 * first?
1768 	 */
1769 	return (FILTER_SCHEDULE_THREAD);
1770 }
1771 
1772 static int
1773 vmxnet3_rxq_intr(void *vrxq)
1774 {
1775 	struct vmxnet3_softc *sc;
1776 	struct vmxnet3_rxqueue *rxq;
1777 
1778 	rxq = vrxq;
1779 	sc = rxq->vxrxq_sc;
1780 
1781 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1782 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1783 
1784 	return (FILTER_SCHEDULE_THREAD);
1785 }
1786 
1787 static int
1788 vmxnet3_event_intr(void *vsc)
1789 {
1790 	struct vmxnet3_softc *sc;
1791 
1792 	sc = vsc;
1793 
1794 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1795 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1796 
1797 	/*
1798 	 * The work will be done via vmxnet3_update_admin_status(), and the
1799 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1800 	 *
1801 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1802 	 */
1803 	return (FILTER_SCHEDULE_THREAD);
1804 }
1805 
1806 static void
1807 vmxnet3_stop(if_ctx_t ctx)
1808 {
1809 	struct vmxnet3_softc *sc;
1810 
1811 	sc = iflib_get_softc(ctx);
1812 
1813 	sc->vmx_link_active = 0;
1814 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1815 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1816 }
1817 
1818 static void
1819 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1820 {
1821 	struct vmxnet3_txring *txr;
1822 	struct vmxnet3_comp_ring *txc;
1823 
1824 	txq->vxtxq_last_flush = -1;
1825 
1826 	txr = &txq->vxtxq_cmd_ring;
1827 	txr->vxtxr_next = 0;
1828 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1829 	/*
1830 	 * iflib has zeroed out the descriptor array during the prior attach
1831 	 * or stop
1832 	 */
1833 
1834 	txc = &txq->vxtxq_comp_ring;
1835 	txc->vxcr_next = 0;
1836 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1837 	/*
1838 	 * iflib has zeroed out the descriptor array during the prior attach
1839 	 * or stop
1840 	 */
1841 }
1842 
1843 static void
1844 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1845 {
1846 	struct vmxnet3_rxring *rxr;
1847 	struct vmxnet3_comp_ring *rxc;
1848 	int i;
1849 
1850 	/*
1851 	 * The descriptors will be populated with buffers during a
1852 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1853 	 */
1854 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1855 		rxr = &rxq->vxrxq_cmd_ring[i];
1856 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1857 		rxr->vxrxr_desc_skips = 0;
1858 		rxr->vxrxr_refill_start = 0;
1859 		/*
1860 		 * iflib has zeroed out the descriptor array during the
1861 		 * prior attach or stop
1862 		 */
1863 	}
1864 
1865 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1866 		rxr = &rxq->vxrxq_cmd_ring[i];
1867 		rxr->vxrxr_gen = 0;
1868 		rxr->vxrxr_desc_skips = 0;
1869 		rxr->vxrxr_refill_start = 0;
1870 		bzero(rxr->vxrxr_rxd,
1871 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1872 	}
1873 
1874 	rxc = &rxq->vxrxq_comp_ring;
1875 	rxc->vxcr_next = 0;
1876 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1877 	rxc->vxcr_zero_length = 0;
1878 	rxc->vcxr_zero_length_frag = 0;
1879 	rxc->vxcr_pkt_errors = 0;
1880 	/*
1881 	 * iflib has zeroed out the descriptor array during the prior attach
1882 	 * or stop
1883 	 */
1884 }
1885 
1886 static void
1887 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1888 {
1889 	if_softc_ctx_t scctx;
1890 	int q;
1891 
1892 	scctx = sc->vmx_scctx;
1893 
1894 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1895 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1896 
1897 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1898 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1899 }
1900 
1901 static int
1902 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1903 {
1904 	if_softc_ctx_t scctx;
1905 	int q;
1906 
1907 	scctx = sc->vmx_scctx;
1908 
1909 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1910 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1911 		return (1);
1912 	}
1913 
1914 	/* Reset the Rx queue heads. */
1915 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1916 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1917 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1918 	}
1919 
1920 	return (0);
1921 }
1922 
1923 static void
1924 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1925 {
1926 	if_t ifp;
1927 
1928 	ifp = sc->vmx_ifp;
1929 
1930 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1931 
1932 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1933 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1934 		    sizeof(sc->vmx_ds->vlan_filter));
1935 	else
1936 		bzero(sc->vmx_ds->vlan_filter,
1937 		    sizeof(sc->vmx_ds->vlan_filter));
1938 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1939 }
1940 
1941 static void
1942 vmxnet3_init(if_ctx_t ctx)
1943 {
1944 	struct vmxnet3_softc *sc;
1945 
1946 	sc = iflib_get_softc(ctx);
1947 
1948 	/* Use the current MAC address. */
1949 	bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1950 	vmxnet3_set_lladdr(sc);
1951 
1952 	vmxnet3_reinit_shared_data(sc);
1953 	vmxnet3_reinit_queues(sc);
1954 
1955 	vmxnet3_enable_device(sc);
1956 
1957 	vmxnet3_reinit_rxfilters(sc);
1958 	vmxnet3_link_status(sc);
1959 }
1960 
1961 static void
1962 vmxnet3_multi_set(if_ctx_t ctx)
1963 {
1964 
1965 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1966 	    if_getflags(iflib_get_ifp(ctx)));
1967 }
1968 
1969 static int
1970 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1971 {
1972 	struct vmxnet3_softc *sc;
1973 	if_softc_ctx_t scctx;
1974 
1975 	sc = iflib_get_softc(ctx);
1976 	scctx = sc->vmx_scctx;
1977 
1978 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1979 		ETHER_CRC_LEN))
1980 		return (EINVAL);
1981 
1982 	/*
1983 	 * Update the max frame size so that the rx mbuf size is
1984 	 * chosen based on the new mtu during the interface init that
1985 	 * will occur after this routine returns.
1986 	 */
1987 	scctx->isc_max_frame_size = mtu +
1988 		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1989 	/* RX completion queue - n/a */
1990 	scctx->isc_rxd_buf_size[0] = 0;
1991 	/*
1992 	 * For header-type descriptors (used for first segment of
1993 	 * packet), let iflib determine the buffer size based on the
1994 	 * max frame size.
1995 	 */
1996 	scctx->isc_rxd_buf_size[1] = 0;
1997 	/*
1998 	 * For body-type descriptors (used for jumbo frames and LRO),
1999 	 * always use page-sized buffers.
2000 	 */
2001 	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2002 
2003 	return (0);
2004 }
2005 
2006 static void
2007 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2008 {
2009 	struct vmxnet3_softc *sc;
2010 
2011 	sc = iflib_get_softc(ctx);
2012 
2013 	ifmr->ifm_status = IFM_AVALID;
2014 	ifmr->ifm_active = IFM_ETHER;
2015 
2016 	if (vmxnet3_link_is_up(sc) != 0) {
2017 		ifmr->ifm_status |= IFM_ACTIVE;
2018 		ifmr->ifm_active |= IFM_AUTO;
2019 	} else
2020 		ifmr->ifm_active |= IFM_NONE;
2021 }
2022 
2023 static int
2024 vmxnet3_media_change(if_ctx_t ctx)
2025 {
2026 
2027 	/* Ignore. */
2028 	return (0);
2029 }
2030 
2031 static int
2032 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2033 {
2034 
2035 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2036 
2037 	return (0);
2038 }
2039 
2040 static uint64_t
2041 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2042 {
2043 	if_t ifp = iflib_get_ifp(ctx);
2044 
2045 	if (cnt < IFCOUNTERS)
2046 		return if_get_counter_default(ifp, cnt);
2047 
2048 	return (0);
2049 }
2050 
2051 static void
2052 vmxnet3_update_admin_status(if_ctx_t ctx)
2053 {
2054 	struct vmxnet3_softc *sc;
2055 
2056 	sc = iflib_get_softc(ctx);
2057 	if (sc->vmx_ds->event != 0)
2058 		vmxnet3_evintr(sc);
2059 
2060 	vmxnet3_refresh_host_stats(sc);
2061 }
2062 
2063 static void
2064 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2065 {
2066 	/* Host stats refresh is global, so just trigger it on txq 0 */
2067 	if (qid == 0)
2068 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2069 }
2070 
2071 static void
2072 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2073 {
2074 	int idx, bit;
2075 
2076 	if (tag == 0 || tag > 4095)
2077 		return;
2078 
2079 	idx = (tag >> 5) & 0x7F;
2080 	bit = tag & 0x1F;
2081 
2082 	/* Update our private VLAN bitvector. */
2083 	if (add)
2084 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2085 	else
2086 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2087 }
2088 
2089 static void
2090 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2091 {
2092 
2093 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2094 }
2095 
2096 static void
2097 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2098 {
2099 
2100 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2101 }
2102 
2103 static u_int
2104 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2105 {
2106 	struct vmxnet3_softc *sc = arg;
2107 
2108 	if (count < VMXNET3_MULTICAST_MAX)
2109 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2110 		    ETHER_ADDR_LEN);
2111 
2112 	return (1);
2113 }
2114 
2115 static void
2116 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2117 {
2118 	if_t ifp;
2119 	struct vmxnet3_driver_shared *ds;
2120 	u_int mode;
2121 
2122 	ifp = sc->vmx_ifp;
2123 	ds = sc->vmx_ds;
2124 
2125 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2126 	if (flags & IFF_PROMISC)
2127 		mode |= VMXNET3_RXMODE_PROMISC;
2128 	if (flags & IFF_ALLMULTI)
2129 		mode |= VMXNET3_RXMODE_ALLMULTI;
2130 	else {
2131 		int cnt;
2132 
2133 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2134 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2135 			cnt = 0;
2136 			mode |= VMXNET3_RXMODE_ALLMULTI;
2137 		} else if (cnt > 0)
2138 			mode |= VMXNET3_RXMODE_MCAST;
2139 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2140 	}
2141 
2142 	ds->rxmode = mode;
2143 
2144 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2145 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2146 }
2147 
2148 static void
2149 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2150 {
2151 
2152 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2153 }
2154 
2155 static int
2156 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2157 {
2158 	uint32_t status;
2159 
2160 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2161 	return !!(status & 0x1);
2162 }
2163 
2164 static void
2165 vmxnet3_link_status(struct vmxnet3_softc *sc)
2166 {
2167 	if_ctx_t ctx;
2168 	uint64_t speed;
2169 	int link;
2170 
2171 	ctx = sc->vmx_ctx;
2172 	link = vmxnet3_link_is_up(sc);
2173 	speed = IF_Gbps(10);
2174 
2175 	if (link != 0 && sc->vmx_link_active == 0) {
2176 		sc->vmx_link_active = 1;
2177 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2178 	} else if (link == 0 && sc->vmx_link_active != 0) {
2179 		sc->vmx_link_active = 0;
2180 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2181 	}
2182 }
2183 
2184 static void
2185 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2186 {
2187 	uint32_t ml, mh;
2188 
2189 	ml  = sc->vmx_lladdr[0];
2190 	ml |= sc->vmx_lladdr[1] << 8;
2191 	ml |= sc->vmx_lladdr[2] << 16;
2192 	ml |= sc->vmx_lladdr[3] << 24;
2193 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2194 
2195 	mh  = sc->vmx_lladdr[4];
2196 	mh |= sc->vmx_lladdr[5] << 8;
2197 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2198 }
2199 
2200 static void
2201 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2202 {
2203 	uint32_t ml, mh;
2204 
2205 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2206 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2207 
2208 	sc->vmx_lladdr[0] = ml;
2209 	sc->vmx_lladdr[1] = ml >> 8;
2210 	sc->vmx_lladdr[2] = ml >> 16;
2211 	sc->vmx_lladdr[3] = ml >> 24;
2212 	sc->vmx_lladdr[4] = mh;
2213 	sc->vmx_lladdr[5] = mh >> 8;
2214 }
2215 
2216 static void
2217 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2218     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2219 {
2220 	struct sysctl_oid *node, *txsnode;
2221 	struct sysctl_oid_list *list, *txslist;
2222 	struct UPT1_TxStats *txstats;
2223 	char namebuf[16];
2224 
2225 	txstats = &txq->vxtxq_ts->stats;
2226 
2227 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2228 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2229 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2230 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2231 
2232 	/*
2233 	 * Add statistics reported by the host. These are updated by the
2234 	 * iflib txq timer on txq 0.
2235 	 */
2236 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2237 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2238 	txslist = SYSCTL_CHILDREN(txsnode);
2239 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2240 	    &txstats->TSO_packets, "TSO packets");
2241 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2242 	    &txstats->TSO_bytes, "TSO bytes");
2243 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2244 	    &txstats->ucast_packets, "Unicast packets");
2245 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2246 	    &txstats->ucast_bytes, "Unicast bytes");
2247 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2248 	    &txstats->mcast_packets, "Multicast packets");
2249 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2250 	    &txstats->mcast_bytes, "Multicast bytes");
2251 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2252 	    &txstats->error, "Errors");
2253 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2254 	    &txstats->discard, "Discards");
2255 }
2256 
2257 static void
2258 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2259     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2260 {
2261 	struct sysctl_oid *node, *rxsnode;
2262 	struct sysctl_oid_list *list, *rxslist;
2263 	struct UPT1_RxStats *rxstats;
2264 	char namebuf[16];
2265 
2266 	rxstats = &rxq->vxrxq_rs->stats;
2267 
2268 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2269 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2270 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2271 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2272 
2273 	/*
2274 	 * Add statistics reported by the host. These are updated by the
2275 	 * iflib txq timer on txq 0.
2276 	 */
2277 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2278 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2279 	rxslist = SYSCTL_CHILDREN(rxsnode);
2280 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2281 	    &rxstats->LRO_packets, "LRO packets");
2282 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2283 	    &rxstats->LRO_bytes, "LRO bytes");
2284 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2285 	    &rxstats->ucast_packets, "Unicast packets");
2286 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2287 	    &rxstats->ucast_bytes, "Unicast bytes");
2288 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2289 	    &rxstats->mcast_packets, "Multicast packets");
2290 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2291 	    &rxstats->mcast_bytes, "Multicast bytes");
2292 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2293 	    &rxstats->bcast_packets, "Broadcast packets");
2294 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2295 	    &rxstats->bcast_bytes, "Broadcast bytes");
2296 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2297 	    &rxstats->nobuffer, "No buffer");
2298 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2299 	    &rxstats->error, "Errors");
2300 }
2301 
2302 static void
2303 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2304     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2305 {
2306 	if_softc_ctx_t scctx;
2307 	struct sysctl_oid *node;
2308 	struct sysctl_oid_list *list;
2309 	int i;
2310 
2311 	scctx = sc->vmx_scctx;
2312 
2313 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2314 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2315 
2316 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2317 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2318 		list = SYSCTL_CHILDREN(node);
2319 
2320 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2321 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2322 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2323 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2324 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2325 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2326 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2327 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2328 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2329 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2330 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2331 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2332 	}
2333 
2334 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2335 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2336 
2337 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2338 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2339 		list = SYSCTL_CHILDREN(node);
2340 
2341 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2342 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2343 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2344 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2345 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2346 		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2347 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2348 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2349 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2350 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2351 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2352 		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2353 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2354 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2355 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2356 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2357 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2358 		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2359 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2360 		    CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2361 		    0, "");
2362 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2363 		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2364 	}
2365 }
2366 
2367 static void
2368 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2369     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2370 {
2371 	if_softc_ctx_t scctx;
2372 	int i;
2373 
2374 	scctx = sc->vmx_scctx;
2375 
2376 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2377 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2378 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2379 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2380 
2381 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2382 }
2383 
2384 static void
2385 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2386 {
2387 	device_t dev;
2388 	struct sysctl_ctx_list *ctx;
2389 	struct sysctl_oid *tree;
2390 	struct sysctl_oid_list *child;
2391 
2392 	dev = sc->vmx_dev;
2393 	ctx = device_get_sysctl_ctx(dev);
2394 	tree = device_get_sysctl_tree(dev);
2395 	child = SYSCTL_CHILDREN(tree);
2396 
2397 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2398 }
2399 
2400 static void
2401 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2402 {
2403 
2404 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2405 }
2406 
2407 static uint32_t
2408 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2409 {
2410 
2411 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2412 }
2413 
2414 static void
2415 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2416 {
2417 
2418 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2419 }
2420 
2421 static void
2422 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2423 {
2424 
2425 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2426 }
2427 
2428 static uint32_t
2429 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2430 {
2431 
2432 	vmxnet3_write_cmd(sc, cmd);
2433 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2434 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2435 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2436 }
2437 
2438 static void
2439 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2440 {
2441 
2442 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2443 }
2444 
2445 static void
2446 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2447 {
2448 
2449 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2450 }
2451 
2452 static int
2453 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2454 {
2455 	/* Not using interrupts for TX */
2456 	return (0);
2457 }
2458 
2459 static int
2460 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2461 {
2462 	struct vmxnet3_softc *sc;
2463 
2464 	sc = iflib_get_softc(ctx);
2465 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2466 	return (0);
2467 }
2468 
2469 static void
2470 vmxnet3_link_intr_enable(if_ctx_t ctx)
2471 {
2472 	struct vmxnet3_softc *sc;
2473 
2474 	sc = iflib_get_softc(ctx);
2475 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2476 }
2477 
2478 static void
2479 vmxnet3_intr_enable_all(if_ctx_t ctx)
2480 {
2481 	struct vmxnet3_softc *sc;
2482 	if_softc_ctx_t scctx;
2483 	int i;
2484 
2485 	sc = iflib_get_softc(ctx);
2486 	scctx = sc->vmx_scctx;
2487 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2488 	for (i = 0; i < scctx->isc_vectors; i++)
2489 		vmxnet3_enable_intr(sc, i);
2490 }
2491 
2492 static void
2493 vmxnet3_intr_disable_all(if_ctx_t ctx)
2494 {
2495 	struct vmxnet3_softc *sc;
2496 	int i;
2497 
2498 	sc = iflib_get_softc(ctx);
2499 	/*
2500 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2501 	 * run, which is before the top level shared data area is
2502 	 * initialized and the device made aware of it.
2503 	 */
2504 	if (sc->vmx_ds != NULL)
2505 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2506 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2507 		vmxnet3_disable_intr(sc, i);
2508 }
2509 
2510 /*
2511  * Since this is a purely paravirtualized device, we do not have
2512  * to worry about DMA coherency. But at times, we must make sure
2513  * both the compiler and CPU do not reorder memory operations.
2514  */
2515 static inline void
2516 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2517 {
2518 
2519 	switch (type) {
2520 	case VMXNET3_BARRIER_RD:
2521 		rmb();
2522 		break;
2523 	case VMXNET3_BARRIER_WR:
2524 		wmb();
2525 		break;
2526 	case VMXNET3_BARRIER_RDWR:
2527 		mb();
2528 		break;
2529 	default:
2530 		panic("%s: bad barrier type %d", __func__, type);
2531 	}
2532 }
2533