xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision bc5304a006238115291e7568583632889dffbab9)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD$");
25 
26 #include "opt_rss.h"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/endian.h>
32 #include <sys/sockio.h>
33 #include <sys/mbuf.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/smp.h>
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_arp.h>
46 #include <net/if_dl.h>
47 #include <net/if_types.h>
48 #include <net/if_media.h>
49 #include <net/if_vlan_var.h>
50 #include <net/iflib.h>
51 #ifdef RSS
52 #include <net/rss_config.h>
53 #endif
54 
55 #include <netinet/in_systm.h>
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip6.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/udp.h>
61 #include <netinet/tcp.h>
62 
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65 #include <sys/bus.h>
66 #include <sys/rman.h>
67 
68 #include <dev/pci/pcireg.h>
69 #include <dev/pci/pcivar.h>
70 
71 #include "ifdi_if.h"
72 
73 #include "if_vmxreg.h"
74 #include "if_vmxvar.h"
75 
76 #include "opt_inet.h"
77 #include "opt_inet6.h"
78 
79 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
80 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
81 
82 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
83 {
84 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
85 	/* required last entry */
86 	PVID_END
87 };
88 
89 static void	*vmxnet3_register(device_t);
90 static int	vmxnet3_attach_pre(if_ctx_t);
91 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
92 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
93 static int	vmxnet3_attach_post(if_ctx_t);
94 static int	vmxnet3_detach(if_ctx_t);
95 static int	vmxnet3_shutdown(if_ctx_t);
96 static int	vmxnet3_suspend(if_ctx_t);
97 static int	vmxnet3_resume(if_ctx_t);
98 
99 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
100 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
101 static int	vmxnet3_check_version(struct vmxnet3_softc *);
102 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
103 
104 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
105 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
106 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
108 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
109 static void	vmxnet3_queues_free(if_ctx_t);
110 
111 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
112 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
113 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
114 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
115 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
116 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
117 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
118 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
119 static void	vmxnet3_free_data(struct vmxnet3_softc *);
120 
121 static void	vmxnet3_evintr(struct vmxnet3_softc *);
122 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
123 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
124 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
125 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
126 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
127 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
128 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
129 static int	vmxnet3_legacy_intr(void *);
130 static int	vmxnet3_rxq_intr(void *);
131 static int	vmxnet3_event_intr(void *);
132 
133 static void	vmxnet3_stop(if_ctx_t);
134 
135 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
136 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
137 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
138 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
139 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
140 static void	vmxnet3_init(if_ctx_t);
141 static void	vmxnet3_multi_set(if_ctx_t);
142 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
143 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
144 static int	vmxnet3_media_change(if_ctx_t);
145 static int	vmxnet3_promisc_set(if_ctx_t, int);
146 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
147 static void	vmxnet3_update_admin_status(if_ctx_t);
148 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
149 
150 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
151 		    uint16_t);
152 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
153 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
154 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
155 
156 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
157 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
158 static void	vmxnet3_link_status(struct vmxnet3_softc *);
159 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
160 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
161 
162 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
165 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
166 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
167 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
168 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
169 
170 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
171 		    uint32_t);
172 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
173 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
174 		    uint32_t);
175 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
176 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
177 
178 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
179 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
180 static void	vmxnet3_link_intr_enable(if_ctx_t);
181 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
182 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
183 static void	vmxnet3_intr_enable_all(if_ctx_t);
184 static void	vmxnet3_intr_disable_all(if_ctx_t);
185 
186 typedef enum {
187 	VMXNET3_BARRIER_RD,
188 	VMXNET3_BARRIER_WR,
189 	VMXNET3_BARRIER_RDWR,
190 } vmxnet3_barrier_t;
191 
192 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
193 
194 static device_method_t vmxnet3_methods[] = {
195 	/* Device interface */
196 	DEVMETHOD(device_register, vmxnet3_register),
197 	DEVMETHOD(device_probe, iflib_device_probe),
198 	DEVMETHOD(device_attach, iflib_device_attach),
199 	DEVMETHOD(device_detach, iflib_device_detach),
200 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
201 	DEVMETHOD(device_suspend, iflib_device_suspend),
202 	DEVMETHOD(device_resume, iflib_device_resume),
203 	DEVMETHOD_END
204 };
205 
206 static driver_t vmxnet3_driver = {
207 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
208 };
209 
210 static devclass_t vmxnet3_devclass;
211 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
212 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
213 MODULE_VERSION(vmx, 2);
214 
215 MODULE_DEPEND(vmx, pci, 1, 1, 1);
216 MODULE_DEPEND(vmx, ether, 1, 1, 1);
217 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
218 
219 static device_method_t vmxnet3_iflib_methods[] = {
220 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
221 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
222 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
223 
224 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
225 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
226 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
227 
228 	DEVMETHOD(ifdi_init, vmxnet3_init),
229 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
230 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
231 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
232 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
233 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
234 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
235 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
236 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
237 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
238 
239 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
240 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
241 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
242 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
243 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
244 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
245 
246 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
247 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
248 
249 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
250 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
251 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
252 
253 	DEVMETHOD_END
254 };
255 
256 static driver_t vmxnet3_iflib_driver = {
257 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
258 };
259 
260 struct if_txrx vmxnet3_txrx = {
261 	.ift_txd_encap = vmxnet3_isc_txd_encap,
262 	.ift_txd_flush = vmxnet3_isc_txd_flush,
263 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
264 	.ift_rxd_available = vmxnet3_isc_rxd_available,
265 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
266 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
267 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
268 	.ift_legacy_intr = vmxnet3_legacy_intr
269 };
270 
271 static struct if_shared_ctx vmxnet3_sctx_init = {
272 	.isc_magic = IFLIB_MAGIC,
273 	.isc_q_align = 512,
274 
275 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
276 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
278 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279 
280 	/*
281 	 * These values are used to configure the busdma tag used for
282 	 * receive descriptors.  Each receive descriptor only points to one
283 	 * buffer.
284 	 */
285 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
286 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
287 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
288 
289 	.isc_admin_intrcnt = 1,
290 	.isc_vendor_info = vmxnet3_vendor_info_array,
291 	.isc_driver_version = "2",
292 	.isc_driver = &vmxnet3_iflib_driver,
293 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
294 
295 	/*
296 	 * Number of receive queues per receive queue set, with associated
297 	 * descriptor settings for each.
298 	 */
299 	.isc_nrxqs = 3,
300 	.isc_nfl = 2, /* one free list for each receive command queue */
301 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
302 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
303 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
304 
305 	/*
306 	 * Number of transmit queues per transmit queue set, with associated
307 	 * descriptor settings for each.
308 	 */
309 	.isc_ntxqs = 2,
310 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
311 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
312 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
313 };
314 
315 static void *
316 vmxnet3_register(device_t dev)
317 {
318 	return (&vmxnet3_sctx_init);
319 }
320 
321 static int
322 trunc_powerof2(int val)
323 {
324 
325 	return (1U << (fls(val) - 1));
326 }
327 
328 static int
329 vmxnet3_attach_pre(if_ctx_t ctx)
330 {
331 	device_t dev;
332 	if_softc_ctx_t scctx;
333 	struct vmxnet3_softc *sc;
334 	uint32_t intr_config;
335 	int error;
336 
337 	dev = iflib_get_dev(ctx);
338 	sc = iflib_get_softc(ctx);
339 	sc->vmx_dev = dev;
340 	sc->vmx_ctx = ctx;
341 	sc->vmx_sctx = iflib_get_sctx(ctx);
342 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
343 	sc->vmx_ifp = iflib_get_ifp(ctx);
344 	sc->vmx_media = iflib_get_media(ctx);
345 	scctx = sc->vmx_scctx;
346 
347 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
348 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
349 	/* isc_tx_tso_size_max doesn't include possible vlan header */
350 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
351 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
352 	scctx->isc_txrx = &vmxnet3_txrx;
353 
354 	/* If 0, the iflib tunable was not set, so set to the default */
355 	if (scctx->isc_nrxqsets == 0)
356 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
357 	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
358 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
359 	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
360 
361 	/* If 0, the iflib tunable was not set, so set to the default */
362 	if (scctx->isc_ntxqsets == 0)
363 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
364 	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
365 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
366 	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
367 
368 	/*
369 	 * Enforce that the transmit completion queue descriptor count is
370 	 * the same as the transmit command queue descriptor count.
371 	 */
372 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
373 	scctx->isc_txqsizes[0] =
374 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
375 	scctx->isc_txqsizes[1] =
376 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
377 
378 	/*
379 	 * Enforce that the receive completion queue descriptor count is the
380 	 * sum of the receive command queue descriptor counts, and that the
381 	 * second receive command queue descriptor count is the same as the
382 	 * first one.
383 	 */
384 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
385 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
386 	scctx->isc_rxqsizes[0] =
387 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
388 	scctx->isc_rxqsizes[1] =
389 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
390 	scctx->isc_rxqsizes[2] =
391 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
392 
393 	/*
394 	 * Initialize the max frame size and descriptor queue buffer
395 	 * sizes.
396 	 */
397 	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
398 
399 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
400 
401 	/* Map PCI BARs */
402 	error = vmxnet3_alloc_resources(sc);
403 	if (error)
404 		goto fail;
405 
406 	/* Check device versions */
407 	error = vmxnet3_check_version(sc);
408 	if (error)
409 		goto fail;
410 
411 	/*
412 	 * The interrupt mode can be set in the hypervisor configuration via
413 	 * the parameter ethernet<N>.intrMode.
414 	 */
415 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
416 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
417 
418 	/*
419 	 * Configure the softc context to attempt to configure the interrupt
420 	 * mode now indicated by intr_config.  iflib will follow the usual
421 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
422 	 * starting mode.
423 	 */
424 	switch (intr_config & 0x03) {
425 	case VMXNET3_IT_AUTO:
426 	case VMXNET3_IT_MSIX:
427 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
428 		break;
429 	case VMXNET3_IT_MSI:
430 		scctx->isc_msix_bar = -1;
431 		scctx->isc_disable_msix = 1;
432 		break;
433 	case VMXNET3_IT_LEGACY:
434 		scctx->isc_msix_bar = 0;
435 		break;
436 	}
437 
438 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
439 	scctx->isc_capabilities = scctx->isc_capenable =
440 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
441 	    IFCAP_TSO4 | IFCAP_TSO6 |
442 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
443 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
444 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
445 	    IFCAP_JUMBO_MTU;
446 
447 	/* These capabilities are not enabled by default. */
448 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
449 
450 	vmxnet3_get_lladdr(sc);
451 	iflib_set_mac(ctx, sc->vmx_lladdr);
452 
453 	return (0);
454 fail:
455 	/*
456 	 * We must completely clean up anything allocated above as iflib
457 	 * will not invoke any other driver entry points as a result of this
458 	 * failure.
459 	 */
460 	vmxnet3_free_resources(sc);
461 
462 	return (error);
463 }
464 
465 static int
466 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
467 {
468 	struct vmxnet3_softc *sc;
469 	if_softc_ctx_t scctx;
470 	struct vmxnet3_rxqueue *rxq;
471 	int error;
472 	int i;
473 	char irq_name[16];
474 
475 	sc = iflib_get_softc(ctx);
476 	scctx = sc->vmx_scctx;
477 
478 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
479 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
480 
481 		rxq = &sc->vmx_rxq[i];
482 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
483 		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
484 		if (error) {
485 			device_printf(iflib_get_dev(ctx),
486 			    "Failed to register rxq %d interrupt handler\n", i);
487 			return (error);
488 		}
489 	}
490 
491 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
492 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
493 
494 		/*
495 		 * Don't provide the corresponding rxq irq for reference -
496 		 * we want the transmit task to be attached to a task queue
497 		 * that is different from the one used by the corresponding
498 		 * rxq irq.  That is because the TX doorbell writes are very
499 		 * expensive as virtualized MMIO operations, so we want to
500 		 * be able to defer them to another core when possible so
501 		 * that they don't steal receive processing cycles during
502 		 * stack turnarounds like TCP ACK generation.  The other
503 		 * piece to this approach is enabling the iflib abdicate
504 		 * option (currently via an interface-specific
505 		 * tunable/sysctl).
506 		 */
507 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
508 		    irq_name);
509 	}
510 
511 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
512 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
513 	    "event");
514 	if (error) {
515 		device_printf(iflib_get_dev(ctx),
516 		    "Failed to register event interrupt handler\n");
517 		return (error);
518 	}
519 
520 	return (0);
521 }
522 
523 static void
524 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
525 {
526 	if_softc_ctx_t scctx;
527 	struct vmxnet3_rxqueue *rxq;
528 	int i;
529 
530 	scctx = sc->vmx_scctx;
531 
532 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
533 		rxq = &sc->vmx_rxq[i];
534 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
535 	}
536 
537 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
538 }
539 
540 static int
541 vmxnet3_attach_post(if_ctx_t ctx)
542 {
543 	device_t dev;
544 	if_softc_ctx_t scctx;
545 	struct vmxnet3_softc *sc;
546 	int error;
547 
548 	dev = iflib_get_dev(ctx);
549 	scctx = iflib_get_softc_ctx(ctx);
550 	sc = iflib_get_softc(ctx);
551 
552 	if (scctx->isc_nrxqsets > 1)
553 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
554 
555 	error = vmxnet3_alloc_data(sc);
556 	if (error)
557 		goto fail;
558 
559 	vmxnet3_set_interrupt_idx(sc);
560 	vmxnet3_setup_sysctl(sc);
561 
562 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
563 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
564 
565 fail:
566 	return (error);
567 }
568 
569 static int
570 vmxnet3_detach(if_ctx_t ctx)
571 {
572 	struct vmxnet3_softc *sc;
573 
574 	sc = iflib_get_softc(ctx);
575 
576 	vmxnet3_free_irqs(sc);
577 	vmxnet3_free_data(sc);
578 	vmxnet3_free_resources(sc);
579 
580 	return (0);
581 }
582 
583 static int
584 vmxnet3_shutdown(if_ctx_t ctx)
585 {
586 
587 	return (0);
588 }
589 
590 static int
591 vmxnet3_suspend(if_ctx_t ctx)
592 {
593 
594 	return (0);
595 }
596 
597 static int
598 vmxnet3_resume(if_ctx_t ctx)
599 {
600 
601 	return (0);
602 }
603 
604 static int
605 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
606 {
607 	device_t dev;
608 	int rid;
609 
610 	dev = sc->vmx_dev;
611 
612 	rid = PCIR_BAR(0);
613 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
614 	    RF_ACTIVE);
615 	if (sc->vmx_res0 == NULL) {
616 		device_printf(dev,
617 		    "could not map BAR0 memory\n");
618 		return (ENXIO);
619 	}
620 
621 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
622 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
623 
624 	rid = PCIR_BAR(1);
625 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
626 	    RF_ACTIVE);
627 	if (sc->vmx_res1 == NULL) {
628 		device_printf(dev,
629 		    "could not map BAR1 memory\n");
630 		return (ENXIO);
631 	}
632 
633 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
634 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
635 
636 	return (0);
637 }
638 
639 static void
640 vmxnet3_free_resources(struct vmxnet3_softc *sc)
641 {
642 	device_t dev;
643 
644 	dev = sc->vmx_dev;
645 
646 	if (sc->vmx_res0 != NULL) {
647 		bus_release_resource(dev, SYS_RES_MEMORY,
648 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
649 		sc->vmx_res0 = NULL;
650 	}
651 
652 	if (sc->vmx_res1 != NULL) {
653 		bus_release_resource(dev, SYS_RES_MEMORY,
654 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
655 		sc->vmx_res1 = NULL;
656 	}
657 }
658 
659 static int
660 vmxnet3_check_version(struct vmxnet3_softc *sc)
661 {
662 	device_t dev;
663 	uint32_t version;
664 
665 	dev = sc->vmx_dev;
666 
667 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
668 	if ((version & 0x01) == 0) {
669 		device_printf(dev, "unsupported hardware version %#x\n",
670 		    version);
671 		return (ENOTSUP);
672 	}
673 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
674 
675 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
676 	if ((version & 0x01) == 0) {
677 		device_printf(dev, "unsupported UPT version %#x\n", version);
678 		return (ENOTSUP);
679 	}
680 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
681 
682 	return (0);
683 }
684 
685 static void
686 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
687 {
688 	if_softc_ctx_t scctx;
689 	struct vmxnet3_txqueue *txq;
690 	struct vmxnet3_txq_shared *txs;
691 	struct vmxnet3_rxqueue *rxq;
692 	struct vmxnet3_rxq_shared *rxs;
693 	int intr_idx;
694 	int i;
695 
696 	scctx = sc->vmx_scctx;
697 
698 	/*
699 	 * There is always one interrupt per receive queue, assigned
700 	 * starting with the first interrupt.  When there is only one
701 	 * interrupt available, the event interrupt shares the receive queue
702 	 * interrupt, otherwise it uses the interrupt following the last
703 	 * receive queue interrupt.  Transmit queues are not assigned
704 	 * interrupts, so they are given indexes beyond the indexes that
705 	 * correspond to the real interrupts.
706 	 */
707 
708 	/* The event interrupt is always the last vector. */
709 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
710 
711 	intr_idx = 0;
712 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
713 		rxq = &sc->vmx_rxq[i];
714 		rxs = rxq->vxrxq_rs;
715 		rxq->vxrxq_intr_idx = intr_idx;
716 		rxs->intr_idx = rxq->vxrxq_intr_idx;
717 	}
718 
719 	/*
720 	 * Assign the tx queues interrupt indexes above what we are actually
721 	 * using.  These interrupts will never be enabled.
722 	 */
723 	intr_idx = scctx->isc_vectors;
724 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
725 		txq = &sc->vmx_txq[i];
726 		txs = txq->vxtxq_ts;
727 		txq->vxtxq_intr_idx = intr_idx;
728 		txs->intr_idx = txq->vxtxq_intr_idx;
729 	}
730 }
731 
732 static int
733 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
734 {
735 	if_softc_ctx_t scctx;
736 	int size;
737 	int error;
738 
739 	scctx = sc->vmx_scctx;
740 
741 	/*
742 	 * The txq and rxq shared data areas must be allocated contiguously
743 	 * as vmxnet3_driver_shared contains only a single address member
744 	 * for the shared queue data area.
745 	 */
746 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
747 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
748 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
749 	if (error) {
750 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
751 		return (error);
752 	}
753 
754 	return (0);
755 }
756 
757 static void
758 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
759 {
760 	struct vmxnet3_txqueue *txq;
761 	struct vmxnet3_comp_ring *txc;
762 	struct vmxnet3_txring *txr;
763 	if_softc_ctx_t scctx;
764 
765 	txq = &sc->vmx_txq[q];
766 	txc = &txq->vxtxq_comp_ring;
767 	txr = &txq->vxtxq_cmd_ring;
768 	scctx = sc->vmx_scctx;
769 
770 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
771 	    device_get_nameunit(sc->vmx_dev), q);
772 
773 	txq->vxtxq_sc = sc;
774 	txq->vxtxq_id = q;
775 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
776 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
777 }
778 
779 static int
780 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
781     int ntxqs, int ntxqsets)
782 {
783 	struct vmxnet3_softc *sc;
784 	int q;
785 	int error;
786 	caddr_t kva;
787 
788 	sc = iflib_get_softc(ctx);
789 
790 	/* Allocate the array of transmit queues */
791 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
792 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
793 	if (sc->vmx_txq == NULL)
794 		return (ENOMEM);
795 
796 	/* Initialize driver state for each transmit queue */
797 	for (q = 0; q < ntxqsets; q++)
798 		vmxnet3_init_txq(sc, q);
799 
800 	/*
801 	 * Allocate queue state that is shared with the device.  This check
802 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
803 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
804 	 * order iflib invokes those routines in.
805 	 */
806 	if (sc->vmx_qs_dma.idi_size == 0) {
807 		error = vmxnet3_queues_shared_alloc(sc);
808 		if (error)
809 			return (error);
810 	}
811 
812 	kva = sc->vmx_qs_dma.idi_vaddr;
813 	for (q = 0; q < ntxqsets; q++) {
814 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
815 		kva += sizeof(struct vmxnet3_txq_shared);
816 	}
817 
818 	/* Record descriptor ring vaddrs and paddrs */
819 	for (q = 0; q < ntxqsets; q++) {
820 		struct vmxnet3_txqueue *txq;
821 		struct vmxnet3_txring *txr;
822 		struct vmxnet3_comp_ring *txc;
823 
824 		txq = &sc->vmx_txq[q];
825 		txc = &txq->vxtxq_comp_ring;
826 		txr = &txq->vxtxq_cmd_ring;
827 
828 		/* Completion ring */
829 		txc->vxcr_u.txcd =
830 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
831 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
832 
833 		/* Command ring */
834 		txr->vxtxr_txd =
835 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
836 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
837 	}
838 
839 	return (0);
840 }
841 
842 static void
843 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
844 {
845 	struct vmxnet3_rxqueue *rxq;
846 	struct vmxnet3_comp_ring *rxc;
847 	struct vmxnet3_rxring *rxr;
848 	if_softc_ctx_t scctx;
849 	int i;
850 
851 	rxq = &sc->vmx_rxq[q];
852 	rxc = &rxq->vxrxq_comp_ring;
853 	scctx = sc->vmx_scctx;
854 
855 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
856 	    device_get_nameunit(sc->vmx_dev), q);
857 
858 	rxq->vxrxq_sc = sc;
859 	rxq->vxrxq_id = q;
860 
861 	/*
862 	 * First rxq is the completion queue, so there are nrxqs - 1 command
863 	 * rings starting at iflib queue id 1.
864 	 */
865 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
866 	for (i = 0; i < nrxqs - 1; i++) {
867 		rxr = &rxq->vxrxq_cmd_ring[i];
868 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
869 	}
870 }
871 
872 static int
873 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
874     int nrxqs, int nrxqsets)
875 {
876 	struct vmxnet3_softc *sc;
877 	if_softc_ctx_t scctx;
878 	int q;
879 	int i;
880 	int error;
881 	caddr_t kva;
882 
883 	sc = iflib_get_softc(ctx);
884 	scctx = sc->vmx_scctx;
885 
886 	/* Allocate the array of receive queues */
887 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
888 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
889 	if (sc->vmx_rxq == NULL)
890 		return (ENOMEM);
891 
892 	/* Initialize driver state for each receive queue */
893 	for (q = 0; q < nrxqsets; q++)
894 		vmxnet3_init_rxq(sc, q, nrxqs);
895 
896 	/*
897 	 * Allocate queue state that is shared with the device.  This check
898 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
899 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
900 	 * order iflib invokes those routines in.
901 	 */
902 	if (sc->vmx_qs_dma.idi_size == 0) {
903 		error = vmxnet3_queues_shared_alloc(sc);
904 		if (error)
905 			return (error);
906 	}
907 
908 	kva = sc->vmx_qs_dma.idi_vaddr +
909 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
910 	for (q = 0; q < nrxqsets; q++) {
911 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
912 		kva += sizeof(struct vmxnet3_rxq_shared);
913 	}
914 
915 	/* Record descriptor ring vaddrs and paddrs */
916 	for (q = 0; q < nrxqsets; q++) {
917 		struct vmxnet3_rxqueue *rxq;
918 		struct vmxnet3_rxring *rxr;
919 		struct vmxnet3_comp_ring *rxc;
920 
921 		rxq = &sc->vmx_rxq[q];
922 		rxc = &rxq->vxrxq_comp_ring;
923 
924 		/* Completion ring */
925 		rxc->vxcr_u.rxcd =
926 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
927 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
928 
929 		/* Command ring(s) */
930 		for (i = 0; i < nrxqs - 1; i++) {
931 			rxr = &rxq->vxrxq_cmd_ring[i];
932 
933 			rxr->vxrxr_rxd =
934 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
935 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
936 		}
937 	}
938 
939 	return (0);
940 }
941 
942 static void
943 vmxnet3_queues_free(if_ctx_t ctx)
944 {
945 	struct vmxnet3_softc *sc;
946 
947 	sc = iflib_get_softc(ctx);
948 
949 	/* Free queue state area that is shared with the device */
950 	if (sc->vmx_qs_dma.idi_size != 0) {
951 		iflib_dma_free(&sc->vmx_qs_dma);
952 		sc->vmx_qs_dma.idi_size = 0;
953 	}
954 
955 	/* Free array of receive queues */
956 	if (sc->vmx_rxq != NULL) {
957 		free(sc->vmx_rxq, M_DEVBUF);
958 		sc->vmx_rxq = NULL;
959 	}
960 
961 	/* Free array of transmit queues */
962 	if (sc->vmx_txq != NULL) {
963 		free(sc->vmx_txq, M_DEVBUF);
964 		sc->vmx_txq = NULL;
965 	}
966 }
967 
968 static int
969 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
970 {
971 	device_t dev;
972 	size_t size;
973 	int error;
974 
975 	dev = sc->vmx_dev;
976 
977 	/* Top level state structure shared with the device */
978 	size = sizeof(struct vmxnet3_driver_shared);
979 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
980 	if (error) {
981 		device_printf(dev, "cannot alloc shared memory\n");
982 		return (error);
983 	}
984 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
985 
986 	/* RSS table state shared with the device */
987 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
988 		size = sizeof(struct vmxnet3_rss_shared);
989 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
990 		    &sc->vmx_rss_dma, 0);
991 		if (error) {
992 			device_printf(dev, "cannot alloc rss shared memory\n");
993 			return (error);
994 		}
995 		sc->vmx_rss =
996 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
997 	}
998 
999 	return (0);
1000 }
1001 
1002 static void
1003 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1004 {
1005 
1006 	/* Free RSS table state shared with the device */
1007 	if (sc->vmx_rss != NULL) {
1008 		iflib_dma_free(&sc->vmx_rss_dma);
1009 		sc->vmx_rss = NULL;
1010 	}
1011 
1012 	/* Free top level state structure shared with the device */
1013 	if (sc->vmx_ds != NULL) {
1014 		iflib_dma_free(&sc->vmx_ds_dma);
1015 		sc->vmx_ds = NULL;
1016 	}
1017 }
1018 
1019 static int
1020 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1021 {
1022 	int error;
1023 
1024 	/* Multicast table state shared with the device */
1025 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1026 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1027 	if (error)
1028 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1029 	else
1030 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1031 
1032 	return (error);
1033 }
1034 
1035 static void
1036 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1037 {
1038 
1039 	/* Free multicast table state shared with the device */
1040 	if (sc->vmx_mcast != NULL) {
1041 		iflib_dma_free(&sc->vmx_mcast_dma);
1042 		sc->vmx_mcast = NULL;
1043 	}
1044 }
1045 
1046 static void
1047 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1048 {
1049 	struct vmxnet3_driver_shared *ds;
1050 	if_shared_ctx_t sctx;
1051 	if_softc_ctx_t scctx;
1052 	struct vmxnet3_txqueue *txq;
1053 	struct vmxnet3_txq_shared *txs;
1054 	struct vmxnet3_rxqueue *rxq;
1055 	struct vmxnet3_rxq_shared *rxs;
1056 	int i;
1057 
1058 	ds = sc->vmx_ds;
1059 	sctx = sc->vmx_sctx;
1060 	scctx = sc->vmx_scctx;
1061 
1062 	/*
1063 	 * Initialize fields of the shared data that remains the same across
1064 	 * reinits. Note the shared data is zero'd when allocated.
1065 	 */
1066 
1067 	ds->magic = VMXNET3_REV1_MAGIC;
1068 
1069 	/* DriverInfo */
1070 	ds->version = VMXNET3_DRIVER_VERSION;
1071 	ds->guest = VMXNET3_GOS_FREEBSD |
1072 #ifdef __LP64__
1073 	    VMXNET3_GOS_64BIT;
1074 #else
1075 	    VMXNET3_GOS_32BIT;
1076 #endif
1077 	ds->vmxnet3_revision = 1;
1078 	ds->upt_version = 1;
1079 
1080 	/* Misc. conf */
1081 	ds->driver_data = vtophys(sc);
1082 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1083 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1084 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1085 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1086 
1087 	/* RSS conf */
1088 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1089 		ds->rss.version = 1;
1090 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1091 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1092 	}
1093 
1094 	/* Interrupt control. */
1095 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1096 	/*
1097 	 * Total number of interrupt indexes we are using in the shared
1098 	 * config data, even though we don't actually allocate interrupt
1099 	 * resources for the tx queues.  Some versions of the device will
1100 	 * fail to initialize successfully if interrupt indexes are used in
1101 	 * the shared config that exceed the number of interrupts configured
1102 	 * here.
1103 	 */
1104 	ds->nintr = (scctx->isc_vectors == 1) ?
1105 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1106 	ds->evintr = sc->vmx_event_intr_idx;
1107 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1108 
1109 	for (i = 0; i < ds->nintr; i++)
1110 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1111 
1112 	/* Receive filter. */
1113 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1114 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1115 
1116 	/* Tx queues */
1117 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1118 		txq = &sc->vmx_txq[i];
1119 		txs = txq->vxtxq_ts;
1120 
1121 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1122 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1123 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1124 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1125 		txs->driver_data = vtophys(txq);
1126 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1127 	}
1128 
1129 	/* Rx queues */
1130 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1131 		rxq = &sc->vmx_rxq[i];
1132 		rxs = rxq->vxrxq_rs;
1133 
1134 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1135 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1136 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1137 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1138 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1139 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1140 		rxs->driver_data = vtophys(rxq);
1141 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1142 	}
1143 }
1144 
1145 static void
1146 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1147 {
1148 	/*
1149 	 * Use the same key as the Linux driver until FreeBSD can do
1150 	 * RSS (presumably Toeplitz) in software.
1151 	 */
1152 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1153 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1154 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1155 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1156 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1157 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1158 	};
1159 
1160 	struct vmxnet3_driver_shared *ds;
1161 	if_softc_ctx_t scctx;
1162 	struct vmxnet3_rss_shared *rss;
1163 #ifdef RSS
1164 	uint8_t rss_algo;
1165 #endif
1166 	int i;
1167 
1168 	ds = sc->vmx_ds;
1169 	scctx = sc->vmx_scctx;
1170 	rss = sc->vmx_rss;
1171 
1172 	rss->hash_type =
1173 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1174 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1175 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1176 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1177 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1178 #ifdef RSS
1179 	/*
1180 	 * If the software RSS is configured to anything else other than
1181 	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1182 	 * the packet distribution, but report the hash as opaque to
1183 	 * disengage from the software RSS.
1184 	 */
1185 	rss_algo = rss_gethashalgo();
1186 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1187 		rss_getkey(rss->hash_key);
1188 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1189 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1190 			    scctx->isc_nrxqsets;
1191 		}
1192 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1193 	} else
1194 #endif
1195 	{
1196 		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1197 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1198 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1199 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1200 	}
1201 }
1202 
1203 static void
1204 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1205 {
1206 	struct ifnet *ifp;
1207 	struct vmxnet3_driver_shared *ds;
1208 	if_softc_ctx_t scctx;
1209 
1210 	ifp = sc->vmx_ifp;
1211 	ds = sc->vmx_ds;
1212 	scctx = sc->vmx_scctx;
1213 
1214 	ds->mtu = ifp->if_mtu;
1215 	ds->ntxqueue = scctx->isc_ntxqsets;
1216 	ds->nrxqueue = scctx->isc_nrxqsets;
1217 
1218 	ds->upt_features = 0;
1219 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1220 		ds->upt_features |= UPT1_F_CSUM;
1221 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1222 		ds->upt_features |= UPT1_F_VLAN;
1223 	if (ifp->if_capenable & IFCAP_LRO)
1224 		ds->upt_features |= UPT1_F_LRO;
1225 
1226 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1227 		ds->upt_features |= UPT1_F_RSS;
1228 		vmxnet3_reinit_rss_shared_data(sc);
1229 	}
1230 
1231 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1232 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1233 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1234 }
1235 
1236 static int
1237 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1238 {
1239 	int error;
1240 
1241 	error = vmxnet3_alloc_shared_data(sc);
1242 	if (error)
1243 		return (error);
1244 
1245 	error = vmxnet3_alloc_mcast_table(sc);
1246 	if (error)
1247 		return (error);
1248 
1249 	vmxnet3_init_shared_data(sc);
1250 
1251 	return (0);
1252 }
1253 
1254 static void
1255 vmxnet3_free_data(struct vmxnet3_softc *sc)
1256 {
1257 
1258 	vmxnet3_free_mcast_table(sc);
1259 	vmxnet3_free_shared_data(sc);
1260 }
1261 
1262 static void
1263 vmxnet3_evintr(struct vmxnet3_softc *sc)
1264 {
1265 	device_t dev;
1266 	struct vmxnet3_txq_shared *ts;
1267 	struct vmxnet3_rxq_shared *rs;
1268 	uint32_t event;
1269 
1270 	dev = sc->vmx_dev;
1271 
1272 	/* Clear events. */
1273 	event = sc->vmx_ds->event;
1274 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1275 
1276 	if (event & VMXNET3_EVENT_LINK)
1277 		vmxnet3_link_status(sc);
1278 
1279 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1280 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1281 		ts = sc->vmx_txq[0].vxtxq_ts;
1282 		if (ts->stopped != 0)
1283 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1284 		rs = sc->vmx_rxq[0].vxrxq_rs;
1285 		if (rs->stopped != 0)
1286 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1287 
1288 		/* XXX - rely on liflib watchdog to reset us? */
1289 		device_printf(dev, "Rx/Tx queue error event ... "
1290 		    "waiting for iflib watchdog reset\n");
1291 	}
1292 
1293 	if (event & VMXNET3_EVENT_DIC)
1294 		device_printf(dev, "device implementation change event\n");
1295 	if (event & VMXNET3_EVENT_DEBUG)
1296 		device_printf(dev, "debug event\n");
1297 }
1298 
1299 static int
1300 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1301 {
1302 	struct vmxnet3_softc *sc;
1303 	struct vmxnet3_txqueue *txq;
1304 	struct vmxnet3_txring *txr;
1305 	struct vmxnet3_txdesc *txd, *sop;
1306 	bus_dma_segment_t *segs;
1307 	int nsegs;
1308 	int pidx;
1309 	int hdrlen;
1310 	int i;
1311 	int gen;
1312 
1313 	sc = vsc;
1314 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1315 	txr = &txq->vxtxq_cmd_ring;
1316 	segs = pi->ipi_segs;
1317 	nsegs = pi->ipi_nsegs;
1318 	pidx = pi->ipi_pidx;
1319 
1320 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1321 	    ("%s: packet with too many segments %d", __func__, nsegs));
1322 
1323 	sop = &txr->vxtxr_txd[pidx];
1324 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1325 
1326 	for (i = 0; i < nsegs; i++) {
1327 		txd = &txr->vxtxr_txd[pidx];
1328 
1329 		txd->addr = segs[i].ds_addr;
1330 		txd->len = segs[i].ds_len;
1331 		txd->gen = gen;
1332 		txd->dtype = 0;
1333 		txd->offload_mode = VMXNET3_OM_NONE;
1334 		txd->offload_pos = 0;
1335 		txd->hlen = 0;
1336 		txd->eop = 0;
1337 		txd->compreq = 0;
1338 		txd->vtag_mode = 0;
1339 		txd->vtag = 0;
1340 
1341 		if (++pidx == txr->vxtxr_ndesc) {
1342 			pidx = 0;
1343 			txr->vxtxr_gen ^= 1;
1344 		}
1345 		gen = txr->vxtxr_gen;
1346 	}
1347 	txd->eop = 1;
1348 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1349 	pi->ipi_new_pidx = pidx;
1350 
1351 	/*
1352 	 * VLAN
1353 	 */
1354 	if (pi->ipi_mflags & M_VLANTAG) {
1355 		sop->vtag_mode = 1;
1356 		sop->vtag = pi->ipi_vtag;
1357 	}
1358 
1359 	/*
1360 	 * TSO and checksum offloads
1361 	 */
1362 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1363 	if (pi->ipi_csum_flags & CSUM_TSO) {
1364 		sop->offload_mode = VMXNET3_OM_TSO;
1365 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1366 		sop->offload_pos = pi->ipi_tso_segsz;
1367 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1368 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1369 		sop->offload_mode = VMXNET3_OM_CSUM;
1370 		sop->hlen = hdrlen;
1371 		sop->offload_pos = hdrlen +
1372 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1373 			offsetof(struct tcphdr, th_sum) :
1374 			offsetof(struct udphdr, uh_sum));
1375 	}
1376 
1377 	/* Finally, change the ownership. */
1378 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1379 	sop->gen ^= 1;
1380 
1381 	return (0);
1382 }
1383 
1384 static void
1385 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1386 {
1387 	struct vmxnet3_softc *sc;
1388 	struct vmxnet3_txqueue *txq;
1389 
1390 	sc = vsc;
1391 	txq = &sc->vmx_txq[txqid];
1392 
1393 	/*
1394 	 * pidx is what we last set ipi_new_pidx to in
1395 	 * vmxnet3_isc_txd_encap()
1396 	 */
1397 
1398 	/*
1399 	 * Avoid expensive register updates if the flush request is
1400 	 * redundant.
1401 	 */
1402 	if (txq->vxtxq_last_flush == pidx)
1403 		return;
1404 	txq->vxtxq_last_flush = pidx;
1405 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1406 }
1407 
1408 static int
1409 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1410 {
1411 	struct vmxnet3_softc *sc;
1412 	struct vmxnet3_txqueue *txq;
1413 	struct vmxnet3_comp_ring *txc;
1414 	struct vmxnet3_txcompdesc *txcd;
1415 	struct vmxnet3_txring *txr;
1416 	int processed;
1417 
1418 	sc = vsc;
1419 	txq = &sc->vmx_txq[txqid];
1420 	txc = &txq->vxtxq_comp_ring;
1421 	txr = &txq->vxtxq_cmd_ring;
1422 
1423 	/*
1424 	 * If clear is true, we need to report the number of TX command ring
1425 	 * descriptors that have been processed by the device.  If clear is
1426 	 * false, we just need to report whether or not at least one TX
1427 	 * command ring descriptor has been processed by the device.
1428 	 */
1429 	processed = 0;
1430 	for (;;) {
1431 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1432 		if (txcd->gen != txc->vxcr_gen)
1433 			break;
1434 		else if (!clear)
1435 			return (1);
1436 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1437 
1438 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1439 			txc->vxcr_next = 0;
1440 			txc->vxcr_gen ^= 1;
1441 		}
1442 
1443 		if (txcd->eop_idx < txr->vxtxr_next)
1444 			processed += txr->vxtxr_ndesc -
1445 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1446 		else
1447 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1448 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1449 	}
1450 
1451 	return (processed);
1452 }
1453 
1454 static int
1455 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1456 {
1457 	struct vmxnet3_softc *sc;
1458 	struct vmxnet3_rxqueue *rxq;
1459 	struct vmxnet3_comp_ring *rxc;
1460 	struct vmxnet3_rxcompdesc *rxcd;
1461 	int avail;
1462 	int completed_gen;
1463 #ifdef INVARIANTS
1464 	int expect_sop = 1;
1465 #endif
1466 	sc = vsc;
1467 	rxq = &sc->vmx_rxq[rxqid];
1468 	rxc = &rxq->vxrxq_comp_ring;
1469 
1470 	avail = 0;
1471 	completed_gen = rxc->vxcr_gen;
1472 	for (;;) {
1473 		rxcd = &rxc->vxcr_u.rxcd[idx];
1474 		if (rxcd->gen != completed_gen)
1475 			break;
1476 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1477 
1478 #ifdef INVARIANTS
1479 		if (expect_sop)
1480 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1481 		else
1482 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1483 		expect_sop = rxcd->eop;
1484 #endif
1485 		if (rxcd->eop && (rxcd->len != 0))
1486 			avail++;
1487 		if (avail > budget)
1488 			break;
1489 		if (++idx == rxc->vxcr_ndesc) {
1490 			idx = 0;
1491 			completed_gen ^= 1;
1492 		}
1493 	}
1494 
1495 	return (avail);
1496 }
1497 
1498 static int
1499 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1500 {
1501 	struct vmxnet3_softc *sc;
1502 	if_softc_ctx_t scctx;
1503 	struct vmxnet3_rxqueue *rxq;
1504 	struct vmxnet3_comp_ring *rxc;
1505 	struct vmxnet3_rxcompdesc *rxcd;
1506 	struct vmxnet3_rxring *rxr;
1507 	struct vmxnet3_rxdesc *rxd;
1508 	if_rxd_frag_t frag;
1509 	int cqidx;
1510 	uint16_t total_len;
1511 	uint8_t nfrags;
1512 	uint8_t i;
1513 	uint8_t flid;
1514 
1515 	sc = vsc;
1516 	scctx = sc->vmx_scctx;
1517 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1518 	rxc = &rxq->vxrxq_comp_ring;
1519 
1520 	/*
1521 	 * Get a single packet starting at the given index in the completion
1522 	 * queue.  That we have been called indicates that
1523 	 * vmxnet3_isc_rxd_available() has already verified that either
1524 	 * there is a complete packet available starting at the given index,
1525 	 * or there are one or more zero length packets starting at the
1526 	 * given index followed by a complete packet, so no verification of
1527 	 * ownership of the descriptors (and no associated read barrier) is
1528 	 * required here.
1529 	 */
1530 	cqidx = ri->iri_cidx;
1531 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1532 	while (rxcd->len == 0) {
1533 		KASSERT(rxcd->sop && rxcd->eop,
1534 		    ("%s: zero-length packet without both sop and eop set",
1535 			__func__));
1536 		rxc->vxcr_zero_length++;
1537 		if (++cqidx == rxc->vxcr_ndesc) {
1538 			cqidx = 0;
1539 			rxc->vxcr_gen ^= 1;
1540 		}
1541 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1542 	}
1543 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1544 
1545 	/*
1546 	 * RSS and flow ID.
1547 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1548 	 * be used only if the software RSS is enabled and it uses the same
1549 	 * algorithm and the hash key as the "hardware".  If the software RSS
1550 	 * is not enabled, then it's simply pointless to use those types.
1551 	 * If it's enabled but with different parameters, then hash values will
1552 	 * not match.
1553 	 */
1554 	ri->iri_flowid = rxcd->rss_hash;
1555 #ifdef RSS
1556 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1557 		switch (rxcd->rss_type) {
1558 		case VMXNET3_RCD_RSS_TYPE_NONE:
1559 			ri->iri_flowid = ri->iri_qsidx;
1560 			ri->iri_rsstype = M_HASHTYPE_NONE;
1561 			break;
1562 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1563 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1564 			break;
1565 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1566 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1567 			break;
1568 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1569 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1570 			break;
1571 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1572 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1573 			break;
1574 		default:
1575 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1576 			break;
1577 		}
1578 	} else
1579 #endif
1580 	{
1581 		switch (rxcd->rss_type) {
1582 		case VMXNET3_RCD_RSS_TYPE_NONE:
1583 			ri->iri_flowid = ri->iri_qsidx;
1584 			ri->iri_rsstype = M_HASHTYPE_NONE;
1585 			break;
1586 		default:
1587 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1588 			break;
1589 		}
1590 	}
1591 
1592 	/*
1593 	 * The queue numbering scheme used for rxcd->qid is as follows:
1594 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1595 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1596 	 *
1597 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1598 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1599 	 * indicates command ring (and flid) 1.
1600 	 */
1601 	nfrags = 0;
1602 	total_len = 0;
1603 	do {
1604 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1605 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1606 		    ("%s: generation mismatch", __func__));
1607 		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1608 		rxr = &rxq->vxrxq_cmd_ring[flid];
1609 		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1610 
1611 		frag = &ri->iri_frags[nfrags];
1612 		frag->irf_flid = flid;
1613 		frag->irf_idx = rxcd->rxd_idx;
1614 		frag->irf_len = rxcd->len;
1615 		total_len += rxcd->len;
1616 		nfrags++;
1617 		if (++cqidx == rxc->vxcr_ndesc) {
1618 			cqidx = 0;
1619 			rxc->vxcr_gen ^= 1;
1620 		}
1621 	} while (!rxcd->eop);
1622 
1623 	ri->iri_cidx = cqidx;
1624 	ri->iri_nfrags = nfrags;
1625 	ri->iri_len = total_len;
1626 
1627 	/*
1628 	 * If there's an error, the last descriptor in the packet will
1629 	 * have the error indicator set.  In this case, set all
1630 	 * fragment lengths to zero.  This will cause iflib to discard
1631 	 * the packet, but process all associated descriptors through
1632 	 * the refill mechanism.
1633 	 */
1634 	if (__predict_false(rxcd->error)) {
1635 		rxc->vxcr_pkt_errors++;
1636 		for (i = 0; i < nfrags; i++) {
1637 			frag = &ri->iri_frags[i];
1638 			frag->irf_len = 0;
1639 		}
1640 	} else {
1641 		/* Checksum offload information is in the last descriptor. */
1642 		if (!rxcd->no_csum) {
1643 			uint32_t csum_flags = 0;
1644 
1645 			if (rxcd->ipv4) {
1646 				csum_flags |= CSUM_IP_CHECKED;
1647 				if (rxcd->ipcsum_ok)
1648 					csum_flags |= CSUM_IP_VALID;
1649 			}
1650 			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1651 				csum_flags |= CSUM_L4_CALC;
1652 				if (rxcd->csum_ok) {
1653 					csum_flags |= CSUM_L4_VALID;
1654 					ri->iri_csum_data = 0xffff;
1655 				}
1656 			}
1657 			ri->iri_csum_flags = csum_flags;
1658 		}
1659 
1660 		/* VLAN information is in the last descriptor. */
1661 		if (rxcd->vlan) {
1662 			ri->iri_flags |= M_VLANTAG;
1663 			ri->iri_vtag = rxcd->vtag;
1664 		}
1665 	}
1666 
1667 	return (0);
1668 }
1669 
1670 static void
1671 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1672 {
1673 	struct vmxnet3_softc *sc;
1674 	struct vmxnet3_rxqueue *rxq;
1675 	struct vmxnet3_rxring *rxr;
1676 	struct vmxnet3_rxdesc *rxd;
1677 	uint64_t *paddrs;
1678 	int count;
1679 	int len;
1680 	int idx;
1681 	int i;
1682 	uint8_t flid;
1683 	uint8_t btype;
1684 
1685 	count = iru->iru_count;
1686 	len = iru->iru_buf_size;
1687 	flid = iru->iru_flidx;
1688 	paddrs = iru->iru_paddrs;
1689 
1690 	sc = vsc;
1691 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1692 	rxr = &rxq->vxrxq_cmd_ring[flid];
1693 	rxd = rxr->vxrxr_rxd;
1694 
1695 	/*
1696 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1697 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1698 	 */
1699 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1700 	/*
1701 	 * The refill entries from iflib will advance monotonically,
1702 	 * but the refilled descriptors may not be contiguous due to
1703 	 * earlier skipping of descriptors by the device.  The refill
1704 	 * entries from iflib need an entire state update, while the
1705 	 * descriptors previously skipped by the device only need to
1706 	 * have their generation numbers updated.
1707 	 */
1708 	idx = rxr->vxrxr_refill_start;
1709 	i = 0;
1710 	do {
1711 		if (idx == iru->iru_idxs[i]) {
1712 			rxd[idx].addr = paddrs[i];
1713 			rxd[idx].len = len;
1714 			rxd[idx].btype = btype;
1715 			i++;
1716 		} else
1717 			rxr->vxrxr_desc_skips++;
1718 		rxd[idx].gen = rxr->vxrxr_gen;
1719 
1720 		if (++idx == rxr->vxrxr_ndesc) {
1721 			idx = 0;
1722 			rxr->vxrxr_gen ^= 1;
1723 		}
1724 	} while (i != count);
1725 	rxr->vxrxr_refill_start = idx;
1726 }
1727 
1728 static void
1729 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1730 {
1731 	struct vmxnet3_softc *sc;
1732 	struct vmxnet3_rxqueue *rxq;
1733 	struct vmxnet3_rxring *rxr;
1734 	bus_size_t r;
1735 
1736 	sc = vsc;
1737 	rxq = &sc->vmx_rxq[rxqid];
1738 	rxr = &rxq->vxrxq_cmd_ring[flid];
1739 
1740 	if (flid == 0)
1741 		r = VMXNET3_BAR0_RXH1(rxqid);
1742 	else
1743 		r = VMXNET3_BAR0_RXH2(rxqid);
1744 
1745 	vmxnet3_write_bar0(sc, r, pidx);
1746 }
1747 
1748 static int
1749 vmxnet3_legacy_intr(void *xsc)
1750 {
1751 	struct vmxnet3_softc *sc;
1752 	if_softc_ctx_t scctx;
1753 	if_ctx_t ctx;
1754 
1755 	sc = xsc;
1756 	scctx = sc->vmx_scctx;
1757 	ctx = sc->vmx_ctx;
1758 
1759 	/*
1760 	 * When there is only a single interrupt configured, this routine
1761 	 * runs in fast interrupt context, following which the rxq 0 task
1762 	 * will be enqueued.
1763 	 */
1764 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1765 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1766 			return (FILTER_HANDLED);
1767 	}
1768 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1769 		vmxnet3_intr_disable_all(ctx);
1770 
1771 	if (sc->vmx_ds->event != 0)
1772 		iflib_admin_intr_deferred(ctx);
1773 
1774 	/*
1775 	 * XXX - When there is both rxq and event activity, do we care
1776 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1777 	 * first?
1778 	 */
1779 	return (FILTER_SCHEDULE_THREAD);
1780 }
1781 
1782 static int
1783 vmxnet3_rxq_intr(void *vrxq)
1784 {
1785 	struct vmxnet3_softc *sc;
1786 	struct vmxnet3_rxqueue *rxq;
1787 
1788 	rxq = vrxq;
1789 	sc = rxq->vxrxq_sc;
1790 
1791 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1792 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1793 
1794 	return (FILTER_SCHEDULE_THREAD);
1795 }
1796 
1797 static int
1798 vmxnet3_event_intr(void *vsc)
1799 {
1800 	struct vmxnet3_softc *sc;
1801 
1802 	sc = vsc;
1803 
1804 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1805 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1806 
1807 	/*
1808 	 * The work will be done via vmxnet3_update_admin_status(), and the
1809 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1810 	 *
1811 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1812 	 */
1813 	return (FILTER_SCHEDULE_THREAD);
1814 }
1815 
1816 static void
1817 vmxnet3_stop(if_ctx_t ctx)
1818 {
1819 	struct vmxnet3_softc *sc;
1820 
1821 	sc = iflib_get_softc(ctx);
1822 
1823 	sc->vmx_link_active = 0;
1824 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1825 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1826 }
1827 
1828 static void
1829 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1830 {
1831 	struct vmxnet3_txring *txr;
1832 	struct vmxnet3_comp_ring *txc;
1833 
1834 	txq->vxtxq_last_flush = -1;
1835 
1836 	txr = &txq->vxtxq_cmd_ring;
1837 	txr->vxtxr_next = 0;
1838 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1839 	/*
1840 	 * iflib has zeroed out the descriptor array during the prior attach
1841 	 * or stop
1842 	 */
1843 
1844 	txc = &txq->vxtxq_comp_ring;
1845 	txc->vxcr_next = 0;
1846 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1847 	/*
1848 	 * iflib has zeroed out the descriptor array during the prior attach
1849 	 * or stop
1850 	 */
1851 }
1852 
1853 static void
1854 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1855 {
1856 	struct vmxnet3_rxring *rxr;
1857 	struct vmxnet3_comp_ring *rxc;
1858 	int i;
1859 
1860 	/*
1861 	 * The descriptors will be populated with buffers during a
1862 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1863 	 */
1864 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1865 		rxr = &rxq->vxrxq_cmd_ring[i];
1866 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1867 		rxr->vxrxr_desc_skips = 0;
1868 		rxr->vxrxr_refill_start = 0;
1869 		/*
1870 		 * iflib has zeroed out the descriptor array during the
1871 		 * prior attach or stop
1872 		 */
1873 	}
1874 
1875 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1876 		rxr = &rxq->vxrxq_cmd_ring[i];
1877 		rxr->vxrxr_gen = 0;
1878 		rxr->vxrxr_desc_skips = 0;
1879 		rxr->vxrxr_refill_start = 0;
1880 		bzero(rxr->vxrxr_rxd,
1881 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1882 	}
1883 
1884 	rxc = &rxq->vxrxq_comp_ring;
1885 	rxc->vxcr_next = 0;
1886 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1887 	rxc->vxcr_zero_length = 0;
1888 	rxc->vxcr_pkt_errors = 0;
1889 	/*
1890 	 * iflib has zeroed out the descriptor array during the prior attach
1891 	 * or stop
1892 	 */
1893 }
1894 
1895 static void
1896 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1897 {
1898 	if_softc_ctx_t scctx;
1899 	int q;
1900 
1901 	scctx = sc->vmx_scctx;
1902 
1903 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1904 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1905 
1906 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1907 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1908 }
1909 
1910 static int
1911 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1912 {
1913 	if_softc_ctx_t scctx;
1914 	int q;
1915 
1916 	scctx = sc->vmx_scctx;
1917 
1918 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1919 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1920 		return (1);
1921 	}
1922 
1923 	/* Reset the Rx queue heads. */
1924 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1925 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1926 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1927 	}
1928 
1929 	return (0);
1930 }
1931 
1932 static void
1933 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1934 {
1935 	struct ifnet *ifp;
1936 
1937 	ifp = sc->vmx_ifp;
1938 
1939 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1940 
1941 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1942 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1943 		    sizeof(sc->vmx_ds->vlan_filter));
1944 	else
1945 		bzero(sc->vmx_ds->vlan_filter,
1946 		    sizeof(sc->vmx_ds->vlan_filter));
1947 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1948 }
1949 
1950 static void
1951 vmxnet3_init(if_ctx_t ctx)
1952 {
1953 	struct vmxnet3_softc *sc;
1954 
1955 	sc = iflib_get_softc(ctx);
1956 
1957 	/* Use the current MAC address. */
1958 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1959 	vmxnet3_set_lladdr(sc);
1960 
1961 	vmxnet3_reinit_shared_data(sc);
1962 	vmxnet3_reinit_queues(sc);
1963 
1964 	vmxnet3_enable_device(sc);
1965 
1966 	vmxnet3_reinit_rxfilters(sc);
1967 	vmxnet3_link_status(sc);
1968 }
1969 
1970 static void
1971 vmxnet3_multi_set(if_ctx_t ctx)
1972 {
1973 
1974 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1975 	    if_getflags(iflib_get_ifp(ctx)));
1976 }
1977 
1978 static int
1979 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1980 {
1981 	struct vmxnet3_softc *sc;
1982 	if_softc_ctx_t scctx;
1983 
1984 	sc = iflib_get_softc(ctx);
1985 	scctx = sc->vmx_scctx;
1986 
1987 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1988 		ETHER_CRC_LEN))
1989 		return (EINVAL);
1990 
1991 	/*
1992 	 * Update the max frame size so that the rx mbuf size is
1993 	 * chosen based on the new mtu during the interface init that
1994 	 * will occur after this routine returns.
1995 	 */
1996 	scctx->isc_max_frame_size = mtu +
1997 		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1998 	/* RX completion queue - n/a */
1999 	scctx->isc_rxd_buf_size[0] = 0;
2000 	/*
2001 	 * For header-type descriptors (used for first segment of
2002 	 * packet), let iflib determine the buffer size based on the
2003 	 * max frame size.
2004 	 */
2005 	scctx->isc_rxd_buf_size[1] = 0;
2006 	/*
2007 	 * For body-type descriptors (used for jumbo frames and LRO),
2008 	 * always use page-sized buffers.
2009 	 */
2010 	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2011 
2012 	return (0);
2013 }
2014 
2015 static void
2016 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2017 {
2018 	struct vmxnet3_softc *sc;
2019 
2020 	sc = iflib_get_softc(ctx);
2021 
2022 	ifmr->ifm_status = IFM_AVALID;
2023 	ifmr->ifm_active = IFM_ETHER;
2024 
2025 	if (vmxnet3_link_is_up(sc) != 0) {
2026 		ifmr->ifm_status |= IFM_ACTIVE;
2027 		ifmr->ifm_active |= IFM_AUTO;
2028 	} else
2029 		ifmr->ifm_active |= IFM_NONE;
2030 }
2031 
2032 static int
2033 vmxnet3_media_change(if_ctx_t ctx)
2034 {
2035 
2036 	/* Ignore. */
2037 	return (0);
2038 }
2039 
2040 static int
2041 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2042 {
2043 
2044 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2045 
2046 	return (0);
2047 }
2048 
2049 static uint64_t
2050 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2051 {
2052 	if_t ifp = iflib_get_ifp(ctx);
2053 
2054 	if (cnt < IFCOUNTERS)
2055 		return if_get_counter_default(ifp, cnt);
2056 
2057 	return (0);
2058 }
2059 
2060 static void
2061 vmxnet3_update_admin_status(if_ctx_t ctx)
2062 {
2063 	struct vmxnet3_softc *sc;
2064 
2065 	sc = iflib_get_softc(ctx);
2066 	if (sc->vmx_ds->event != 0)
2067 		vmxnet3_evintr(sc);
2068 
2069 	vmxnet3_refresh_host_stats(sc);
2070 }
2071 
2072 static void
2073 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2074 {
2075 	/* Host stats refresh is global, so just trigger it on txq 0 */
2076 	if (qid == 0)
2077 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2078 }
2079 
2080 static void
2081 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2082 {
2083 	int idx, bit;
2084 
2085 	if (tag == 0 || tag > 4095)
2086 		return;
2087 
2088 	idx = (tag >> 5) & 0x7F;
2089 	bit = tag & 0x1F;
2090 
2091 	/* Update our private VLAN bitvector. */
2092 	if (add)
2093 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2094 	else
2095 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2096 }
2097 
2098 static void
2099 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2100 {
2101 
2102 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2103 }
2104 
2105 static void
2106 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2107 {
2108 
2109 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2110 }
2111 
2112 static u_int
2113 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2114 {
2115 	struct vmxnet3_softc *sc = arg;
2116 
2117 	if (count < VMXNET3_MULTICAST_MAX)
2118 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2119 		    ETHER_ADDR_LEN);
2120 
2121 	return (1);
2122 }
2123 
2124 static void
2125 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2126 {
2127 	struct ifnet *ifp;
2128 	struct vmxnet3_driver_shared *ds;
2129 	u_int mode;
2130 
2131 	ifp = sc->vmx_ifp;
2132 	ds = sc->vmx_ds;
2133 
2134 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2135 	if (flags & IFF_PROMISC)
2136 		mode |= VMXNET3_RXMODE_PROMISC;
2137 	if (flags & IFF_ALLMULTI)
2138 		mode |= VMXNET3_RXMODE_ALLMULTI;
2139 	else {
2140 		int cnt;
2141 
2142 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2143 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2144 			cnt = 0;
2145 			mode |= VMXNET3_RXMODE_ALLMULTI;
2146 		} else if (cnt > 0)
2147 			mode |= VMXNET3_RXMODE_MCAST;
2148 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2149 	}
2150 
2151 	ds->rxmode = mode;
2152 
2153 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2154 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2155 }
2156 
2157 static void
2158 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2159 {
2160 
2161 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2162 }
2163 
2164 static int
2165 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2166 {
2167 	uint32_t status;
2168 
2169 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2170 	return !!(status & 0x1);
2171 }
2172 
2173 static void
2174 vmxnet3_link_status(struct vmxnet3_softc *sc)
2175 {
2176 	if_ctx_t ctx;
2177 	uint64_t speed;
2178 	int link;
2179 
2180 	ctx = sc->vmx_ctx;
2181 	link = vmxnet3_link_is_up(sc);
2182 	speed = IF_Gbps(10);
2183 
2184 	if (link != 0 && sc->vmx_link_active == 0) {
2185 		sc->vmx_link_active = 1;
2186 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2187 	} else if (link == 0 && sc->vmx_link_active != 0) {
2188 		sc->vmx_link_active = 0;
2189 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2190 	}
2191 }
2192 
2193 static void
2194 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2195 {
2196 	uint32_t ml, mh;
2197 
2198 	ml  = sc->vmx_lladdr[0];
2199 	ml |= sc->vmx_lladdr[1] << 8;
2200 	ml |= sc->vmx_lladdr[2] << 16;
2201 	ml |= sc->vmx_lladdr[3] << 24;
2202 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2203 
2204 	mh  = sc->vmx_lladdr[4];
2205 	mh |= sc->vmx_lladdr[5] << 8;
2206 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2207 }
2208 
2209 static void
2210 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2211 {
2212 	uint32_t ml, mh;
2213 
2214 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2215 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2216 
2217 	sc->vmx_lladdr[0] = ml;
2218 	sc->vmx_lladdr[1] = ml >> 8;
2219 	sc->vmx_lladdr[2] = ml >> 16;
2220 	sc->vmx_lladdr[3] = ml >> 24;
2221 	sc->vmx_lladdr[4] = mh;
2222 	sc->vmx_lladdr[5] = mh >> 8;
2223 }
2224 
2225 static void
2226 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2227     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2228 {
2229 	struct sysctl_oid *node, *txsnode;
2230 	struct sysctl_oid_list *list, *txslist;
2231 	struct UPT1_TxStats *txstats;
2232 	char namebuf[16];
2233 
2234 	txstats = &txq->vxtxq_ts->stats;
2235 
2236 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2237 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2238 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2239 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2240 
2241 	/*
2242 	 * Add statistics reported by the host. These are updated by the
2243 	 * iflib txq timer on txq 0.
2244 	 */
2245 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2246 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2247 	txslist = SYSCTL_CHILDREN(txsnode);
2248 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2249 	    &txstats->TSO_packets, "TSO packets");
2250 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2251 	    &txstats->TSO_bytes, "TSO bytes");
2252 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2253 	    &txstats->ucast_packets, "Unicast packets");
2254 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2255 	    &txstats->ucast_bytes, "Unicast bytes");
2256 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2257 	    &txstats->mcast_packets, "Multicast packets");
2258 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2259 	    &txstats->mcast_bytes, "Multicast bytes");
2260 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2261 	    &txstats->error, "Errors");
2262 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2263 	    &txstats->discard, "Discards");
2264 }
2265 
2266 static void
2267 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2268     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2269 {
2270 	struct sysctl_oid *node, *rxsnode;
2271 	struct sysctl_oid_list *list, *rxslist;
2272 	struct UPT1_RxStats *rxstats;
2273 	char namebuf[16];
2274 
2275 	rxstats = &rxq->vxrxq_rs->stats;
2276 
2277 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2278 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2279 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2280 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2281 
2282 	/*
2283 	 * Add statistics reported by the host. These are updated by the
2284 	 * iflib txq timer on txq 0.
2285 	 */
2286 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2287 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2288 	rxslist = SYSCTL_CHILDREN(rxsnode);
2289 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2290 	    &rxstats->LRO_packets, "LRO packets");
2291 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2292 	    &rxstats->LRO_bytes, "LRO bytes");
2293 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2294 	    &rxstats->ucast_packets, "Unicast packets");
2295 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2296 	    &rxstats->ucast_bytes, "Unicast bytes");
2297 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2298 	    &rxstats->mcast_packets, "Multicast packets");
2299 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2300 	    &rxstats->mcast_bytes, "Multicast bytes");
2301 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2302 	    &rxstats->bcast_packets, "Broadcast packets");
2303 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2304 	    &rxstats->bcast_bytes, "Broadcast bytes");
2305 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2306 	    &rxstats->nobuffer, "No buffer");
2307 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2308 	    &rxstats->error, "Errors");
2309 }
2310 
2311 static void
2312 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2313     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2314 {
2315 	if_softc_ctx_t scctx;
2316 	struct sysctl_oid *node;
2317 	struct sysctl_oid_list *list;
2318 	int i;
2319 
2320 	scctx = sc->vmx_scctx;
2321 
2322 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2323 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2324 
2325 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2326 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2327 		list = SYSCTL_CHILDREN(node);
2328 
2329 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2330 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2331 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2332 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2333 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2334 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2335 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2336 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2337 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2338 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2339 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2340 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2341 	}
2342 
2343 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2344 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2345 
2346 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2347 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2348 		list = SYSCTL_CHILDREN(node);
2349 
2350 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2351 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2352 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2353 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2354 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2355 		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2356 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2357 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2358 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2359 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2360 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2361 		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2362 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2363 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2364 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2365 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2366 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2367 		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2368 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2369 		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2370 	}
2371 }
2372 
2373 static void
2374 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2375     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2376 {
2377 	if_softc_ctx_t scctx;
2378 	int i;
2379 
2380 	scctx = sc->vmx_scctx;
2381 
2382 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2383 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2384 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2385 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2386 
2387 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2388 }
2389 
2390 static void
2391 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2392 {
2393 	device_t dev;
2394 	struct sysctl_ctx_list *ctx;
2395 	struct sysctl_oid *tree;
2396 	struct sysctl_oid_list *child;
2397 
2398 	dev = sc->vmx_dev;
2399 	ctx = device_get_sysctl_ctx(dev);
2400 	tree = device_get_sysctl_tree(dev);
2401 	child = SYSCTL_CHILDREN(tree);
2402 
2403 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2404 }
2405 
2406 static void
2407 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2408 {
2409 
2410 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2411 }
2412 
2413 static uint32_t
2414 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2415 {
2416 
2417 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2418 }
2419 
2420 static void
2421 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2422 {
2423 
2424 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2425 }
2426 
2427 static void
2428 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2429 {
2430 
2431 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2432 }
2433 
2434 static uint32_t
2435 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2436 {
2437 
2438 	vmxnet3_write_cmd(sc, cmd);
2439 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2440 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2441 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2442 }
2443 
2444 static void
2445 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2446 {
2447 
2448 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2449 }
2450 
2451 static void
2452 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2453 {
2454 
2455 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2456 }
2457 
2458 static int
2459 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2460 {
2461 	/* Not using interrupts for TX */
2462 	return (0);
2463 }
2464 
2465 static int
2466 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2467 {
2468 	struct vmxnet3_softc *sc;
2469 
2470 	sc = iflib_get_softc(ctx);
2471 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2472 	return (0);
2473 }
2474 
2475 static void
2476 vmxnet3_link_intr_enable(if_ctx_t ctx)
2477 {
2478 	struct vmxnet3_softc *sc;
2479 
2480 	sc = iflib_get_softc(ctx);
2481 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2482 }
2483 
2484 static void
2485 vmxnet3_intr_enable_all(if_ctx_t ctx)
2486 {
2487 	struct vmxnet3_softc *sc;
2488 	if_softc_ctx_t scctx;
2489 	int i;
2490 
2491 	sc = iflib_get_softc(ctx);
2492 	scctx = sc->vmx_scctx;
2493 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2494 	for (i = 0; i < scctx->isc_vectors; i++)
2495 		vmxnet3_enable_intr(sc, i);
2496 }
2497 
2498 static void
2499 vmxnet3_intr_disable_all(if_ctx_t ctx)
2500 {
2501 	struct vmxnet3_softc *sc;
2502 	int i;
2503 
2504 	sc = iflib_get_softc(ctx);
2505 	/*
2506 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2507 	 * run, which is before the top level shared data area is
2508 	 * initialized and the device made aware of it.
2509 	 */
2510 	if (sc->vmx_ds != NULL)
2511 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2512 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2513 		vmxnet3_disable_intr(sc, i);
2514 }
2515 
2516 /*
2517  * Since this is a purely paravirtualized device, we do not have
2518  * to worry about DMA coherency. But at times, we must make sure
2519  * both the compiler and CPU do not reorder memory operations.
2520  */
2521 static inline void
2522 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2523 {
2524 
2525 	switch (type) {
2526 	case VMXNET3_BARRIER_RD:
2527 		rmb();
2528 		break;
2529 	case VMXNET3_BARRIER_WR:
2530 		wmb();
2531 		break;
2532 	case VMXNET3_BARRIER_RDWR:
2533 		mb();
2534 		break;
2535 	default:
2536 		panic("%s: bad barrier type %d", __func__, type);
2537 	}
2538 }
2539