xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision da759cfa320d5076b075d15ff3f00ab3ba5634fd)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 __FBSDID("$FreeBSD$");
25 
26 #include "opt_rss.h"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/kernel.h>
31 #include <sys/endian.h>
32 #include <sys/sockio.h>
33 #include <sys/mbuf.h>
34 #include <sys/malloc.h>
35 #include <sys/module.h>
36 #include <sys/socket.h>
37 #include <sys/sysctl.h>
38 #include <sys/smp.h>
39 #include <vm/vm.h>
40 #include <vm/pmap.h>
41 
42 #include <net/ethernet.h>
43 #include <net/if.h>
44 #include <net/if_var.h>
45 #include <net/if_arp.h>
46 #include <net/if_dl.h>
47 #include <net/if_types.h>
48 #include <net/if_media.h>
49 #include <net/if_vlan_var.h>
50 #include <net/iflib.h>
51 #ifdef RSS
52 #include <net/rss_config.h>
53 #endif
54 
55 #include <netinet/in_systm.h>
56 #include <netinet/in.h>
57 #include <netinet/ip.h>
58 #include <netinet/ip6.h>
59 #include <netinet6/ip6_var.h>
60 #include <netinet/udp.h>
61 #include <netinet/tcp.h>
62 
63 #include <machine/bus.h>
64 #include <machine/resource.h>
65 #include <sys/bus.h>
66 #include <sys/rman.h>
67 
68 #include <dev/pci/pcireg.h>
69 #include <dev/pci/pcivar.h>
70 
71 #include "ifdi_if.h"
72 
73 #include "if_vmxreg.h"
74 #include "if_vmxvar.h"
75 
76 #include "opt_inet.h"
77 #include "opt_inet6.h"
78 
79 
80 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
81 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
82 
83 static pci_vendor_info_t vmxnet3_vendor_info_array[] =
84 {
85 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
86 	/* required last entry */
87 	PVID_END
88 };
89 
90 static void	*vmxnet3_register(device_t);
91 static int	vmxnet3_attach_pre(if_ctx_t);
92 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
93 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
94 static int	vmxnet3_attach_post(if_ctx_t);
95 static int	vmxnet3_detach(if_ctx_t);
96 static int	vmxnet3_shutdown(if_ctx_t);
97 static int	vmxnet3_suspend(if_ctx_t);
98 static int	vmxnet3_resume(if_ctx_t);
99 
100 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
101 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
102 static int	vmxnet3_check_version(struct vmxnet3_softc *);
103 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
104 
105 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
106 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
107 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
108 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
109 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
110 static void	vmxnet3_queues_free(if_ctx_t);
111 
112 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
113 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
114 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
115 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
116 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
117 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
118 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
119 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
120 static void	vmxnet3_free_data(struct vmxnet3_softc *);
121 
122 static void	vmxnet3_evintr(struct vmxnet3_softc *);
123 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
124 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
125 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
126 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
127 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
128 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
129 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
130 static int	vmxnet3_legacy_intr(void *);
131 static int	vmxnet3_rxq_intr(void *);
132 static int	vmxnet3_event_intr(void *);
133 
134 static void	vmxnet3_stop(if_ctx_t);
135 
136 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
137 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
138 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
139 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
140 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
141 static void	vmxnet3_init(if_ctx_t);
142 static void	vmxnet3_multi_set(if_ctx_t);
143 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
144 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
145 static int	vmxnet3_media_change(if_ctx_t);
146 static int	vmxnet3_promisc_set(if_ctx_t, int);
147 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
148 static void	vmxnet3_update_admin_status(if_ctx_t);
149 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
150 
151 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
152 		    uint16_t);
153 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
154 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
155 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
156 
157 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
158 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
159 static void	vmxnet3_link_status(struct vmxnet3_softc *);
160 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
161 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
162 
163 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
164 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
165 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
166 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
167 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
168 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
169 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
170 
171 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
172 		    uint32_t);
173 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
174 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
175 		    uint32_t);
176 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
177 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
178 
179 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
180 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
181 static void	vmxnet3_link_intr_enable(if_ctx_t);
182 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
183 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
184 static void	vmxnet3_intr_enable_all(if_ctx_t);
185 static void	vmxnet3_intr_disable_all(if_ctx_t);
186 
187 typedef enum {
188 	VMXNET3_BARRIER_RD,
189 	VMXNET3_BARRIER_WR,
190 	VMXNET3_BARRIER_RDWR,
191 } vmxnet3_barrier_t;
192 
193 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
194 
195 
196 static device_method_t vmxnet3_methods[] = {
197 	/* Device interface */
198 	DEVMETHOD(device_register, vmxnet3_register),
199 	DEVMETHOD(device_probe, iflib_device_probe),
200 	DEVMETHOD(device_attach, iflib_device_attach),
201 	DEVMETHOD(device_detach, iflib_device_detach),
202 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
203 	DEVMETHOD(device_suspend, iflib_device_suspend),
204 	DEVMETHOD(device_resume, iflib_device_resume),
205 	DEVMETHOD_END
206 };
207 
208 static driver_t vmxnet3_driver = {
209 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
210 };
211 
212 static devclass_t vmxnet3_devclass;
213 DRIVER_MODULE(vmx, pci, vmxnet3_driver, vmxnet3_devclass, 0, 0);
214 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
215 MODULE_VERSION(vmx, 2);
216 
217 MODULE_DEPEND(vmx, pci, 1, 1, 1);
218 MODULE_DEPEND(vmx, ether, 1, 1, 1);
219 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
220 
221 static device_method_t vmxnet3_iflib_methods[] = {
222 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
223 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
224 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
225 
226 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
227 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
228 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
229 
230 	DEVMETHOD(ifdi_init, vmxnet3_init),
231 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
232 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
233 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
234 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
235 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
236 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
237 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
238 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
239 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
240 
241 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
242 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
243 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
244 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
245 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
246 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
247 
248 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
249 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
250 
251 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
252 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
253 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
254 
255 	DEVMETHOD_END
256 };
257 
258 static driver_t vmxnet3_iflib_driver = {
259 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
260 };
261 
262 struct if_txrx vmxnet3_txrx = {
263 	.ift_txd_encap = vmxnet3_isc_txd_encap,
264 	.ift_txd_flush = vmxnet3_isc_txd_flush,
265 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
266 	.ift_rxd_available = vmxnet3_isc_rxd_available,
267 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
268 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
269 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
270 	.ift_legacy_intr = vmxnet3_legacy_intr
271 };
272 
273 static struct if_shared_ctx vmxnet3_sctx_init = {
274 	.isc_magic = IFLIB_MAGIC,
275 	.isc_q_align = 512,
276 
277 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
278 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
280 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
281 
282 	/*
283 	 * These values are used to configure the busdma tag used for
284 	 * receive descriptors.  Each receive descriptor only points to one
285 	 * buffer.
286 	 */
287 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
288 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
289 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
290 
291 	.isc_admin_intrcnt = 1,
292 	.isc_vendor_info = vmxnet3_vendor_info_array,
293 	.isc_driver_version = "2",
294 	.isc_driver = &vmxnet3_iflib_driver,
295 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
296 
297 	/*
298 	 * Number of receive queues per receive queue set, with associated
299 	 * descriptor settings for each.
300 	 */
301 	.isc_nrxqs = 3,
302 	.isc_nfl = 2, /* one free list for each receive command queue */
303 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
304 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
305 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
306 
307 	/*
308 	 * Number of transmit queues per transmit queue set, with associated
309 	 * descriptor settings for each.
310 	 */
311 	.isc_ntxqs = 2,
312 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
313 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
314 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
315 };
316 
317 static void *
318 vmxnet3_register(device_t dev)
319 {
320 	return (&vmxnet3_sctx_init);
321 }
322 
323 static int
324 vmxnet3_attach_pre(if_ctx_t ctx)
325 {
326 	device_t dev;
327 	if_softc_ctx_t scctx;
328 	struct vmxnet3_softc *sc;
329 	uint32_t intr_config;
330 	int error;
331 
332 	dev = iflib_get_dev(ctx);
333 	sc = iflib_get_softc(ctx);
334 	sc->vmx_dev = dev;
335 	sc->vmx_ctx = ctx;
336 	sc->vmx_sctx = iflib_get_sctx(ctx);
337 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
338 	sc->vmx_ifp = iflib_get_ifp(ctx);
339 	sc->vmx_media = iflib_get_media(ctx);
340 	scctx = sc->vmx_scctx;
341 
342 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
343 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
344 	/* isc_tx_tso_size_max doesn't include possible vlan header */
345 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
346 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
347 	scctx->isc_txrx = &vmxnet3_txrx;
348 
349 	/* If 0, the iflib tunable was not set, so set to the default */
350 	if (scctx->isc_nrxqsets == 0)
351 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
352 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
353 
354 	/* If 0, the iflib tunable was not set, so set to the default */
355 	if (scctx->isc_ntxqsets == 0)
356 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
357 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
358 
359 	/*
360 	 * Enforce that the transmit completion queue descriptor count is
361 	 * the same as the transmit command queue descriptor count.
362 	 */
363 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
364 	scctx->isc_txqsizes[0] =
365 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
366 	scctx->isc_txqsizes[1] =
367 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
368 
369 	/*
370 	 * Enforce that the receive completion queue descriptor count is the
371 	 * sum of the receive command queue descriptor counts, and that the
372 	 * second receive command queue descriptor count is the same as the
373 	 * first one.
374 	 */
375 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
376 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
377 	scctx->isc_rxqsizes[0] =
378 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
379 	scctx->isc_rxqsizes[1] =
380 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
381 	scctx->isc_rxqsizes[2] =
382 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
383 
384 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
385 
386 	/* Map PCI BARs */
387 	error = vmxnet3_alloc_resources(sc);
388 	if (error)
389 		goto fail;
390 
391 	/* Check device versions */
392 	error = vmxnet3_check_version(sc);
393 	if (error)
394 		goto fail;
395 
396 	/*
397 	 * The interrupt mode can be set in the hypervisor configuration via
398 	 * the parameter ethernet<N>.intrMode.
399 	 */
400 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
401 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
402 
403 	/*
404 	 * Configure the softc context to attempt to configure the interrupt
405 	 * mode now indicated by intr_config.  iflib will follow the usual
406 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
407 	 * starting mode.
408 	 */
409 	switch (intr_config & 0x03) {
410 	case VMXNET3_IT_AUTO:
411 	case VMXNET3_IT_MSIX:
412 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
413 		break;
414 	case VMXNET3_IT_MSI:
415 		scctx->isc_msix_bar = -1;
416 		scctx->isc_disable_msix = 1;
417 		break;
418 	case VMXNET3_IT_LEGACY:
419 		scctx->isc_msix_bar = 0;
420 		break;
421 	}
422 
423 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
424 	scctx->isc_capabilities = scctx->isc_capenable =
425 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
426 	    IFCAP_TSO4 | IFCAP_TSO6 |
427 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
428 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
429 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
430 	    IFCAP_JUMBO_MTU;
431 
432 	/* These capabilities are not enabled by default. */
433 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
434 
435 	vmxnet3_get_lladdr(sc);
436 	iflib_set_mac(ctx, sc->vmx_lladdr);
437 
438 	return (0);
439 fail:
440 	/*
441 	 * We must completely clean up anything allocated above as iflib
442 	 * will not invoke any other driver entry points as a result of this
443 	 * failure.
444 	 */
445 	vmxnet3_free_resources(sc);
446 
447 	return (error);
448 }
449 
450 static int
451 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
452 {
453 	struct vmxnet3_softc *sc;
454 	if_softc_ctx_t scctx;
455 	struct vmxnet3_rxqueue *rxq;
456 	int error;
457 	int i;
458 	char irq_name[16];
459 
460 	sc = iflib_get_softc(ctx);
461 	scctx = sc->vmx_scctx;
462 
463 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
464 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
465 
466 		rxq = &sc->vmx_rxq[i];
467 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
468 		    IFLIB_INTR_RX, vmxnet3_rxq_intr, rxq, i, irq_name);
469 		if (error) {
470 			device_printf(iflib_get_dev(ctx),
471 			    "Failed to register rxq %d interrupt handler\n", i);
472 			return (error);
473 		}
474 	}
475 
476 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
477 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
478 
479 		/*
480 		 * Don't provide the corresponding rxq irq for reference -
481 		 * we want the transmit task to be attached to a task queue
482 		 * that is different from the one used by the corresponding
483 		 * rxq irq.  That is because the TX doorbell writes are very
484 		 * expensive as virtualized MMIO operations, so we want to
485 		 * be able to defer them to another core when possible so
486 		 * that they don't steal receive processing cycles during
487 		 * stack turnarounds like TCP ACK generation.  The other
488 		 * piece to this approach is enabling the iflib abdicate
489 		 * option (currently via an interface-specific
490 		 * tunable/sysctl).
491 		 */
492 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
493 		    irq_name);
494 	}
495 
496 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
497 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
498 	    "event");
499 	if (error) {
500 		device_printf(iflib_get_dev(ctx),
501 		    "Failed to register event interrupt handler\n");
502 		return (error);
503 	}
504 
505 	return (0);
506 }
507 
508 static void
509 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
510 {
511 	if_softc_ctx_t scctx;
512 	struct vmxnet3_rxqueue *rxq;
513 	int i;
514 
515 	scctx = sc->vmx_scctx;
516 
517 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
518 		rxq = &sc->vmx_rxq[i];
519 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
520 	}
521 
522 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
523 }
524 
525 static int
526 vmxnet3_attach_post(if_ctx_t ctx)
527 {
528 	device_t dev;
529 	if_softc_ctx_t scctx;
530 	struct vmxnet3_softc *sc;
531 	int error;
532 
533 	dev = iflib_get_dev(ctx);
534 	scctx = iflib_get_softc_ctx(ctx);
535 	sc = iflib_get_softc(ctx);
536 
537 	if (scctx->isc_nrxqsets > 1)
538 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
539 
540 	error = vmxnet3_alloc_data(sc);
541 	if (error)
542 		goto fail;
543 
544 	vmxnet3_set_interrupt_idx(sc);
545 	vmxnet3_setup_sysctl(sc);
546 
547 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
548 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
549 
550 fail:
551 	return (error);
552 }
553 
554 static int
555 vmxnet3_detach(if_ctx_t ctx)
556 {
557 	struct vmxnet3_softc *sc;
558 
559 	sc = iflib_get_softc(ctx);
560 
561 	vmxnet3_free_irqs(sc);
562 	vmxnet3_free_data(sc);
563 	vmxnet3_free_resources(sc);
564 
565 	return (0);
566 }
567 
568 static int
569 vmxnet3_shutdown(if_ctx_t ctx)
570 {
571 
572 	return (0);
573 }
574 
575 static int
576 vmxnet3_suspend(if_ctx_t ctx)
577 {
578 
579 	return (0);
580 }
581 
582 static int
583 vmxnet3_resume(if_ctx_t ctx)
584 {
585 
586 	return (0);
587 }
588 
589 static int
590 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
591 {
592 	device_t dev;
593 	int rid;
594 
595 	dev = sc->vmx_dev;
596 
597 	rid = PCIR_BAR(0);
598 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
599 	    RF_ACTIVE);
600 	if (sc->vmx_res0 == NULL) {
601 		device_printf(dev,
602 		    "could not map BAR0 memory\n");
603 		return (ENXIO);
604 	}
605 
606 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
607 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
608 
609 	rid = PCIR_BAR(1);
610 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
611 	    RF_ACTIVE);
612 	if (sc->vmx_res1 == NULL) {
613 		device_printf(dev,
614 		    "could not map BAR1 memory\n");
615 		return (ENXIO);
616 	}
617 
618 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
619 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
620 
621 	return (0);
622 }
623 
624 static void
625 vmxnet3_free_resources(struct vmxnet3_softc *sc)
626 {
627 	device_t dev;
628 
629 	dev = sc->vmx_dev;
630 
631 	if (sc->vmx_res0 != NULL) {
632 		bus_release_resource(dev, SYS_RES_MEMORY,
633 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
634 		sc->vmx_res0 = NULL;
635 	}
636 
637 	if (sc->vmx_res1 != NULL) {
638 		bus_release_resource(dev, SYS_RES_MEMORY,
639 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
640 		sc->vmx_res1 = NULL;
641 	}
642 }
643 
644 static int
645 vmxnet3_check_version(struct vmxnet3_softc *sc)
646 {
647 	device_t dev;
648 	uint32_t version;
649 
650 	dev = sc->vmx_dev;
651 
652 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
653 	if ((version & 0x01) == 0) {
654 		device_printf(dev, "unsupported hardware version %#x\n",
655 		    version);
656 		return (ENOTSUP);
657 	}
658 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
659 
660 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
661 	if ((version & 0x01) == 0) {
662 		device_printf(dev, "unsupported UPT version %#x\n", version);
663 		return (ENOTSUP);
664 	}
665 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
666 
667 	return (0);
668 }
669 
670 static void
671 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
672 {
673 	if_softc_ctx_t scctx;
674 	struct vmxnet3_txqueue *txq;
675 	struct vmxnet3_txq_shared *txs;
676 	struct vmxnet3_rxqueue *rxq;
677 	struct vmxnet3_rxq_shared *rxs;
678 	int intr_idx;
679 	int i;
680 
681 	scctx = sc->vmx_scctx;
682 
683 	/*
684 	 * There is always one interrupt per receive queue, assigned
685 	 * starting with the first interrupt.  When there is only one
686 	 * interrupt available, the event interrupt shares the receive queue
687 	 * interrupt, otherwise it uses the interrupt following the last
688 	 * receive queue interrupt.  Transmit queues are not assigned
689 	 * interrupts, so they are given indexes beyond the indexes that
690 	 * correspond to the real interrupts.
691 	 */
692 
693 	/* The event interrupt is always the last vector. */
694 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
695 
696 	intr_idx = 0;
697 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
698 		rxq = &sc->vmx_rxq[i];
699 		rxs = rxq->vxrxq_rs;
700 		rxq->vxrxq_intr_idx = intr_idx;
701 		rxs->intr_idx = rxq->vxrxq_intr_idx;
702 	}
703 
704 	/*
705 	 * Assign the tx queues interrupt indexes above what we are actually
706 	 * using.  These interrupts will never be enabled.
707 	 */
708 	intr_idx = scctx->isc_vectors;
709 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
710 		txq = &sc->vmx_txq[i];
711 		txs = txq->vxtxq_ts;
712 		txq->vxtxq_intr_idx = intr_idx;
713 		txs->intr_idx = txq->vxtxq_intr_idx;
714 	}
715 }
716 
717 static int
718 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
719 {
720 	if_softc_ctx_t scctx;
721 	int size;
722 	int error;
723 
724 	scctx = sc->vmx_scctx;
725 
726 	/*
727 	 * The txq and rxq shared data areas must be allocated contiguously
728 	 * as vmxnet3_driver_shared contains only a single address member
729 	 * for the shared queue data area.
730 	 */
731 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
732 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
733 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
734 	if (error) {
735 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
736 		return (error);
737 	}
738 
739 	return (0);
740 }
741 
742 static void
743 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
744 {
745 	struct vmxnet3_txqueue *txq;
746 	struct vmxnet3_comp_ring *txc;
747 	struct vmxnet3_txring *txr;
748 	if_softc_ctx_t scctx;
749 
750 	txq = &sc->vmx_txq[q];
751 	txc = &txq->vxtxq_comp_ring;
752 	txr = &txq->vxtxq_cmd_ring;
753 	scctx = sc->vmx_scctx;
754 
755 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
756 	    device_get_nameunit(sc->vmx_dev), q);
757 
758 	txq->vxtxq_sc = sc;
759 	txq->vxtxq_id = q;
760 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
761 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
762 }
763 
764 static int
765 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
766     int ntxqs, int ntxqsets)
767 {
768 	struct vmxnet3_softc *sc;
769 	int q;
770 	int error;
771 	caddr_t kva;
772 
773 	sc = iflib_get_softc(ctx);
774 
775 	/* Allocate the array of transmit queues */
776 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
777 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
778 	if (sc->vmx_txq == NULL)
779 		return (ENOMEM);
780 
781 	/* Initialize driver state for each transmit queue */
782 	for (q = 0; q < ntxqsets; q++)
783 		vmxnet3_init_txq(sc, q);
784 
785 	/*
786 	 * Allocate queue state that is shared with the device.  This check
787 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
788 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
789 	 * order iflib invokes those routines in.
790 	 */
791 	if (sc->vmx_qs_dma.idi_size == 0) {
792 		error = vmxnet3_queues_shared_alloc(sc);
793 		if (error)
794 			return (error);
795 	}
796 
797 	kva = sc->vmx_qs_dma.idi_vaddr;
798 	for (q = 0; q < ntxqsets; q++) {
799 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
800 		kva += sizeof(struct vmxnet3_txq_shared);
801 	}
802 
803 	/* Record descriptor ring vaddrs and paddrs */
804 	for (q = 0; q < ntxqsets; q++) {
805 		struct vmxnet3_txqueue *txq;
806 		struct vmxnet3_txring *txr;
807 		struct vmxnet3_comp_ring *txc;
808 
809 		txq = &sc->vmx_txq[q];
810 		txc = &txq->vxtxq_comp_ring;
811 		txr = &txq->vxtxq_cmd_ring;
812 
813 		/* Completion ring */
814 		txc->vxcr_u.txcd =
815 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
816 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
817 
818 		/* Command ring */
819 		txr->vxtxr_txd =
820 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
821 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
822 	}
823 
824 	return (0);
825 }
826 
827 static void
828 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
829 {
830 	struct vmxnet3_rxqueue *rxq;
831 	struct vmxnet3_comp_ring *rxc;
832 	struct vmxnet3_rxring *rxr;
833 	if_softc_ctx_t scctx;
834 	int i;
835 
836 	rxq = &sc->vmx_rxq[q];
837 	rxc = &rxq->vxrxq_comp_ring;
838 	scctx = sc->vmx_scctx;
839 
840 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
841 	    device_get_nameunit(sc->vmx_dev), q);
842 
843 	rxq->vxrxq_sc = sc;
844 	rxq->vxrxq_id = q;
845 
846 	/*
847 	 * First rxq is the completion queue, so there are nrxqs - 1 command
848 	 * rings starting at iflib queue id 1.
849 	 */
850 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
851 	for (i = 0; i < nrxqs - 1; i++) {
852 		rxr = &rxq->vxrxq_cmd_ring[i];
853 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
854 	}
855 }
856 
857 static int
858 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
859     int nrxqs, int nrxqsets)
860 {
861 	struct vmxnet3_softc *sc;
862 	if_softc_ctx_t scctx;
863 	int q;
864 	int i;
865 	int error;
866 	caddr_t kva;
867 
868 	sc = iflib_get_softc(ctx);
869 	scctx = sc->vmx_scctx;
870 
871 	/* Allocate the array of receive queues */
872 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
873 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
874 	if (sc->vmx_rxq == NULL)
875 		return (ENOMEM);
876 
877 	/* Initialize driver state for each receive queue */
878 	for (q = 0; q < nrxqsets; q++)
879 		vmxnet3_init_rxq(sc, q, nrxqs);
880 
881 	/*
882 	 * Allocate queue state that is shared with the device.  This check
883 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
884 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
885 	 * order iflib invokes those routines in.
886 	 */
887 	if (sc->vmx_qs_dma.idi_size == 0) {
888 		error = vmxnet3_queues_shared_alloc(sc);
889 		if (error)
890 			return (error);
891 	}
892 
893 	kva = sc->vmx_qs_dma.idi_vaddr +
894 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
895 	for (q = 0; q < nrxqsets; q++) {
896 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
897 		kva += sizeof(struct vmxnet3_rxq_shared);
898 	}
899 
900 	/* Record descriptor ring vaddrs and paddrs */
901 	for (q = 0; q < nrxqsets; q++) {
902 		struct vmxnet3_rxqueue *rxq;
903 		struct vmxnet3_rxring *rxr;
904 		struct vmxnet3_comp_ring *rxc;
905 
906 		rxq = &sc->vmx_rxq[q];
907 		rxc = &rxq->vxrxq_comp_ring;
908 
909 		/* Completion ring */
910 		rxc->vxcr_u.rxcd =
911 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
912 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
913 
914 		/* Command ring(s) */
915 		for (i = 0; i < nrxqs - 1; i++) {
916 			rxr = &rxq->vxrxq_cmd_ring[i];
917 
918 			rxr->vxrxr_rxd =
919 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
920 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
921 		}
922 	}
923 
924 	return (0);
925 }
926 
927 static void
928 vmxnet3_queues_free(if_ctx_t ctx)
929 {
930 	struct vmxnet3_softc *sc;
931 
932 	sc = iflib_get_softc(ctx);
933 
934 	/* Free queue state area that is shared with the device */
935 	if (sc->vmx_qs_dma.idi_size != 0) {
936 		iflib_dma_free(&sc->vmx_qs_dma);
937 		sc->vmx_qs_dma.idi_size = 0;
938 	}
939 
940 	/* Free array of receive queues */
941 	if (sc->vmx_rxq != NULL) {
942 		free(sc->vmx_rxq, M_DEVBUF);
943 		sc->vmx_rxq = NULL;
944 	}
945 
946 	/* Free array of transmit queues */
947 	if (sc->vmx_txq != NULL) {
948 		free(sc->vmx_txq, M_DEVBUF);
949 		sc->vmx_txq = NULL;
950 	}
951 }
952 
953 static int
954 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
955 {
956 	device_t dev;
957 	size_t size;
958 	int error;
959 
960 	dev = sc->vmx_dev;
961 
962 	/* Top level state structure shared with the device */
963 	size = sizeof(struct vmxnet3_driver_shared);
964 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
965 	if (error) {
966 		device_printf(dev, "cannot alloc shared memory\n");
967 		return (error);
968 	}
969 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
970 
971 	/* RSS table state shared with the device */
972 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
973 		size = sizeof(struct vmxnet3_rss_shared);
974 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
975 		    &sc->vmx_rss_dma, 0);
976 		if (error) {
977 			device_printf(dev, "cannot alloc rss shared memory\n");
978 			return (error);
979 		}
980 		sc->vmx_rss =
981 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
982 	}
983 
984 	return (0);
985 }
986 
987 static void
988 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
989 {
990 
991 	/* Free RSS table state shared with the device */
992 	if (sc->vmx_rss != NULL) {
993 		iflib_dma_free(&sc->vmx_rss_dma);
994 		sc->vmx_rss = NULL;
995 	}
996 
997 	/* Free top level state structure shared with the device */
998 	if (sc->vmx_ds != NULL) {
999 		iflib_dma_free(&sc->vmx_ds_dma);
1000 		sc->vmx_ds = NULL;
1001 	}
1002 }
1003 
1004 static int
1005 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1006 {
1007 	int error;
1008 
1009 	/* Multicast table state shared with the device */
1010 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1011 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1012 	if (error)
1013 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1014 	else
1015 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1016 
1017 	return (error);
1018 }
1019 
1020 static void
1021 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1022 {
1023 
1024 	/* Free multicast table state shared with the device */
1025 	if (sc->vmx_mcast != NULL) {
1026 		iflib_dma_free(&sc->vmx_mcast_dma);
1027 		sc->vmx_mcast = NULL;
1028 	}
1029 }
1030 
1031 static void
1032 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1033 {
1034 	struct vmxnet3_driver_shared *ds;
1035 	if_shared_ctx_t sctx;
1036 	if_softc_ctx_t scctx;
1037 	struct vmxnet3_txqueue *txq;
1038 	struct vmxnet3_txq_shared *txs;
1039 	struct vmxnet3_rxqueue *rxq;
1040 	struct vmxnet3_rxq_shared *rxs;
1041 	int i;
1042 
1043 	ds = sc->vmx_ds;
1044 	sctx = sc->vmx_sctx;
1045 	scctx = sc->vmx_scctx;
1046 
1047 	/*
1048 	 * Initialize fields of the shared data that remains the same across
1049 	 * reinits. Note the shared data is zero'd when allocated.
1050 	 */
1051 
1052 	ds->magic = VMXNET3_REV1_MAGIC;
1053 
1054 	/* DriverInfo */
1055 	ds->version = VMXNET3_DRIVER_VERSION;
1056 	ds->guest = VMXNET3_GOS_FREEBSD |
1057 #ifdef __LP64__
1058 	    VMXNET3_GOS_64BIT;
1059 #else
1060 	    VMXNET3_GOS_32BIT;
1061 #endif
1062 	ds->vmxnet3_revision = 1;
1063 	ds->upt_version = 1;
1064 
1065 	/* Misc. conf */
1066 	ds->driver_data = vtophys(sc);
1067 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1068 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1069 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1070 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1071 
1072 	/* RSS conf */
1073 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1074 		ds->rss.version = 1;
1075 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1076 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1077 	}
1078 
1079 	/* Interrupt control. */
1080 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1081 	/*
1082 	 * Total number of interrupt indexes we are using in the shared
1083 	 * config data, even though we don't actually allocate interrupt
1084 	 * resources for the tx queues.  Some versions of the device will
1085 	 * fail to initialize successfully if interrupt indexes are used in
1086 	 * the shared config that exceed the number of interrupts configured
1087 	 * here.
1088 	 */
1089 	ds->nintr = (scctx->isc_vectors == 1) ?
1090 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1091 	ds->evintr = sc->vmx_event_intr_idx;
1092 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1093 
1094 	for (i = 0; i < ds->nintr; i++)
1095 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1096 
1097 	/* Receive filter. */
1098 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1099 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1100 
1101 	/* Tx queues */
1102 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1103 		txq = &sc->vmx_txq[i];
1104 		txs = txq->vxtxq_ts;
1105 
1106 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1107 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1108 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1109 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1110 		txs->driver_data = vtophys(txq);
1111 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1112 	}
1113 
1114 	/* Rx queues */
1115 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1116 		rxq = &sc->vmx_rxq[i];
1117 		rxs = rxq->vxrxq_rs;
1118 
1119 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1120 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1121 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1122 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1123 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1124 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1125 		rxs->driver_data = vtophys(rxq);
1126 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1127 	}
1128 }
1129 
1130 static void
1131 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1132 {
1133 	/*
1134 	 * Use the same key as the Linux driver until FreeBSD can do
1135 	 * RSS (presumably Toeplitz) in software.
1136 	 */
1137 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1138 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1139 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1140 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1141 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1142 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1143 	};
1144 
1145 	struct vmxnet3_driver_shared *ds;
1146 	if_softc_ctx_t scctx;
1147 	struct vmxnet3_rss_shared *rss;
1148 #ifdef RSS
1149 	uint8_t rss_algo;
1150 #endif
1151 	int i;
1152 
1153 	ds = sc->vmx_ds;
1154 	scctx = sc->vmx_scctx;
1155 	rss = sc->vmx_rss;
1156 
1157 	rss->hash_type =
1158 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1159 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1160 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1161 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1162 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1163 #ifdef RSS
1164 	/*
1165 	 * If the software RSS is configured to anything else other than
1166 	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1167 	 * the packet distribution, but report the hash as opaque to
1168 	 * disengage from the software RSS.
1169 	 */
1170 	rss_algo = rss_gethashalgo();
1171 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1172 		rss_getkey(rss->hash_key);
1173 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1174 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1175 			    scctx->isc_nrxqsets;
1176 		}
1177 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1178 	} else
1179 #endif
1180 	{
1181 		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1182 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1183 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1184 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1185 	}
1186 }
1187 
1188 static void
1189 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1190 {
1191 	struct ifnet *ifp;
1192 	struct vmxnet3_driver_shared *ds;
1193 	if_softc_ctx_t scctx;
1194 
1195 	ifp = sc->vmx_ifp;
1196 	ds = sc->vmx_ds;
1197 	scctx = sc->vmx_scctx;
1198 
1199 	ds->mtu = ifp->if_mtu;
1200 	ds->ntxqueue = scctx->isc_ntxqsets;
1201 	ds->nrxqueue = scctx->isc_nrxqsets;
1202 
1203 	ds->upt_features = 0;
1204 	if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1205 		ds->upt_features |= UPT1_F_CSUM;
1206 	if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING)
1207 		ds->upt_features |= UPT1_F_VLAN;
1208 	if (ifp->if_capenable & IFCAP_LRO)
1209 		ds->upt_features |= UPT1_F_LRO;
1210 
1211 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1212 		ds->upt_features |= UPT1_F_RSS;
1213 		vmxnet3_reinit_rss_shared_data(sc);
1214 	}
1215 
1216 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1217 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1218 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1219 }
1220 
1221 static int
1222 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1223 {
1224 	int error;
1225 
1226 	error = vmxnet3_alloc_shared_data(sc);
1227 	if (error)
1228 		return (error);
1229 
1230 	error = vmxnet3_alloc_mcast_table(sc);
1231 	if (error)
1232 		return (error);
1233 
1234 	vmxnet3_init_shared_data(sc);
1235 
1236 	return (0);
1237 }
1238 
1239 static void
1240 vmxnet3_free_data(struct vmxnet3_softc *sc)
1241 {
1242 
1243 	vmxnet3_free_mcast_table(sc);
1244 	vmxnet3_free_shared_data(sc);
1245 }
1246 
1247 static void
1248 vmxnet3_evintr(struct vmxnet3_softc *sc)
1249 {
1250 	device_t dev;
1251 	struct vmxnet3_txq_shared *ts;
1252 	struct vmxnet3_rxq_shared *rs;
1253 	uint32_t event;
1254 
1255 	dev = sc->vmx_dev;
1256 
1257 	/* Clear events. */
1258 	event = sc->vmx_ds->event;
1259 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1260 
1261 	if (event & VMXNET3_EVENT_LINK)
1262 		vmxnet3_link_status(sc);
1263 
1264 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1265 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1266 		ts = sc->vmx_txq[0].vxtxq_ts;
1267 		if (ts->stopped != 0)
1268 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1269 		rs = sc->vmx_rxq[0].vxrxq_rs;
1270 		if (rs->stopped != 0)
1271 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1272 
1273 		/* XXX - rely on liflib watchdog to reset us? */
1274 		device_printf(dev, "Rx/Tx queue error event ... "
1275 		    "waiting for iflib watchdog reset\n");
1276 	}
1277 
1278 	if (event & VMXNET3_EVENT_DIC)
1279 		device_printf(dev, "device implementation change event\n");
1280 	if (event & VMXNET3_EVENT_DEBUG)
1281 		device_printf(dev, "debug event\n");
1282 }
1283 
1284 static int
1285 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1286 {
1287 	struct vmxnet3_softc *sc;
1288 	struct vmxnet3_txqueue *txq;
1289 	struct vmxnet3_txring *txr;
1290 	struct vmxnet3_txdesc *txd, *sop;
1291 	bus_dma_segment_t *segs;
1292 	int nsegs;
1293 	int pidx;
1294 	int hdrlen;
1295 	int i;
1296 	int gen;
1297 
1298 	sc = vsc;
1299 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1300 	txr = &txq->vxtxq_cmd_ring;
1301 	segs = pi->ipi_segs;
1302 	nsegs = pi->ipi_nsegs;
1303 	pidx = pi->ipi_pidx;
1304 
1305 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1306 	    ("%s: packet with too many segments %d", __func__, nsegs));
1307 
1308 	sop = &txr->vxtxr_txd[pidx];
1309 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1310 
1311 	for (i = 0; i < nsegs; i++) {
1312 		txd = &txr->vxtxr_txd[pidx];
1313 
1314 		txd->addr = segs[i].ds_addr;
1315 		txd->len = segs[i].ds_len;
1316 		txd->gen = gen;
1317 		txd->dtype = 0;
1318 		txd->offload_mode = VMXNET3_OM_NONE;
1319 		txd->offload_pos = 0;
1320 		txd->hlen = 0;
1321 		txd->eop = 0;
1322 		txd->compreq = 0;
1323 		txd->vtag_mode = 0;
1324 		txd->vtag = 0;
1325 
1326 		if (++pidx == txr->vxtxr_ndesc) {
1327 			pidx = 0;
1328 			txr->vxtxr_gen ^= 1;
1329 		}
1330 		gen = txr->vxtxr_gen;
1331 	}
1332 	txd->eop = 1;
1333 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1334 	pi->ipi_new_pidx = pidx;
1335 
1336 	/*
1337 	 * VLAN
1338 	 */
1339 	if (pi->ipi_mflags & M_VLANTAG) {
1340 		sop->vtag_mode = 1;
1341 		sop->vtag = pi->ipi_vtag;
1342 	}
1343 
1344 	/*
1345 	 * TSO and checksum offloads
1346 	 */
1347 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1348 	if (pi->ipi_csum_flags & CSUM_TSO) {
1349 		sop->offload_mode = VMXNET3_OM_TSO;
1350 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1351 		sop->offload_pos = pi->ipi_tso_segsz;
1352 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1353 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1354 		sop->offload_mode = VMXNET3_OM_CSUM;
1355 		sop->hlen = hdrlen;
1356 		sop->offload_pos = hdrlen +
1357 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1358 			offsetof(struct tcphdr, th_sum) :
1359 			offsetof(struct udphdr, uh_sum));
1360 	}
1361 
1362 	/* Finally, change the ownership. */
1363 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1364 	sop->gen ^= 1;
1365 
1366 	return (0);
1367 }
1368 
1369 static void
1370 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1371 {
1372 	struct vmxnet3_softc *sc;
1373 	struct vmxnet3_txqueue *txq;
1374 
1375 	sc = vsc;
1376 	txq = &sc->vmx_txq[txqid];
1377 
1378 	/*
1379 	 * pidx is what we last set ipi_new_pidx to in
1380 	 * vmxnet3_isc_txd_encap()
1381 	 */
1382 
1383 	/*
1384 	 * Avoid expensive register updates if the flush request is
1385 	 * redundant.
1386 	 */
1387 	if (txq->vxtxq_last_flush == pidx)
1388 		return;
1389 	txq->vxtxq_last_flush = pidx;
1390 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1391 }
1392 
1393 static int
1394 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1395 {
1396 	struct vmxnet3_softc *sc;
1397 	struct vmxnet3_txqueue *txq;
1398 	struct vmxnet3_comp_ring *txc;
1399 	struct vmxnet3_txcompdesc *txcd;
1400 	struct vmxnet3_txring *txr;
1401 	int processed;
1402 
1403 	sc = vsc;
1404 	txq = &sc->vmx_txq[txqid];
1405 	txc = &txq->vxtxq_comp_ring;
1406 	txr = &txq->vxtxq_cmd_ring;
1407 
1408 	/*
1409 	 * If clear is true, we need to report the number of TX command ring
1410 	 * descriptors that have been processed by the device.  If clear is
1411 	 * false, we just need to report whether or not at least one TX
1412 	 * command ring descriptor has been processed by the device.
1413 	 */
1414 	processed = 0;
1415 	for (;;) {
1416 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1417 		if (txcd->gen != txc->vxcr_gen)
1418 			break;
1419 		else if (!clear)
1420 			return (1);
1421 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1422 
1423 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1424 			txc->vxcr_next = 0;
1425 			txc->vxcr_gen ^= 1;
1426 		}
1427 
1428 		if (txcd->eop_idx < txr->vxtxr_next)
1429 			processed += txr->vxtxr_ndesc -
1430 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1431 		else
1432 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1433 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1434 	}
1435 
1436 	return (processed);
1437 }
1438 
1439 static int
1440 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1441 {
1442 	struct vmxnet3_softc *sc;
1443 	struct vmxnet3_rxqueue *rxq;
1444 	struct vmxnet3_comp_ring *rxc;
1445 	struct vmxnet3_rxcompdesc *rxcd;
1446 	int avail;
1447 	int completed_gen;
1448 #ifdef INVARIANTS
1449 	int expect_sop = 1;
1450 #endif
1451 	sc = vsc;
1452 	rxq = &sc->vmx_rxq[rxqid];
1453 	rxc = &rxq->vxrxq_comp_ring;
1454 
1455 	avail = 0;
1456 	completed_gen = rxc->vxcr_gen;
1457 	for (;;) {
1458 		rxcd = &rxc->vxcr_u.rxcd[idx];
1459 		if (rxcd->gen != completed_gen)
1460 			break;
1461 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1462 
1463 #ifdef INVARIANTS
1464 		if (expect_sop)
1465 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1466 		else
1467 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1468 		expect_sop = rxcd->eop;
1469 #endif
1470 		if (rxcd->eop && (rxcd->len != 0))
1471 			avail++;
1472 		if (avail > budget)
1473 			break;
1474 		if (++idx == rxc->vxcr_ndesc) {
1475 			idx = 0;
1476 			completed_gen ^= 1;
1477 		}
1478 	}
1479 
1480 	return (avail);
1481 }
1482 
1483 static int
1484 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1485 {
1486 	struct vmxnet3_softc *sc;
1487 	if_softc_ctx_t scctx;
1488 	struct vmxnet3_rxqueue *rxq;
1489 	struct vmxnet3_comp_ring *rxc;
1490 	struct vmxnet3_rxcompdesc *rxcd;
1491 	struct vmxnet3_rxring *rxr;
1492 	struct vmxnet3_rxdesc *rxd;
1493 	if_rxd_frag_t frag;
1494 	int cqidx;
1495 	uint16_t total_len;
1496 	uint8_t nfrags;
1497 	uint8_t flid;
1498 
1499 	sc = vsc;
1500 	scctx = sc->vmx_scctx;
1501 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1502 	rxc = &rxq->vxrxq_comp_ring;
1503 
1504 	/*
1505 	 * Get a single packet starting at the given index in the completion
1506 	 * queue.  That we have been called indicates that
1507 	 * vmxnet3_isc_rxd_available() has already verified that either
1508 	 * there is a complete packet available starting at the given index,
1509 	 * or there are one or more zero length packets starting at the
1510 	 * given index followed by a complete packet, so no verification of
1511 	 * ownership of the descriptors (and no associated read barrier) is
1512 	 * required here.
1513 	 */
1514 	cqidx = ri->iri_cidx;
1515 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1516 	while (rxcd->len == 0) {
1517 		KASSERT(rxcd->sop && rxcd->eop,
1518 		    ("%s: zero-length packet without both sop and eop set",
1519 			__func__));
1520 		if (++cqidx == rxc->vxcr_ndesc) {
1521 			cqidx = 0;
1522 			rxc->vxcr_gen ^= 1;
1523 		}
1524 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1525 	}
1526 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1527 
1528 	/*
1529 	 * RSS and flow ID.
1530 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1531 	 * be used only if the software RSS is enabled and it uses the same
1532 	 * algorithm and the hash key as the "hardware".  If the software RSS
1533 	 * is not enabled, then it's simply pointless to use those types.
1534 	 * If it's enabled but with different parameters, then hash values will
1535 	 * not match.
1536 	 */
1537 	ri->iri_flowid = rxcd->rss_hash;
1538 #ifdef RSS
1539 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1540 		switch (rxcd->rss_type) {
1541 		case VMXNET3_RCD_RSS_TYPE_NONE:
1542 			ri->iri_flowid = ri->iri_qsidx;
1543 			ri->iri_rsstype = M_HASHTYPE_NONE;
1544 			break;
1545 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1546 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1547 			break;
1548 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1549 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1550 			break;
1551 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1552 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1553 			break;
1554 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1555 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1556 			break;
1557 		default:
1558 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1559 			break;
1560 		}
1561 	} else
1562 #endif
1563 	{
1564 		switch (rxcd->rss_type) {
1565 		case VMXNET3_RCD_RSS_TYPE_NONE:
1566 			ri->iri_flowid = ri->iri_qsidx;
1567 			ri->iri_rsstype = M_HASHTYPE_NONE;
1568 			break;
1569 		default:
1570 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1571 			break;
1572 		}
1573 	}
1574 
1575 	/* VLAN */
1576 	if (rxcd->vlan) {
1577 		ri->iri_flags |= M_VLANTAG;
1578 		ri->iri_vtag = rxcd->vtag;
1579 	}
1580 
1581 	/* Checksum offload */
1582 	if (!rxcd->no_csum) {
1583 		uint32_t csum_flags = 0;
1584 
1585 		if (rxcd->ipv4) {
1586 			csum_flags |= CSUM_IP_CHECKED;
1587 			if (rxcd->ipcsum_ok)
1588 				csum_flags |= CSUM_IP_VALID;
1589 		}
1590 		if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1591 			csum_flags |= CSUM_L4_CALC;
1592 			if (rxcd->csum_ok) {
1593 				csum_flags |= CSUM_L4_VALID;
1594 				ri->iri_csum_data = 0xffff;
1595 			}
1596 		}
1597 		ri->iri_csum_flags = csum_flags;
1598 	}
1599 
1600 	/*
1601 	 * The queue numbering scheme used for rxcd->qid is as follows:
1602 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1603 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1604 	 *
1605 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1606 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1607 	 * indicates command ring (and flid) 1.
1608 	 */
1609 	nfrags = 0;
1610 	total_len = 0;
1611 	do {
1612 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1613 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1614 		    ("%s: generation mismatch", __func__));
1615 		flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1616 		rxr = &rxq->vxrxq_cmd_ring[flid];
1617 		rxd = &rxr->vxrxr_rxd[rxcd->rxd_idx];
1618 
1619 		frag = &ri->iri_frags[nfrags];
1620 		frag->irf_flid = flid;
1621 		frag->irf_idx = rxcd->rxd_idx;
1622 		frag->irf_len = rxcd->len;
1623 		total_len += rxcd->len;
1624 		nfrags++;
1625 		if (++cqidx == rxc->vxcr_ndesc) {
1626 			cqidx = 0;
1627 			rxc->vxcr_gen ^= 1;
1628 		}
1629 	} while (!rxcd->eop);
1630 
1631 	ri->iri_cidx = cqidx;
1632 	ri->iri_nfrags = nfrags;
1633 	ri->iri_len = total_len;
1634 
1635 	return (0);
1636 }
1637 
1638 static void
1639 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1640 {
1641 	struct vmxnet3_softc *sc;
1642 	struct vmxnet3_rxqueue *rxq;
1643 	struct vmxnet3_rxring *rxr;
1644 	struct vmxnet3_rxdesc *rxd;
1645 	uint64_t *paddrs;
1646 	int count;
1647 	int len;
1648 	int pidx;
1649 	int i;
1650 	uint8_t flid;
1651 	uint8_t btype;
1652 
1653 	count = iru->iru_count;
1654 	len = iru->iru_buf_size;
1655 	pidx = iru->iru_pidx;
1656 	flid = iru->iru_flidx;
1657 	paddrs = iru->iru_paddrs;
1658 
1659 	sc = vsc;
1660 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1661 	rxr = &rxq->vxrxq_cmd_ring[flid];
1662 	rxd = rxr->vxrxr_rxd;
1663 
1664 	/*
1665 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1666 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1667 	 */
1668 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1669 	for (i = 0; i < count; i++) {
1670 		rxd[pidx].addr = paddrs[i];
1671 		rxd[pidx].len = len;
1672 		rxd[pidx].btype = btype;
1673 		rxd[pidx].gen = rxr->vxrxr_gen;
1674 
1675 		if (++pidx == rxr->vxrxr_ndesc) {
1676 			pidx = 0;
1677 			rxr->vxrxr_gen ^= 1;
1678 		}
1679 	}
1680 }
1681 
1682 static void
1683 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1684 {
1685 	struct vmxnet3_softc *sc;
1686 	struct vmxnet3_rxqueue *rxq;
1687 	struct vmxnet3_rxring *rxr;
1688 	bus_size_t r;
1689 
1690 	sc = vsc;
1691 	rxq = &sc->vmx_rxq[rxqid];
1692 	rxr = &rxq->vxrxq_cmd_ring[flid];
1693 
1694 	if (flid == 0)
1695 		r = VMXNET3_BAR0_RXH1(rxqid);
1696 	else
1697 		r = VMXNET3_BAR0_RXH2(rxqid);
1698 
1699 	/*
1700 	 * pidx is the index of the last descriptor with a buffer the device
1701 	 * can use, and the device needs to be told which index is one past
1702 	 * that.
1703 	 */
1704 	if (++pidx == rxr->vxrxr_ndesc)
1705 		pidx = 0;
1706 	vmxnet3_write_bar0(sc, r, pidx);
1707 }
1708 
1709 static int
1710 vmxnet3_legacy_intr(void *xsc)
1711 {
1712 	struct vmxnet3_softc *sc;
1713 	if_softc_ctx_t scctx;
1714 	if_ctx_t ctx;
1715 
1716 	sc = xsc;
1717 	scctx = sc->vmx_scctx;
1718 	ctx = sc->vmx_ctx;
1719 
1720 	/*
1721 	 * When there is only a single interrupt configured, this routine
1722 	 * runs in fast interrupt context, following which the rxq 0 task
1723 	 * will be enqueued.
1724 	 */
1725 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1726 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1727 			return (FILTER_HANDLED);
1728 	}
1729 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1730 		vmxnet3_intr_disable_all(ctx);
1731 
1732 	if (sc->vmx_ds->event != 0)
1733 		iflib_admin_intr_deferred(ctx);
1734 
1735 	/*
1736 	 * XXX - When there is both rxq and event activity, do we care
1737 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1738 	 * first?
1739 	 */
1740 	return (FILTER_SCHEDULE_THREAD);
1741 }
1742 
1743 static int
1744 vmxnet3_rxq_intr(void *vrxq)
1745 {
1746 	struct vmxnet3_softc *sc;
1747 	struct vmxnet3_rxqueue *rxq;
1748 
1749 	rxq = vrxq;
1750 	sc = rxq->vxrxq_sc;
1751 
1752 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1753 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1754 
1755 	return (FILTER_SCHEDULE_THREAD);
1756 }
1757 
1758 static int
1759 vmxnet3_event_intr(void *vsc)
1760 {
1761 	struct vmxnet3_softc *sc;
1762 
1763 	sc = vsc;
1764 
1765 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1766 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1767 
1768 	/*
1769 	 * The work will be done via vmxnet3_update_admin_status(), and the
1770 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1771 	 *
1772 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1773 	 */
1774 	return (FILTER_SCHEDULE_THREAD);
1775 }
1776 
1777 static void
1778 vmxnet3_stop(if_ctx_t ctx)
1779 {
1780 	struct vmxnet3_softc *sc;
1781 
1782 	sc = iflib_get_softc(ctx);
1783 
1784 	sc->vmx_link_active = 0;
1785 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1786 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1787 }
1788 
1789 static void
1790 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1791 {
1792 	struct vmxnet3_txring *txr;
1793 	struct vmxnet3_comp_ring *txc;
1794 
1795 	txq->vxtxq_last_flush = -1;
1796 
1797 	txr = &txq->vxtxq_cmd_ring;
1798 	txr->vxtxr_next = 0;
1799 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1800 	/*
1801 	 * iflib has zeroed out the descriptor array during the prior attach
1802 	 * or stop
1803 	 */
1804 
1805 	txc = &txq->vxtxq_comp_ring;
1806 	txc->vxcr_next = 0;
1807 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1808 	/*
1809 	 * iflib has zeroed out the descriptor array during the prior attach
1810 	 * or stop
1811 	 */
1812 }
1813 
1814 static void
1815 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1816 {
1817 	struct vmxnet3_rxring *rxr;
1818 	struct vmxnet3_comp_ring *rxc;
1819 	int i;
1820 
1821 	/*
1822 	 * The descriptors will be populated with buffers during a
1823 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1824 	 */
1825 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1826 		rxr = &rxq->vxrxq_cmd_ring[i];
1827 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1828 		/*
1829 		 * iflib has zeroed out the descriptor array during the
1830 		 * prior attach or stop
1831 		 */
1832 	}
1833 
1834 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1835 		rxr = &rxq->vxrxq_cmd_ring[i];
1836 		rxr->vxrxr_gen = 0;
1837 		bzero(rxr->vxrxr_rxd,
1838 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1839 	}
1840 
1841 	rxc = &rxq->vxrxq_comp_ring;
1842 	rxc->vxcr_next = 0;
1843 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1844 	/*
1845 	 * iflib has zeroed out the descriptor array during the prior attach
1846 	 * or stop
1847 	 */
1848 }
1849 
1850 static void
1851 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1852 {
1853 	if_softc_ctx_t scctx;
1854 	int q;
1855 
1856 	scctx = sc->vmx_scctx;
1857 
1858 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1859 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1860 
1861 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1862 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1863 }
1864 
1865 static int
1866 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1867 {
1868 	if_softc_ctx_t scctx;
1869 	int q;
1870 
1871 	scctx = sc->vmx_scctx;
1872 
1873 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1874 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1875 		return (1);
1876 	}
1877 
1878 	/* Reset the Rx queue heads. */
1879 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1880 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1881 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1882 	}
1883 
1884 	return (0);
1885 }
1886 
1887 static void
1888 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1889 {
1890 	struct ifnet *ifp;
1891 
1892 	ifp = sc->vmx_ifp;
1893 
1894 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1895 
1896 	if (ifp->if_capenable & IFCAP_VLAN_HWFILTER)
1897 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1898 		    sizeof(sc->vmx_ds->vlan_filter));
1899 	else
1900 		bzero(sc->vmx_ds->vlan_filter,
1901 		    sizeof(sc->vmx_ds->vlan_filter));
1902 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1903 }
1904 
1905 static void
1906 vmxnet3_init(if_ctx_t ctx)
1907 {
1908 	struct vmxnet3_softc *sc;
1909 	if_softc_ctx_t scctx;
1910 
1911 	sc = iflib_get_softc(ctx);
1912 	scctx = sc->vmx_scctx;
1913 
1914 	scctx->isc_max_frame_size = if_getmtu(iflib_get_ifp(ctx)) +
1915 	    ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1916 
1917 	/* Use the current MAC address. */
1918 	bcopy(IF_LLADDR(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1919 	vmxnet3_set_lladdr(sc);
1920 
1921 	vmxnet3_reinit_shared_data(sc);
1922 	vmxnet3_reinit_queues(sc);
1923 
1924 	vmxnet3_enable_device(sc);
1925 
1926 	vmxnet3_reinit_rxfilters(sc);
1927 	vmxnet3_link_status(sc);
1928 }
1929 
1930 static void
1931 vmxnet3_multi_set(if_ctx_t ctx)
1932 {
1933 
1934 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1935 	    if_getflags(iflib_get_ifp(ctx)));
1936 }
1937 
1938 static int
1939 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1940 {
1941 
1942 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1943 		ETHER_CRC_LEN))
1944 		return (EINVAL);
1945 
1946 	return (0);
1947 }
1948 
1949 static void
1950 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
1951 {
1952 	struct vmxnet3_softc *sc;
1953 
1954 	sc = iflib_get_softc(ctx);
1955 
1956 	ifmr->ifm_status = IFM_AVALID;
1957 	ifmr->ifm_active = IFM_ETHER;
1958 
1959 	if (vmxnet3_link_is_up(sc) != 0) {
1960 		ifmr->ifm_status |= IFM_ACTIVE;
1961 		ifmr->ifm_active |= IFM_AUTO;
1962 	} else
1963 		ifmr->ifm_active |= IFM_NONE;
1964 }
1965 
1966 static int
1967 vmxnet3_media_change(if_ctx_t ctx)
1968 {
1969 
1970 	/* Ignore. */
1971 	return (0);
1972 }
1973 
1974 static int
1975 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
1976 {
1977 
1978 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
1979 
1980 	return (0);
1981 }
1982 
1983 static uint64_t
1984 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
1985 {
1986 	if_t ifp = iflib_get_ifp(ctx);
1987 
1988 	if (cnt < IFCOUNTERS)
1989 		return if_get_counter_default(ifp, cnt);
1990 
1991 	return (0);
1992 }
1993 
1994 static void
1995 vmxnet3_update_admin_status(if_ctx_t ctx)
1996 {
1997 	struct vmxnet3_softc *sc;
1998 
1999 	sc = iflib_get_softc(ctx);
2000 	if (sc->vmx_ds->event != 0)
2001 		vmxnet3_evintr(sc);
2002 
2003 	vmxnet3_refresh_host_stats(sc);
2004 }
2005 
2006 static void
2007 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2008 {
2009 	/* Host stats refresh is global, so just trigger it on txq 0 */
2010 	if (qid == 0)
2011 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2012 }
2013 
2014 static void
2015 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2016 {
2017 	int idx, bit;
2018 
2019 	if (tag == 0 || tag > 4095)
2020 		return;
2021 
2022 	idx = (tag >> 5) & 0x7F;
2023 	bit = tag & 0x1F;
2024 
2025 	/* Update our private VLAN bitvector. */
2026 	if (add)
2027 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2028 	else
2029 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2030 }
2031 
2032 static void
2033 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2034 {
2035 
2036 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2037 }
2038 
2039 static void
2040 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2041 {
2042 
2043 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2044 }
2045 
2046 static u_int
2047 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2048 {
2049 	struct vmxnet3_softc *sc = arg;
2050 
2051 	if (count < VMXNET3_MULTICAST_MAX)
2052 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2053 		    ETHER_ADDR_LEN);
2054 
2055 	return (1);
2056 }
2057 
2058 static void
2059 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2060 {
2061 	struct ifnet *ifp;
2062 	struct vmxnet3_driver_shared *ds;
2063 	u_int mode;
2064 
2065 	ifp = sc->vmx_ifp;
2066 	ds = sc->vmx_ds;
2067 
2068 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2069 	if (flags & IFF_PROMISC)
2070 		mode |= VMXNET3_RXMODE_PROMISC;
2071 	if (flags & IFF_ALLMULTI)
2072 		mode |= VMXNET3_RXMODE_ALLMULTI;
2073 	else {
2074 		int cnt;
2075 
2076 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2077 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2078 			cnt = 0;
2079 			mode |= VMXNET3_RXMODE_ALLMULTI;
2080 		} else if (cnt > 0)
2081 			mode |= VMXNET3_RXMODE_MCAST;
2082 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2083 	}
2084 
2085 	ds->rxmode = mode;
2086 
2087 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2088 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2089 }
2090 
2091 static void
2092 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2093 {
2094 
2095 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2096 }
2097 
2098 static int
2099 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2100 {
2101 	uint32_t status;
2102 
2103 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2104 	return !!(status & 0x1);
2105 }
2106 
2107 static void
2108 vmxnet3_link_status(struct vmxnet3_softc *sc)
2109 {
2110 	if_ctx_t ctx;
2111 	uint64_t speed;
2112 	int link;
2113 
2114 	ctx = sc->vmx_ctx;
2115 	link = vmxnet3_link_is_up(sc);
2116 	speed = IF_Gbps(10);
2117 
2118 	if (link != 0 && sc->vmx_link_active == 0) {
2119 		sc->vmx_link_active = 1;
2120 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2121 	} else if (link == 0 && sc->vmx_link_active != 0) {
2122 		sc->vmx_link_active = 0;
2123 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2124 	}
2125 }
2126 
2127 static void
2128 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2129 {
2130 	uint32_t ml, mh;
2131 
2132 	ml  = sc->vmx_lladdr[0];
2133 	ml |= sc->vmx_lladdr[1] << 8;
2134 	ml |= sc->vmx_lladdr[2] << 16;
2135 	ml |= sc->vmx_lladdr[3] << 24;
2136 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2137 
2138 	mh  = sc->vmx_lladdr[4];
2139 	mh |= sc->vmx_lladdr[5] << 8;
2140 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2141 }
2142 
2143 static void
2144 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2145 {
2146 	uint32_t ml, mh;
2147 
2148 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2149 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2150 
2151 	sc->vmx_lladdr[0] = ml;
2152 	sc->vmx_lladdr[1] = ml >> 8;
2153 	sc->vmx_lladdr[2] = ml >> 16;
2154 	sc->vmx_lladdr[3] = ml >> 24;
2155 	sc->vmx_lladdr[4] = mh;
2156 	sc->vmx_lladdr[5] = mh >> 8;
2157 }
2158 
2159 static void
2160 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2161     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2162 {
2163 	struct sysctl_oid *node, *txsnode;
2164 	struct sysctl_oid_list *list, *txslist;
2165 	struct UPT1_TxStats *txstats;
2166 	char namebuf[16];
2167 
2168 	txstats = &txq->vxtxq_ts->stats;
2169 
2170 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2171 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2172 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2173 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2174 
2175 	/*
2176 	 * Add statistics reported by the host. These are updated by the
2177 	 * iflib txq timer on txq 0.
2178 	 */
2179 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2180 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2181 	txslist = SYSCTL_CHILDREN(txsnode);
2182 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2183 	    &txstats->TSO_packets, "TSO packets");
2184 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2185 	    &txstats->TSO_bytes, "TSO bytes");
2186 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2187 	    &txstats->ucast_packets, "Unicast packets");
2188 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2189 	    &txstats->ucast_bytes, "Unicast bytes");
2190 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2191 	    &txstats->mcast_packets, "Multicast packets");
2192 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2193 	    &txstats->mcast_bytes, "Multicast bytes");
2194 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2195 	    &txstats->error, "Errors");
2196 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2197 	    &txstats->discard, "Discards");
2198 }
2199 
2200 static void
2201 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2202     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2203 {
2204 	struct sysctl_oid *node, *rxsnode;
2205 	struct sysctl_oid_list *list, *rxslist;
2206 	struct UPT1_RxStats *rxstats;
2207 	char namebuf[16];
2208 
2209 	rxstats = &rxq->vxrxq_rs->stats;
2210 
2211 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2212 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2213 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2214 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2215 
2216 	/*
2217 	 * Add statistics reported by the host. These are updated by the
2218 	 * iflib txq timer on txq 0.
2219 	 */
2220 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2221 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2222 	rxslist = SYSCTL_CHILDREN(rxsnode);
2223 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2224 	    &rxstats->LRO_packets, "LRO packets");
2225 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2226 	    &rxstats->LRO_bytes, "LRO bytes");
2227 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2228 	    &rxstats->ucast_packets, "Unicast packets");
2229 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2230 	    &rxstats->ucast_bytes, "Unicast bytes");
2231 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2232 	    &rxstats->mcast_packets, "Multicast packets");
2233 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2234 	    &rxstats->mcast_bytes, "Multicast bytes");
2235 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2236 	    &rxstats->bcast_packets, "Broadcast packets");
2237 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2238 	    &rxstats->bcast_bytes, "Broadcast bytes");
2239 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2240 	    &rxstats->nobuffer, "No buffer");
2241 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2242 	    &rxstats->error, "Errors");
2243 }
2244 
2245 static void
2246 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2247     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2248 {
2249 	if_softc_ctx_t scctx;
2250 	struct sysctl_oid *node;
2251 	struct sysctl_oid_list *list;
2252 	int i;
2253 
2254 	scctx = sc->vmx_scctx;
2255 
2256 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2257 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2258 
2259 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2260 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2261 		list = SYSCTL_CHILDREN(node);
2262 
2263 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2264 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2265 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2266 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2267 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2268 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2269 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2270 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2271 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2272 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2273 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2274 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2275 	}
2276 
2277 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2278 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2279 
2280 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2281 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2282 		list = SYSCTL_CHILDREN(node);
2283 
2284 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2285 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2286 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2287 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2288 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2289 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2290 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2291 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2292 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2293 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2294 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2295 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2296 	}
2297 }
2298 
2299 static void
2300 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2301     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2302 {
2303 	if_softc_ctx_t scctx;
2304 	int i;
2305 
2306 	scctx = sc->vmx_scctx;
2307 
2308 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2309 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2310 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2311 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2312 
2313 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2314 }
2315 
2316 static void
2317 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2318 {
2319 	device_t dev;
2320 	struct sysctl_ctx_list *ctx;
2321 	struct sysctl_oid *tree;
2322 	struct sysctl_oid_list *child;
2323 
2324 	dev = sc->vmx_dev;
2325 	ctx = device_get_sysctl_ctx(dev);
2326 	tree = device_get_sysctl_tree(dev);
2327 	child = SYSCTL_CHILDREN(tree);
2328 
2329 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2330 }
2331 
2332 static void
2333 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2334 {
2335 
2336 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2337 }
2338 
2339 static uint32_t
2340 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2341 {
2342 
2343 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2344 }
2345 
2346 static void
2347 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2348 {
2349 
2350 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2351 }
2352 
2353 static void
2354 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2355 {
2356 
2357 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2358 }
2359 
2360 static uint32_t
2361 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2362 {
2363 
2364 	vmxnet3_write_cmd(sc, cmd);
2365 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2366 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2367 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2368 }
2369 
2370 static void
2371 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2372 {
2373 
2374 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2375 }
2376 
2377 static void
2378 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2379 {
2380 
2381 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2382 }
2383 
2384 static int
2385 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2386 {
2387 	/* Not using interrupts for TX */
2388 	return (0);
2389 }
2390 
2391 static int
2392 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2393 {
2394 	struct vmxnet3_softc *sc;
2395 
2396 	sc = iflib_get_softc(ctx);
2397 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2398 	return (0);
2399 }
2400 
2401 static void
2402 vmxnet3_link_intr_enable(if_ctx_t ctx)
2403 {
2404 	struct vmxnet3_softc *sc;
2405 
2406 	sc = iflib_get_softc(ctx);
2407 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2408 }
2409 
2410 static void
2411 vmxnet3_intr_enable_all(if_ctx_t ctx)
2412 {
2413 	struct vmxnet3_softc *sc;
2414 	if_softc_ctx_t scctx;
2415 	int i;
2416 
2417 	sc = iflib_get_softc(ctx);
2418 	scctx = sc->vmx_scctx;
2419 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2420 	for (i = 0; i < scctx->isc_vectors; i++)
2421 		vmxnet3_enable_intr(sc, i);
2422 }
2423 
2424 static void
2425 vmxnet3_intr_disable_all(if_ctx_t ctx)
2426 {
2427 	struct vmxnet3_softc *sc;
2428 	int i;
2429 
2430 	sc = iflib_get_softc(ctx);
2431 	/*
2432 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2433 	 * run, which is before the top level shared data area is
2434 	 * initialized and the device made aware of it.
2435 	 */
2436 	if (sc->vmx_ds != NULL)
2437 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2438 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2439 		vmxnet3_disable_intr(sc, i);
2440 }
2441 
2442 /*
2443  * Since this is a purely paravirtualized device, we do not have
2444  * to worry about DMA coherency. But at times, we must make sure
2445  * both the compiler and CPU do not reorder memory operations.
2446  */
2447 static inline void
2448 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2449 {
2450 
2451 	switch (type) {
2452 	case VMXNET3_BARRIER_RD:
2453 		rmb();
2454 		break;
2455 	case VMXNET3_BARRIER_WR:
2456 		wmb();
2457 		break;
2458 	case VMXNET3_BARRIER_RDWR:
2459 		mb();
2460 		break;
2461 	default:
2462 		panic("%s: bad barrier type %d", __func__, type);
2463 	}
2464 }
2465