xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision 559a218c9b257775fb249b67945fe4a05b7a6b9f)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 #include "opt_rss.h"
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52 
53 #include <netinet/in_systm.h>
54 #include <netinet/in.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip6.h>
57 #include <netinet6/ip6_var.h>
58 #include <netinet/udp.h>
59 #include <netinet/tcp.h>
60 
61 #include <machine/bus.h>
62 #include <machine/resource.h>
63 #include <sys/bus.h>
64 #include <sys/rman.h>
65 
66 #include <dev/pci/pcireg.h>
67 #include <dev/pci/pcivar.h>
68 
69 #include "ifdi_if.h"
70 
71 #include "if_vmxreg.h"
72 #include "if_vmxvar.h"
73 
74 #include "opt_inet.h"
75 #include "opt_inet6.h"
76 
77 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
78 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
79 
80 static const pci_vendor_info_t vmxnet3_vendor_info_array[] =
81 {
82 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
83 	/* required last entry */
84 	PVID_END
85 };
86 
87 static void	*vmxnet3_register(device_t);
88 static int	vmxnet3_attach_pre(if_ctx_t);
89 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
90 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
91 static int	vmxnet3_attach_post(if_ctx_t);
92 static int	vmxnet3_detach(if_ctx_t);
93 static int	vmxnet3_shutdown(if_ctx_t);
94 static int	vmxnet3_suspend(if_ctx_t);
95 static int	vmxnet3_resume(if_ctx_t);
96 
97 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
98 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
99 static int	vmxnet3_check_version(struct vmxnet3_softc *);
100 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
101 
102 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
103 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
104 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
106 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107 static void	vmxnet3_queues_free(if_ctx_t);
108 
109 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
110 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
111 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
112 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
113 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
114 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
115 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
116 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
117 static void	vmxnet3_free_data(struct vmxnet3_softc *);
118 
119 static void	vmxnet3_evintr(struct vmxnet3_softc *);
120 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
121 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
122 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
123 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
124 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
125 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
126 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
127 static int	vmxnet3_legacy_intr(void *);
128 static int	vmxnet3_rxq_intr(void *);
129 static int	vmxnet3_event_intr(void *);
130 
131 static void	vmxnet3_stop(if_ctx_t);
132 
133 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
134 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
135 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
136 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
137 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
138 static void	vmxnet3_init(if_ctx_t);
139 static void	vmxnet3_multi_set(if_ctx_t);
140 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
141 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
142 static int	vmxnet3_media_change(if_ctx_t);
143 static int	vmxnet3_promisc_set(if_ctx_t, int);
144 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
145 static void	vmxnet3_update_admin_status(if_ctx_t);
146 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
147 
148 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
149 		    uint16_t);
150 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
151 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
152 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
153 
154 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
155 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
156 static void	vmxnet3_link_status(struct vmxnet3_softc *);
157 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
158 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
159 
160 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
161 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
165 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
166 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
167 
168 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
169 		    uint32_t);
170 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
171 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
172 		    uint32_t);
173 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
174 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
175 
176 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
177 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
178 static void	vmxnet3_link_intr_enable(if_ctx_t);
179 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
180 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
181 static void	vmxnet3_intr_enable_all(if_ctx_t);
182 static void	vmxnet3_intr_disable_all(if_ctx_t);
183 static bool	vmxnet3_if_needs_restart(if_ctx_t, enum iflib_restart_event);
184 
185 typedef enum {
186 	VMXNET3_BARRIER_RD,
187 	VMXNET3_BARRIER_WR,
188 	VMXNET3_BARRIER_RDWR,
189 } vmxnet3_barrier_t;
190 
191 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
192 
193 static device_method_t vmxnet3_methods[] = {
194 	/* Device interface */
195 	DEVMETHOD(device_register, vmxnet3_register),
196 	DEVMETHOD(device_probe, iflib_device_probe),
197 	DEVMETHOD(device_attach, iflib_device_attach),
198 	DEVMETHOD(device_detach, iflib_device_detach),
199 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
200 	DEVMETHOD(device_suspend, iflib_device_suspend),
201 	DEVMETHOD(device_resume, iflib_device_resume),
202 	DEVMETHOD_END
203 };
204 
205 static driver_t vmxnet3_driver = {
206 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
207 };
208 
209 DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
210 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
211 MODULE_VERSION(vmx, 2);
212 
213 MODULE_DEPEND(vmx, pci, 1, 1, 1);
214 MODULE_DEPEND(vmx, ether, 1, 1, 1);
215 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
216 
217 static device_method_t vmxnet3_iflib_methods[] = {
218 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
219 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
220 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
221 
222 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
223 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
224 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
225 
226 	DEVMETHOD(ifdi_init, vmxnet3_init),
227 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
228 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
229 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
230 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
231 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
232 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
233 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
234 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
235 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
236 
237 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
238 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
239 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
240 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
241 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
242 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
243 
244 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
245 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
246 
247 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
248 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
249 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
250 
251 	DEVMETHOD(ifdi_needs_restart, vmxnet3_if_needs_restart),
252 
253 	DEVMETHOD_END
254 };
255 
256 static driver_t vmxnet3_iflib_driver = {
257 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
258 };
259 
260 struct if_txrx vmxnet3_txrx = {
261 	.ift_txd_encap = vmxnet3_isc_txd_encap,
262 	.ift_txd_flush = vmxnet3_isc_txd_flush,
263 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
264 	.ift_rxd_available = vmxnet3_isc_rxd_available,
265 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
266 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
267 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
268 	.ift_legacy_intr = vmxnet3_legacy_intr
269 };
270 
271 static struct if_shared_ctx vmxnet3_sctx_init = {
272 	.isc_magic = IFLIB_MAGIC,
273 	.isc_q_align = 512,
274 
275 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
276 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
278 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279 
280 	/*
281 	 * These values are used to configure the busdma tag used for
282 	 * receive descriptors.  Each receive descriptor only points to one
283 	 * buffer.
284 	 */
285 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
286 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
287 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
288 
289 	.isc_admin_intrcnt = 1,
290 	.isc_vendor_info = vmxnet3_vendor_info_array,
291 	.isc_driver_version = "2",
292 	.isc_driver = &vmxnet3_iflib_driver,
293 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
294 
295 	/*
296 	 * Number of receive queues per receive queue set, with associated
297 	 * descriptor settings for each.
298 	 */
299 	.isc_nrxqs = 3,
300 	.isc_nfl = 2, /* one free list for each receive command queue */
301 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
302 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
303 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
304 
305 	/*
306 	 * Number of transmit queues per transmit queue set, with associated
307 	 * descriptor settings for each.
308 	 */
309 	.isc_ntxqs = 2,
310 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
311 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
312 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
313 };
314 
315 static void *
316 vmxnet3_register(device_t dev)
317 {
318 	return (&vmxnet3_sctx_init);
319 }
320 
321 static int
322 trunc_powerof2(int val)
323 {
324 
325 	return (1U << (fls(val) - 1));
326 }
327 
328 static int
329 vmxnet3_attach_pre(if_ctx_t ctx)
330 {
331 	device_t dev;
332 	if_softc_ctx_t scctx;
333 	struct vmxnet3_softc *sc;
334 	uint32_t intr_config;
335 	int error;
336 
337 	dev = iflib_get_dev(ctx);
338 	sc = iflib_get_softc(ctx);
339 	sc->vmx_dev = dev;
340 	sc->vmx_ctx = ctx;
341 	sc->vmx_sctx = iflib_get_sctx(ctx);
342 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
343 	sc->vmx_ifp = iflib_get_ifp(ctx);
344 	sc->vmx_media = iflib_get_media(ctx);
345 	scctx = sc->vmx_scctx;
346 
347 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
348 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
349 	/* isc_tx_tso_size_max doesn't include possible vlan header */
350 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
351 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
352 	scctx->isc_txrx = &vmxnet3_txrx;
353 
354 	/* If 0, the iflib tunable was not set, so set to the default */
355 	if (scctx->isc_nrxqsets == 0)
356 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
357 	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
358 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
359 	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
360 
361 	/* If 0, the iflib tunable was not set, so set to the default */
362 	if (scctx->isc_ntxqsets == 0)
363 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
364 	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
365 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
366 	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
367 
368 	/*
369 	 * Enforce that the transmit completion queue descriptor count is
370 	 * the same as the transmit command queue descriptor count.
371 	 */
372 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
373 	scctx->isc_txqsizes[0] =
374 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
375 	scctx->isc_txqsizes[1] =
376 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
377 
378 	/*
379 	 * Enforce that the receive completion queue descriptor count is the
380 	 * sum of the receive command queue descriptor counts, and that the
381 	 * second receive command queue descriptor count is the same as the
382 	 * first one.
383 	 */
384 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
385 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
386 	scctx->isc_rxqsizes[0] =
387 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
388 	scctx->isc_rxqsizes[1] =
389 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
390 	scctx->isc_rxqsizes[2] =
391 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
392 
393 	/*
394 	 * Initialize the max frame size and descriptor queue buffer
395 	 * sizes.
396 	 */
397 	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
398 
399 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
400 
401 	/* Map PCI BARs */
402 	error = vmxnet3_alloc_resources(sc);
403 	if (error)
404 		goto fail;
405 
406 	/* Check device versions */
407 	error = vmxnet3_check_version(sc);
408 	if (error)
409 		goto fail;
410 
411 	/*
412 	 * The interrupt mode can be set in the hypervisor configuration via
413 	 * the parameter ethernet<N>.intrMode.
414 	 */
415 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
416 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
417 
418 	/*
419 	 * Configure the softc context to attempt to configure the interrupt
420 	 * mode now indicated by intr_config.  iflib will follow the usual
421 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
422 	 * starting mode.
423 	 */
424 	switch (intr_config & 0x03) {
425 	case VMXNET3_IT_AUTO:
426 	case VMXNET3_IT_MSIX:
427 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
428 		break;
429 	case VMXNET3_IT_MSI:
430 		scctx->isc_msix_bar = -1;
431 		scctx->isc_disable_msix = 1;
432 		break;
433 	case VMXNET3_IT_LEGACY:
434 		scctx->isc_msix_bar = 0;
435 		break;
436 	}
437 
438 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
439 	scctx->isc_capabilities = scctx->isc_capenable =
440 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
441 	    IFCAP_TSO4 | IFCAP_TSO6 |
442 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
443 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
444 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
445 	    IFCAP_JUMBO_MTU;
446 
447 	/* These capabilities are not enabled by default. */
448 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
449 
450 	vmxnet3_get_lladdr(sc);
451 	iflib_set_mac(ctx, sc->vmx_lladdr);
452 
453 	return (0);
454 fail:
455 	/*
456 	 * We must completely clean up anything allocated above as iflib
457 	 * will not invoke any other driver entry points as a result of this
458 	 * failure.
459 	 */
460 	vmxnet3_free_resources(sc);
461 
462 	return (error);
463 }
464 
465 static int
466 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
467 {
468 	struct vmxnet3_softc *sc;
469 	if_softc_ctx_t scctx;
470 	struct vmxnet3_rxqueue *rxq;
471 	int error;
472 	int i;
473 	char irq_name[16];
474 
475 	sc = iflib_get_softc(ctx);
476 	scctx = sc->vmx_scctx;
477 
478 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
479 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
480 
481 		rxq = &sc->vmx_rxq[i];
482 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
483 		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
484 		if (error) {
485 			device_printf(iflib_get_dev(ctx),
486 			    "Failed to register rxq %d interrupt handler\n", i);
487 			return (error);
488 		}
489 	}
490 
491 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
492 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
493 
494 		/*
495 		 * Don't provide the corresponding rxq irq for reference -
496 		 * we want the transmit task to be attached to a task queue
497 		 * that is different from the one used by the corresponding
498 		 * rxq irq.  That is because the TX doorbell writes are very
499 		 * expensive as virtualized MMIO operations, so we want to
500 		 * be able to defer them to another core when possible so
501 		 * that they don't steal receive processing cycles during
502 		 * stack turnarounds like TCP ACK generation.  The other
503 		 * piece to this approach is enabling the iflib abdicate
504 		 * option (currently via an interface-specific
505 		 * tunable/sysctl).
506 		 */
507 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
508 		    irq_name);
509 	}
510 
511 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
512 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
513 	    "event");
514 	if (error) {
515 		device_printf(iflib_get_dev(ctx),
516 		    "Failed to register event interrupt handler\n");
517 		return (error);
518 	}
519 
520 	return (0);
521 }
522 
523 static void
524 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
525 {
526 	if_softc_ctx_t scctx;
527 	struct vmxnet3_rxqueue *rxq;
528 	int i;
529 
530 	scctx = sc->vmx_scctx;
531 
532 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
533 		rxq = &sc->vmx_rxq[i];
534 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
535 	}
536 
537 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
538 }
539 
540 static int
541 vmxnet3_attach_post(if_ctx_t ctx)
542 {
543 	if_softc_ctx_t scctx;
544 	struct vmxnet3_softc *sc;
545 	int error;
546 
547 	scctx = iflib_get_softc_ctx(ctx);
548 	sc = iflib_get_softc(ctx);
549 
550 	if (scctx->isc_nrxqsets > 1)
551 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
552 
553 	error = vmxnet3_alloc_data(sc);
554 	if (error)
555 		goto fail;
556 
557 	vmxnet3_set_interrupt_idx(sc);
558 	vmxnet3_setup_sysctl(sc);
559 
560 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
561 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
562 
563 fail:
564 	return (error);
565 }
566 
567 static int
568 vmxnet3_detach(if_ctx_t ctx)
569 {
570 	struct vmxnet3_softc *sc;
571 
572 	sc = iflib_get_softc(ctx);
573 
574 	vmxnet3_free_irqs(sc);
575 	vmxnet3_free_data(sc);
576 	vmxnet3_free_resources(sc);
577 
578 	return (0);
579 }
580 
581 static int
582 vmxnet3_shutdown(if_ctx_t ctx)
583 {
584 
585 	return (0);
586 }
587 
588 static int
589 vmxnet3_suspend(if_ctx_t ctx)
590 {
591 
592 	return (0);
593 }
594 
595 static int
596 vmxnet3_resume(if_ctx_t ctx)
597 {
598 
599 	return (0);
600 }
601 
602 static int
603 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
604 {
605 	device_t dev;
606 	int rid;
607 
608 	dev = sc->vmx_dev;
609 
610 	rid = PCIR_BAR(0);
611 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
612 	    RF_ACTIVE);
613 	if (sc->vmx_res0 == NULL) {
614 		device_printf(dev,
615 		    "could not map BAR0 memory\n");
616 		return (ENXIO);
617 	}
618 
619 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
620 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
621 
622 	rid = PCIR_BAR(1);
623 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
624 	    RF_ACTIVE);
625 	if (sc->vmx_res1 == NULL) {
626 		device_printf(dev,
627 		    "could not map BAR1 memory\n");
628 		return (ENXIO);
629 	}
630 
631 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
632 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
633 
634 	return (0);
635 }
636 
637 static void
638 vmxnet3_free_resources(struct vmxnet3_softc *sc)
639 {
640 	device_t dev;
641 
642 	dev = sc->vmx_dev;
643 
644 	if (sc->vmx_res0 != NULL) {
645 		bus_release_resource(dev, SYS_RES_MEMORY,
646 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
647 		sc->vmx_res0 = NULL;
648 	}
649 
650 	if (sc->vmx_res1 != NULL) {
651 		bus_release_resource(dev, SYS_RES_MEMORY,
652 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
653 		sc->vmx_res1 = NULL;
654 	}
655 }
656 
657 static int
658 vmxnet3_check_version(struct vmxnet3_softc *sc)
659 {
660 	device_t dev;
661 	uint32_t version;
662 
663 	dev = sc->vmx_dev;
664 
665 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
666 	if ((version & 0x01) == 0) {
667 		device_printf(dev, "unsupported hardware version %#x\n",
668 		    version);
669 		return (ENOTSUP);
670 	}
671 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
672 
673 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
674 	if ((version & 0x01) == 0) {
675 		device_printf(dev, "unsupported UPT version %#x\n", version);
676 		return (ENOTSUP);
677 	}
678 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
679 
680 	return (0);
681 }
682 
683 static void
684 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
685 {
686 	if_softc_ctx_t scctx;
687 	struct vmxnet3_txqueue *txq;
688 	struct vmxnet3_txq_shared *txs;
689 	struct vmxnet3_rxqueue *rxq;
690 	struct vmxnet3_rxq_shared *rxs;
691 	int intr_idx;
692 	int i;
693 
694 	scctx = sc->vmx_scctx;
695 
696 	/*
697 	 * There is always one interrupt per receive queue, assigned
698 	 * starting with the first interrupt.  When there is only one
699 	 * interrupt available, the event interrupt shares the receive queue
700 	 * interrupt, otherwise it uses the interrupt following the last
701 	 * receive queue interrupt.  Transmit queues are not assigned
702 	 * interrupts, so they are given indexes beyond the indexes that
703 	 * correspond to the real interrupts.
704 	 */
705 
706 	/* The event interrupt is always the last vector. */
707 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
708 
709 	intr_idx = 0;
710 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
711 		rxq = &sc->vmx_rxq[i];
712 		rxs = rxq->vxrxq_rs;
713 		rxq->vxrxq_intr_idx = intr_idx;
714 		rxs->intr_idx = rxq->vxrxq_intr_idx;
715 	}
716 
717 	/*
718 	 * Assign the tx queues interrupt indexes above what we are actually
719 	 * using.  These interrupts will never be enabled.
720 	 */
721 	intr_idx = scctx->isc_vectors;
722 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
723 		txq = &sc->vmx_txq[i];
724 		txs = txq->vxtxq_ts;
725 		txq->vxtxq_intr_idx = intr_idx;
726 		txs->intr_idx = txq->vxtxq_intr_idx;
727 	}
728 }
729 
730 static int
731 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
732 {
733 	if_softc_ctx_t scctx;
734 	int size;
735 	int error;
736 
737 	scctx = sc->vmx_scctx;
738 
739 	/*
740 	 * The txq and rxq shared data areas must be allocated contiguously
741 	 * as vmxnet3_driver_shared contains only a single address member
742 	 * for the shared queue data area.
743 	 */
744 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
745 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
746 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
747 	if (error) {
748 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
749 		return (error);
750 	}
751 
752 	return (0);
753 }
754 
755 static void
756 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
757 {
758 	struct vmxnet3_txqueue *txq;
759 	struct vmxnet3_comp_ring *txc;
760 	struct vmxnet3_txring *txr;
761 	if_softc_ctx_t scctx;
762 
763 	txq = &sc->vmx_txq[q];
764 	txc = &txq->vxtxq_comp_ring;
765 	txr = &txq->vxtxq_cmd_ring;
766 	scctx = sc->vmx_scctx;
767 
768 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
769 	    device_get_nameunit(sc->vmx_dev), q);
770 
771 	txq->vxtxq_sc = sc;
772 	txq->vxtxq_id = q;
773 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
774 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
775 }
776 
777 static int
778 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
779     int ntxqs, int ntxqsets)
780 {
781 	struct vmxnet3_softc *sc;
782 	int q;
783 	int error;
784 	caddr_t kva;
785 
786 	sc = iflib_get_softc(ctx);
787 
788 	/* Allocate the array of transmit queues */
789 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
790 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
791 	if (sc->vmx_txq == NULL)
792 		return (ENOMEM);
793 
794 	/* Initialize driver state for each transmit queue */
795 	for (q = 0; q < ntxqsets; q++)
796 		vmxnet3_init_txq(sc, q);
797 
798 	/*
799 	 * Allocate queue state that is shared with the device.  This check
800 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
801 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
802 	 * order iflib invokes those routines in.
803 	 */
804 	if (sc->vmx_qs_dma.idi_size == 0) {
805 		error = vmxnet3_queues_shared_alloc(sc);
806 		if (error)
807 			return (error);
808 	}
809 
810 	kva = sc->vmx_qs_dma.idi_vaddr;
811 	for (q = 0; q < ntxqsets; q++) {
812 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
813 		kva += sizeof(struct vmxnet3_txq_shared);
814 	}
815 
816 	/* Record descriptor ring vaddrs and paddrs */
817 	for (q = 0; q < ntxqsets; q++) {
818 		struct vmxnet3_txqueue *txq;
819 		struct vmxnet3_txring *txr;
820 		struct vmxnet3_comp_ring *txc;
821 
822 		txq = &sc->vmx_txq[q];
823 		txc = &txq->vxtxq_comp_ring;
824 		txr = &txq->vxtxq_cmd_ring;
825 
826 		/* Completion ring */
827 		txc->vxcr_u.txcd =
828 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
829 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
830 
831 		/* Command ring */
832 		txr->vxtxr_txd =
833 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
834 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
835 	}
836 
837 	return (0);
838 }
839 
840 static void
841 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
842 {
843 	struct vmxnet3_rxqueue *rxq;
844 	struct vmxnet3_comp_ring *rxc;
845 	struct vmxnet3_rxring *rxr;
846 	if_softc_ctx_t scctx;
847 	int i;
848 
849 	rxq = &sc->vmx_rxq[q];
850 	rxc = &rxq->vxrxq_comp_ring;
851 	scctx = sc->vmx_scctx;
852 
853 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
854 	    device_get_nameunit(sc->vmx_dev), q);
855 
856 	rxq->vxrxq_sc = sc;
857 	rxq->vxrxq_id = q;
858 
859 	/*
860 	 * First rxq is the completion queue, so there are nrxqs - 1 command
861 	 * rings starting at iflib queue id 1.
862 	 */
863 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
864 	for (i = 0; i < nrxqs - 1; i++) {
865 		rxr = &rxq->vxrxq_cmd_ring[i];
866 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
867 	}
868 }
869 
870 static int
871 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
872     int nrxqs, int nrxqsets)
873 {
874 	struct vmxnet3_softc *sc;
875 	if_softc_ctx_t scctx;
876 	int q;
877 	int i;
878 	int error;
879 	caddr_t kva;
880 
881 	sc = iflib_get_softc(ctx);
882 	scctx = sc->vmx_scctx;
883 
884 	/* Allocate the array of receive queues */
885 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
886 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
887 	if (sc->vmx_rxq == NULL)
888 		return (ENOMEM);
889 
890 	/* Initialize driver state for each receive queue */
891 	for (q = 0; q < nrxqsets; q++)
892 		vmxnet3_init_rxq(sc, q, nrxqs);
893 
894 	/*
895 	 * Allocate queue state that is shared with the device.  This check
896 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
897 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
898 	 * order iflib invokes those routines in.
899 	 */
900 	if (sc->vmx_qs_dma.idi_size == 0) {
901 		error = vmxnet3_queues_shared_alloc(sc);
902 		if (error)
903 			return (error);
904 	}
905 
906 	kva = sc->vmx_qs_dma.idi_vaddr +
907 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
908 	for (q = 0; q < nrxqsets; q++) {
909 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
910 		kva += sizeof(struct vmxnet3_rxq_shared);
911 	}
912 
913 	/* Record descriptor ring vaddrs and paddrs */
914 	for (q = 0; q < nrxqsets; q++) {
915 		struct vmxnet3_rxqueue *rxq;
916 		struct vmxnet3_rxring *rxr;
917 		struct vmxnet3_comp_ring *rxc;
918 
919 		rxq = &sc->vmx_rxq[q];
920 		rxc = &rxq->vxrxq_comp_ring;
921 
922 		/* Completion ring */
923 		rxc->vxcr_u.rxcd =
924 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
925 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
926 
927 		/* Command ring(s) */
928 		for (i = 0; i < nrxqs - 1; i++) {
929 			rxr = &rxq->vxrxq_cmd_ring[i];
930 
931 			rxr->vxrxr_rxd =
932 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
933 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
934 		}
935 	}
936 
937 	return (0);
938 }
939 
940 static void
941 vmxnet3_queues_free(if_ctx_t ctx)
942 {
943 	struct vmxnet3_softc *sc;
944 
945 	sc = iflib_get_softc(ctx);
946 
947 	/* Free queue state area that is shared with the device */
948 	if (sc->vmx_qs_dma.idi_size != 0) {
949 		iflib_dma_free(&sc->vmx_qs_dma);
950 		sc->vmx_qs_dma.idi_size = 0;
951 	}
952 
953 	/* Free array of receive queues */
954 	if (sc->vmx_rxq != NULL) {
955 		free(sc->vmx_rxq, M_DEVBUF);
956 		sc->vmx_rxq = NULL;
957 	}
958 
959 	/* Free array of transmit queues */
960 	if (sc->vmx_txq != NULL) {
961 		free(sc->vmx_txq, M_DEVBUF);
962 		sc->vmx_txq = NULL;
963 	}
964 }
965 
966 static int
967 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
968 {
969 	device_t dev;
970 	size_t size;
971 	int error;
972 
973 	dev = sc->vmx_dev;
974 
975 	/* Top level state structure shared with the device */
976 	size = sizeof(struct vmxnet3_driver_shared);
977 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
978 	if (error) {
979 		device_printf(dev, "cannot alloc shared memory\n");
980 		return (error);
981 	}
982 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
983 
984 	/* RSS table state shared with the device */
985 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
986 		size = sizeof(struct vmxnet3_rss_shared);
987 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
988 		    &sc->vmx_rss_dma, 0);
989 		if (error) {
990 			device_printf(dev, "cannot alloc rss shared memory\n");
991 			return (error);
992 		}
993 		sc->vmx_rss =
994 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
995 	}
996 
997 	return (0);
998 }
999 
1000 static void
1001 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1002 {
1003 
1004 	/* Free RSS table state shared with the device */
1005 	if (sc->vmx_rss != NULL) {
1006 		iflib_dma_free(&sc->vmx_rss_dma);
1007 		sc->vmx_rss = NULL;
1008 	}
1009 
1010 	/* Free top level state structure shared with the device */
1011 	if (sc->vmx_ds != NULL) {
1012 		iflib_dma_free(&sc->vmx_ds_dma);
1013 		sc->vmx_ds = NULL;
1014 	}
1015 }
1016 
1017 static int
1018 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1019 {
1020 	int error;
1021 
1022 	/* Multicast table state shared with the device */
1023 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1024 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1025 	if (error)
1026 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1027 	else
1028 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1029 
1030 	return (error);
1031 }
1032 
1033 static void
1034 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1035 {
1036 
1037 	/* Free multicast table state shared with the device */
1038 	if (sc->vmx_mcast != NULL) {
1039 		iflib_dma_free(&sc->vmx_mcast_dma);
1040 		sc->vmx_mcast = NULL;
1041 	}
1042 }
1043 
1044 static void
1045 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1046 {
1047 	struct vmxnet3_driver_shared *ds;
1048 	if_softc_ctx_t scctx;
1049 	struct vmxnet3_txqueue *txq;
1050 	struct vmxnet3_txq_shared *txs;
1051 	struct vmxnet3_rxqueue *rxq;
1052 	struct vmxnet3_rxq_shared *rxs;
1053 	int i;
1054 
1055 	ds = sc->vmx_ds;
1056 	scctx = sc->vmx_scctx;
1057 
1058 	/*
1059 	 * Initialize fields of the shared data that remains the same across
1060 	 * reinits. Note the shared data is zero'd when allocated.
1061 	 */
1062 
1063 	ds->magic = VMXNET3_REV1_MAGIC;
1064 
1065 	/* DriverInfo */
1066 	ds->version = VMXNET3_DRIVER_VERSION;
1067 	ds->guest = VMXNET3_GOS_FREEBSD |
1068 #ifdef __LP64__
1069 	    VMXNET3_GOS_64BIT;
1070 #else
1071 	    VMXNET3_GOS_32BIT;
1072 #endif
1073 	ds->vmxnet3_revision = 1;
1074 	ds->upt_version = 1;
1075 
1076 	/* Misc. conf */
1077 	ds->driver_data = vtophys(sc);
1078 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1079 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1080 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1081 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1082 
1083 	/* RSS conf */
1084 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1085 		ds->rss.version = 1;
1086 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1087 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1088 	}
1089 
1090 	/* Interrupt control. */
1091 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1092 	/*
1093 	 * Total number of interrupt indexes we are using in the shared
1094 	 * config data, even though we don't actually allocate interrupt
1095 	 * resources for the tx queues.  Some versions of the device will
1096 	 * fail to initialize successfully if interrupt indexes are used in
1097 	 * the shared config that exceed the number of interrupts configured
1098 	 * here.
1099 	 */
1100 	ds->nintr = (scctx->isc_vectors == 1) ?
1101 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1102 	ds->evintr = sc->vmx_event_intr_idx;
1103 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1104 
1105 	for (i = 0; i < ds->nintr; i++)
1106 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1107 
1108 	/* Receive filter. */
1109 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1110 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1111 
1112 	/* Tx queues */
1113 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1114 		txq = &sc->vmx_txq[i];
1115 		txs = txq->vxtxq_ts;
1116 
1117 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1118 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1119 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1120 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1121 		txs->driver_data = vtophys(txq);
1122 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1123 	}
1124 
1125 	/* Rx queues */
1126 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1127 		rxq = &sc->vmx_rxq[i];
1128 		rxs = rxq->vxrxq_rs;
1129 
1130 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1131 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1132 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1133 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1134 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1135 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1136 		rxs->driver_data = vtophys(rxq);
1137 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1138 	}
1139 }
1140 
1141 static void
1142 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1143 {
1144 	/*
1145 	 * Use the same key as the Linux driver until FreeBSD can do
1146 	 * RSS (presumably Toeplitz) in software.
1147 	 */
1148 	static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1149 	    0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1150 	    0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1151 	    0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1152 	    0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1153 	    0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1154 	};
1155 
1156 	if_softc_ctx_t scctx;
1157 	struct vmxnet3_rss_shared *rss;
1158 #ifdef RSS
1159 	uint8_t rss_algo;
1160 #endif
1161 	int i;
1162 
1163 	scctx = sc->vmx_scctx;
1164 	rss = sc->vmx_rss;
1165 
1166 	rss->hash_type =
1167 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1168 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1169 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1170 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1171 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1172 #ifdef RSS
1173 	/*
1174 	 * If the software RSS is configured to anything else other than
1175 	 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1176 	 * the packet distribution, but report the hash as opaque to
1177 	 * disengage from the software RSS.
1178 	 */
1179 	rss_algo = rss_gethashalgo();
1180 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1181 		rss_getkey(rss->hash_key);
1182 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1183 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1184 			    scctx->isc_nrxqsets;
1185 		}
1186 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1187 	} else
1188 #endif
1189 	{
1190 		memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1191 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1192 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1193 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1194 	}
1195 }
1196 
1197 static void
1198 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1199 {
1200 	if_t ifp;
1201 	struct vmxnet3_driver_shared *ds;
1202 	if_softc_ctx_t scctx;
1203 
1204 	ifp = sc->vmx_ifp;
1205 	ds = sc->vmx_ds;
1206 	scctx = sc->vmx_scctx;
1207 
1208 	ds->mtu = if_getmtu(ifp);
1209 	ds->ntxqueue = scctx->isc_ntxqsets;
1210 	ds->nrxqueue = scctx->isc_nrxqsets;
1211 
1212 	ds->upt_features = 0;
1213 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1214 		ds->upt_features |= UPT1_F_CSUM;
1215 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1216 		ds->upt_features |= UPT1_F_VLAN;
1217 	if (if_getcapenable(ifp) & IFCAP_LRO)
1218 		ds->upt_features |= UPT1_F_LRO;
1219 
1220 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1221 		ds->upt_features |= UPT1_F_RSS;
1222 		vmxnet3_reinit_rss_shared_data(sc);
1223 	}
1224 
1225 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1226 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1227 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1228 }
1229 
1230 static int
1231 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1232 {
1233 	int error;
1234 
1235 	error = vmxnet3_alloc_shared_data(sc);
1236 	if (error)
1237 		return (error);
1238 
1239 	error = vmxnet3_alloc_mcast_table(sc);
1240 	if (error)
1241 		return (error);
1242 
1243 	vmxnet3_init_shared_data(sc);
1244 
1245 	return (0);
1246 }
1247 
1248 static void
1249 vmxnet3_free_data(struct vmxnet3_softc *sc)
1250 {
1251 
1252 	vmxnet3_free_mcast_table(sc);
1253 	vmxnet3_free_shared_data(sc);
1254 }
1255 
1256 static void
1257 vmxnet3_evintr(struct vmxnet3_softc *sc)
1258 {
1259 	device_t dev;
1260 	struct vmxnet3_txq_shared *ts;
1261 	struct vmxnet3_rxq_shared *rs;
1262 	uint32_t event;
1263 
1264 	dev = sc->vmx_dev;
1265 
1266 	/* Clear events. */
1267 	event = sc->vmx_ds->event;
1268 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1269 
1270 	if (event & VMXNET3_EVENT_LINK)
1271 		vmxnet3_link_status(sc);
1272 
1273 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1274 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1275 		ts = sc->vmx_txq[0].vxtxq_ts;
1276 		if (ts->stopped != 0)
1277 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1278 		rs = sc->vmx_rxq[0].vxrxq_rs;
1279 		if (rs->stopped != 0)
1280 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1281 
1282 		/* XXX - rely on liflib watchdog to reset us? */
1283 		device_printf(dev, "Rx/Tx queue error event ... "
1284 		    "waiting for iflib watchdog reset\n");
1285 	}
1286 
1287 	if (event & VMXNET3_EVENT_DIC)
1288 		device_printf(dev, "device implementation change event\n");
1289 	if (event & VMXNET3_EVENT_DEBUG)
1290 		device_printf(dev, "debug event\n");
1291 }
1292 
1293 static int
1294 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1295 {
1296 	struct vmxnet3_softc *sc;
1297 	struct vmxnet3_txqueue *txq;
1298 	struct vmxnet3_txring *txr;
1299 	struct vmxnet3_txdesc *txd, *sop;
1300 	bus_dma_segment_t *segs;
1301 	int nsegs;
1302 	int pidx;
1303 	int hdrlen;
1304 	int i;
1305 	int gen;
1306 
1307 	sc = vsc;
1308 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1309 	txr = &txq->vxtxq_cmd_ring;
1310 	segs = pi->ipi_segs;
1311 	nsegs = pi->ipi_nsegs;
1312 	pidx = pi->ipi_pidx;
1313 
1314 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1315 	    ("%s: packet with too many segments %d", __func__, nsegs));
1316 
1317 	sop = &txr->vxtxr_txd[pidx];
1318 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1319 
1320 	for (i = 0; i < nsegs; i++) {
1321 		txd = &txr->vxtxr_txd[pidx];
1322 
1323 		txd->addr = segs[i].ds_addr;
1324 		txd->len = segs[i].ds_len;
1325 		txd->gen = gen;
1326 		txd->dtype = 0;
1327 		txd->offload_mode = VMXNET3_OM_NONE;
1328 		txd->offload_pos = 0;
1329 		txd->hlen = 0;
1330 		txd->eop = 0;
1331 		txd->compreq = 0;
1332 		txd->vtag_mode = 0;
1333 		txd->vtag = 0;
1334 
1335 		if (++pidx == txr->vxtxr_ndesc) {
1336 			pidx = 0;
1337 			txr->vxtxr_gen ^= 1;
1338 		}
1339 		gen = txr->vxtxr_gen;
1340 	}
1341 	txd->eop = 1;
1342 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1343 	pi->ipi_new_pidx = pidx;
1344 
1345 	/*
1346 	 * VLAN
1347 	 */
1348 	if (pi->ipi_mflags & M_VLANTAG) {
1349 		sop->vtag_mode = 1;
1350 		sop->vtag = pi->ipi_vtag;
1351 	}
1352 
1353 	/*
1354 	 * TSO and checksum offloads
1355 	 */
1356 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1357 	if (pi->ipi_csum_flags & CSUM_TSO) {
1358 		sop->offload_mode = VMXNET3_OM_TSO;
1359 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1360 		sop->offload_pos = pi->ipi_tso_segsz;
1361 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1362 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1363 		sop->offload_mode = VMXNET3_OM_CSUM;
1364 		sop->hlen = hdrlen;
1365 		sop->offload_pos = hdrlen +
1366 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1367 			offsetof(struct tcphdr, th_sum) :
1368 			offsetof(struct udphdr, uh_sum));
1369 	}
1370 
1371 	/* Finally, change the ownership. */
1372 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1373 	sop->gen ^= 1;
1374 
1375 	return (0);
1376 }
1377 
1378 static void
1379 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1380 {
1381 	struct vmxnet3_softc *sc;
1382 	struct vmxnet3_txqueue *txq;
1383 
1384 	sc = vsc;
1385 	txq = &sc->vmx_txq[txqid];
1386 
1387 	/*
1388 	 * pidx is what we last set ipi_new_pidx to in
1389 	 * vmxnet3_isc_txd_encap()
1390 	 */
1391 
1392 	/*
1393 	 * Avoid expensive register updates if the flush request is
1394 	 * redundant.
1395 	 */
1396 	if (txq->vxtxq_last_flush == pidx)
1397 		return;
1398 	txq->vxtxq_last_flush = pidx;
1399 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1400 }
1401 
1402 static int
1403 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1404 {
1405 	struct vmxnet3_softc *sc;
1406 	struct vmxnet3_txqueue *txq;
1407 	struct vmxnet3_comp_ring *txc;
1408 	struct vmxnet3_txcompdesc *txcd;
1409 	struct vmxnet3_txring *txr;
1410 	int processed;
1411 
1412 	sc = vsc;
1413 	txq = &sc->vmx_txq[txqid];
1414 	txc = &txq->vxtxq_comp_ring;
1415 	txr = &txq->vxtxq_cmd_ring;
1416 
1417 	/*
1418 	 * If clear is true, we need to report the number of TX command ring
1419 	 * descriptors that have been processed by the device.  If clear is
1420 	 * false, we just need to report whether or not at least one TX
1421 	 * command ring descriptor has been processed by the device.
1422 	 */
1423 	processed = 0;
1424 	for (;;) {
1425 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1426 		if (txcd->gen != txc->vxcr_gen)
1427 			break;
1428 		else if (!clear)
1429 			return (1);
1430 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1431 
1432 		if (++txc->vxcr_next == txc->vxcr_ndesc) {
1433 			txc->vxcr_next = 0;
1434 			txc->vxcr_gen ^= 1;
1435 		}
1436 
1437 		if (txcd->eop_idx < txr->vxtxr_next)
1438 			processed += txr->vxtxr_ndesc -
1439 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1440 		else
1441 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1442 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1443 	}
1444 
1445 	return (processed);
1446 }
1447 
1448 static int
1449 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1450 {
1451 	struct vmxnet3_softc *sc;
1452 	struct vmxnet3_rxqueue *rxq;
1453 	struct vmxnet3_comp_ring *rxc;
1454 	struct vmxnet3_rxcompdesc *rxcd;
1455 	int avail;
1456 	int completed_gen;
1457 #ifdef INVARIANTS
1458 	int expect_sop = 1;
1459 #endif
1460 	sc = vsc;
1461 	rxq = &sc->vmx_rxq[rxqid];
1462 	rxc = &rxq->vxrxq_comp_ring;
1463 
1464 	avail = 0;
1465 	completed_gen = rxc->vxcr_gen;
1466 	for (;;) {
1467 		rxcd = &rxc->vxcr_u.rxcd[idx];
1468 		if (rxcd->gen != completed_gen)
1469 			break;
1470 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1471 
1472 #ifdef INVARIANTS
1473 		if (expect_sop)
1474 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1475 		else
1476 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1477 		expect_sop = rxcd->eop;
1478 #endif
1479 		if (rxcd->eop && (rxcd->len != 0))
1480 			avail++;
1481 		if (avail > budget)
1482 			break;
1483 		if (++idx == rxc->vxcr_ndesc) {
1484 			idx = 0;
1485 			completed_gen ^= 1;
1486 		}
1487 	}
1488 
1489 	return (avail);
1490 }
1491 
1492 static int
1493 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1494 {
1495 	struct vmxnet3_softc *sc;
1496 	if_softc_ctx_t scctx;
1497 	struct vmxnet3_rxqueue *rxq;
1498 	struct vmxnet3_comp_ring *rxc;
1499 	struct vmxnet3_rxcompdesc *rxcd;
1500 	if_rxd_frag_t frag;
1501 	int cqidx;
1502 	uint16_t total_len;
1503 	uint8_t nfrags;
1504 	uint8_t i;
1505 	uint8_t flid;
1506 
1507 	sc = vsc;
1508 	scctx = sc->vmx_scctx;
1509 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1510 	rxc = &rxq->vxrxq_comp_ring;
1511 
1512 	/*
1513 	 * Get a single packet starting at the given index in the completion
1514 	 * queue.  That we have been called indicates that
1515 	 * vmxnet3_isc_rxd_available() has already verified that either
1516 	 * there is a complete packet available starting at the given index,
1517 	 * or there are one or more zero length packets starting at the
1518 	 * given index followed by a complete packet, so no verification of
1519 	 * ownership of the descriptors (and no associated read barrier) is
1520 	 * required here.
1521 	 */
1522 	cqidx = ri->iri_cidx;
1523 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1524 	while (rxcd->len == 0) {
1525 		KASSERT(rxcd->sop && rxcd->eop,
1526 		    ("%s: zero-length packet without both sop and eop set",
1527 			__func__));
1528 		rxc->vxcr_zero_length++;
1529 		if (++cqidx == rxc->vxcr_ndesc) {
1530 			cqidx = 0;
1531 			rxc->vxcr_gen ^= 1;
1532 		}
1533 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1534 	}
1535 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1536 
1537 	/*
1538 	 * RSS and flow ID.
1539 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1540 	 * be used only if the software RSS is enabled and it uses the same
1541 	 * algorithm and the hash key as the "hardware".  If the software RSS
1542 	 * is not enabled, then it's simply pointless to use those types.
1543 	 * If it's enabled but with different parameters, then hash values will
1544 	 * not match.
1545 	 */
1546 	ri->iri_flowid = rxcd->rss_hash;
1547 #ifdef RSS
1548 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1549 		switch (rxcd->rss_type) {
1550 		case VMXNET3_RCD_RSS_TYPE_NONE:
1551 			ri->iri_flowid = ri->iri_qsidx;
1552 			ri->iri_rsstype = M_HASHTYPE_NONE;
1553 			break;
1554 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1555 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1556 			break;
1557 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1558 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1559 			break;
1560 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1561 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1562 			break;
1563 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1564 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1565 			break;
1566 		default:
1567 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1568 			break;
1569 		}
1570 	} else
1571 #endif
1572 	{
1573 		switch (rxcd->rss_type) {
1574 		case VMXNET3_RCD_RSS_TYPE_NONE:
1575 			ri->iri_flowid = ri->iri_qsidx;
1576 			ri->iri_rsstype = M_HASHTYPE_NONE;
1577 			break;
1578 		default:
1579 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1580 			break;
1581 		}
1582 	}
1583 
1584 	/*
1585 	 * The queue numbering scheme used for rxcd->qid is as follows:
1586 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1587 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1588 	 *
1589 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1590 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1591 	 * indicates command ring (and flid) 1.
1592 	 */
1593 	nfrags = 0;
1594 	total_len = 0;
1595 	do {
1596 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1597 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1598 		    ("%s: generation mismatch", __func__));
1599 		KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1600 		    ("%s: too many fragments", __func__));
1601 		if (__predict_true(rxcd->len != 0)) {
1602 			frag = &ri->iri_frags[nfrags];
1603 			flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1604 			frag->irf_flid = flid;
1605 			frag->irf_idx = rxcd->rxd_idx;
1606 			frag->irf_len = rxcd->len;
1607 			total_len += rxcd->len;
1608 			nfrags++;
1609 		} else {
1610 			rxc->vcxr_zero_length_frag++;
1611 		}
1612 		if (++cqidx == rxc->vxcr_ndesc) {
1613 			cqidx = 0;
1614 			rxc->vxcr_gen ^= 1;
1615 		}
1616 	} while (!rxcd->eop);
1617 
1618 	ri->iri_cidx = cqidx;
1619 	ri->iri_nfrags = nfrags;
1620 	ri->iri_len = total_len;
1621 
1622 	/*
1623 	 * If there's an error, the last descriptor in the packet will
1624 	 * have the error indicator set.  In this case, set all
1625 	 * fragment lengths to zero.  This will cause iflib to discard
1626 	 * the packet, but process all associated descriptors through
1627 	 * the refill mechanism.
1628 	 */
1629 	if (__predict_false(rxcd->error)) {
1630 		rxc->vxcr_pkt_errors++;
1631 		for (i = 0; i < nfrags; i++) {
1632 			frag = &ri->iri_frags[i];
1633 			frag->irf_len = 0;
1634 		}
1635 	} else {
1636 		/* Checksum offload information is in the last descriptor. */
1637 		if (!rxcd->no_csum) {
1638 			uint32_t csum_flags = 0;
1639 
1640 			if (rxcd->ipv4) {
1641 				csum_flags |= CSUM_IP_CHECKED;
1642 				if (rxcd->ipcsum_ok)
1643 					csum_flags |= CSUM_IP_VALID;
1644 			}
1645 			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1646 				csum_flags |= CSUM_L4_CALC;
1647 				if (rxcd->csum_ok) {
1648 					csum_flags |= CSUM_L4_VALID;
1649 					ri->iri_csum_data = 0xffff;
1650 				}
1651 			}
1652 			ri->iri_csum_flags = csum_flags;
1653 		}
1654 
1655 		/* VLAN information is in the last descriptor. */
1656 		if (rxcd->vlan) {
1657 			ri->iri_flags |= M_VLANTAG;
1658 			ri->iri_vtag = rxcd->vtag;
1659 		}
1660 	}
1661 
1662 	return (0);
1663 }
1664 
1665 static void
1666 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1667 {
1668 	struct vmxnet3_softc *sc;
1669 	struct vmxnet3_rxqueue *rxq;
1670 	struct vmxnet3_rxring *rxr;
1671 	struct vmxnet3_rxdesc *rxd;
1672 	uint64_t *paddrs;
1673 	int count;
1674 	int len;
1675 	int idx;
1676 	int i;
1677 	uint8_t flid;
1678 	uint8_t btype;
1679 
1680 	count = iru->iru_count;
1681 	len = iru->iru_buf_size;
1682 	flid = iru->iru_flidx;
1683 	paddrs = iru->iru_paddrs;
1684 
1685 	sc = vsc;
1686 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1687 	rxr = &rxq->vxrxq_cmd_ring[flid];
1688 	rxd = rxr->vxrxr_rxd;
1689 
1690 	/*
1691 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1692 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1693 	 */
1694 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1695 	/*
1696 	 * The refill entries from iflib will advance monotonically,
1697 	 * but the refilled descriptors may not be contiguous due to
1698 	 * earlier skipping of descriptors by the device.  The refill
1699 	 * entries from iflib need an entire state update, while the
1700 	 * descriptors previously skipped by the device only need to
1701 	 * have their generation numbers updated.
1702 	 */
1703 	idx = rxr->vxrxr_refill_start;
1704 	i = 0;
1705 	do {
1706 		if (idx == iru->iru_idxs[i]) {
1707 			rxd[idx].addr = paddrs[i];
1708 			rxd[idx].len = len;
1709 			rxd[idx].btype = btype;
1710 			i++;
1711 		} else
1712 			rxr->vxrxr_desc_skips++;
1713 		rxd[idx].gen = rxr->vxrxr_gen;
1714 
1715 		if (++idx == rxr->vxrxr_ndesc) {
1716 			idx = 0;
1717 			rxr->vxrxr_gen ^= 1;
1718 		}
1719 	} while (i != count);
1720 	rxr->vxrxr_refill_start = idx;
1721 }
1722 
1723 static void
1724 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1725 {
1726 	struct vmxnet3_softc *sc;
1727 	bus_size_t r;
1728 
1729 	sc = vsc;
1730 
1731 	if (flid == 0)
1732 		r = VMXNET3_BAR0_RXH1(rxqid);
1733 	else
1734 		r = VMXNET3_BAR0_RXH2(rxqid);
1735 
1736 	vmxnet3_write_bar0(sc, r, pidx);
1737 }
1738 
1739 static int
1740 vmxnet3_legacy_intr(void *xsc)
1741 {
1742 	struct vmxnet3_softc *sc;
1743 	if_softc_ctx_t scctx;
1744 	if_ctx_t ctx;
1745 
1746 	sc = xsc;
1747 	scctx = sc->vmx_scctx;
1748 	ctx = sc->vmx_ctx;
1749 
1750 	/*
1751 	 * When there is only a single interrupt configured, this routine
1752 	 * runs in fast interrupt context, following which the rxq 0 task
1753 	 * will be enqueued.
1754 	 */
1755 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1756 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1757 			return (FILTER_HANDLED);
1758 	}
1759 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1760 		vmxnet3_intr_disable_all(ctx);
1761 
1762 	if (sc->vmx_ds->event != 0)
1763 		iflib_admin_intr_deferred(ctx);
1764 
1765 	/*
1766 	 * XXX - When there is both rxq and event activity, do we care
1767 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1768 	 * first?
1769 	 */
1770 	return (FILTER_SCHEDULE_THREAD);
1771 }
1772 
1773 static int
1774 vmxnet3_rxq_intr(void *vrxq)
1775 {
1776 	struct vmxnet3_softc *sc;
1777 	struct vmxnet3_rxqueue *rxq;
1778 
1779 	rxq = vrxq;
1780 	sc = rxq->vxrxq_sc;
1781 
1782 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1783 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1784 
1785 	return (FILTER_SCHEDULE_THREAD);
1786 }
1787 
1788 static int
1789 vmxnet3_event_intr(void *vsc)
1790 {
1791 	struct vmxnet3_softc *sc;
1792 
1793 	sc = vsc;
1794 
1795 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1796 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1797 
1798 	/*
1799 	 * The work will be done via vmxnet3_update_admin_status(), and the
1800 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1801 	 *
1802 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1803 	 */
1804 	return (FILTER_SCHEDULE_THREAD);
1805 }
1806 
1807 static void
1808 vmxnet3_stop(if_ctx_t ctx)
1809 {
1810 	struct vmxnet3_softc *sc;
1811 
1812 	sc = iflib_get_softc(ctx);
1813 
1814 	sc->vmx_link_active = 0;
1815 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1816 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1817 }
1818 
1819 static void
1820 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1821 {
1822 	struct vmxnet3_txring *txr;
1823 	struct vmxnet3_comp_ring *txc;
1824 
1825 	txq->vxtxq_last_flush = -1;
1826 
1827 	txr = &txq->vxtxq_cmd_ring;
1828 	txr->vxtxr_next = 0;
1829 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1830 	/*
1831 	 * iflib has zeroed out the descriptor array during the prior attach
1832 	 * or stop
1833 	 */
1834 
1835 	txc = &txq->vxtxq_comp_ring;
1836 	txc->vxcr_next = 0;
1837 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1838 	/*
1839 	 * iflib has zeroed out the descriptor array during the prior attach
1840 	 * or stop
1841 	 */
1842 }
1843 
1844 static void
1845 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1846 {
1847 	struct vmxnet3_rxring *rxr;
1848 	struct vmxnet3_comp_ring *rxc;
1849 	int i;
1850 
1851 	/*
1852 	 * The descriptors will be populated with buffers during a
1853 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1854 	 */
1855 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1856 		rxr = &rxq->vxrxq_cmd_ring[i];
1857 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1858 		rxr->vxrxr_desc_skips = 0;
1859 		rxr->vxrxr_refill_start = 0;
1860 		/*
1861 		 * iflib has zeroed out the descriptor array during the
1862 		 * prior attach or stop
1863 		 */
1864 	}
1865 
1866 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1867 		rxr = &rxq->vxrxq_cmd_ring[i];
1868 		rxr->vxrxr_gen = 0;
1869 		rxr->vxrxr_desc_skips = 0;
1870 		rxr->vxrxr_refill_start = 0;
1871 		bzero(rxr->vxrxr_rxd,
1872 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1873 	}
1874 
1875 	rxc = &rxq->vxrxq_comp_ring;
1876 	rxc->vxcr_next = 0;
1877 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1878 	rxc->vxcr_zero_length = 0;
1879 	rxc->vcxr_zero_length_frag = 0;
1880 	rxc->vxcr_pkt_errors = 0;
1881 	/*
1882 	 * iflib has zeroed out the descriptor array during the prior attach
1883 	 * or stop
1884 	 */
1885 }
1886 
1887 static void
1888 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1889 {
1890 	if_softc_ctx_t scctx;
1891 	int q;
1892 
1893 	scctx = sc->vmx_scctx;
1894 
1895 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1896 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1897 
1898 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1899 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1900 }
1901 
1902 static int
1903 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1904 {
1905 	if_softc_ctx_t scctx;
1906 	int q;
1907 
1908 	scctx = sc->vmx_scctx;
1909 
1910 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1911 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1912 		return (1);
1913 	}
1914 
1915 	/* Reset the Rx queue heads. */
1916 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1917 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1918 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1919 	}
1920 
1921 	return (0);
1922 }
1923 
1924 static void
1925 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1926 {
1927 	if_t ifp;
1928 
1929 	ifp = sc->vmx_ifp;
1930 
1931 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1932 
1933 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1934 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1935 		    sizeof(sc->vmx_ds->vlan_filter));
1936 	else
1937 		bzero(sc->vmx_ds->vlan_filter,
1938 		    sizeof(sc->vmx_ds->vlan_filter));
1939 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1940 }
1941 
1942 static void
1943 vmxnet3_init(if_ctx_t ctx)
1944 {
1945 	struct vmxnet3_softc *sc;
1946 
1947 	sc = iflib_get_softc(ctx);
1948 
1949 	/* Use the current MAC address. */
1950 	bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1951 	vmxnet3_set_lladdr(sc);
1952 
1953 	vmxnet3_reinit_shared_data(sc);
1954 	vmxnet3_reinit_queues(sc);
1955 
1956 	vmxnet3_enable_device(sc);
1957 
1958 	vmxnet3_reinit_rxfilters(sc);
1959 	vmxnet3_link_status(sc);
1960 }
1961 
1962 static void
1963 vmxnet3_multi_set(if_ctx_t ctx)
1964 {
1965 
1966 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1967 	    if_getflags(iflib_get_ifp(ctx)));
1968 }
1969 
1970 static int
1971 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1972 {
1973 	struct vmxnet3_softc *sc;
1974 	if_softc_ctx_t scctx;
1975 
1976 	sc = iflib_get_softc(ctx);
1977 	scctx = sc->vmx_scctx;
1978 
1979 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1980 		ETHER_CRC_LEN))
1981 		return (EINVAL);
1982 
1983 	/*
1984 	 * Update the max frame size so that the rx mbuf size is
1985 	 * chosen based on the new mtu during the interface init that
1986 	 * will occur after this routine returns.
1987 	 */
1988 	scctx->isc_max_frame_size = mtu +
1989 		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1990 	/* RX completion queue - n/a */
1991 	scctx->isc_rxd_buf_size[0] = 0;
1992 	/*
1993 	 * For header-type descriptors (used for first segment of
1994 	 * packet), let iflib determine the buffer size based on the
1995 	 * max frame size.
1996 	 */
1997 	scctx->isc_rxd_buf_size[1] = 0;
1998 	/*
1999 	 * For body-type descriptors (used for jumbo frames and LRO),
2000 	 * always use page-sized buffers.
2001 	 */
2002 	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2003 
2004 	return (0);
2005 }
2006 
2007 static void
2008 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2009 {
2010 	struct vmxnet3_softc *sc;
2011 
2012 	sc = iflib_get_softc(ctx);
2013 
2014 	ifmr->ifm_status = IFM_AVALID;
2015 	ifmr->ifm_active = IFM_ETHER;
2016 
2017 	if (vmxnet3_link_is_up(sc) != 0) {
2018 		ifmr->ifm_status |= IFM_ACTIVE;
2019 		ifmr->ifm_active |= IFM_AUTO;
2020 	} else
2021 		ifmr->ifm_active |= IFM_NONE;
2022 }
2023 
2024 static int
2025 vmxnet3_media_change(if_ctx_t ctx)
2026 {
2027 
2028 	/* Ignore. */
2029 	return (0);
2030 }
2031 
2032 static int
2033 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2034 {
2035 
2036 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2037 
2038 	return (0);
2039 }
2040 
2041 static uint64_t
2042 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2043 {
2044 	if_t ifp = iflib_get_ifp(ctx);
2045 
2046 	if (cnt < IFCOUNTERS)
2047 		return if_get_counter_default(ifp, cnt);
2048 
2049 	return (0);
2050 }
2051 
2052 static void
2053 vmxnet3_update_admin_status(if_ctx_t ctx)
2054 {
2055 	struct vmxnet3_softc *sc;
2056 
2057 	sc = iflib_get_softc(ctx);
2058 	if (sc->vmx_ds->event != 0)
2059 		vmxnet3_evintr(sc);
2060 
2061 	vmxnet3_refresh_host_stats(sc);
2062 }
2063 
2064 static void
2065 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2066 {
2067 	/* Host stats refresh is global, so just trigger it on txq 0 */
2068 	if (qid == 0)
2069 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2070 }
2071 
2072 static void
2073 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2074 {
2075 	int idx, bit;
2076 
2077 	if (tag == 0 || tag > 4095)
2078 		return;
2079 
2080 	idx = (tag >> 5) & 0x7F;
2081 	bit = tag & 0x1F;
2082 
2083 	/* Update our private VLAN bitvector. */
2084 	if (add)
2085 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2086 	else
2087 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2088 }
2089 
2090 static void
2091 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2092 {
2093 
2094 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2095 }
2096 
2097 static void
2098 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2099 {
2100 
2101 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2102 }
2103 
2104 static u_int
2105 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2106 {
2107 	struct vmxnet3_softc *sc = arg;
2108 
2109 	if (count < VMXNET3_MULTICAST_MAX)
2110 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2111 		    ETHER_ADDR_LEN);
2112 
2113 	return (1);
2114 }
2115 
2116 static void
2117 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2118 {
2119 	if_t ifp;
2120 	struct vmxnet3_driver_shared *ds;
2121 	u_int mode;
2122 
2123 	ifp = sc->vmx_ifp;
2124 	ds = sc->vmx_ds;
2125 
2126 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2127 	if (flags & IFF_PROMISC)
2128 		mode |= VMXNET3_RXMODE_PROMISC;
2129 	if (flags & IFF_ALLMULTI)
2130 		mode |= VMXNET3_RXMODE_ALLMULTI;
2131 	else {
2132 		int cnt;
2133 
2134 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2135 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2136 			cnt = 0;
2137 			mode |= VMXNET3_RXMODE_ALLMULTI;
2138 		} else if (cnt > 0)
2139 			mode |= VMXNET3_RXMODE_MCAST;
2140 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2141 	}
2142 
2143 	ds->rxmode = mode;
2144 
2145 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2146 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2147 }
2148 
2149 static void
2150 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2151 {
2152 
2153 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2154 }
2155 
2156 static int
2157 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2158 {
2159 	uint32_t status;
2160 
2161 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2162 	return !!(status & 0x1);
2163 }
2164 
2165 static void
2166 vmxnet3_link_status(struct vmxnet3_softc *sc)
2167 {
2168 	if_ctx_t ctx;
2169 	uint64_t speed;
2170 	int link;
2171 
2172 	ctx = sc->vmx_ctx;
2173 	link = vmxnet3_link_is_up(sc);
2174 	speed = IF_Gbps(10);
2175 
2176 	if (link != 0 && sc->vmx_link_active == 0) {
2177 		sc->vmx_link_active = 1;
2178 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2179 	} else if (link == 0 && sc->vmx_link_active != 0) {
2180 		sc->vmx_link_active = 0;
2181 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2182 	}
2183 }
2184 
2185 static void
2186 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2187 {
2188 	uint32_t ml, mh;
2189 
2190 	ml  = sc->vmx_lladdr[0];
2191 	ml |= sc->vmx_lladdr[1] << 8;
2192 	ml |= sc->vmx_lladdr[2] << 16;
2193 	ml |= sc->vmx_lladdr[3] << 24;
2194 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2195 
2196 	mh  = sc->vmx_lladdr[4];
2197 	mh |= sc->vmx_lladdr[5] << 8;
2198 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2199 }
2200 
2201 static void
2202 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2203 {
2204 	uint32_t ml, mh;
2205 
2206 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2207 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2208 
2209 	sc->vmx_lladdr[0] = ml;
2210 	sc->vmx_lladdr[1] = ml >> 8;
2211 	sc->vmx_lladdr[2] = ml >> 16;
2212 	sc->vmx_lladdr[3] = ml >> 24;
2213 	sc->vmx_lladdr[4] = mh;
2214 	sc->vmx_lladdr[5] = mh >> 8;
2215 }
2216 
2217 static void
2218 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2219     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2220 {
2221 	struct sysctl_oid *node, *txsnode;
2222 	struct sysctl_oid_list *list, *txslist;
2223 	struct UPT1_TxStats *txstats;
2224 	char namebuf[16];
2225 
2226 	txstats = &txq->vxtxq_ts->stats;
2227 
2228 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2229 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2230 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2231 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2232 
2233 	/*
2234 	 * Add statistics reported by the host. These are updated by the
2235 	 * iflib txq timer on txq 0.
2236 	 */
2237 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2238 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2239 	txslist = SYSCTL_CHILDREN(txsnode);
2240 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2241 	    &txstats->TSO_packets, "TSO packets");
2242 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2243 	    &txstats->TSO_bytes, "TSO bytes");
2244 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2245 	    &txstats->ucast_packets, "Unicast packets");
2246 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2247 	    &txstats->ucast_bytes, "Unicast bytes");
2248 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2249 	    &txstats->mcast_packets, "Multicast packets");
2250 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2251 	    &txstats->mcast_bytes, "Multicast bytes");
2252 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2253 	    &txstats->error, "Errors");
2254 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2255 	    &txstats->discard, "Discards");
2256 }
2257 
2258 static void
2259 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2260     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2261 {
2262 	struct sysctl_oid *node, *rxsnode;
2263 	struct sysctl_oid_list *list, *rxslist;
2264 	struct UPT1_RxStats *rxstats;
2265 	char namebuf[16];
2266 
2267 	rxstats = &rxq->vxrxq_rs->stats;
2268 
2269 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2270 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2271 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2272 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2273 
2274 	/*
2275 	 * Add statistics reported by the host. These are updated by the
2276 	 * iflib txq timer on txq 0.
2277 	 */
2278 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2279 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2280 	rxslist = SYSCTL_CHILDREN(rxsnode);
2281 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2282 	    &rxstats->LRO_packets, "LRO packets");
2283 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2284 	    &rxstats->LRO_bytes, "LRO bytes");
2285 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2286 	    &rxstats->ucast_packets, "Unicast packets");
2287 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2288 	    &rxstats->ucast_bytes, "Unicast bytes");
2289 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2290 	    &rxstats->mcast_packets, "Multicast packets");
2291 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2292 	    &rxstats->mcast_bytes, "Multicast bytes");
2293 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2294 	    &rxstats->bcast_packets, "Broadcast packets");
2295 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2296 	    &rxstats->bcast_bytes, "Broadcast bytes");
2297 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2298 	    &rxstats->nobuffer, "No buffer");
2299 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2300 	    &rxstats->error, "Errors");
2301 }
2302 
2303 static void
2304 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2305     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2306 {
2307 	if_softc_ctx_t scctx;
2308 	struct sysctl_oid *node;
2309 	struct sysctl_oid_list *list;
2310 	int i;
2311 
2312 	scctx = sc->vmx_scctx;
2313 
2314 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2315 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2316 
2317 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2318 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2319 		list = SYSCTL_CHILDREN(node);
2320 
2321 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2322 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2323 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2324 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2325 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2326 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2327 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2328 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2329 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2330 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2331 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2332 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2333 	}
2334 
2335 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2336 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2337 
2338 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2339 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2340 		list = SYSCTL_CHILDREN(node);
2341 
2342 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2343 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2344 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2345 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2346 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2347 		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2348 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2349 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2350 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2351 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2352 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2353 		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2354 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2355 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2356 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2357 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2358 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2359 		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2360 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2361 		    CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2362 		    0, "");
2363 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2364 		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2365 	}
2366 }
2367 
2368 static void
2369 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2370     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2371 {
2372 	if_softc_ctx_t scctx;
2373 	int i;
2374 
2375 	scctx = sc->vmx_scctx;
2376 
2377 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2378 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2379 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2380 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2381 
2382 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2383 }
2384 
2385 static void
2386 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2387 {
2388 	device_t dev;
2389 	struct sysctl_ctx_list *ctx;
2390 	struct sysctl_oid *tree;
2391 	struct sysctl_oid_list *child;
2392 
2393 	dev = sc->vmx_dev;
2394 	ctx = device_get_sysctl_ctx(dev);
2395 	tree = device_get_sysctl_tree(dev);
2396 	child = SYSCTL_CHILDREN(tree);
2397 
2398 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2399 }
2400 
2401 static void
2402 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2403 {
2404 
2405 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2406 }
2407 
2408 static uint32_t
2409 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2410 {
2411 
2412 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2413 }
2414 
2415 static void
2416 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2417 {
2418 
2419 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2420 }
2421 
2422 static void
2423 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2424 {
2425 
2426 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2427 }
2428 
2429 static uint32_t
2430 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2431 {
2432 
2433 	vmxnet3_write_cmd(sc, cmd);
2434 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2435 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2436 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2437 }
2438 
2439 static void
2440 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2441 {
2442 
2443 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2444 }
2445 
2446 static void
2447 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2448 {
2449 
2450 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2451 }
2452 
2453 static int
2454 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2455 {
2456 	/* Not using interrupts for TX */
2457 	return (0);
2458 }
2459 
2460 static int
2461 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2462 {
2463 	struct vmxnet3_softc *sc;
2464 
2465 	sc = iflib_get_softc(ctx);
2466 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2467 	return (0);
2468 }
2469 
2470 static void
2471 vmxnet3_link_intr_enable(if_ctx_t ctx)
2472 {
2473 	struct vmxnet3_softc *sc;
2474 
2475 	sc = iflib_get_softc(ctx);
2476 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2477 }
2478 
2479 static void
2480 vmxnet3_intr_enable_all(if_ctx_t ctx)
2481 {
2482 	struct vmxnet3_softc *sc;
2483 	if_softc_ctx_t scctx;
2484 	int i;
2485 
2486 	sc = iflib_get_softc(ctx);
2487 	scctx = sc->vmx_scctx;
2488 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2489 	for (i = 0; i < scctx->isc_vectors; i++)
2490 		vmxnet3_enable_intr(sc, i);
2491 }
2492 
2493 static void
2494 vmxnet3_intr_disable_all(if_ctx_t ctx)
2495 {
2496 	struct vmxnet3_softc *sc;
2497 	int i;
2498 
2499 	sc = iflib_get_softc(ctx);
2500 	/*
2501 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2502 	 * run, which is before the top level shared data area is
2503 	 * initialized and the device made aware of it.
2504 	 */
2505 	if (sc->vmx_ds != NULL)
2506 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2507 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2508 		vmxnet3_disable_intr(sc, i);
2509 }
2510 
2511 static bool
2512 vmxnet3_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
2513 {
2514 	switch (event) {
2515 	case IFLIB_RESTART_VLAN_CONFIG:
2516 		return (true);
2517 	default:
2518 		return (false);
2519 	}
2520 }
2521 
2522 /*
2523  * Since this is a purely paravirtualized device, we do not have
2524  * to worry about DMA coherency. But at times, we must make sure
2525  * both the compiler and CPU do not reorder memory operations.
2526  */
2527 static inline void
2528 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2529 {
2530 
2531 	switch (type) {
2532 	case VMXNET3_BARRIER_RD:
2533 		rmb();
2534 		break;
2535 	case VMXNET3_BARRIER_WR:
2536 		wmb();
2537 		break;
2538 	case VMXNET3_BARRIER_RDWR:
2539 		mb();
2540 		break;
2541 	default:
2542 		panic("%s: bad barrier type %d", __func__, type);
2543 	}
2544 }
2545