xref: /freebsd/sys/dev/vmware/vmxnet3/if_vmx.c (revision e10e0c63f0e40043c589e00cdeef6c5574c72858)
1 /*-
2  * Copyright (c) 2013 Tsubai Masanari
3  * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4  * Copyright (c) 2018 Patrick Kelsey
5  *
6  * Permission to use, copy, modify, and distribute this software for any
7  * purpose with or without fee is hereby granted, provided that the above
8  * copyright notice and this permission notice appear in all copies.
9  *
10  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17  *
18  * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19  */
20 
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22 
23 #include <sys/cdefs.h>
24 #include "opt_rss.h"
25 
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39 
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 #include <net/rss_config.h>
50 
51 #include <netinet/in_systm.h>
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/tcp.h>
58 
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 #include <sys/bus.h>
62 #include <sys/rman.h>
63 
64 #include <dev/pci/pcireg.h>
65 #include <dev/pci/pcivar.h>
66 
67 #include "ifdi_if.h"
68 
69 #include "if_vmxreg.h"
70 #include "if_vmxvar.h"
71 
72 #include "opt_inet.h"
73 #include "opt_inet6.h"
74 
75 #define VMXNET3_VMWARE_VENDOR_ID	0x15AD
76 #define VMXNET3_VMWARE_DEVICE_ID	0x07B0
77 
78 static const pci_vendor_info_t vmxnet3_vendor_info_array[] =
79 {
80 	PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
81 	/* required last entry */
82 	PVID_END
83 };
84 
85 static void	*vmxnet3_register(device_t);
86 static int	vmxnet3_attach_pre(if_ctx_t);
87 static int	vmxnet3_msix_intr_assign(if_ctx_t, int);
88 static void	vmxnet3_free_irqs(struct vmxnet3_softc *);
89 static int	vmxnet3_attach_post(if_ctx_t);
90 static int	vmxnet3_detach(if_ctx_t);
91 static int	vmxnet3_shutdown(if_ctx_t);
92 static int	vmxnet3_suspend(if_ctx_t);
93 static int	vmxnet3_resume(if_ctx_t);
94 
95 static int	vmxnet3_alloc_resources(struct vmxnet3_softc *);
96 static void	vmxnet3_free_resources(struct vmxnet3_softc *);
97 static int	vmxnet3_check_version(struct vmxnet3_softc *);
98 static void	vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
99 
100 static int	vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
101 static void	vmxnet3_init_txq(struct vmxnet3_softc *, int);
102 static int	vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
103 static void	vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
104 static int	vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void	vmxnet3_queues_free(if_ctx_t);
106 
107 static int	vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
108 static void	vmxnet3_free_shared_data(struct vmxnet3_softc *);
109 static int	vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
110 static void	vmxnet3_free_mcast_table(struct vmxnet3_softc *);
111 static void	vmxnet3_init_shared_data(struct vmxnet3_softc *);
112 static void	vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
113 static void	vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
114 static int	vmxnet3_alloc_data(struct vmxnet3_softc *);
115 static void	vmxnet3_free_data(struct vmxnet3_softc *);
116 
117 static void	vmxnet3_evintr(struct vmxnet3_softc *);
118 static int	vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
119 static void	vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
120 static int	vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
121 static int	vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
122 static int	vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
123 static void	vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
124 static void	vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
125 static int	vmxnet3_legacy_intr(void *);
126 static int	vmxnet3_rxq_intr(void *);
127 static int	vmxnet3_event_intr(void *);
128 
129 static void	vmxnet3_stop(if_ctx_t);
130 
131 static void	vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
132 static void	vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
133 static void	vmxnet3_reinit_queues(struct vmxnet3_softc *);
134 static int	vmxnet3_enable_device(struct vmxnet3_softc *);
135 static void	vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
136 static void	vmxnet3_init(if_ctx_t);
137 static void	vmxnet3_multi_set(if_ctx_t);
138 static int	vmxnet3_mtu_set(if_ctx_t, uint32_t);
139 static void	vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
140 static int	vmxnet3_media_change(if_ctx_t);
141 static int	vmxnet3_promisc_set(if_ctx_t, int);
142 static uint64_t	vmxnet3_get_counter(if_ctx_t, ift_counter);
143 static void	vmxnet3_update_admin_status(if_ctx_t);
144 static void	vmxnet3_txq_timer(if_ctx_t, uint16_t);
145 
146 static void	vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
147 		    uint16_t);
148 static void	vmxnet3_vlan_register(if_ctx_t, uint16_t);
149 static void	vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
150 static void	vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
151 
152 static void	vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
153 static int	vmxnet3_link_is_up(struct vmxnet3_softc *);
154 static void	vmxnet3_link_status(struct vmxnet3_softc *);
155 static void	vmxnet3_set_lladdr(struct vmxnet3_softc *);
156 static void	vmxnet3_get_lladdr(struct vmxnet3_softc *);
157 
158 static void	vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
159 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
160 static void	vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
161 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void	vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
163 		    struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void	vmxnet3_setup_sysctl(struct vmxnet3_softc *);
165 
166 static void	vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
167 		    uint32_t);
168 static uint32_t	vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
169 static void	vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
170 		    uint32_t);
171 static void	vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
172 static uint32_t	vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
173 
174 static int	vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
175 static int	vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
176 static void	vmxnet3_link_intr_enable(if_ctx_t);
177 static void	vmxnet3_enable_intr(struct vmxnet3_softc *, int);
178 static void	vmxnet3_disable_intr(struct vmxnet3_softc *, int);
179 static void	vmxnet3_intr_enable_all(if_ctx_t);
180 static void	vmxnet3_intr_disable_all(if_ctx_t);
181 static bool	vmxnet3_if_needs_restart(if_ctx_t, enum iflib_restart_event);
182 
183 typedef enum {
184 	VMXNET3_BARRIER_RD,
185 	VMXNET3_BARRIER_WR,
186 	VMXNET3_BARRIER_RDWR,
187 } vmxnet3_barrier_t;
188 
189 static void	vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
190 
191 static device_method_t vmxnet3_methods[] = {
192 	/* Device interface */
193 	DEVMETHOD(device_register, vmxnet3_register),
194 	DEVMETHOD(device_probe, iflib_device_probe),
195 	DEVMETHOD(device_attach, iflib_device_attach),
196 	DEVMETHOD(device_detach, iflib_device_detach),
197 	DEVMETHOD(device_shutdown, iflib_device_shutdown),
198 	DEVMETHOD(device_suspend, iflib_device_suspend),
199 	DEVMETHOD(device_resume, iflib_device_resume),
200 	DEVMETHOD_END
201 };
202 
203 static driver_t vmxnet3_driver = {
204 	"vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
205 };
206 
207 DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
208 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
209 MODULE_VERSION(vmx, 2);
210 
211 MODULE_DEPEND(vmx, pci, 1, 1, 1);
212 MODULE_DEPEND(vmx, ether, 1, 1, 1);
213 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
214 
215 static device_method_t vmxnet3_iflib_methods[] = {
216 	DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
217 	DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
218 	DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
219 
220 	DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
221 	DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
222 	DEVMETHOD(ifdi_detach, vmxnet3_detach),
223 
224 	DEVMETHOD(ifdi_init, vmxnet3_init),
225 	DEVMETHOD(ifdi_stop, vmxnet3_stop),
226 	DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
227 	DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
228 	DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
229 	DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
230 	DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
231 	DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
232 	DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
233 	DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
234 
235 	DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
236 	DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
237 	DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
238 	DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
239 	DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
240 	DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
241 
242 	DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
243 	DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
244 
245 	DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
246 	DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
247 	DEVMETHOD(ifdi_resume, vmxnet3_resume),
248 
249 	DEVMETHOD(ifdi_needs_restart, vmxnet3_if_needs_restart),
250 
251 	DEVMETHOD_END
252 };
253 
254 static driver_t vmxnet3_iflib_driver = {
255 	"vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
256 };
257 
258 struct if_txrx vmxnet3_txrx = {
259 	.ift_txd_encap = vmxnet3_isc_txd_encap,
260 	.ift_txd_flush = vmxnet3_isc_txd_flush,
261 	.ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
262 	.ift_rxd_available = vmxnet3_isc_rxd_available,
263 	.ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
264 	.ift_rxd_refill = vmxnet3_isc_rxd_refill,
265 	.ift_rxd_flush = vmxnet3_isc_rxd_flush,
266 	.ift_legacy_intr = vmxnet3_legacy_intr
267 };
268 
269 static struct if_shared_ctx vmxnet3_sctx_init = {
270 	.isc_magic = IFLIB_MAGIC,
271 	.isc_q_align = 512,
272 
273 	.isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
274 	.isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
275 	.isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
276 	.isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277 
278 	/*
279 	 * These values are used to configure the busdma tag used for
280 	 * receive descriptors.  Each receive descriptor only points to one
281 	 * buffer.
282 	 */
283 	.isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
284 	.isc_rx_nsegments = 1,  /* One mapping per descriptor */
285 	.isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
286 
287 	.isc_admin_intrcnt = 1,
288 	.isc_vendor_info = vmxnet3_vendor_info_array,
289 	.isc_driver_version = "2",
290 	.isc_driver = &vmxnet3_iflib_driver,
291 	.isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
292 
293 	/*
294 	 * Number of receive queues per receive queue set, with associated
295 	 * descriptor settings for each.
296 	 */
297 	.isc_nrxqs = 3,
298 	.isc_nfl = 2, /* one free list for each receive command queue */
299 	.isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
300 	.isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
301 	.isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
302 
303 	/*
304 	 * Number of transmit queues per transmit queue set, with associated
305 	 * descriptor settings for each.
306 	 */
307 	.isc_ntxqs = 2,
308 	.isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
309 	.isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
310 	.isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
311 };
312 
313 static void *
vmxnet3_register(device_t dev)314 vmxnet3_register(device_t dev)
315 {
316 	return (&vmxnet3_sctx_init);
317 }
318 
319 static int
trunc_powerof2(int val)320 trunc_powerof2(int val)
321 {
322 
323 	return (1U << (fls(val) - 1));
324 }
325 
326 static int
vmxnet3_attach_pre(if_ctx_t ctx)327 vmxnet3_attach_pre(if_ctx_t ctx)
328 {
329 	device_t dev;
330 	if_softc_ctx_t scctx;
331 	struct vmxnet3_softc *sc;
332 	uint32_t intr_config;
333 	int error;
334 
335 	dev = iflib_get_dev(ctx);
336 	sc = iflib_get_softc(ctx);
337 	sc->vmx_dev = dev;
338 	sc->vmx_ctx = ctx;
339 	sc->vmx_sctx = iflib_get_sctx(ctx);
340 	sc->vmx_scctx = iflib_get_softc_ctx(ctx);
341 	sc->vmx_ifp = iflib_get_ifp(ctx);
342 	sc->vmx_media = iflib_get_media(ctx);
343 	scctx = sc->vmx_scctx;
344 
345 	scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
346 	scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
347 	/* isc_tx_tso_size_max doesn't include possible vlan header */
348 	scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
349 	scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
350 	scctx->isc_txrx = &vmxnet3_txrx;
351 
352 	/* If 0, the iflib tunable was not set, so set to the default */
353 	if (scctx->isc_nrxqsets == 0)
354 		scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
355 	scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
356 	scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
357 	scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
358 
359 	/* If 0, the iflib tunable was not set, so set to the default */
360 	if (scctx->isc_ntxqsets == 0)
361 		scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
362 	scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
363 	scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
364 	scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
365 
366 	/*
367 	 * Enforce that the transmit completion queue descriptor count is
368 	 * the same as the transmit command queue descriptor count.
369 	 */
370 	scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
371 	scctx->isc_txqsizes[0] =
372 	    sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
373 	scctx->isc_txqsizes[1] =
374 	    sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
375 
376 	/*
377 	 * Enforce that the receive completion queue descriptor count is the
378 	 * sum of the receive command queue descriptor counts, and that the
379 	 * second receive command queue descriptor count is the same as the
380 	 * first one.
381 	 */
382 	scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
383 	scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
384 	scctx->isc_rxqsizes[0] =
385 	    sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
386 	scctx->isc_rxqsizes[1] =
387 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
388 	scctx->isc_rxqsizes[2] =
389 	    sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
390 
391 	/*
392 	 * Initialize the max frame size and descriptor queue buffer
393 	 * sizes.
394 	 */
395 	vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
396 
397 	scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
398 
399 	/* Map PCI BARs */
400 	error = vmxnet3_alloc_resources(sc);
401 	if (error)
402 		goto fail;
403 
404 	/* Check device versions */
405 	error = vmxnet3_check_version(sc);
406 	if (error)
407 		goto fail;
408 
409 	/*
410 	 * The interrupt mode can be set in the hypervisor configuration via
411 	 * the parameter ethernet<N>.intrMode.
412 	 */
413 	intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
414 	sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
415 
416 	/*
417 	 * Configure the softc context to attempt to configure the interrupt
418 	 * mode now indicated by intr_config.  iflib will follow the usual
419 	 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
420 	 * starting mode.
421 	 */
422 	switch (intr_config & 0x03) {
423 	case VMXNET3_IT_AUTO:
424 	case VMXNET3_IT_MSIX:
425 		scctx->isc_msix_bar = pci_msix_table_bar(dev);
426 		break;
427 	case VMXNET3_IT_MSI:
428 		scctx->isc_msix_bar = -1;
429 		scctx->isc_disable_msix = 1;
430 		break;
431 	case VMXNET3_IT_LEGACY:
432 		scctx->isc_msix_bar = 0;
433 		break;
434 	}
435 
436 	scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
437 	scctx->isc_capabilities = scctx->isc_capenable =
438 	    IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
439 	    IFCAP_TSO4 | IFCAP_TSO6 |
440 	    IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
441 	    IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
442 	    IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
443 	    IFCAP_JUMBO_MTU;
444 
445 	/* These capabilities are not enabled by default. */
446 	scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
447 
448 	vmxnet3_get_lladdr(sc);
449 	iflib_set_mac(ctx, sc->vmx_lladdr);
450 
451 	return (0);
452 fail:
453 	/*
454 	 * We must completely clean up anything allocated above as iflib
455 	 * will not invoke any other driver entry points as a result of this
456 	 * failure.
457 	 */
458 	vmxnet3_free_resources(sc);
459 
460 	return (error);
461 }
462 
463 static int
vmxnet3_msix_intr_assign(if_ctx_t ctx,int msix)464 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
465 {
466 	struct vmxnet3_softc *sc;
467 	if_softc_ctx_t scctx;
468 	struct vmxnet3_rxqueue *rxq;
469 	int error;
470 	int i;
471 	char irq_name[16];
472 
473 	sc = iflib_get_softc(ctx);
474 	scctx = sc->vmx_scctx;
475 
476 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
477 		snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
478 
479 		rxq = &sc->vmx_rxq[i];
480 		error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
481 		    IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
482 		if (error) {
483 			device_printf(iflib_get_dev(ctx),
484 			    "Failed to register rxq %d interrupt handler\n", i);
485 			return (error);
486 		}
487 	}
488 
489 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
490 		snprintf(irq_name, sizeof(irq_name), "txq%d", i);
491 
492 		/*
493 		 * Don't provide the corresponding rxq irq for reference -
494 		 * we want the transmit task to be attached to a task queue
495 		 * that is different from the one used by the corresponding
496 		 * rxq irq.  That is because the TX doorbell writes are very
497 		 * expensive as virtualized MMIO operations, so we want to
498 		 * be able to defer them to another core when possible so
499 		 * that they don't steal receive processing cycles during
500 		 * stack turnarounds like TCP ACK generation.  The other
501 		 * piece to this approach is enabling the iflib abdicate
502 		 * option (currently via an interface-specific
503 		 * tunable/sysctl).
504 		 */
505 		iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
506 		    irq_name);
507 	}
508 
509 	error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
510 	    scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
511 	    "event");
512 	if (error) {
513 		device_printf(iflib_get_dev(ctx),
514 		    "Failed to register event interrupt handler\n");
515 		return (error);
516 	}
517 
518 	return (0);
519 }
520 
521 static void
vmxnet3_free_irqs(struct vmxnet3_softc * sc)522 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
523 {
524 	if_softc_ctx_t scctx;
525 	struct vmxnet3_rxqueue *rxq;
526 	int i;
527 
528 	scctx = sc->vmx_scctx;
529 
530 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
531 		rxq = &sc->vmx_rxq[i];
532 		iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
533 	}
534 
535 	iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
536 }
537 
538 static int
vmxnet3_attach_post(if_ctx_t ctx)539 vmxnet3_attach_post(if_ctx_t ctx)
540 {
541 	if_softc_ctx_t scctx;
542 	struct vmxnet3_softc *sc;
543 	int error;
544 
545 	scctx = iflib_get_softc_ctx(ctx);
546 	sc = iflib_get_softc(ctx);
547 
548 	if (scctx->isc_nrxqsets > 1)
549 		sc->vmx_flags |= VMXNET3_FLAG_RSS;
550 
551 	error = vmxnet3_alloc_data(sc);
552 	if (error)
553 		goto fail;
554 
555 	vmxnet3_set_interrupt_idx(sc);
556 	vmxnet3_setup_sysctl(sc);
557 
558 	ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
559 	ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
560 
561 fail:
562 	return (error);
563 }
564 
565 static int
vmxnet3_detach(if_ctx_t ctx)566 vmxnet3_detach(if_ctx_t ctx)
567 {
568 	struct vmxnet3_softc *sc;
569 
570 	sc = iflib_get_softc(ctx);
571 
572 	vmxnet3_free_irqs(sc);
573 	vmxnet3_free_data(sc);
574 	vmxnet3_free_resources(sc);
575 
576 	return (0);
577 }
578 
579 static int
vmxnet3_shutdown(if_ctx_t ctx)580 vmxnet3_shutdown(if_ctx_t ctx)
581 {
582 
583 	return (0);
584 }
585 
586 static int
vmxnet3_suspend(if_ctx_t ctx)587 vmxnet3_suspend(if_ctx_t ctx)
588 {
589 
590 	return (0);
591 }
592 
593 static int
vmxnet3_resume(if_ctx_t ctx)594 vmxnet3_resume(if_ctx_t ctx)
595 {
596 
597 	return (0);
598 }
599 
600 static int
vmxnet3_alloc_resources(struct vmxnet3_softc * sc)601 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
602 {
603 	device_t dev;
604 	int rid;
605 
606 	dev = sc->vmx_dev;
607 
608 	rid = PCIR_BAR(0);
609 	sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
610 	    RF_ACTIVE);
611 	if (sc->vmx_res0 == NULL) {
612 		device_printf(dev,
613 		    "could not map BAR0 memory\n");
614 		return (ENXIO);
615 	}
616 
617 	sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
618 	sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
619 
620 	rid = PCIR_BAR(1);
621 	sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
622 	    RF_ACTIVE);
623 	if (sc->vmx_res1 == NULL) {
624 		device_printf(dev,
625 		    "could not map BAR1 memory\n");
626 		return (ENXIO);
627 	}
628 
629 	sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
630 	sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
631 
632 	return (0);
633 }
634 
635 static void
vmxnet3_free_resources(struct vmxnet3_softc * sc)636 vmxnet3_free_resources(struct vmxnet3_softc *sc)
637 {
638 	device_t dev;
639 
640 	dev = sc->vmx_dev;
641 
642 	if (sc->vmx_res0 != NULL) {
643 		bus_release_resource(dev, SYS_RES_MEMORY,
644 		    rman_get_rid(sc->vmx_res0), sc->vmx_res0);
645 		sc->vmx_res0 = NULL;
646 	}
647 
648 	if (sc->vmx_res1 != NULL) {
649 		bus_release_resource(dev, SYS_RES_MEMORY,
650 		    rman_get_rid(sc->vmx_res1), sc->vmx_res1);
651 		sc->vmx_res1 = NULL;
652 	}
653 }
654 
655 static int
vmxnet3_check_version(struct vmxnet3_softc * sc)656 vmxnet3_check_version(struct vmxnet3_softc *sc)
657 {
658 	device_t dev;
659 	uint32_t version;
660 
661 	dev = sc->vmx_dev;
662 
663 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
664 	if ((version & 0x01) == 0) {
665 		device_printf(dev, "unsupported hardware version %#x\n",
666 		    version);
667 		return (ENOTSUP);
668 	}
669 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
670 
671 	version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
672 	if ((version & 0x01) == 0) {
673 		device_printf(dev, "unsupported UPT version %#x\n", version);
674 		return (ENOTSUP);
675 	}
676 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
677 
678 	return (0);
679 }
680 
681 static void
vmxnet3_set_interrupt_idx(struct vmxnet3_softc * sc)682 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
683 {
684 	if_softc_ctx_t scctx;
685 	struct vmxnet3_txqueue *txq;
686 	struct vmxnet3_txq_shared *txs;
687 	struct vmxnet3_rxqueue *rxq;
688 	struct vmxnet3_rxq_shared *rxs;
689 	int intr_idx;
690 	int i;
691 
692 	scctx = sc->vmx_scctx;
693 
694 	/*
695 	 * There is always one interrupt per receive queue, assigned
696 	 * starting with the first interrupt.  When there is only one
697 	 * interrupt available, the event interrupt shares the receive queue
698 	 * interrupt, otherwise it uses the interrupt following the last
699 	 * receive queue interrupt.  Transmit queues are not assigned
700 	 * interrupts, so they are given indexes beyond the indexes that
701 	 * correspond to the real interrupts.
702 	 */
703 
704 	/* The event interrupt is always the last vector. */
705 	sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
706 
707 	intr_idx = 0;
708 	for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
709 		rxq = &sc->vmx_rxq[i];
710 		rxs = rxq->vxrxq_rs;
711 		rxq->vxrxq_intr_idx = intr_idx;
712 		rxs->intr_idx = rxq->vxrxq_intr_idx;
713 	}
714 
715 	/*
716 	 * Assign the tx queues interrupt indexes above what we are actually
717 	 * using.  These interrupts will never be enabled.
718 	 */
719 	intr_idx = scctx->isc_vectors;
720 	for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
721 		txq = &sc->vmx_txq[i];
722 		txs = txq->vxtxq_ts;
723 		txq->vxtxq_intr_idx = intr_idx;
724 		txs->intr_idx = txq->vxtxq_intr_idx;
725 	}
726 }
727 
728 static int
vmxnet3_queues_shared_alloc(struct vmxnet3_softc * sc)729 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
730 {
731 	if_softc_ctx_t scctx;
732 	int size;
733 	int error;
734 
735 	scctx = sc->vmx_scctx;
736 
737 	/*
738 	 * The txq and rxq shared data areas must be allocated contiguously
739 	 * as vmxnet3_driver_shared contains only a single address member
740 	 * for the shared queue data area.
741 	 */
742 	size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
743 	    scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
744 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
745 	if (error) {
746 		device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
747 		return (error);
748 	}
749 
750 	return (0);
751 }
752 
753 static void
vmxnet3_init_txq(struct vmxnet3_softc * sc,int q)754 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
755 {
756 	struct vmxnet3_txqueue *txq;
757 	struct vmxnet3_comp_ring *txc;
758 	struct vmxnet3_txring *txr;
759 	if_softc_ctx_t scctx;
760 
761 	txq = &sc->vmx_txq[q];
762 	txc = &txq->vxtxq_comp_ring;
763 	txr = &txq->vxtxq_cmd_ring;
764 	scctx = sc->vmx_scctx;
765 
766 	snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
767 	    device_get_nameunit(sc->vmx_dev), q);
768 
769 	txq->vxtxq_sc = sc;
770 	txq->vxtxq_id = q;
771 	txc->vxcr_ndesc = scctx->isc_ntxd[0];
772 	txr->vxtxr_ndesc = scctx->isc_ntxd[1];
773 }
774 
775 static int
vmxnet3_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int ntxqs,int ntxqsets)776 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
777     int ntxqs, int ntxqsets)
778 {
779 	struct vmxnet3_softc *sc;
780 	int q;
781 	int error;
782 	caddr_t kva;
783 
784 	sc = iflib_get_softc(ctx);
785 
786 	/* Allocate the array of transmit queues */
787 	sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
788 	    ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
789 	if (sc->vmx_txq == NULL)
790 		return (ENOMEM);
791 
792 	/* Initialize driver state for each transmit queue */
793 	for (q = 0; q < ntxqsets; q++)
794 		vmxnet3_init_txq(sc, q);
795 
796 	/*
797 	 * Allocate queue state that is shared with the device.  This check
798 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
799 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
800 	 * order iflib invokes those routines in.
801 	 */
802 	if (sc->vmx_qs_dma.idi_size == 0) {
803 		error = vmxnet3_queues_shared_alloc(sc);
804 		if (error)
805 			return (error);
806 	}
807 
808 	kva = sc->vmx_qs_dma.idi_vaddr;
809 	for (q = 0; q < ntxqsets; q++) {
810 		sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
811 		kva += sizeof(struct vmxnet3_txq_shared);
812 	}
813 
814 	/* Record descriptor ring vaddrs and paddrs */
815 	for (q = 0; q < ntxqsets; q++) {
816 		struct vmxnet3_txqueue *txq;
817 		struct vmxnet3_txring *txr;
818 		struct vmxnet3_comp_ring *txc;
819 
820 		txq = &sc->vmx_txq[q];
821 		txc = &txq->vxtxq_comp_ring;
822 		txr = &txq->vxtxq_cmd_ring;
823 
824 		/* Completion ring */
825 		txc->vxcr_u.txcd =
826 		    (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
827 		txc->vxcr_paddr = paddrs[q * ntxqs + 0];
828 
829 		/* Command ring */
830 		txr->vxtxr_txd =
831 		    (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
832 		txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
833 	}
834 
835 	return (0);
836 }
837 
838 static void
vmxnet3_init_rxq(struct vmxnet3_softc * sc,int q,int nrxqs)839 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
840 {
841 	struct vmxnet3_rxqueue *rxq;
842 	struct vmxnet3_comp_ring *rxc;
843 	struct vmxnet3_rxring *rxr;
844 	if_softc_ctx_t scctx;
845 	int i;
846 
847 	rxq = &sc->vmx_rxq[q];
848 	rxc = &rxq->vxrxq_comp_ring;
849 	scctx = sc->vmx_scctx;
850 
851 	snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
852 	    device_get_nameunit(sc->vmx_dev), q);
853 
854 	rxq->vxrxq_sc = sc;
855 	rxq->vxrxq_id = q;
856 
857 	/*
858 	 * First rxq is the completion queue, so there are nrxqs - 1 command
859 	 * rings starting at iflib queue id 1.
860 	 */
861 	rxc->vxcr_ndesc = scctx->isc_nrxd[0];
862 	for (i = 0; i < nrxqs - 1; i++) {
863 		rxr = &rxq->vxrxq_cmd_ring[i];
864 		rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
865 	}
866 }
867 
868 static int
vmxnet3_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int nrxqs,int nrxqsets)869 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
870     int nrxqs, int nrxqsets)
871 {
872 	struct vmxnet3_softc *sc;
873 	if_softc_ctx_t scctx;
874 	int q;
875 	int i;
876 	int error;
877 	caddr_t kva;
878 
879 	sc = iflib_get_softc(ctx);
880 	scctx = sc->vmx_scctx;
881 
882 	/* Allocate the array of receive queues */
883 	sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
884 	    nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
885 	if (sc->vmx_rxq == NULL)
886 		return (ENOMEM);
887 
888 	/* Initialize driver state for each receive queue */
889 	for (q = 0; q < nrxqsets; q++)
890 		vmxnet3_init_rxq(sc, q, nrxqs);
891 
892 	/*
893 	 * Allocate queue state that is shared with the device.  This check
894 	 * and call is performed in both vmxnet3_tx_queues_alloc() and
895 	 * vmxnet3_rx_queues_alloc() so that we don't have to care which
896 	 * order iflib invokes those routines in.
897 	 */
898 	if (sc->vmx_qs_dma.idi_size == 0) {
899 		error = vmxnet3_queues_shared_alloc(sc);
900 		if (error)
901 			return (error);
902 	}
903 
904 	kva = sc->vmx_qs_dma.idi_vaddr +
905 	    scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
906 	for (q = 0; q < nrxqsets; q++) {
907 		sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
908 		kva += sizeof(struct vmxnet3_rxq_shared);
909 	}
910 
911 	/* Record descriptor ring vaddrs and paddrs */
912 	for (q = 0; q < nrxqsets; q++) {
913 		struct vmxnet3_rxqueue *rxq;
914 		struct vmxnet3_rxring *rxr;
915 		struct vmxnet3_comp_ring *rxc;
916 
917 		rxq = &sc->vmx_rxq[q];
918 		rxc = &rxq->vxrxq_comp_ring;
919 
920 		/* Completion ring */
921 		rxc->vxcr_u.rxcd =
922 		    (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
923 		rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
924 
925 		/* Command ring(s) */
926 		for (i = 0; i < nrxqs - 1; i++) {
927 			rxr = &rxq->vxrxq_cmd_ring[i];
928 
929 			rxr->vxrxr_rxd =
930 			    (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
931 			rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
932 		}
933 	}
934 
935 	return (0);
936 }
937 
938 static void
vmxnet3_queues_free(if_ctx_t ctx)939 vmxnet3_queues_free(if_ctx_t ctx)
940 {
941 	struct vmxnet3_softc *sc;
942 
943 	sc = iflib_get_softc(ctx);
944 
945 	/* Free queue state area that is shared with the device */
946 	if (sc->vmx_qs_dma.idi_size != 0) {
947 		iflib_dma_free(&sc->vmx_qs_dma);
948 		sc->vmx_qs_dma.idi_size = 0;
949 	}
950 
951 	/* Free array of receive queues */
952 	if (sc->vmx_rxq != NULL) {
953 		free(sc->vmx_rxq, M_DEVBUF);
954 		sc->vmx_rxq = NULL;
955 	}
956 
957 	/* Free array of transmit queues */
958 	if (sc->vmx_txq != NULL) {
959 		free(sc->vmx_txq, M_DEVBUF);
960 		sc->vmx_txq = NULL;
961 	}
962 }
963 
964 static int
vmxnet3_alloc_shared_data(struct vmxnet3_softc * sc)965 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
966 {
967 	device_t dev;
968 	size_t size;
969 	int error;
970 
971 	dev = sc->vmx_dev;
972 
973 	/* Top level state structure shared with the device */
974 	size = sizeof(struct vmxnet3_driver_shared);
975 	error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
976 	if (error) {
977 		device_printf(dev, "cannot alloc shared memory\n");
978 		return (error);
979 	}
980 	sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
981 
982 	/* RSS table state shared with the device */
983 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
984 		size = sizeof(struct vmxnet3_rss_shared);
985 		error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
986 		    &sc->vmx_rss_dma, 0);
987 		if (error) {
988 			device_printf(dev, "cannot alloc rss shared memory\n");
989 			return (error);
990 		}
991 		sc->vmx_rss =
992 		    (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
993 	}
994 
995 	return (0);
996 }
997 
998 static void
vmxnet3_free_shared_data(struct vmxnet3_softc * sc)999 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1000 {
1001 
1002 	/* Free RSS table state shared with the device */
1003 	if (sc->vmx_rss != NULL) {
1004 		iflib_dma_free(&sc->vmx_rss_dma);
1005 		sc->vmx_rss = NULL;
1006 	}
1007 
1008 	/* Free top level state structure shared with the device */
1009 	if (sc->vmx_ds != NULL) {
1010 		iflib_dma_free(&sc->vmx_ds_dma);
1011 		sc->vmx_ds = NULL;
1012 	}
1013 }
1014 
1015 static int
vmxnet3_alloc_mcast_table(struct vmxnet3_softc * sc)1016 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1017 {
1018 	int error;
1019 
1020 	/* Multicast table state shared with the device */
1021 	error = iflib_dma_alloc_align(sc->vmx_ctx,
1022 	    VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1023 	if (error)
1024 		device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1025 	else
1026 		sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1027 
1028 	return (error);
1029 }
1030 
1031 static void
vmxnet3_free_mcast_table(struct vmxnet3_softc * sc)1032 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1033 {
1034 
1035 	/* Free multicast table state shared with the device */
1036 	if (sc->vmx_mcast != NULL) {
1037 		iflib_dma_free(&sc->vmx_mcast_dma);
1038 		sc->vmx_mcast = NULL;
1039 	}
1040 }
1041 
1042 static void
vmxnet3_init_shared_data(struct vmxnet3_softc * sc)1043 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1044 {
1045 	struct vmxnet3_driver_shared *ds;
1046 	if_softc_ctx_t scctx;
1047 	struct vmxnet3_txqueue *txq;
1048 	struct vmxnet3_txq_shared *txs;
1049 	struct vmxnet3_rxqueue *rxq;
1050 	struct vmxnet3_rxq_shared *rxs;
1051 	int i;
1052 
1053 	ds = sc->vmx_ds;
1054 	scctx = sc->vmx_scctx;
1055 
1056 	/*
1057 	 * Initialize fields of the shared data that remains the same across
1058 	 * reinits. Note the shared data is zero'd when allocated.
1059 	 */
1060 
1061 	ds->magic = VMXNET3_REV1_MAGIC;
1062 
1063 	/* DriverInfo */
1064 	ds->version = VMXNET3_DRIVER_VERSION;
1065 	ds->guest = VMXNET3_GOS_FREEBSD |
1066 #ifdef __LP64__
1067 	    VMXNET3_GOS_64BIT;
1068 #else
1069 	    VMXNET3_GOS_32BIT;
1070 #endif
1071 	ds->vmxnet3_revision = 1;
1072 	ds->upt_version = 1;
1073 
1074 	/* Misc. conf */
1075 	ds->driver_data = vtophys(sc);
1076 	ds->driver_data_len = sizeof(struct vmxnet3_softc);
1077 	ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1078 	ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1079 	ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1080 
1081 	/* RSS conf */
1082 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1083 		ds->rss.version = 1;
1084 		ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1085 		ds->rss.len = sc->vmx_rss_dma.idi_size;
1086 	}
1087 
1088 	/* Interrupt control. */
1089 	ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1090 	/*
1091 	 * Total number of interrupt indexes we are using in the shared
1092 	 * config data, even though we don't actually allocate interrupt
1093 	 * resources for the tx queues.  Some versions of the device will
1094 	 * fail to initialize successfully if interrupt indexes are used in
1095 	 * the shared config that exceed the number of interrupts configured
1096 	 * here.
1097 	 */
1098 	ds->nintr = (scctx->isc_vectors == 1) ?
1099 	    2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1100 	ds->evintr = sc->vmx_event_intr_idx;
1101 	ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1102 
1103 	for (i = 0; i < ds->nintr; i++)
1104 		ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1105 
1106 	/* Receive filter. */
1107 	ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1108 	ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1109 
1110 	/* Tx queues */
1111 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
1112 		txq = &sc->vmx_txq[i];
1113 		txs = txq->vxtxq_ts;
1114 
1115 		txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1116 		txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1117 		txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1118 		txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1119 		txs->driver_data = vtophys(txq);
1120 		txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1121 	}
1122 
1123 	/* Rx queues */
1124 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
1125 		rxq = &sc->vmx_rxq[i];
1126 		rxs = rxq->vxrxq_rs;
1127 
1128 		rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1129 		rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1130 		rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1131 		rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1132 		rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1133 		rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1134 		rxs->driver_data = vtophys(rxq);
1135 		rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1136 	}
1137 }
1138 
1139 static void
vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc * sc)1140 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1141 {
1142 	if_softc_ctx_t scctx;
1143 	struct vmxnet3_rss_shared *rss;
1144 #ifdef RSS
1145 	uint8_t rss_algo;
1146 #endif
1147 	int i;
1148 
1149 	scctx = sc->vmx_scctx;
1150 	rss = sc->vmx_rss;
1151 
1152 	rss->hash_type =
1153 	    UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1154 	    UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1155 	rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1156 	rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1157 	rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1158 	/*
1159 	 * Always use the kernel RSS key for consistent hashing.
1160 	 * If software RSS is configured to Toeplitz and RSS CPU steering
1161 	 * is available, use the RSS indirection table. Otherwise use
1162 	 * simple round-robin but still report hash as opaque to disengage
1163 	 * from software RSS when CPU steering is not available.
1164 	 */
1165 	rss_getkey(rss->hash_key);
1166 
1167 #ifdef RSS
1168 	rss_algo = rss_gethashalgo();
1169 	if (rss_algo == RSS_HASH_TOEPLITZ) {
1170 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1171 			rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1172 			    scctx->isc_nrxqsets;
1173 		}
1174 		sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1175 	} else
1176 #endif
1177 	{
1178 		for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1179 			rss->ind_table[i] = i % scctx->isc_nrxqsets;
1180 		sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1181 	}
1182 }
1183 
1184 static void
vmxnet3_reinit_shared_data(struct vmxnet3_softc * sc)1185 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1186 {
1187 	if_t ifp;
1188 	struct vmxnet3_driver_shared *ds;
1189 	if_softc_ctx_t scctx;
1190 
1191 	ifp = sc->vmx_ifp;
1192 	ds = sc->vmx_ds;
1193 	scctx = sc->vmx_scctx;
1194 
1195 	ds->mtu = if_getmtu(ifp);
1196 	ds->ntxqueue = scctx->isc_ntxqsets;
1197 	ds->nrxqueue = scctx->isc_nrxqsets;
1198 
1199 	ds->upt_features = 0;
1200 	if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1201 		ds->upt_features |= UPT1_F_CSUM;
1202 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1203 		ds->upt_features |= UPT1_F_VLAN;
1204 	if (if_getcapenable(ifp) & IFCAP_LRO)
1205 		ds->upt_features |= UPT1_F_LRO;
1206 
1207 	if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1208 		ds->upt_features |= UPT1_F_RSS;
1209 		vmxnet3_reinit_rss_shared_data(sc);
1210 	}
1211 
1212 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1213 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1214 	    (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1215 }
1216 
1217 static int
vmxnet3_alloc_data(struct vmxnet3_softc * sc)1218 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1219 {
1220 	int error;
1221 
1222 	error = vmxnet3_alloc_shared_data(sc);
1223 	if (error)
1224 		return (error);
1225 
1226 	error = vmxnet3_alloc_mcast_table(sc);
1227 	if (error)
1228 		return (error);
1229 
1230 	vmxnet3_init_shared_data(sc);
1231 
1232 	return (0);
1233 }
1234 
1235 static void
vmxnet3_free_data(struct vmxnet3_softc * sc)1236 vmxnet3_free_data(struct vmxnet3_softc *sc)
1237 {
1238 
1239 	vmxnet3_free_mcast_table(sc);
1240 	vmxnet3_free_shared_data(sc);
1241 }
1242 
1243 static void
vmxnet3_evintr(struct vmxnet3_softc * sc)1244 vmxnet3_evintr(struct vmxnet3_softc *sc)
1245 {
1246 	device_t dev;
1247 	struct vmxnet3_txq_shared *ts;
1248 	struct vmxnet3_rxq_shared *rs;
1249 	uint32_t event;
1250 
1251 	dev = sc->vmx_dev;
1252 
1253 	/* Clear events. */
1254 	event = sc->vmx_ds->event;
1255 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1256 
1257 	if (event & VMXNET3_EVENT_LINK)
1258 		vmxnet3_link_status(sc);
1259 
1260 	if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1261 		vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1262 		ts = sc->vmx_txq[0].vxtxq_ts;
1263 		if (ts->stopped != 0)
1264 			device_printf(dev, "Tx queue error %#x\n", ts->error);
1265 		rs = sc->vmx_rxq[0].vxrxq_rs;
1266 		if (rs->stopped != 0)
1267 			device_printf(dev, "Rx queue error %#x\n", rs->error);
1268 
1269 		/* XXX - rely on liflib watchdog to reset us? */
1270 		device_printf(dev, "Rx/Tx queue error event ... "
1271 		    "waiting for iflib watchdog reset\n");
1272 	}
1273 
1274 	if (event & VMXNET3_EVENT_DIC)
1275 		device_printf(dev, "device implementation change event\n");
1276 	if (event & VMXNET3_EVENT_DEBUG)
1277 		device_printf(dev, "debug event\n");
1278 }
1279 
1280 static int
vmxnet3_isc_txd_encap(void * vsc,if_pkt_info_t pi)1281 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1282 {
1283 	struct vmxnet3_softc *sc;
1284 	struct vmxnet3_txqueue *txq;
1285 	struct vmxnet3_txring *txr;
1286 	struct vmxnet3_txdesc *txd, *sop;
1287 	bus_dma_segment_t *segs;
1288 	int nsegs;
1289 	int pidx;
1290 	int hdrlen;
1291 	int i;
1292 	int gen;
1293 
1294 	sc = vsc;
1295 	txq = &sc->vmx_txq[pi->ipi_qsidx];
1296 	txr = &txq->vxtxq_cmd_ring;
1297 	segs = pi->ipi_segs;
1298 	nsegs = pi->ipi_nsegs;
1299 	pidx = pi->ipi_pidx;
1300 
1301 	KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1302 	    ("%s: packet with too many segments %d", __func__, nsegs));
1303 
1304 	sop = &txr->vxtxr_txd[pidx];
1305 	gen = txr->vxtxr_gen ^ 1;	/* Owned by cpu (yet) */
1306 
1307 	for (i = 0; i < nsegs; i++) {
1308 		txd = &txr->vxtxr_txd[pidx];
1309 
1310 		txd->addr = segs[i].ds_addr;
1311 		txd->len = segs[i].ds_len;
1312 		txd->gen = gen;
1313 		txd->dtype = 0;
1314 		txd->offload_mode = VMXNET3_OM_NONE;
1315 		txd->offload_pos = 0;
1316 		txd->hlen = 0;
1317 		txd->eop = 0;
1318 		txd->compreq = 0;
1319 		txd->vtag_mode = 0;
1320 		txd->vtag = 0;
1321 
1322 		if (++pidx == txr->vxtxr_ndesc) {
1323 			pidx = 0;
1324 			txr->vxtxr_gen ^= 1;
1325 		}
1326 		gen = txr->vxtxr_gen;
1327 	}
1328 	txd->eop = 1;
1329 	txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1330 	pi->ipi_new_pidx = pidx;
1331 
1332 	/*
1333 	 * VLAN
1334 	 */
1335 	if (pi->ipi_mflags & M_VLANTAG) {
1336 		sop->vtag_mode = 1;
1337 		sop->vtag = pi->ipi_vtag;
1338 	}
1339 
1340 	/*
1341 	 * TSO and checksum offloads
1342 	 */
1343 	hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1344 	if (pi->ipi_csum_flags & CSUM_TSO) {
1345 		sop->offload_mode = VMXNET3_OM_TSO;
1346 		sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1347 		sop->offload_pos = pi->ipi_tso_segsz;
1348 	} else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1349 	    VMXNET3_CSUM_OFFLOAD_IPV6)) {
1350 		sop->offload_mode = VMXNET3_OM_CSUM;
1351 		sop->hlen = hdrlen;
1352 		sop->offload_pos = hdrlen +
1353 		    ((pi->ipi_ipproto == IPPROTO_TCP) ?
1354 			offsetof(struct tcphdr, th_sum) :
1355 			offsetof(struct udphdr, uh_sum));
1356 	}
1357 
1358 	/* Finally, change the ownership. */
1359 	vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1360 	sop->gen ^= 1;
1361 
1362 	return (0);
1363 }
1364 
1365 static void
vmxnet3_isc_txd_flush(void * vsc,uint16_t txqid,qidx_t pidx)1366 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1367 {
1368 	struct vmxnet3_softc *sc;
1369 	struct vmxnet3_txqueue *txq;
1370 
1371 	sc = vsc;
1372 	txq = &sc->vmx_txq[txqid];
1373 
1374 	/*
1375 	 * pidx is what we last set ipi_new_pidx to in
1376 	 * vmxnet3_isc_txd_encap()
1377 	 */
1378 
1379 	/*
1380 	 * Avoid expensive register updates if the flush request is
1381 	 * redundant.
1382 	 */
1383 	if (txq->vxtxq_last_flush == pidx)
1384 		return;
1385 	txq->vxtxq_last_flush = pidx;
1386 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1387 }
1388 
1389 static int
vmxnet3_isc_txd_credits_update(void * vsc,uint16_t txqid,bool clear)1390 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1391 {
1392 	struct vmxnet3_softc *sc;
1393 	struct vmxnet3_txqueue *txq;
1394 	struct vmxnet3_comp_ring *txc;
1395 	struct vmxnet3_txcompdesc *txcd;
1396 	struct vmxnet3_txring *txr;
1397 	int processed;
1398 
1399 	sc = vsc;
1400 	txq = &sc->vmx_txq[txqid];
1401 	txc = &txq->vxtxq_comp_ring;
1402 	txr = &txq->vxtxq_cmd_ring;
1403 
1404 	/*
1405 	 * If clear is true, we need to report the number of TX command ring
1406 	 * descriptors that have been processed by the device.  If clear is
1407 	 * false, we just need to report whether or not at least one TX
1408 	 * command ring descriptor has been processed by the device.
1409 	 */
1410 	processed = 0;
1411 	for (;;) {
1412 		txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1413 		if (txcd->gen != txc->vxcr_gen)
1414 			break;
1415 		else if (!clear)
1416 			return (1);
1417 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1418 
1419 		MPASS(txc->vxcr_next < txc->vxcr_ndesc);
1420 		if (++txc->vxcr_next >= txc->vxcr_ndesc) {
1421 			txc->vxcr_next = 0;
1422 			txc->vxcr_gen ^= 1;
1423 		}
1424 
1425 		if (txcd->eop_idx < txr->vxtxr_next)
1426 			processed += txr->vxtxr_ndesc -
1427 			    (txr->vxtxr_next - txcd->eop_idx) + 1;
1428 		else
1429 			processed += txcd->eop_idx - txr->vxtxr_next + 1;
1430 		txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1431 	}
1432 
1433 	return (processed);
1434 }
1435 
1436 static int
vmxnet3_isc_rxd_available(void * vsc,uint16_t rxqid,qidx_t idx,qidx_t budget)1437 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1438 {
1439 	struct vmxnet3_softc *sc;
1440 	struct vmxnet3_rxqueue *rxq;
1441 	struct vmxnet3_comp_ring *rxc;
1442 	struct vmxnet3_rxcompdesc *rxcd;
1443 	int avail;
1444 	int completed_gen;
1445 #ifdef INVARIANTS
1446 	int expect_sop = 1;
1447 #endif
1448 	sc = vsc;
1449 	rxq = &sc->vmx_rxq[rxqid];
1450 	rxc = &rxq->vxrxq_comp_ring;
1451 
1452 	avail = 0;
1453 	completed_gen = rxc->vxcr_gen;
1454 	for (;;) {
1455 		rxcd = &rxc->vxcr_u.rxcd[idx];
1456 		if (rxcd->gen != completed_gen)
1457 			break;
1458 		vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1459 
1460 #ifdef INVARIANTS
1461 		if (expect_sop)
1462 			KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1463 		else
1464 			KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1465 		expect_sop = rxcd->eop;
1466 #endif
1467 		if (rxcd->eop && (rxcd->len != 0))
1468 			avail++;
1469 		if (avail > budget)
1470 			break;
1471 		if (++idx == rxc->vxcr_ndesc) {
1472 			idx = 0;
1473 			completed_gen ^= 1;
1474 		}
1475 	}
1476 
1477 	return (avail);
1478 }
1479 
1480 static int
vmxnet3_isc_rxd_pkt_get(void * vsc,if_rxd_info_t ri)1481 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1482 {
1483 	struct vmxnet3_softc *sc;
1484 	if_softc_ctx_t scctx;
1485 	struct vmxnet3_rxqueue *rxq;
1486 	struct vmxnet3_comp_ring *rxc;
1487 	struct vmxnet3_rxcompdesc *rxcd;
1488 	if_rxd_frag_t frag;
1489 	int cqidx;
1490 	uint16_t total_len;
1491 	uint8_t nfrags;
1492 	uint8_t i;
1493 	uint8_t flid;
1494 
1495 	sc = vsc;
1496 	scctx = sc->vmx_scctx;
1497 	rxq = &sc->vmx_rxq[ri->iri_qsidx];
1498 	rxc = &rxq->vxrxq_comp_ring;
1499 
1500 	/*
1501 	 * Get a single packet starting at the given index in the completion
1502 	 * queue.  That we have been called indicates that
1503 	 * vmxnet3_isc_rxd_available() has already verified that either
1504 	 * there is a complete packet available starting at the given index,
1505 	 * or there are one or more zero length packets starting at the
1506 	 * given index followed by a complete packet, so no verification of
1507 	 * ownership of the descriptors (and no associated read barrier) is
1508 	 * required here.
1509 	 */
1510 	cqidx = ri->iri_cidx;
1511 	rxcd = &rxc->vxcr_u.rxcd[cqidx];
1512 	while (rxcd->len == 0) {
1513 		KASSERT(rxcd->sop && rxcd->eop,
1514 		    ("%s: zero-length packet without both sop and eop set",
1515 			__func__));
1516 		rxc->vxcr_zero_length++;
1517 		if (++cqidx == rxc->vxcr_ndesc) {
1518 			cqidx = 0;
1519 			rxc->vxcr_gen ^= 1;
1520 		}
1521 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1522 	}
1523 	KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1524 
1525 	/*
1526 	 * RSS and flow ID.
1527 	 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1528 	 * be used only if the software RSS is enabled and it uses the same
1529 	 * algorithm and the hash key as the "hardware".  If the software RSS
1530 	 * is not enabled, then it's simply pointless to use those types.
1531 	 * If it's enabled but with different parameters, then hash values will
1532 	 * not match.
1533 	 */
1534 	ri->iri_flowid = rxcd->rss_hash;
1535 #ifdef RSS
1536 	if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1537 		switch (rxcd->rss_type) {
1538 		case VMXNET3_RCD_RSS_TYPE_NONE:
1539 			ri->iri_flowid = ri->iri_qsidx;
1540 			ri->iri_rsstype = M_HASHTYPE_NONE;
1541 			break;
1542 		case VMXNET3_RCD_RSS_TYPE_IPV4:
1543 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1544 			break;
1545 		case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1546 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1547 			break;
1548 		case VMXNET3_RCD_RSS_TYPE_IPV6:
1549 			ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1550 			break;
1551 		case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1552 			ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1553 			break;
1554 		default:
1555 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1556 			break;
1557 		}
1558 	} else
1559 #endif
1560 	{
1561 		switch (rxcd->rss_type) {
1562 		case VMXNET3_RCD_RSS_TYPE_NONE:
1563 			ri->iri_flowid = ri->iri_qsidx;
1564 			ri->iri_rsstype = M_HASHTYPE_NONE;
1565 			break;
1566 		default:
1567 			ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1568 			break;
1569 		}
1570 	}
1571 
1572 	/*
1573 	 * The queue numbering scheme used for rxcd->qid is as follows:
1574 	 *  - All of the command ring 0s are numbered [0, nrxqsets - 1]
1575 	 *  - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1576 	 *
1577 	 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1578 	 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1579 	 * indicates command ring (and flid) 1.
1580 	 */
1581 	nfrags = 0;
1582 	total_len = 0;
1583 	do {
1584 		rxcd = &rxc->vxcr_u.rxcd[cqidx];
1585 		KASSERT(rxcd->gen == rxc->vxcr_gen,
1586 		    ("%s: generation mismatch", __func__));
1587 		KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1588 		    ("%s: too many fragments", __func__));
1589 		if (__predict_true(rxcd->len != 0)) {
1590 			frag = &ri->iri_frags[nfrags];
1591 			flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1592 			frag->irf_flid = flid;
1593 			frag->irf_idx = rxcd->rxd_idx;
1594 			frag->irf_len = rxcd->len;
1595 			total_len += rxcd->len;
1596 			nfrags++;
1597 		} else {
1598 			rxc->vcxr_zero_length_frag++;
1599 		}
1600 		if (++cqidx == rxc->vxcr_ndesc) {
1601 			cqidx = 0;
1602 			rxc->vxcr_gen ^= 1;
1603 		}
1604 	} while (!rxcd->eop);
1605 
1606 	ri->iri_cidx = cqidx;
1607 	ri->iri_nfrags = nfrags;
1608 	ri->iri_len = total_len;
1609 
1610 	/*
1611 	 * If there's an error, the last descriptor in the packet will
1612 	 * have the error indicator set.  In this case, set all
1613 	 * fragment lengths to zero.  This will cause iflib to discard
1614 	 * the packet, but process all associated descriptors through
1615 	 * the refill mechanism.
1616 	 */
1617 	if (__predict_false(rxcd->error)) {
1618 		rxc->vxcr_pkt_errors++;
1619 		for (i = 0; i < nfrags; i++) {
1620 			frag = &ri->iri_frags[i];
1621 			frag->irf_len = 0;
1622 		}
1623 	} else {
1624 		/* Checksum offload information is in the last descriptor. */
1625 		if (!rxcd->no_csum) {
1626 			uint32_t csum_flags = 0;
1627 
1628 			if (rxcd->ipv4) {
1629 				csum_flags |= CSUM_IP_CHECKED;
1630 				if (rxcd->ipcsum_ok)
1631 					csum_flags |= CSUM_IP_VALID;
1632 			}
1633 			if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1634 				csum_flags |= CSUM_L4_CALC;
1635 				if (rxcd->csum_ok) {
1636 					csum_flags |= CSUM_L4_VALID;
1637 					ri->iri_csum_data = 0xffff;
1638 				}
1639 			}
1640 			ri->iri_csum_flags = csum_flags;
1641 		}
1642 
1643 		/* VLAN information is in the last descriptor. */
1644 		if (rxcd->vlan) {
1645 			ri->iri_flags |= M_VLANTAG;
1646 			ri->iri_vtag = rxcd->vtag;
1647 		}
1648 	}
1649 
1650 	return (0);
1651 }
1652 
1653 static void
vmxnet3_isc_rxd_refill(void * vsc,if_rxd_update_t iru)1654 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1655 {
1656 	struct vmxnet3_softc *sc;
1657 	struct vmxnet3_rxqueue *rxq;
1658 	struct vmxnet3_rxring *rxr;
1659 	struct vmxnet3_rxdesc *rxd;
1660 	uint64_t *paddrs;
1661 	int count;
1662 	int len;
1663 	int idx;
1664 	int i;
1665 	uint8_t flid;
1666 	uint8_t btype;
1667 
1668 	count = iru->iru_count;
1669 	len = iru->iru_buf_size;
1670 	flid = iru->iru_flidx;
1671 	paddrs = iru->iru_paddrs;
1672 
1673 	sc = vsc;
1674 	rxq = &sc->vmx_rxq[iru->iru_qsidx];
1675 	rxr = &rxq->vxrxq_cmd_ring[flid];
1676 	rxd = rxr->vxrxr_rxd;
1677 
1678 	/*
1679 	 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1680 	 * command ring 1 is filled with BTYPE_BODY descriptors.
1681 	 */
1682 	btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1683 	/*
1684 	 * The refill entries from iflib will advance monotonically,
1685 	 * but the refilled descriptors may not be contiguous due to
1686 	 * earlier skipping of descriptors by the device.  The refill
1687 	 * entries from iflib need an entire state update, while the
1688 	 * descriptors previously skipped by the device only need to
1689 	 * have their generation numbers updated.
1690 	 */
1691 	idx = rxr->vxrxr_refill_start;
1692 	i = 0;
1693 	do {
1694 		if (idx == iru->iru_idxs[i]) {
1695 			rxd[idx].addr = paddrs[i];
1696 			rxd[idx].len = len;
1697 			rxd[idx].btype = btype;
1698 			i++;
1699 		} else
1700 			rxr->vxrxr_desc_skips++;
1701 		rxd[idx].gen = rxr->vxrxr_gen;
1702 
1703 		if (++idx == rxr->vxrxr_ndesc) {
1704 			idx = 0;
1705 			rxr->vxrxr_gen ^= 1;
1706 		}
1707 	} while (i != count);
1708 	rxr->vxrxr_refill_start = idx;
1709 }
1710 
1711 static void
vmxnet3_isc_rxd_flush(void * vsc,uint16_t rxqid,uint8_t flid,qidx_t pidx)1712 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1713 {
1714 	struct vmxnet3_softc *sc;
1715 	bus_size_t r;
1716 
1717 	sc = vsc;
1718 
1719 	if (flid == 0)
1720 		r = VMXNET3_BAR0_RXH1(rxqid);
1721 	else
1722 		r = VMXNET3_BAR0_RXH2(rxqid);
1723 
1724 	vmxnet3_write_bar0(sc, r, pidx);
1725 }
1726 
1727 static int
vmxnet3_legacy_intr(void * xsc)1728 vmxnet3_legacy_intr(void *xsc)
1729 {
1730 	struct vmxnet3_softc *sc;
1731 	if_softc_ctx_t scctx;
1732 	if_ctx_t ctx;
1733 
1734 	sc = xsc;
1735 	scctx = sc->vmx_scctx;
1736 	ctx = sc->vmx_ctx;
1737 
1738 	/*
1739 	 * When there is only a single interrupt configured, this routine
1740 	 * runs in fast interrupt context, following which the rxq 0 task
1741 	 * will be enqueued.
1742 	 */
1743 	if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1744 		if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1745 			return (FILTER_HANDLED);
1746 	}
1747 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1748 		vmxnet3_intr_disable_all(ctx);
1749 
1750 	if (sc->vmx_ds->event != 0)
1751 		iflib_admin_intr_deferred(ctx);
1752 
1753 	/*
1754 	 * XXX - When there is both rxq and event activity, do we care
1755 	 * whether the rxq 0 task or the admin task re-enables the interrupt
1756 	 * first?
1757 	 */
1758 	return (FILTER_SCHEDULE_THREAD);
1759 }
1760 
1761 static int
vmxnet3_rxq_intr(void * vrxq)1762 vmxnet3_rxq_intr(void *vrxq)
1763 {
1764 	struct vmxnet3_softc *sc;
1765 	struct vmxnet3_rxqueue *rxq;
1766 
1767 	rxq = vrxq;
1768 	sc = rxq->vxrxq_sc;
1769 
1770 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1771 		vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1772 
1773 	return (FILTER_SCHEDULE_THREAD);
1774 }
1775 
1776 static int
vmxnet3_event_intr(void * vsc)1777 vmxnet3_event_intr(void *vsc)
1778 {
1779 	struct vmxnet3_softc *sc;
1780 
1781 	sc = vsc;
1782 
1783 	if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1784 		vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1785 
1786 	/*
1787 	 * The work will be done via vmxnet3_update_admin_status(), and the
1788 	 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1789 	 *
1790 	 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1791 	 */
1792 	return (FILTER_SCHEDULE_THREAD);
1793 }
1794 
1795 static void
vmxnet3_stop(if_ctx_t ctx)1796 vmxnet3_stop(if_ctx_t ctx)
1797 {
1798 	struct vmxnet3_softc *sc;
1799 
1800 	sc = iflib_get_softc(ctx);
1801 
1802 	sc->vmx_link_active = 0;
1803 	vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1804 	vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1805 }
1806 
1807 static void
vmxnet3_txinit(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * txq)1808 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1809 {
1810 	struct vmxnet3_txring *txr;
1811 	struct vmxnet3_comp_ring *txc;
1812 
1813 	txq->vxtxq_last_flush = -1;
1814 
1815 	txr = &txq->vxtxq_cmd_ring;
1816 	txr->vxtxr_next = 0;
1817 	txr->vxtxr_gen = VMXNET3_INIT_GEN;
1818 	/*
1819 	 * iflib has zeroed out the descriptor array during the prior attach
1820 	 * or stop
1821 	 */
1822 
1823 	txc = &txq->vxtxq_comp_ring;
1824 	txc->vxcr_next = 0;
1825 	txc->vxcr_gen = VMXNET3_INIT_GEN;
1826 	/*
1827 	 * iflib has zeroed out the descriptor array during the prior attach
1828 	 * or stop
1829 	 */
1830 }
1831 
1832 static void
vmxnet3_rxinit(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rxq)1833 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1834 {
1835 	struct vmxnet3_rxring *rxr;
1836 	struct vmxnet3_comp_ring *rxc;
1837 	int i;
1838 
1839 	/*
1840 	 * The descriptors will be populated with buffers during a
1841 	 * subsequent invocation of vmxnet3_isc_rxd_refill()
1842 	 */
1843 	for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1844 		rxr = &rxq->vxrxq_cmd_ring[i];
1845 		rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1846 		rxr->vxrxr_desc_skips = 0;
1847 		rxr->vxrxr_refill_start = 0;
1848 		/*
1849 		 * iflib has zeroed out the descriptor array during the
1850 		 * prior attach or stop
1851 		 */
1852 	}
1853 
1854 	for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1855 		rxr = &rxq->vxrxq_cmd_ring[i];
1856 		rxr->vxrxr_gen = 0;
1857 		rxr->vxrxr_desc_skips = 0;
1858 		rxr->vxrxr_refill_start = 0;
1859 		bzero(rxr->vxrxr_rxd,
1860 		    rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1861 	}
1862 
1863 	rxc = &rxq->vxrxq_comp_ring;
1864 	rxc->vxcr_next = 0;
1865 	rxc->vxcr_gen = VMXNET3_INIT_GEN;
1866 	rxc->vxcr_zero_length = 0;
1867 	rxc->vcxr_zero_length_frag = 0;
1868 	rxc->vxcr_pkt_errors = 0;
1869 	/*
1870 	 * iflib has zeroed out the descriptor array during the prior attach
1871 	 * or stop
1872 	 */
1873 }
1874 
1875 static void
vmxnet3_reinit_queues(struct vmxnet3_softc * sc)1876 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1877 {
1878 	if_softc_ctx_t scctx;
1879 	int q;
1880 
1881 	scctx = sc->vmx_scctx;
1882 
1883 	for (q = 0; q < scctx->isc_ntxqsets; q++)
1884 		vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1885 
1886 	for (q = 0; q < scctx->isc_nrxqsets; q++)
1887 		vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1888 }
1889 
1890 static int
vmxnet3_enable_device(struct vmxnet3_softc * sc)1891 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1892 {
1893 	if_softc_ctx_t scctx;
1894 	int q;
1895 
1896 	scctx = sc->vmx_scctx;
1897 
1898 	if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1899 		device_printf(sc->vmx_dev, "device enable command failed!\n");
1900 		return (1);
1901 	}
1902 
1903 	/* Reset the Rx queue heads. */
1904 	for (q = 0; q < scctx->isc_nrxqsets; q++) {
1905 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1906 		vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1907 	}
1908 
1909 	return (0);
1910 }
1911 
1912 static void
vmxnet3_reinit_rxfilters(struct vmxnet3_softc * sc)1913 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1914 {
1915 	if_t ifp;
1916 
1917 	ifp = sc->vmx_ifp;
1918 
1919 	vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1920 
1921 	if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1922 		bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1923 		    sizeof(sc->vmx_ds->vlan_filter));
1924 	else
1925 		bzero(sc->vmx_ds->vlan_filter,
1926 		    sizeof(sc->vmx_ds->vlan_filter));
1927 	vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1928 }
1929 
1930 static void
vmxnet3_init(if_ctx_t ctx)1931 vmxnet3_init(if_ctx_t ctx)
1932 {
1933 	struct vmxnet3_softc *sc;
1934 
1935 	sc = iflib_get_softc(ctx);
1936 
1937 	/* Use the current MAC address. */
1938 	bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1939 	vmxnet3_set_lladdr(sc);
1940 
1941 	vmxnet3_reinit_shared_data(sc);
1942 	vmxnet3_reinit_queues(sc);
1943 
1944 	vmxnet3_enable_device(sc);
1945 
1946 	vmxnet3_reinit_rxfilters(sc);
1947 	vmxnet3_link_status(sc);
1948 }
1949 
1950 static void
vmxnet3_multi_set(if_ctx_t ctx)1951 vmxnet3_multi_set(if_ctx_t ctx)
1952 {
1953 
1954 	vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1955 	    if_getflags(iflib_get_ifp(ctx)));
1956 }
1957 
1958 static int
vmxnet3_mtu_set(if_ctx_t ctx,uint32_t mtu)1959 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1960 {
1961 	struct vmxnet3_softc *sc;
1962 	if_softc_ctx_t scctx;
1963 
1964 	sc = iflib_get_softc(ctx);
1965 	scctx = sc->vmx_scctx;
1966 
1967 	if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1968 		ETHER_CRC_LEN))
1969 		return (EINVAL);
1970 
1971 	/*
1972 	 * Update the max frame size so that the rx mbuf size is
1973 	 * chosen based on the new mtu during the interface init that
1974 	 * will occur after this routine returns.
1975 	 */
1976 	scctx->isc_max_frame_size = mtu +
1977 		ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1978 	/* RX completion queue - n/a */
1979 	scctx->isc_rxd_buf_size[0] = 0;
1980 	/*
1981 	 * For header-type descriptors (used for first segment of
1982 	 * packet), let iflib determine the buffer size based on the
1983 	 * max frame size.
1984 	 */
1985 	scctx->isc_rxd_buf_size[1] = 0;
1986 	/*
1987 	 * For body-type descriptors (used for jumbo frames and LRO),
1988 	 * always use page-sized buffers.
1989 	 */
1990 	scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
1991 
1992 	return (0);
1993 }
1994 
1995 static void
vmxnet3_media_status(if_ctx_t ctx,struct ifmediareq * ifmr)1996 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
1997 {
1998 	struct vmxnet3_softc *sc;
1999 
2000 	sc = iflib_get_softc(ctx);
2001 
2002 	ifmr->ifm_status = IFM_AVALID;
2003 	ifmr->ifm_active = IFM_ETHER;
2004 
2005 	if (vmxnet3_link_is_up(sc) != 0) {
2006 		ifmr->ifm_status |= IFM_ACTIVE;
2007 		ifmr->ifm_active |= IFM_AUTO;
2008 	} else
2009 		ifmr->ifm_active |= IFM_NONE;
2010 }
2011 
2012 static int
vmxnet3_media_change(if_ctx_t ctx)2013 vmxnet3_media_change(if_ctx_t ctx)
2014 {
2015 
2016 	/* Ignore. */
2017 	return (0);
2018 }
2019 
2020 static int
vmxnet3_promisc_set(if_ctx_t ctx,int flags)2021 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2022 {
2023 
2024 	vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2025 
2026 	return (0);
2027 }
2028 
2029 static uint64_t
vmxnet3_get_counter(if_ctx_t ctx,ift_counter cnt)2030 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2031 {
2032 	if_t ifp = iflib_get_ifp(ctx);
2033 
2034 	if (cnt < IFCOUNTERS)
2035 		return if_get_counter_default(ifp, cnt);
2036 
2037 	return (0);
2038 }
2039 
2040 static void
vmxnet3_update_admin_status(if_ctx_t ctx)2041 vmxnet3_update_admin_status(if_ctx_t ctx)
2042 {
2043 	struct vmxnet3_softc *sc;
2044 
2045 	sc = iflib_get_softc(ctx);
2046 	/*
2047 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2048 	 * run, which is before the top level shared data area is
2049 	 * initialized and the device made aware of it.
2050 	 */
2051 	if (sc->vmx_ds != NULL && sc->vmx_ds->event != 0)
2052 		vmxnet3_evintr(sc);
2053 
2054 	vmxnet3_refresh_host_stats(sc);
2055 }
2056 
2057 static void
vmxnet3_txq_timer(if_ctx_t ctx,uint16_t qid)2058 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2059 {
2060 	/* Host stats refresh is global, so just trigger it on txq 0 */
2061 	if (qid == 0)
2062 		vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2063 }
2064 
2065 static void
vmxnet3_update_vlan_filter(struct vmxnet3_softc * sc,int add,uint16_t tag)2066 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2067 {
2068 	int idx, bit;
2069 
2070 	if (tag == 0 || tag > 4095)
2071 		return;
2072 
2073 	idx = (tag >> 5) & 0x7F;
2074 	bit = tag & 0x1F;
2075 
2076 	/* Update our private VLAN bitvector. */
2077 	if (add)
2078 		sc->vmx_vlan_filter[idx] |= (1 << bit);
2079 	else
2080 		sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2081 }
2082 
2083 static void
vmxnet3_vlan_register(if_ctx_t ctx,uint16_t tag)2084 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2085 {
2086 
2087 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2088 }
2089 
2090 static void
vmxnet3_vlan_unregister(if_ctx_t ctx,uint16_t tag)2091 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2092 {
2093 
2094 	vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2095 }
2096 
2097 static u_int
vmxnet3_hash_maddr(void * arg,struct sockaddr_dl * sdl,u_int count)2098 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2099 {
2100 	struct vmxnet3_softc *sc = arg;
2101 
2102 	if (count < VMXNET3_MULTICAST_MAX)
2103 		bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2104 		    ETHER_ADDR_LEN);
2105 
2106 	return (1);
2107 }
2108 
2109 static void
vmxnet3_set_rxfilter(struct vmxnet3_softc * sc,int flags)2110 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2111 {
2112 	if_t ifp;
2113 	struct vmxnet3_driver_shared *ds;
2114 	u_int mode;
2115 
2116 	ifp = sc->vmx_ifp;
2117 	ds = sc->vmx_ds;
2118 
2119 	mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2120 	if (flags & IFF_PROMISC)
2121 		mode |= VMXNET3_RXMODE_PROMISC;
2122 	if (flags & IFF_ALLMULTI)
2123 		mode |= VMXNET3_RXMODE_ALLMULTI;
2124 	else {
2125 		int cnt;
2126 
2127 		cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2128 		if (cnt >= VMXNET3_MULTICAST_MAX) {
2129 			cnt = 0;
2130 			mode |= VMXNET3_RXMODE_ALLMULTI;
2131 		} else if (cnt > 0)
2132 			mode |= VMXNET3_RXMODE_MCAST;
2133 		ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2134 	}
2135 
2136 	ds->rxmode = mode;
2137 
2138 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2139 	vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2140 }
2141 
2142 static void
vmxnet3_refresh_host_stats(struct vmxnet3_softc * sc)2143 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2144 {
2145 
2146 	vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2147 }
2148 
2149 static int
vmxnet3_link_is_up(struct vmxnet3_softc * sc)2150 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2151 {
2152 	uint32_t status;
2153 
2154 	status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2155 	return !!(status & 0x1);
2156 }
2157 
2158 static void
vmxnet3_link_status(struct vmxnet3_softc * sc)2159 vmxnet3_link_status(struct vmxnet3_softc *sc)
2160 {
2161 	if_ctx_t ctx;
2162 	uint64_t speed;
2163 	int link;
2164 
2165 	ctx = sc->vmx_ctx;
2166 	link = vmxnet3_link_is_up(sc);
2167 	speed = IF_Gbps(10);
2168 
2169 	if (link != 0 && sc->vmx_link_active == 0) {
2170 		sc->vmx_link_active = 1;
2171 		iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2172 	} else if (link == 0 && sc->vmx_link_active != 0) {
2173 		sc->vmx_link_active = 0;
2174 		iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2175 	}
2176 }
2177 
2178 static void
vmxnet3_set_lladdr(struct vmxnet3_softc * sc)2179 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2180 {
2181 	uint32_t ml, mh;
2182 
2183 	ml  = sc->vmx_lladdr[0];
2184 	ml |= sc->vmx_lladdr[1] << 8;
2185 	ml |= sc->vmx_lladdr[2] << 16;
2186 	ml |= sc->vmx_lladdr[3] << 24;
2187 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2188 
2189 	mh  = sc->vmx_lladdr[4];
2190 	mh |= sc->vmx_lladdr[5] << 8;
2191 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2192 }
2193 
2194 static void
vmxnet3_get_lladdr(struct vmxnet3_softc * sc)2195 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2196 {
2197 	uint32_t ml, mh;
2198 
2199 	ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2200 	mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2201 
2202 	sc->vmx_lladdr[0] = ml;
2203 	sc->vmx_lladdr[1] = ml >> 8;
2204 	sc->vmx_lladdr[2] = ml >> 16;
2205 	sc->vmx_lladdr[3] = ml >> 24;
2206 	sc->vmx_lladdr[4] = mh;
2207 	sc->vmx_lladdr[5] = mh >> 8;
2208 }
2209 
2210 static void
vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue * txq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2211 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2212     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2213 {
2214 	struct sysctl_oid *node, *txsnode;
2215 	struct sysctl_oid_list *list, *txslist;
2216 	struct UPT1_TxStats *txstats;
2217 	char namebuf[16];
2218 
2219 	txstats = &txq->vxtxq_ts->stats;
2220 
2221 	snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2222 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2223 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2224 	txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2225 
2226 	/*
2227 	 * Add statistics reported by the host. These are updated by the
2228 	 * iflib txq timer on txq 0.
2229 	 */
2230 	txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2231 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2232 	txslist = SYSCTL_CHILDREN(txsnode);
2233 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2234 	    &txstats->TSO_packets, "TSO packets");
2235 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2236 	    &txstats->TSO_bytes, "TSO bytes");
2237 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2238 	    &txstats->ucast_packets, "Unicast packets");
2239 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2240 	    &txstats->ucast_bytes, "Unicast bytes");
2241 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2242 	    &txstats->mcast_packets, "Multicast packets");
2243 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2244 	    &txstats->mcast_bytes, "Multicast bytes");
2245 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2246 	    &txstats->error, "Errors");
2247 	SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2248 	    &txstats->discard, "Discards");
2249 }
2250 
2251 static void
vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue * rxq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2252 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2253     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2254 {
2255 	struct sysctl_oid *node, *rxsnode;
2256 	struct sysctl_oid_list *list, *rxslist;
2257 	struct UPT1_RxStats *rxstats;
2258 	char namebuf[16];
2259 
2260 	rxstats = &rxq->vxrxq_rs->stats;
2261 
2262 	snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2263 	node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2264 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2265 	rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2266 
2267 	/*
2268 	 * Add statistics reported by the host. These are updated by the
2269 	 * iflib txq timer on txq 0.
2270 	 */
2271 	rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2272 	    CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2273 	rxslist = SYSCTL_CHILDREN(rxsnode);
2274 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2275 	    &rxstats->LRO_packets, "LRO packets");
2276 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2277 	    &rxstats->LRO_bytes, "LRO bytes");
2278 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2279 	    &rxstats->ucast_packets, "Unicast packets");
2280 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2281 	    &rxstats->ucast_bytes, "Unicast bytes");
2282 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2283 	    &rxstats->mcast_packets, "Multicast packets");
2284 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2285 	    &rxstats->mcast_bytes, "Multicast bytes");
2286 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2287 	    &rxstats->bcast_packets, "Broadcast packets");
2288 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2289 	    &rxstats->bcast_bytes, "Broadcast bytes");
2290 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2291 	    &rxstats->nobuffer, "No buffer");
2292 	SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2293 	    &rxstats->error, "Errors");
2294 }
2295 
2296 static void
vmxnet3_setup_debug_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2297 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2298     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2299 {
2300 	if_softc_ctx_t scctx;
2301 	struct sysctl_oid *node;
2302 	struct sysctl_oid_list *list;
2303 	int i;
2304 
2305 	scctx = sc->vmx_scctx;
2306 
2307 	for (i = 0; i < scctx->isc_ntxqsets; i++) {
2308 		struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2309 
2310 		node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2311 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2312 		list = SYSCTL_CHILDREN(node);
2313 
2314 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2315 		    &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2316 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2317 		    &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2318 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2319 		    &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2320 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2321 		    &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2322 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2323 		    &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2324 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2325 		    &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2326 	}
2327 
2328 	for (i = 0; i < scctx->isc_nrxqsets; i++) {
2329 		struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2330 
2331 		node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2332 		    "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2333 		list = SYSCTL_CHILDREN(node);
2334 
2335 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2336 		    &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2337 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2338 		    &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2339 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2340 		    &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2341 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2342 		    &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2343 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2344 		    &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2345 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2346 		    &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2347 		SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2348 		    &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2349 		SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2350 		    &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2351 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2352 		    &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2353 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2354 		    CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2355 		    0, "");
2356 		SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2357 		    &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2358 	}
2359 }
2360 
2361 static void
vmxnet3_setup_queue_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2362 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2363     struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2364 {
2365 	if_softc_ctx_t scctx;
2366 	int i;
2367 
2368 	scctx = sc->vmx_scctx;
2369 
2370 	for (i = 0; i < scctx->isc_ntxqsets; i++)
2371 		vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2372 	for (i = 0; i < scctx->isc_nrxqsets; i++)
2373 		vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2374 
2375 	vmxnet3_setup_debug_sysctl(sc, ctx, child);
2376 }
2377 
2378 static void
vmxnet3_setup_sysctl(struct vmxnet3_softc * sc)2379 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2380 {
2381 	device_t dev;
2382 	struct sysctl_ctx_list *ctx;
2383 	struct sysctl_oid *tree;
2384 	struct sysctl_oid_list *child;
2385 
2386 	dev = sc->vmx_dev;
2387 	ctx = device_get_sysctl_ctx(dev);
2388 	tree = device_get_sysctl_tree(dev);
2389 	child = SYSCTL_CHILDREN(tree);
2390 
2391 	vmxnet3_setup_queue_sysctl(sc, ctx, child);
2392 }
2393 
2394 static void
vmxnet3_write_bar0(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2395 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2396 {
2397 
2398 	bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2399 }
2400 
2401 static uint32_t
vmxnet3_read_bar1(struct vmxnet3_softc * sc,bus_size_t r)2402 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2403 {
2404 
2405 	return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2406 }
2407 
2408 static void
vmxnet3_write_bar1(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2409 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2410 {
2411 
2412 	bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2413 }
2414 
2415 static void
vmxnet3_write_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2416 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2417 {
2418 
2419 	vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2420 }
2421 
2422 static uint32_t
vmxnet3_read_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2423 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2424 {
2425 
2426 	vmxnet3_write_cmd(sc, cmd);
2427 	bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2428 	    BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2429 	return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2430 }
2431 
2432 static void
vmxnet3_enable_intr(struct vmxnet3_softc * sc,int irq)2433 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2434 {
2435 
2436 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2437 }
2438 
2439 static void
vmxnet3_disable_intr(struct vmxnet3_softc * sc,int irq)2440 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2441 {
2442 
2443 	vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2444 }
2445 
2446 static int
vmxnet3_tx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2447 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2448 {
2449 	/* Not using interrupts for TX */
2450 	return (0);
2451 }
2452 
2453 static int
vmxnet3_rx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2454 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2455 {
2456 	struct vmxnet3_softc *sc;
2457 
2458 	sc = iflib_get_softc(ctx);
2459 	vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2460 	return (0);
2461 }
2462 
2463 static void
vmxnet3_link_intr_enable(if_ctx_t ctx)2464 vmxnet3_link_intr_enable(if_ctx_t ctx)
2465 {
2466 	struct vmxnet3_softc *sc;
2467 
2468 	sc = iflib_get_softc(ctx);
2469 	vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2470 }
2471 
2472 static void
vmxnet3_intr_enable_all(if_ctx_t ctx)2473 vmxnet3_intr_enable_all(if_ctx_t ctx)
2474 {
2475 	struct vmxnet3_softc *sc;
2476 	if_softc_ctx_t scctx;
2477 	int i;
2478 
2479 	sc = iflib_get_softc(ctx);
2480 	scctx = sc->vmx_scctx;
2481 	sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2482 	for (i = 0; i < scctx->isc_vectors; i++)
2483 		vmxnet3_enable_intr(sc, i);
2484 }
2485 
2486 static void
vmxnet3_intr_disable_all(if_ctx_t ctx)2487 vmxnet3_intr_disable_all(if_ctx_t ctx)
2488 {
2489 	struct vmxnet3_softc *sc;
2490 	int i;
2491 
2492 	sc = iflib_get_softc(ctx);
2493 	/*
2494 	 * iflib may invoke this routine before vmxnet3_attach_post() has
2495 	 * run, which is before the top level shared data area is
2496 	 * initialized and the device made aware of it.
2497 	 */
2498 	if (sc->vmx_ds != NULL)
2499 		sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2500 	for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2501 		vmxnet3_disable_intr(sc, i);
2502 }
2503 
2504 static bool
vmxnet3_if_needs_restart(if_ctx_t ctx __unused,enum iflib_restart_event event)2505 vmxnet3_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
2506 {
2507 	switch (event) {
2508 	case IFLIB_RESTART_VLAN_CONFIG:
2509 		return (true);
2510 	default:
2511 		return (false);
2512 	}
2513 }
2514 
2515 /*
2516  * Since this is a purely paravirtualized device, we do not have
2517  * to worry about DMA coherency. But at times, we must make sure
2518  * both the compiler and CPU do not reorder memory operations.
2519  */
2520 static inline void
vmxnet3_barrier(struct vmxnet3_softc * sc,vmxnet3_barrier_t type)2521 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2522 {
2523 
2524 	switch (type) {
2525 	case VMXNET3_BARRIER_RD:
2526 		rmb();
2527 		break;
2528 	case VMXNET3_BARRIER_WR:
2529 		wmb();
2530 		break;
2531 	case VMXNET3_BARRIER_RDWR:
2532 		mb();
2533 		break;
2534 	default:
2535 		panic("%s: bad barrier type %d", __func__, type);
2536 	}
2537 }
2538