1 /*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 * Copyright (c) 2018 Patrick Kelsey
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19 */
20
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22
23 #include <sys/cdefs.h>
24 #include "opt_rss.h"
25
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 #include <net/rss_config.h>
50
51 #include <netinet/in_systm.h>
52 #include <netinet/in.h>
53 #include <netinet/ip.h>
54 #include <netinet/ip6.h>
55 #include <netinet6/ip6_var.h>
56 #include <netinet/udp.h>
57 #include <netinet/tcp.h>
58
59 #include <machine/bus.h>
60 #include <machine/resource.h>
61 #include <sys/bus.h>
62 #include <sys/rman.h>
63
64 #include <dev/pci/pcireg.h>
65 #include <dev/pci/pcivar.h>
66
67 #include "ifdi_if.h"
68
69 #include "if_vmxreg.h"
70 #include "if_vmxvar.h"
71
72 #include "opt_inet.h"
73 #include "opt_inet6.h"
74
75 #define VMXNET3_VMWARE_VENDOR_ID 0x15AD
76 #define VMXNET3_VMWARE_DEVICE_ID 0x07B0
77
78 static const pci_vendor_info_t vmxnet3_vendor_info_array[] =
79 {
80 PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
81 /* required last entry */
82 PVID_END
83 };
84
85 static void *vmxnet3_register(device_t);
86 static int vmxnet3_attach_pre(if_ctx_t);
87 static int vmxnet3_msix_intr_assign(if_ctx_t, int);
88 static void vmxnet3_free_irqs(struct vmxnet3_softc *);
89 static int vmxnet3_attach_post(if_ctx_t);
90 static int vmxnet3_detach(if_ctx_t);
91 static int vmxnet3_shutdown(if_ctx_t);
92 static int vmxnet3_suspend(if_ctx_t);
93 static int vmxnet3_resume(if_ctx_t);
94
95 static int vmxnet3_alloc_resources(struct vmxnet3_softc *);
96 static void vmxnet3_free_resources(struct vmxnet3_softc *);
97 static int vmxnet3_check_version(struct vmxnet3_softc *);
98 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
99
100 static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
101 static void vmxnet3_init_txq(struct vmxnet3_softc *, int);
102 static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
103 static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
104 static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void vmxnet3_queues_free(if_ctx_t);
106
107 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
108 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
109 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
110 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
111 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
112 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
113 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
114 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
115 static void vmxnet3_free_data(struct vmxnet3_softc *);
116
117 static void vmxnet3_evintr(struct vmxnet3_softc *);
118 static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
119 static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
120 static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
121 static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
122 static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
123 static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
124 static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
125 static int vmxnet3_legacy_intr(void *);
126 static int vmxnet3_rxq_intr(void *);
127 static int vmxnet3_event_intr(void *);
128
129 static void vmxnet3_stop(if_ctx_t);
130
131 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
132 static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
133 static void vmxnet3_reinit_queues(struct vmxnet3_softc *);
134 static int vmxnet3_enable_device(struct vmxnet3_softc *);
135 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
136 static void vmxnet3_init(if_ctx_t);
137 static void vmxnet3_multi_set(if_ctx_t);
138 static int vmxnet3_mtu_set(if_ctx_t, uint32_t);
139 static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
140 static int vmxnet3_media_change(if_ctx_t);
141 static int vmxnet3_promisc_set(if_ctx_t, int);
142 static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter);
143 static void vmxnet3_update_admin_status(if_ctx_t);
144 static void vmxnet3_txq_timer(if_ctx_t, uint16_t);
145
146 static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
147 uint16_t);
148 static void vmxnet3_vlan_register(if_ctx_t, uint16_t);
149 static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
150 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
151
152 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
153 static int vmxnet3_link_is_up(struct vmxnet3_softc *);
154 static void vmxnet3_link_status(struct vmxnet3_softc *);
155 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
156 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
157
158 static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
159 struct sysctl_ctx_list *, struct sysctl_oid_list *);
160 static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
161 struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
163 struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void vmxnet3_setup_sysctl(struct vmxnet3_softc *);
165
166 static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
167 uint32_t);
168 static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
169 static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
170 uint32_t);
171 static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
172 static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
173
174 static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
175 static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
176 static void vmxnet3_link_intr_enable(if_ctx_t);
177 static void vmxnet3_enable_intr(struct vmxnet3_softc *, int);
178 static void vmxnet3_disable_intr(struct vmxnet3_softc *, int);
179 static void vmxnet3_intr_enable_all(if_ctx_t);
180 static void vmxnet3_intr_disable_all(if_ctx_t);
181 static bool vmxnet3_if_needs_restart(if_ctx_t, enum iflib_restart_event);
182
183 typedef enum {
184 VMXNET3_BARRIER_RD,
185 VMXNET3_BARRIER_WR,
186 VMXNET3_BARRIER_RDWR,
187 } vmxnet3_barrier_t;
188
189 static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
190
191 static device_method_t vmxnet3_methods[] = {
192 /* Device interface */
193 DEVMETHOD(device_register, vmxnet3_register),
194 DEVMETHOD(device_probe, iflib_device_probe),
195 DEVMETHOD(device_attach, iflib_device_attach),
196 DEVMETHOD(device_detach, iflib_device_detach),
197 DEVMETHOD(device_shutdown, iflib_device_shutdown),
198 DEVMETHOD(device_suspend, iflib_device_suspend),
199 DEVMETHOD(device_resume, iflib_device_resume),
200 DEVMETHOD_END
201 };
202
203 static driver_t vmxnet3_driver = {
204 "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
205 };
206
207 DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
208 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
209 MODULE_VERSION(vmx, 2);
210
211 MODULE_DEPEND(vmx, pci, 1, 1, 1);
212 MODULE_DEPEND(vmx, ether, 1, 1, 1);
213 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
214
215 static device_method_t vmxnet3_iflib_methods[] = {
216 DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
217 DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
218 DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
219
220 DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
221 DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
222 DEVMETHOD(ifdi_detach, vmxnet3_detach),
223
224 DEVMETHOD(ifdi_init, vmxnet3_init),
225 DEVMETHOD(ifdi_stop, vmxnet3_stop),
226 DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
227 DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
228 DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
229 DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
230 DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
231 DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
232 DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
233 DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
234
235 DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
236 DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
237 DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
238 DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
239 DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
240 DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
241
242 DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
243 DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
244
245 DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
246 DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
247 DEVMETHOD(ifdi_resume, vmxnet3_resume),
248
249 DEVMETHOD(ifdi_needs_restart, vmxnet3_if_needs_restart),
250
251 DEVMETHOD_END
252 };
253
254 static driver_t vmxnet3_iflib_driver = {
255 "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
256 };
257
258 struct if_txrx vmxnet3_txrx = {
259 .ift_txd_encap = vmxnet3_isc_txd_encap,
260 .ift_txd_flush = vmxnet3_isc_txd_flush,
261 .ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
262 .ift_rxd_available = vmxnet3_isc_rxd_available,
263 .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
264 .ift_rxd_refill = vmxnet3_isc_rxd_refill,
265 .ift_rxd_flush = vmxnet3_isc_rxd_flush,
266 .ift_legacy_intr = vmxnet3_legacy_intr
267 };
268
269 static struct if_shared_ctx vmxnet3_sctx_init = {
270 .isc_magic = IFLIB_MAGIC,
271 .isc_q_align = 512,
272
273 .isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
274 .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
275 .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
276 .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277
278 /*
279 * These values are used to configure the busdma tag used for
280 * receive descriptors. Each receive descriptor only points to one
281 * buffer.
282 */
283 .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
284 .isc_rx_nsegments = 1, /* One mapping per descriptor */
285 .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
286
287 .isc_admin_intrcnt = 1,
288 .isc_vendor_info = vmxnet3_vendor_info_array,
289 .isc_driver_version = "2",
290 .isc_driver = &vmxnet3_iflib_driver,
291 .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
292
293 /*
294 * Number of receive queues per receive queue set, with associated
295 * descriptor settings for each.
296 */
297 .isc_nrxqs = 3,
298 .isc_nfl = 2, /* one free list for each receive command queue */
299 .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
300 .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
301 .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
302
303 /*
304 * Number of transmit queues per transmit queue set, with associated
305 * descriptor settings for each.
306 */
307 .isc_ntxqs = 2,
308 .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
309 .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
310 .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
311 };
312
313 static void *
vmxnet3_register(device_t dev)314 vmxnet3_register(device_t dev)
315 {
316 return (&vmxnet3_sctx_init);
317 }
318
319 static int
trunc_powerof2(int val)320 trunc_powerof2(int val)
321 {
322
323 return (1U << (fls(val) - 1));
324 }
325
326 static int
vmxnet3_attach_pre(if_ctx_t ctx)327 vmxnet3_attach_pre(if_ctx_t ctx)
328 {
329 device_t dev;
330 if_softc_ctx_t scctx;
331 struct vmxnet3_softc *sc;
332 uint32_t intr_config;
333 int error;
334
335 dev = iflib_get_dev(ctx);
336 sc = iflib_get_softc(ctx);
337 sc->vmx_dev = dev;
338 sc->vmx_ctx = ctx;
339 sc->vmx_sctx = iflib_get_sctx(ctx);
340 sc->vmx_scctx = iflib_get_softc_ctx(ctx);
341 sc->vmx_ifp = iflib_get_ifp(ctx);
342 sc->vmx_media = iflib_get_media(ctx);
343 scctx = sc->vmx_scctx;
344
345 scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
346 scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
347 /* isc_tx_tso_size_max doesn't include possible vlan header */
348 scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
349 scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
350 scctx->isc_txrx = &vmxnet3_txrx;
351
352 /* If 0, the iflib tunable was not set, so set to the default */
353 if (scctx->isc_nrxqsets == 0)
354 scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
355 scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
356 scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
357 scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
358
359 /* If 0, the iflib tunable was not set, so set to the default */
360 if (scctx->isc_ntxqsets == 0)
361 scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
362 scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
363 scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
364 scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
365
366 /*
367 * Enforce that the transmit completion queue descriptor count is
368 * the same as the transmit command queue descriptor count.
369 */
370 scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
371 scctx->isc_txqsizes[0] =
372 sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
373 scctx->isc_txqsizes[1] =
374 sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
375
376 /*
377 * Enforce that the receive completion queue descriptor count is the
378 * sum of the receive command queue descriptor counts, and that the
379 * second receive command queue descriptor count is the same as the
380 * first one.
381 */
382 scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
383 scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
384 scctx->isc_rxqsizes[0] =
385 sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
386 scctx->isc_rxqsizes[1] =
387 sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
388 scctx->isc_rxqsizes[2] =
389 sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
390
391 /*
392 * Initialize the max frame size and descriptor queue buffer
393 * sizes.
394 */
395 vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
396
397 scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
398
399 /* Map PCI BARs */
400 error = vmxnet3_alloc_resources(sc);
401 if (error)
402 goto fail;
403
404 /* Check device versions */
405 error = vmxnet3_check_version(sc);
406 if (error)
407 goto fail;
408
409 /*
410 * The interrupt mode can be set in the hypervisor configuration via
411 * the parameter ethernet<N>.intrMode.
412 */
413 intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
414 sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
415
416 /*
417 * Configure the softc context to attempt to configure the interrupt
418 * mode now indicated by intr_config. iflib will follow the usual
419 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
420 * starting mode.
421 */
422 switch (intr_config & 0x03) {
423 case VMXNET3_IT_AUTO:
424 case VMXNET3_IT_MSIX:
425 scctx->isc_msix_bar = pci_msix_table_bar(dev);
426 break;
427 case VMXNET3_IT_MSI:
428 scctx->isc_msix_bar = -1;
429 scctx->isc_disable_msix = 1;
430 break;
431 case VMXNET3_IT_LEGACY:
432 scctx->isc_msix_bar = 0;
433 break;
434 }
435
436 scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
437 scctx->isc_capabilities = scctx->isc_capenable =
438 IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
439 IFCAP_TSO4 | IFCAP_TSO6 |
440 IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
441 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
442 IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
443 IFCAP_JUMBO_MTU;
444
445 /* These capabilities are not enabled by default. */
446 scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
447
448 vmxnet3_get_lladdr(sc);
449 iflib_set_mac(ctx, sc->vmx_lladdr);
450
451 return (0);
452 fail:
453 /*
454 * We must completely clean up anything allocated above as iflib
455 * will not invoke any other driver entry points as a result of this
456 * failure.
457 */
458 vmxnet3_free_resources(sc);
459
460 return (error);
461 }
462
463 static int
vmxnet3_msix_intr_assign(if_ctx_t ctx,int msix)464 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
465 {
466 struct vmxnet3_softc *sc;
467 if_softc_ctx_t scctx;
468 struct vmxnet3_rxqueue *rxq;
469 int error;
470 int i;
471 char irq_name[16];
472
473 sc = iflib_get_softc(ctx);
474 scctx = sc->vmx_scctx;
475
476 for (i = 0; i < scctx->isc_nrxqsets; i++) {
477 snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
478
479 rxq = &sc->vmx_rxq[i];
480 error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
481 IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
482 if (error) {
483 device_printf(iflib_get_dev(ctx),
484 "Failed to register rxq %d interrupt handler\n", i);
485 return (error);
486 }
487 }
488
489 for (i = 0; i < scctx->isc_ntxqsets; i++) {
490 snprintf(irq_name, sizeof(irq_name), "txq%d", i);
491
492 /*
493 * Don't provide the corresponding rxq irq for reference -
494 * we want the transmit task to be attached to a task queue
495 * that is different from the one used by the corresponding
496 * rxq irq. That is because the TX doorbell writes are very
497 * expensive as virtualized MMIO operations, so we want to
498 * be able to defer them to another core when possible so
499 * that they don't steal receive processing cycles during
500 * stack turnarounds like TCP ACK generation. The other
501 * piece to this approach is enabling the iflib abdicate
502 * option (currently via an interface-specific
503 * tunable/sysctl).
504 */
505 iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
506 irq_name);
507 }
508
509 error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
510 scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
511 "event");
512 if (error) {
513 device_printf(iflib_get_dev(ctx),
514 "Failed to register event interrupt handler\n");
515 return (error);
516 }
517
518 return (0);
519 }
520
521 static void
vmxnet3_free_irqs(struct vmxnet3_softc * sc)522 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
523 {
524 if_softc_ctx_t scctx;
525 struct vmxnet3_rxqueue *rxq;
526 int i;
527
528 scctx = sc->vmx_scctx;
529
530 for (i = 0; i < scctx->isc_nrxqsets; i++) {
531 rxq = &sc->vmx_rxq[i];
532 iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
533 }
534
535 iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
536 }
537
538 static int
vmxnet3_attach_post(if_ctx_t ctx)539 vmxnet3_attach_post(if_ctx_t ctx)
540 {
541 if_softc_ctx_t scctx;
542 struct vmxnet3_softc *sc;
543 int error;
544
545 scctx = iflib_get_softc_ctx(ctx);
546 sc = iflib_get_softc(ctx);
547
548 if (scctx->isc_nrxqsets > 1)
549 sc->vmx_flags |= VMXNET3_FLAG_RSS;
550
551 error = vmxnet3_alloc_data(sc);
552 if (error)
553 goto fail;
554
555 vmxnet3_set_interrupt_idx(sc);
556 vmxnet3_setup_sysctl(sc);
557
558 ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
559 ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
560
561 fail:
562 return (error);
563 }
564
565 static int
vmxnet3_detach(if_ctx_t ctx)566 vmxnet3_detach(if_ctx_t ctx)
567 {
568 struct vmxnet3_softc *sc;
569
570 sc = iflib_get_softc(ctx);
571
572 vmxnet3_free_irqs(sc);
573 vmxnet3_free_data(sc);
574 vmxnet3_free_resources(sc);
575
576 return (0);
577 }
578
579 static int
vmxnet3_shutdown(if_ctx_t ctx)580 vmxnet3_shutdown(if_ctx_t ctx)
581 {
582
583 return (0);
584 }
585
586 static int
vmxnet3_suspend(if_ctx_t ctx)587 vmxnet3_suspend(if_ctx_t ctx)
588 {
589
590 return (0);
591 }
592
593 static int
vmxnet3_resume(if_ctx_t ctx)594 vmxnet3_resume(if_ctx_t ctx)
595 {
596
597 return (0);
598 }
599
600 static int
vmxnet3_alloc_resources(struct vmxnet3_softc * sc)601 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
602 {
603 device_t dev;
604 int rid;
605
606 dev = sc->vmx_dev;
607
608 rid = PCIR_BAR(0);
609 sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
610 RF_ACTIVE);
611 if (sc->vmx_res0 == NULL) {
612 device_printf(dev,
613 "could not map BAR0 memory\n");
614 return (ENXIO);
615 }
616
617 sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
618 sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
619
620 rid = PCIR_BAR(1);
621 sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
622 RF_ACTIVE);
623 if (sc->vmx_res1 == NULL) {
624 device_printf(dev,
625 "could not map BAR1 memory\n");
626 return (ENXIO);
627 }
628
629 sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
630 sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
631
632 return (0);
633 }
634
635 static void
vmxnet3_free_resources(struct vmxnet3_softc * sc)636 vmxnet3_free_resources(struct vmxnet3_softc *sc)
637 {
638 device_t dev;
639
640 dev = sc->vmx_dev;
641
642 if (sc->vmx_res0 != NULL) {
643 bus_release_resource(dev, SYS_RES_MEMORY,
644 rman_get_rid(sc->vmx_res0), sc->vmx_res0);
645 sc->vmx_res0 = NULL;
646 }
647
648 if (sc->vmx_res1 != NULL) {
649 bus_release_resource(dev, SYS_RES_MEMORY,
650 rman_get_rid(sc->vmx_res1), sc->vmx_res1);
651 sc->vmx_res1 = NULL;
652 }
653 }
654
655 static int
vmxnet3_check_version(struct vmxnet3_softc * sc)656 vmxnet3_check_version(struct vmxnet3_softc *sc)
657 {
658 device_t dev;
659 uint32_t version;
660
661 dev = sc->vmx_dev;
662
663 version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
664 if ((version & 0x01) == 0) {
665 device_printf(dev, "unsupported hardware version %#x\n",
666 version);
667 return (ENOTSUP);
668 }
669 vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
670
671 version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
672 if ((version & 0x01) == 0) {
673 device_printf(dev, "unsupported UPT version %#x\n", version);
674 return (ENOTSUP);
675 }
676 vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
677
678 return (0);
679 }
680
681 static void
vmxnet3_set_interrupt_idx(struct vmxnet3_softc * sc)682 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
683 {
684 if_softc_ctx_t scctx;
685 struct vmxnet3_txqueue *txq;
686 struct vmxnet3_txq_shared *txs;
687 struct vmxnet3_rxqueue *rxq;
688 struct vmxnet3_rxq_shared *rxs;
689 int intr_idx;
690 int i;
691
692 scctx = sc->vmx_scctx;
693
694 /*
695 * There is always one interrupt per receive queue, assigned
696 * starting with the first interrupt. When there is only one
697 * interrupt available, the event interrupt shares the receive queue
698 * interrupt, otherwise it uses the interrupt following the last
699 * receive queue interrupt. Transmit queues are not assigned
700 * interrupts, so they are given indexes beyond the indexes that
701 * correspond to the real interrupts.
702 */
703
704 /* The event interrupt is always the last vector. */
705 sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
706
707 intr_idx = 0;
708 for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
709 rxq = &sc->vmx_rxq[i];
710 rxs = rxq->vxrxq_rs;
711 rxq->vxrxq_intr_idx = intr_idx;
712 rxs->intr_idx = rxq->vxrxq_intr_idx;
713 }
714
715 /*
716 * Assign the tx queues interrupt indexes above what we are actually
717 * using. These interrupts will never be enabled.
718 */
719 intr_idx = scctx->isc_vectors;
720 for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
721 txq = &sc->vmx_txq[i];
722 txs = txq->vxtxq_ts;
723 txq->vxtxq_intr_idx = intr_idx;
724 txs->intr_idx = txq->vxtxq_intr_idx;
725 }
726 }
727
728 static int
vmxnet3_queues_shared_alloc(struct vmxnet3_softc * sc)729 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
730 {
731 if_softc_ctx_t scctx;
732 int size;
733 int error;
734
735 scctx = sc->vmx_scctx;
736
737 /*
738 * The txq and rxq shared data areas must be allocated contiguously
739 * as vmxnet3_driver_shared contains only a single address member
740 * for the shared queue data area.
741 */
742 size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
743 scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
744 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
745 if (error) {
746 device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
747 return (error);
748 }
749
750 return (0);
751 }
752
753 static void
vmxnet3_init_txq(struct vmxnet3_softc * sc,int q)754 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
755 {
756 struct vmxnet3_txqueue *txq;
757 struct vmxnet3_comp_ring *txc;
758 struct vmxnet3_txring *txr;
759 if_softc_ctx_t scctx;
760
761 txq = &sc->vmx_txq[q];
762 txc = &txq->vxtxq_comp_ring;
763 txr = &txq->vxtxq_cmd_ring;
764 scctx = sc->vmx_scctx;
765
766 snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
767 device_get_nameunit(sc->vmx_dev), q);
768
769 txq->vxtxq_sc = sc;
770 txq->vxtxq_id = q;
771 txc->vxcr_ndesc = scctx->isc_ntxd[0];
772 txr->vxtxr_ndesc = scctx->isc_ntxd[1];
773 }
774
775 static int
vmxnet3_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int ntxqs,int ntxqsets)776 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
777 int ntxqs, int ntxqsets)
778 {
779 struct vmxnet3_softc *sc;
780 int q;
781 int error;
782 caddr_t kva;
783
784 sc = iflib_get_softc(ctx);
785
786 /* Allocate the array of transmit queues */
787 sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
788 ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
789 if (sc->vmx_txq == NULL)
790 return (ENOMEM);
791
792 /* Initialize driver state for each transmit queue */
793 for (q = 0; q < ntxqsets; q++)
794 vmxnet3_init_txq(sc, q);
795
796 /*
797 * Allocate queue state that is shared with the device. This check
798 * and call is performed in both vmxnet3_tx_queues_alloc() and
799 * vmxnet3_rx_queues_alloc() so that we don't have to care which
800 * order iflib invokes those routines in.
801 */
802 if (sc->vmx_qs_dma.idi_size == 0) {
803 error = vmxnet3_queues_shared_alloc(sc);
804 if (error)
805 return (error);
806 }
807
808 kva = sc->vmx_qs_dma.idi_vaddr;
809 for (q = 0; q < ntxqsets; q++) {
810 sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
811 kva += sizeof(struct vmxnet3_txq_shared);
812 }
813
814 /* Record descriptor ring vaddrs and paddrs */
815 for (q = 0; q < ntxqsets; q++) {
816 struct vmxnet3_txqueue *txq;
817 struct vmxnet3_txring *txr;
818 struct vmxnet3_comp_ring *txc;
819
820 txq = &sc->vmx_txq[q];
821 txc = &txq->vxtxq_comp_ring;
822 txr = &txq->vxtxq_cmd_ring;
823
824 /* Completion ring */
825 txc->vxcr_u.txcd =
826 (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
827 txc->vxcr_paddr = paddrs[q * ntxqs + 0];
828
829 /* Command ring */
830 txr->vxtxr_txd =
831 (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
832 txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
833 }
834
835 return (0);
836 }
837
838 static void
vmxnet3_init_rxq(struct vmxnet3_softc * sc,int q,int nrxqs)839 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
840 {
841 struct vmxnet3_rxqueue *rxq;
842 struct vmxnet3_comp_ring *rxc;
843 struct vmxnet3_rxring *rxr;
844 if_softc_ctx_t scctx;
845 int i;
846
847 rxq = &sc->vmx_rxq[q];
848 rxc = &rxq->vxrxq_comp_ring;
849 scctx = sc->vmx_scctx;
850
851 snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
852 device_get_nameunit(sc->vmx_dev), q);
853
854 rxq->vxrxq_sc = sc;
855 rxq->vxrxq_id = q;
856
857 /*
858 * First rxq is the completion queue, so there are nrxqs - 1 command
859 * rings starting at iflib queue id 1.
860 */
861 rxc->vxcr_ndesc = scctx->isc_nrxd[0];
862 for (i = 0; i < nrxqs - 1; i++) {
863 rxr = &rxq->vxrxq_cmd_ring[i];
864 rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
865 }
866 }
867
868 static int
vmxnet3_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int nrxqs,int nrxqsets)869 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
870 int nrxqs, int nrxqsets)
871 {
872 struct vmxnet3_softc *sc;
873 if_softc_ctx_t scctx;
874 int q;
875 int i;
876 int error;
877 caddr_t kva;
878
879 sc = iflib_get_softc(ctx);
880 scctx = sc->vmx_scctx;
881
882 /* Allocate the array of receive queues */
883 sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
884 nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
885 if (sc->vmx_rxq == NULL)
886 return (ENOMEM);
887
888 /* Initialize driver state for each receive queue */
889 for (q = 0; q < nrxqsets; q++)
890 vmxnet3_init_rxq(sc, q, nrxqs);
891
892 /*
893 * Allocate queue state that is shared with the device. This check
894 * and call is performed in both vmxnet3_tx_queues_alloc() and
895 * vmxnet3_rx_queues_alloc() so that we don't have to care which
896 * order iflib invokes those routines in.
897 */
898 if (sc->vmx_qs_dma.idi_size == 0) {
899 error = vmxnet3_queues_shared_alloc(sc);
900 if (error)
901 return (error);
902 }
903
904 kva = sc->vmx_qs_dma.idi_vaddr +
905 scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
906 for (q = 0; q < nrxqsets; q++) {
907 sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
908 kva += sizeof(struct vmxnet3_rxq_shared);
909 }
910
911 /* Record descriptor ring vaddrs and paddrs */
912 for (q = 0; q < nrxqsets; q++) {
913 struct vmxnet3_rxqueue *rxq;
914 struct vmxnet3_rxring *rxr;
915 struct vmxnet3_comp_ring *rxc;
916
917 rxq = &sc->vmx_rxq[q];
918 rxc = &rxq->vxrxq_comp_ring;
919
920 /* Completion ring */
921 rxc->vxcr_u.rxcd =
922 (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
923 rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
924
925 /* Command ring(s) */
926 for (i = 0; i < nrxqs - 1; i++) {
927 rxr = &rxq->vxrxq_cmd_ring[i];
928
929 rxr->vxrxr_rxd =
930 (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
931 rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
932 }
933 }
934
935 return (0);
936 }
937
938 static void
vmxnet3_queues_free(if_ctx_t ctx)939 vmxnet3_queues_free(if_ctx_t ctx)
940 {
941 struct vmxnet3_softc *sc;
942
943 sc = iflib_get_softc(ctx);
944
945 /* Free queue state area that is shared with the device */
946 if (sc->vmx_qs_dma.idi_size != 0) {
947 iflib_dma_free(&sc->vmx_qs_dma);
948 sc->vmx_qs_dma.idi_size = 0;
949 }
950
951 /* Free array of receive queues */
952 if (sc->vmx_rxq != NULL) {
953 free(sc->vmx_rxq, M_DEVBUF);
954 sc->vmx_rxq = NULL;
955 }
956
957 /* Free array of transmit queues */
958 if (sc->vmx_txq != NULL) {
959 free(sc->vmx_txq, M_DEVBUF);
960 sc->vmx_txq = NULL;
961 }
962 }
963
964 static int
vmxnet3_alloc_shared_data(struct vmxnet3_softc * sc)965 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
966 {
967 device_t dev;
968 size_t size;
969 int error;
970
971 dev = sc->vmx_dev;
972
973 /* Top level state structure shared with the device */
974 size = sizeof(struct vmxnet3_driver_shared);
975 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
976 if (error) {
977 device_printf(dev, "cannot alloc shared memory\n");
978 return (error);
979 }
980 sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
981
982 /* RSS table state shared with the device */
983 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
984 size = sizeof(struct vmxnet3_rss_shared);
985 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
986 &sc->vmx_rss_dma, 0);
987 if (error) {
988 device_printf(dev, "cannot alloc rss shared memory\n");
989 return (error);
990 }
991 sc->vmx_rss =
992 (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
993 }
994
995 return (0);
996 }
997
998 static void
vmxnet3_free_shared_data(struct vmxnet3_softc * sc)999 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1000 {
1001
1002 /* Free RSS table state shared with the device */
1003 if (sc->vmx_rss != NULL) {
1004 iflib_dma_free(&sc->vmx_rss_dma);
1005 sc->vmx_rss = NULL;
1006 }
1007
1008 /* Free top level state structure shared with the device */
1009 if (sc->vmx_ds != NULL) {
1010 iflib_dma_free(&sc->vmx_ds_dma);
1011 sc->vmx_ds = NULL;
1012 }
1013 }
1014
1015 static int
vmxnet3_alloc_mcast_table(struct vmxnet3_softc * sc)1016 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1017 {
1018 int error;
1019
1020 /* Multicast table state shared with the device */
1021 error = iflib_dma_alloc_align(sc->vmx_ctx,
1022 VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1023 if (error)
1024 device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1025 else
1026 sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1027
1028 return (error);
1029 }
1030
1031 static void
vmxnet3_free_mcast_table(struct vmxnet3_softc * sc)1032 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1033 {
1034
1035 /* Free multicast table state shared with the device */
1036 if (sc->vmx_mcast != NULL) {
1037 iflib_dma_free(&sc->vmx_mcast_dma);
1038 sc->vmx_mcast = NULL;
1039 }
1040 }
1041
1042 static void
vmxnet3_init_shared_data(struct vmxnet3_softc * sc)1043 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1044 {
1045 struct vmxnet3_driver_shared *ds;
1046 if_softc_ctx_t scctx;
1047 struct vmxnet3_txqueue *txq;
1048 struct vmxnet3_txq_shared *txs;
1049 struct vmxnet3_rxqueue *rxq;
1050 struct vmxnet3_rxq_shared *rxs;
1051 int i;
1052
1053 ds = sc->vmx_ds;
1054 scctx = sc->vmx_scctx;
1055
1056 /*
1057 * Initialize fields of the shared data that remains the same across
1058 * reinits. Note the shared data is zero'd when allocated.
1059 */
1060
1061 ds->magic = VMXNET3_REV1_MAGIC;
1062
1063 /* DriverInfo */
1064 ds->version = VMXNET3_DRIVER_VERSION;
1065 ds->guest = VMXNET3_GOS_FREEBSD |
1066 #ifdef __LP64__
1067 VMXNET3_GOS_64BIT;
1068 #else
1069 VMXNET3_GOS_32BIT;
1070 #endif
1071 ds->vmxnet3_revision = 1;
1072 ds->upt_version = 1;
1073
1074 /* Misc. conf */
1075 ds->driver_data = vtophys(sc);
1076 ds->driver_data_len = sizeof(struct vmxnet3_softc);
1077 ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1078 ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1079 ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1080
1081 /* RSS conf */
1082 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1083 ds->rss.version = 1;
1084 ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1085 ds->rss.len = sc->vmx_rss_dma.idi_size;
1086 }
1087
1088 /* Interrupt control. */
1089 ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1090 /*
1091 * Total number of interrupt indexes we are using in the shared
1092 * config data, even though we don't actually allocate interrupt
1093 * resources for the tx queues. Some versions of the device will
1094 * fail to initialize successfully if interrupt indexes are used in
1095 * the shared config that exceed the number of interrupts configured
1096 * here.
1097 */
1098 ds->nintr = (scctx->isc_vectors == 1) ?
1099 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1100 ds->evintr = sc->vmx_event_intr_idx;
1101 ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1102
1103 for (i = 0; i < ds->nintr; i++)
1104 ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1105
1106 /* Receive filter. */
1107 ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1108 ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1109
1110 /* Tx queues */
1111 for (i = 0; i < scctx->isc_ntxqsets; i++) {
1112 txq = &sc->vmx_txq[i];
1113 txs = txq->vxtxq_ts;
1114
1115 txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1116 txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1117 txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1118 txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1119 txs->driver_data = vtophys(txq);
1120 txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1121 }
1122
1123 /* Rx queues */
1124 for (i = 0; i < scctx->isc_nrxqsets; i++) {
1125 rxq = &sc->vmx_rxq[i];
1126 rxs = rxq->vxrxq_rs;
1127
1128 rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1129 rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1130 rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1131 rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1132 rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1133 rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1134 rxs->driver_data = vtophys(rxq);
1135 rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1136 }
1137 }
1138
1139 static void
vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc * sc)1140 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1141 {
1142 if_softc_ctx_t scctx;
1143 struct vmxnet3_rss_shared *rss;
1144 #ifdef RSS
1145 uint8_t rss_algo;
1146 #endif
1147 int i;
1148
1149 scctx = sc->vmx_scctx;
1150 rss = sc->vmx_rss;
1151
1152 rss->hash_type =
1153 UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1154 UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1155 rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1156 rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1157 rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1158 /*
1159 * Always use the kernel RSS key for consistent hashing.
1160 * If software RSS is configured to Toeplitz and RSS CPU steering
1161 * is available, use the RSS indirection table. Otherwise use
1162 * simple round-robin but still report hash as opaque to disengage
1163 * from software RSS when CPU steering is not available.
1164 */
1165 rss_getkey(rss->hash_key);
1166
1167 #ifdef RSS
1168 rss_algo = rss_gethashalgo();
1169 if (rss_algo == RSS_HASH_TOEPLITZ) {
1170 for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1171 rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1172 scctx->isc_nrxqsets;
1173 }
1174 sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1175 } else
1176 #endif
1177 {
1178 for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1179 rss->ind_table[i] = i % scctx->isc_nrxqsets;
1180 sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1181 }
1182 }
1183
1184 static void
vmxnet3_reinit_shared_data(struct vmxnet3_softc * sc)1185 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1186 {
1187 if_t ifp;
1188 struct vmxnet3_driver_shared *ds;
1189 if_softc_ctx_t scctx;
1190
1191 ifp = sc->vmx_ifp;
1192 ds = sc->vmx_ds;
1193 scctx = sc->vmx_scctx;
1194
1195 ds->mtu = if_getmtu(ifp);
1196 ds->ntxqueue = scctx->isc_ntxqsets;
1197 ds->nrxqueue = scctx->isc_nrxqsets;
1198
1199 ds->upt_features = 0;
1200 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1201 ds->upt_features |= UPT1_F_CSUM;
1202 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1203 ds->upt_features |= UPT1_F_VLAN;
1204 if (if_getcapenable(ifp) & IFCAP_LRO)
1205 ds->upt_features |= UPT1_F_LRO;
1206
1207 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1208 ds->upt_features |= UPT1_F_RSS;
1209 vmxnet3_reinit_rss_shared_data(sc);
1210 }
1211
1212 vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1213 vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1214 (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1215 }
1216
1217 static int
vmxnet3_alloc_data(struct vmxnet3_softc * sc)1218 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1219 {
1220 int error;
1221
1222 error = vmxnet3_alloc_shared_data(sc);
1223 if (error)
1224 return (error);
1225
1226 error = vmxnet3_alloc_mcast_table(sc);
1227 if (error)
1228 return (error);
1229
1230 vmxnet3_init_shared_data(sc);
1231
1232 return (0);
1233 }
1234
1235 static void
vmxnet3_free_data(struct vmxnet3_softc * sc)1236 vmxnet3_free_data(struct vmxnet3_softc *sc)
1237 {
1238
1239 vmxnet3_free_mcast_table(sc);
1240 vmxnet3_free_shared_data(sc);
1241 }
1242
1243 static void
vmxnet3_evintr(struct vmxnet3_softc * sc)1244 vmxnet3_evintr(struct vmxnet3_softc *sc)
1245 {
1246 device_t dev;
1247 struct vmxnet3_txq_shared *ts;
1248 struct vmxnet3_rxq_shared *rs;
1249 uint32_t event;
1250
1251 dev = sc->vmx_dev;
1252
1253 /* Clear events. */
1254 event = sc->vmx_ds->event;
1255 vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1256
1257 if (event & VMXNET3_EVENT_LINK)
1258 vmxnet3_link_status(sc);
1259
1260 if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1261 vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1262 ts = sc->vmx_txq[0].vxtxq_ts;
1263 if (ts->stopped != 0)
1264 device_printf(dev, "Tx queue error %#x\n", ts->error);
1265 rs = sc->vmx_rxq[0].vxrxq_rs;
1266 if (rs->stopped != 0)
1267 device_printf(dev, "Rx queue error %#x\n", rs->error);
1268
1269 /* XXX - rely on liflib watchdog to reset us? */
1270 device_printf(dev, "Rx/Tx queue error event ... "
1271 "waiting for iflib watchdog reset\n");
1272 }
1273
1274 if (event & VMXNET3_EVENT_DIC)
1275 device_printf(dev, "device implementation change event\n");
1276 if (event & VMXNET3_EVENT_DEBUG)
1277 device_printf(dev, "debug event\n");
1278 }
1279
1280 static int
vmxnet3_isc_txd_encap(void * vsc,if_pkt_info_t pi)1281 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1282 {
1283 struct vmxnet3_softc *sc;
1284 struct vmxnet3_txqueue *txq;
1285 struct vmxnet3_txring *txr;
1286 struct vmxnet3_txdesc *txd, *sop;
1287 bus_dma_segment_t *segs;
1288 int nsegs;
1289 int pidx;
1290 int hdrlen;
1291 int i;
1292 int gen;
1293
1294 sc = vsc;
1295 txq = &sc->vmx_txq[pi->ipi_qsidx];
1296 txr = &txq->vxtxq_cmd_ring;
1297 segs = pi->ipi_segs;
1298 nsegs = pi->ipi_nsegs;
1299 pidx = pi->ipi_pidx;
1300
1301 KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1302 ("%s: packet with too many segments %d", __func__, nsegs));
1303
1304 sop = &txr->vxtxr_txd[pidx];
1305 gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */
1306
1307 for (i = 0; i < nsegs; i++) {
1308 txd = &txr->vxtxr_txd[pidx];
1309
1310 txd->addr = segs[i].ds_addr;
1311 txd->len = segs[i].ds_len;
1312 txd->gen = gen;
1313 txd->dtype = 0;
1314 txd->offload_mode = VMXNET3_OM_NONE;
1315 txd->offload_pos = 0;
1316 txd->hlen = 0;
1317 txd->eop = 0;
1318 txd->compreq = 0;
1319 txd->vtag_mode = 0;
1320 txd->vtag = 0;
1321
1322 if (++pidx == txr->vxtxr_ndesc) {
1323 pidx = 0;
1324 txr->vxtxr_gen ^= 1;
1325 }
1326 gen = txr->vxtxr_gen;
1327 }
1328 txd->eop = 1;
1329 txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1330 pi->ipi_new_pidx = pidx;
1331
1332 /*
1333 * VLAN
1334 */
1335 if (pi->ipi_mflags & M_VLANTAG) {
1336 sop->vtag_mode = 1;
1337 sop->vtag = pi->ipi_vtag;
1338 }
1339
1340 /*
1341 * TSO and checksum offloads
1342 */
1343 hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1344 if (pi->ipi_csum_flags & CSUM_TSO) {
1345 sop->offload_mode = VMXNET3_OM_TSO;
1346 sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1347 sop->offload_pos = pi->ipi_tso_segsz;
1348 } else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1349 VMXNET3_CSUM_OFFLOAD_IPV6)) {
1350 sop->offload_mode = VMXNET3_OM_CSUM;
1351 sop->hlen = hdrlen;
1352 sop->offload_pos = hdrlen +
1353 ((pi->ipi_ipproto == IPPROTO_TCP) ?
1354 offsetof(struct tcphdr, th_sum) :
1355 offsetof(struct udphdr, uh_sum));
1356 }
1357
1358 /* Finally, change the ownership. */
1359 vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1360 sop->gen ^= 1;
1361
1362 return (0);
1363 }
1364
1365 static void
vmxnet3_isc_txd_flush(void * vsc,uint16_t txqid,qidx_t pidx)1366 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1367 {
1368 struct vmxnet3_softc *sc;
1369 struct vmxnet3_txqueue *txq;
1370
1371 sc = vsc;
1372 txq = &sc->vmx_txq[txqid];
1373
1374 /*
1375 * pidx is what we last set ipi_new_pidx to in
1376 * vmxnet3_isc_txd_encap()
1377 */
1378
1379 /*
1380 * Avoid expensive register updates if the flush request is
1381 * redundant.
1382 */
1383 if (txq->vxtxq_last_flush == pidx)
1384 return;
1385 txq->vxtxq_last_flush = pidx;
1386 vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1387 }
1388
1389 static int
vmxnet3_isc_txd_credits_update(void * vsc,uint16_t txqid,bool clear)1390 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1391 {
1392 struct vmxnet3_softc *sc;
1393 struct vmxnet3_txqueue *txq;
1394 struct vmxnet3_comp_ring *txc;
1395 struct vmxnet3_txcompdesc *txcd;
1396 struct vmxnet3_txring *txr;
1397 int processed;
1398
1399 sc = vsc;
1400 txq = &sc->vmx_txq[txqid];
1401 txc = &txq->vxtxq_comp_ring;
1402 txr = &txq->vxtxq_cmd_ring;
1403
1404 /*
1405 * If clear is true, we need to report the number of TX command ring
1406 * descriptors that have been processed by the device. If clear is
1407 * false, we just need to report whether or not at least one TX
1408 * command ring descriptor has been processed by the device.
1409 */
1410 processed = 0;
1411 for (;;) {
1412 txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1413 if (txcd->gen != txc->vxcr_gen)
1414 break;
1415 else if (!clear)
1416 return (1);
1417 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1418
1419 MPASS(txc->vxcr_next < txc->vxcr_ndesc);
1420 if (++txc->vxcr_next >= txc->vxcr_ndesc) {
1421 txc->vxcr_next = 0;
1422 txc->vxcr_gen ^= 1;
1423 }
1424
1425 if (txcd->eop_idx < txr->vxtxr_next)
1426 processed += txr->vxtxr_ndesc -
1427 (txr->vxtxr_next - txcd->eop_idx) + 1;
1428 else
1429 processed += txcd->eop_idx - txr->vxtxr_next + 1;
1430 txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1431 }
1432
1433 return (processed);
1434 }
1435
1436 static int
vmxnet3_isc_rxd_available(void * vsc,uint16_t rxqid,qidx_t idx,qidx_t budget)1437 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1438 {
1439 struct vmxnet3_softc *sc;
1440 struct vmxnet3_rxqueue *rxq;
1441 struct vmxnet3_comp_ring *rxc;
1442 struct vmxnet3_rxcompdesc *rxcd;
1443 int avail;
1444 int completed_gen;
1445 #ifdef INVARIANTS
1446 int expect_sop = 1;
1447 #endif
1448 sc = vsc;
1449 rxq = &sc->vmx_rxq[rxqid];
1450 rxc = &rxq->vxrxq_comp_ring;
1451
1452 avail = 0;
1453 completed_gen = rxc->vxcr_gen;
1454 for (;;) {
1455 rxcd = &rxc->vxcr_u.rxcd[idx];
1456 if (rxcd->gen != completed_gen)
1457 break;
1458 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1459
1460 #ifdef INVARIANTS
1461 if (expect_sop)
1462 KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1463 else
1464 KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1465 expect_sop = rxcd->eop;
1466 #endif
1467 if (rxcd->eop && (rxcd->len != 0))
1468 avail++;
1469 if (avail > budget)
1470 break;
1471 if (++idx == rxc->vxcr_ndesc) {
1472 idx = 0;
1473 completed_gen ^= 1;
1474 }
1475 }
1476
1477 return (avail);
1478 }
1479
1480 static int
vmxnet3_isc_rxd_pkt_get(void * vsc,if_rxd_info_t ri)1481 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1482 {
1483 struct vmxnet3_softc *sc;
1484 if_softc_ctx_t scctx;
1485 struct vmxnet3_rxqueue *rxq;
1486 struct vmxnet3_comp_ring *rxc;
1487 struct vmxnet3_rxcompdesc *rxcd;
1488 if_rxd_frag_t frag;
1489 int cqidx;
1490 uint16_t total_len;
1491 uint8_t nfrags;
1492 uint8_t i;
1493 uint8_t flid;
1494
1495 sc = vsc;
1496 scctx = sc->vmx_scctx;
1497 rxq = &sc->vmx_rxq[ri->iri_qsidx];
1498 rxc = &rxq->vxrxq_comp_ring;
1499
1500 /*
1501 * Get a single packet starting at the given index in the completion
1502 * queue. That we have been called indicates that
1503 * vmxnet3_isc_rxd_available() has already verified that either
1504 * there is a complete packet available starting at the given index,
1505 * or there are one or more zero length packets starting at the
1506 * given index followed by a complete packet, so no verification of
1507 * ownership of the descriptors (and no associated read barrier) is
1508 * required here.
1509 */
1510 cqidx = ri->iri_cidx;
1511 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1512 while (rxcd->len == 0) {
1513 KASSERT(rxcd->sop && rxcd->eop,
1514 ("%s: zero-length packet without both sop and eop set",
1515 __func__));
1516 rxc->vxcr_zero_length++;
1517 if (++cqidx == rxc->vxcr_ndesc) {
1518 cqidx = 0;
1519 rxc->vxcr_gen ^= 1;
1520 }
1521 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1522 }
1523 KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1524
1525 /*
1526 * RSS and flow ID.
1527 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1528 * be used only if the software RSS is enabled and it uses the same
1529 * algorithm and the hash key as the "hardware". If the software RSS
1530 * is not enabled, then it's simply pointless to use those types.
1531 * If it's enabled but with different parameters, then hash values will
1532 * not match.
1533 */
1534 ri->iri_flowid = rxcd->rss_hash;
1535 #ifdef RSS
1536 if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1537 switch (rxcd->rss_type) {
1538 case VMXNET3_RCD_RSS_TYPE_NONE:
1539 ri->iri_flowid = ri->iri_qsidx;
1540 ri->iri_rsstype = M_HASHTYPE_NONE;
1541 break;
1542 case VMXNET3_RCD_RSS_TYPE_IPV4:
1543 ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1544 break;
1545 case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1546 ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1547 break;
1548 case VMXNET3_RCD_RSS_TYPE_IPV6:
1549 ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1550 break;
1551 case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1552 ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1553 break;
1554 default:
1555 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1556 break;
1557 }
1558 } else
1559 #endif
1560 {
1561 switch (rxcd->rss_type) {
1562 case VMXNET3_RCD_RSS_TYPE_NONE:
1563 ri->iri_flowid = ri->iri_qsidx;
1564 ri->iri_rsstype = M_HASHTYPE_NONE;
1565 break;
1566 default:
1567 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1568 break;
1569 }
1570 }
1571
1572 /*
1573 * The queue numbering scheme used for rxcd->qid is as follows:
1574 * - All of the command ring 0s are numbered [0, nrxqsets - 1]
1575 * - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1576 *
1577 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1578 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1579 * indicates command ring (and flid) 1.
1580 */
1581 nfrags = 0;
1582 total_len = 0;
1583 do {
1584 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1585 KASSERT(rxcd->gen == rxc->vxcr_gen,
1586 ("%s: generation mismatch", __func__));
1587 KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1588 ("%s: too many fragments", __func__));
1589 if (__predict_true(rxcd->len != 0)) {
1590 frag = &ri->iri_frags[nfrags];
1591 flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1592 frag->irf_flid = flid;
1593 frag->irf_idx = rxcd->rxd_idx;
1594 frag->irf_len = rxcd->len;
1595 total_len += rxcd->len;
1596 nfrags++;
1597 } else {
1598 rxc->vcxr_zero_length_frag++;
1599 }
1600 if (++cqidx == rxc->vxcr_ndesc) {
1601 cqidx = 0;
1602 rxc->vxcr_gen ^= 1;
1603 }
1604 } while (!rxcd->eop);
1605
1606 ri->iri_cidx = cqidx;
1607 ri->iri_nfrags = nfrags;
1608 ri->iri_len = total_len;
1609
1610 /*
1611 * If there's an error, the last descriptor in the packet will
1612 * have the error indicator set. In this case, set all
1613 * fragment lengths to zero. This will cause iflib to discard
1614 * the packet, but process all associated descriptors through
1615 * the refill mechanism.
1616 */
1617 if (__predict_false(rxcd->error)) {
1618 rxc->vxcr_pkt_errors++;
1619 for (i = 0; i < nfrags; i++) {
1620 frag = &ri->iri_frags[i];
1621 frag->irf_len = 0;
1622 }
1623 } else {
1624 /* Checksum offload information is in the last descriptor. */
1625 if (!rxcd->no_csum) {
1626 uint32_t csum_flags = 0;
1627
1628 if (rxcd->ipv4) {
1629 csum_flags |= CSUM_IP_CHECKED;
1630 if (rxcd->ipcsum_ok)
1631 csum_flags |= CSUM_IP_VALID;
1632 }
1633 if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1634 csum_flags |= CSUM_L4_CALC;
1635 if (rxcd->csum_ok) {
1636 csum_flags |= CSUM_L4_VALID;
1637 ri->iri_csum_data = 0xffff;
1638 }
1639 }
1640 ri->iri_csum_flags = csum_flags;
1641 }
1642
1643 /* VLAN information is in the last descriptor. */
1644 if (rxcd->vlan) {
1645 ri->iri_flags |= M_VLANTAG;
1646 ri->iri_vtag = rxcd->vtag;
1647 }
1648 }
1649
1650 return (0);
1651 }
1652
1653 static void
vmxnet3_isc_rxd_refill(void * vsc,if_rxd_update_t iru)1654 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1655 {
1656 struct vmxnet3_softc *sc;
1657 struct vmxnet3_rxqueue *rxq;
1658 struct vmxnet3_rxring *rxr;
1659 struct vmxnet3_rxdesc *rxd;
1660 uint64_t *paddrs;
1661 int count;
1662 int len;
1663 int idx;
1664 int i;
1665 uint8_t flid;
1666 uint8_t btype;
1667
1668 count = iru->iru_count;
1669 len = iru->iru_buf_size;
1670 flid = iru->iru_flidx;
1671 paddrs = iru->iru_paddrs;
1672
1673 sc = vsc;
1674 rxq = &sc->vmx_rxq[iru->iru_qsidx];
1675 rxr = &rxq->vxrxq_cmd_ring[flid];
1676 rxd = rxr->vxrxr_rxd;
1677
1678 /*
1679 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1680 * command ring 1 is filled with BTYPE_BODY descriptors.
1681 */
1682 btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1683 /*
1684 * The refill entries from iflib will advance monotonically,
1685 * but the refilled descriptors may not be contiguous due to
1686 * earlier skipping of descriptors by the device. The refill
1687 * entries from iflib need an entire state update, while the
1688 * descriptors previously skipped by the device only need to
1689 * have their generation numbers updated.
1690 */
1691 idx = rxr->vxrxr_refill_start;
1692 i = 0;
1693 do {
1694 if (idx == iru->iru_idxs[i]) {
1695 rxd[idx].addr = paddrs[i];
1696 rxd[idx].len = len;
1697 rxd[idx].btype = btype;
1698 i++;
1699 } else
1700 rxr->vxrxr_desc_skips++;
1701 rxd[idx].gen = rxr->vxrxr_gen;
1702
1703 if (++idx == rxr->vxrxr_ndesc) {
1704 idx = 0;
1705 rxr->vxrxr_gen ^= 1;
1706 }
1707 } while (i != count);
1708 rxr->vxrxr_refill_start = idx;
1709 }
1710
1711 static void
vmxnet3_isc_rxd_flush(void * vsc,uint16_t rxqid,uint8_t flid,qidx_t pidx)1712 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1713 {
1714 struct vmxnet3_softc *sc;
1715 bus_size_t r;
1716
1717 sc = vsc;
1718
1719 if (flid == 0)
1720 r = VMXNET3_BAR0_RXH1(rxqid);
1721 else
1722 r = VMXNET3_BAR0_RXH2(rxqid);
1723
1724 vmxnet3_write_bar0(sc, r, pidx);
1725 }
1726
1727 static int
vmxnet3_legacy_intr(void * xsc)1728 vmxnet3_legacy_intr(void *xsc)
1729 {
1730 struct vmxnet3_softc *sc;
1731 if_softc_ctx_t scctx;
1732 if_ctx_t ctx;
1733
1734 sc = xsc;
1735 scctx = sc->vmx_scctx;
1736 ctx = sc->vmx_ctx;
1737
1738 /*
1739 * When there is only a single interrupt configured, this routine
1740 * runs in fast interrupt context, following which the rxq 0 task
1741 * will be enqueued.
1742 */
1743 if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1744 if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1745 return (FILTER_HANDLED);
1746 }
1747 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1748 vmxnet3_intr_disable_all(ctx);
1749
1750 if (sc->vmx_ds->event != 0)
1751 iflib_admin_intr_deferred(ctx);
1752
1753 /*
1754 * XXX - When there is both rxq and event activity, do we care
1755 * whether the rxq 0 task or the admin task re-enables the interrupt
1756 * first?
1757 */
1758 return (FILTER_SCHEDULE_THREAD);
1759 }
1760
1761 static int
vmxnet3_rxq_intr(void * vrxq)1762 vmxnet3_rxq_intr(void *vrxq)
1763 {
1764 struct vmxnet3_softc *sc;
1765 struct vmxnet3_rxqueue *rxq;
1766
1767 rxq = vrxq;
1768 sc = rxq->vxrxq_sc;
1769
1770 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1771 vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1772
1773 return (FILTER_SCHEDULE_THREAD);
1774 }
1775
1776 static int
vmxnet3_event_intr(void * vsc)1777 vmxnet3_event_intr(void *vsc)
1778 {
1779 struct vmxnet3_softc *sc;
1780
1781 sc = vsc;
1782
1783 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1784 vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1785
1786 /*
1787 * The work will be done via vmxnet3_update_admin_status(), and the
1788 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1789 *
1790 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1791 */
1792 return (FILTER_SCHEDULE_THREAD);
1793 }
1794
1795 static void
vmxnet3_stop(if_ctx_t ctx)1796 vmxnet3_stop(if_ctx_t ctx)
1797 {
1798 struct vmxnet3_softc *sc;
1799
1800 sc = iflib_get_softc(ctx);
1801
1802 sc->vmx_link_active = 0;
1803 vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1804 vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1805 }
1806
1807 static void
vmxnet3_txinit(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * txq)1808 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1809 {
1810 struct vmxnet3_txring *txr;
1811 struct vmxnet3_comp_ring *txc;
1812
1813 txq->vxtxq_last_flush = -1;
1814
1815 txr = &txq->vxtxq_cmd_ring;
1816 txr->vxtxr_next = 0;
1817 txr->vxtxr_gen = VMXNET3_INIT_GEN;
1818 /*
1819 * iflib has zeroed out the descriptor array during the prior attach
1820 * or stop
1821 */
1822
1823 txc = &txq->vxtxq_comp_ring;
1824 txc->vxcr_next = 0;
1825 txc->vxcr_gen = VMXNET3_INIT_GEN;
1826 /*
1827 * iflib has zeroed out the descriptor array during the prior attach
1828 * or stop
1829 */
1830 }
1831
1832 static void
vmxnet3_rxinit(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rxq)1833 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1834 {
1835 struct vmxnet3_rxring *rxr;
1836 struct vmxnet3_comp_ring *rxc;
1837 int i;
1838
1839 /*
1840 * The descriptors will be populated with buffers during a
1841 * subsequent invocation of vmxnet3_isc_rxd_refill()
1842 */
1843 for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1844 rxr = &rxq->vxrxq_cmd_ring[i];
1845 rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1846 rxr->vxrxr_desc_skips = 0;
1847 rxr->vxrxr_refill_start = 0;
1848 /*
1849 * iflib has zeroed out the descriptor array during the
1850 * prior attach or stop
1851 */
1852 }
1853
1854 for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1855 rxr = &rxq->vxrxq_cmd_ring[i];
1856 rxr->vxrxr_gen = 0;
1857 rxr->vxrxr_desc_skips = 0;
1858 rxr->vxrxr_refill_start = 0;
1859 bzero(rxr->vxrxr_rxd,
1860 rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1861 }
1862
1863 rxc = &rxq->vxrxq_comp_ring;
1864 rxc->vxcr_next = 0;
1865 rxc->vxcr_gen = VMXNET3_INIT_GEN;
1866 rxc->vxcr_zero_length = 0;
1867 rxc->vcxr_zero_length_frag = 0;
1868 rxc->vxcr_pkt_errors = 0;
1869 /*
1870 * iflib has zeroed out the descriptor array during the prior attach
1871 * or stop
1872 */
1873 }
1874
1875 static void
vmxnet3_reinit_queues(struct vmxnet3_softc * sc)1876 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1877 {
1878 if_softc_ctx_t scctx;
1879 int q;
1880
1881 scctx = sc->vmx_scctx;
1882
1883 for (q = 0; q < scctx->isc_ntxqsets; q++)
1884 vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1885
1886 for (q = 0; q < scctx->isc_nrxqsets; q++)
1887 vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1888 }
1889
1890 static int
vmxnet3_enable_device(struct vmxnet3_softc * sc)1891 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1892 {
1893 if_softc_ctx_t scctx;
1894 int q;
1895
1896 scctx = sc->vmx_scctx;
1897
1898 if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1899 device_printf(sc->vmx_dev, "device enable command failed!\n");
1900 return (1);
1901 }
1902
1903 /* Reset the Rx queue heads. */
1904 for (q = 0; q < scctx->isc_nrxqsets; q++) {
1905 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1906 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1907 }
1908
1909 return (0);
1910 }
1911
1912 static void
vmxnet3_reinit_rxfilters(struct vmxnet3_softc * sc)1913 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1914 {
1915 if_t ifp;
1916
1917 ifp = sc->vmx_ifp;
1918
1919 vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1920
1921 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1922 bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1923 sizeof(sc->vmx_ds->vlan_filter));
1924 else
1925 bzero(sc->vmx_ds->vlan_filter,
1926 sizeof(sc->vmx_ds->vlan_filter));
1927 vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1928 }
1929
1930 static void
vmxnet3_init(if_ctx_t ctx)1931 vmxnet3_init(if_ctx_t ctx)
1932 {
1933 struct vmxnet3_softc *sc;
1934
1935 sc = iflib_get_softc(ctx);
1936
1937 /* Use the current MAC address. */
1938 bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1939 vmxnet3_set_lladdr(sc);
1940
1941 vmxnet3_reinit_shared_data(sc);
1942 vmxnet3_reinit_queues(sc);
1943
1944 vmxnet3_enable_device(sc);
1945
1946 vmxnet3_reinit_rxfilters(sc);
1947 vmxnet3_link_status(sc);
1948 }
1949
1950 static void
vmxnet3_multi_set(if_ctx_t ctx)1951 vmxnet3_multi_set(if_ctx_t ctx)
1952 {
1953
1954 vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1955 if_getflags(iflib_get_ifp(ctx)));
1956 }
1957
1958 static int
vmxnet3_mtu_set(if_ctx_t ctx,uint32_t mtu)1959 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1960 {
1961 struct vmxnet3_softc *sc;
1962 if_softc_ctx_t scctx;
1963
1964 sc = iflib_get_softc(ctx);
1965 scctx = sc->vmx_scctx;
1966
1967 if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1968 ETHER_CRC_LEN))
1969 return (EINVAL);
1970
1971 /*
1972 * Update the max frame size so that the rx mbuf size is
1973 * chosen based on the new mtu during the interface init that
1974 * will occur after this routine returns.
1975 */
1976 scctx->isc_max_frame_size = mtu +
1977 ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1978 /* RX completion queue - n/a */
1979 scctx->isc_rxd_buf_size[0] = 0;
1980 /*
1981 * For header-type descriptors (used for first segment of
1982 * packet), let iflib determine the buffer size based on the
1983 * max frame size.
1984 */
1985 scctx->isc_rxd_buf_size[1] = 0;
1986 /*
1987 * For body-type descriptors (used for jumbo frames and LRO),
1988 * always use page-sized buffers.
1989 */
1990 scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
1991
1992 return (0);
1993 }
1994
1995 static void
vmxnet3_media_status(if_ctx_t ctx,struct ifmediareq * ifmr)1996 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
1997 {
1998 struct vmxnet3_softc *sc;
1999
2000 sc = iflib_get_softc(ctx);
2001
2002 ifmr->ifm_status = IFM_AVALID;
2003 ifmr->ifm_active = IFM_ETHER;
2004
2005 if (vmxnet3_link_is_up(sc) != 0) {
2006 ifmr->ifm_status |= IFM_ACTIVE;
2007 ifmr->ifm_active |= IFM_AUTO;
2008 } else
2009 ifmr->ifm_active |= IFM_NONE;
2010 }
2011
2012 static int
vmxnet3_media_change(if_ctx_t ctx)2013 vmxnet3_media_change(if_ctx_t ctx)
2014 {
2015
2016 /* Ignore. */
2017 return (0);
2018 }
2019
2020 static int
vmxnet3_promisc_set(if_ctx_t ctx,int flags)2021 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2022 {
2023
2024 vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2025
2026 return (0);
2027 }
2028
2029 static uint64_t
vmxnet3_get_counter(if_ctx_t ctx,ift_counter cnt)2030 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2031 {
2032 if_t ifp = iflib_get_ifp(ctx);
2033
2034 if (cnt < IFCOUNTERS)
2035 return if_get_counter_default(ifp, cnt);
2036
2037 return (0);
2038 }
2039
2040 static void
vmxnet3_update_admin_status(if_ctx_t ctx)2041 vmxnet3_update_admin_status(if_ctx_t ctx)
2042 {
2043 struct vmxnet3_softc *sc;
2044
2045 sc = iflib_get_softc(ctx);
2046 /*
2047 * iflib may invoke this routine before vmxnet3_attach_post() has
2048 * run, which is before the top level shared data area is
2049 * initialized and the device made aware of it.
2050 */
2051 if (sc->vmx_ds != NULL && sc->vmx_ds->event != 0)
2052 vmxnet3_evintr(sc);
2053
2054 vmxnet3_refresh_host_stats(sc);
2055 }
2056
2057 static void
vmxnet3_txq_timer(if_ctx_t ctx,uint16_t qid)2058 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2059 {
2060 /* Host stats refresh is global, so just trigger it on txq 0 */
2061 if (qid == 0)
2062 vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2063 }
2064
2065 static void
vmxnet3_update_vlan_filter(struct vmxnet3_softc * sc,int add,uint16_t tag)2066 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2067 {
2068 int idx, bit;
2069
2070 if (tag == 0 || tag > 4095)
2071 return;
2072
2073 idx = (tag >> 5) & 0x7F;
2074 bit = tag & 0x1F;
2075
2076 /* Update our private VLAN bitvector. */
2077 if (add)
2078 sc->vmx_vlan_filter[idx] |= (1 << bit);
2079 else
2080 sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2081 }
2082
2083 static void
vmxnet3_vlan_register(if_ctx_t ctx,uint16_t tag)2084 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2085 {
2086
2087 vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2088 }
2089
2090 static void
vmxnet3_vlan_unregister(if_ctx_t ctx,uint16_t tag)2091 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2092 {
2093
2094 vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2095 }
2096
2097 static u_int
vmxnet3_hash_maddr(void * arg,struct sockaddr_dl * sdl,u_int count)2098 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2099 {
2100 struct vmxnet3_softc *sc = arg;
2101
2102 if (count < VMXNET3_MULTICAST_MAX)
2103 bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2104 ETHER_ADDR_LEN);
2105
2106 return (1);
2107 }
2108
2109 static void
vmxnet3_set_rxfilter(struct vmxnet3_softc * sc,int flags)2110 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2111 {
2112 if_t ifp;
2113 struct vmxnet3_driver_shared *ds;
2114 u_int mode;
2115
2116 ifp = sc->vmx_ifp;
2117 ds = sc->vmx_ds;
2118
2119 mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2120 if (flags & IFF_PROMISC)
2121 mode |= VMXNET3_RXMODE_PROMISC;
2122 if (flags & IFF_ALLMULTI)
2123 mode |= VMXNET3_RXMODE_ALLMULTI;
2124 else {
2125 int cnt;
2126
2127 cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2128 if (cnt >= VMXNET3_MULTICAST_MAX) {
2129 cnt = 0;
2130 mode |= VMXNET3_RXMODE_ALLMULTI;
2131 } else if (cnt > 0)
2132 mode |= VMXNET3_RXMODE_MCAST;
2133 ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2134 }
2135
2136 ds->rxmode = mode;
2137
2138 vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2139 vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2140 }
2141
2142 static void
vmxnet3_refresh_host_stats(struct vmxnet3_softc * sc)2143 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2144 {
2145
2146 vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2147 }
2148
2149 static int
vmxnet3_link_is_up(struct vmxnet3_softc * sc)2150 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2151 {
2152 uint32_t status;
2153
2154 status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2155 return !!(status & 0x1);
2156 }
2157
2158 static void
vmxnet3_link_status(struct vmxnet3_softc * sc)2159 vmxnet3_link_status(struct vmxnet3_softc *sc)
2160 {
2161 if_ctx_t ctx;
2162 uint64_t speed;
2163 int link;
2164
2165 ctx = sc->vmx_ctx;
2166 link = vmxnet3_link_is_up(sc);
2167 speed = IF_Gbps(10);
2168
2169 if (link != 0 && sc->vmx_link_active == 0) {
2170 sc->vmx_link_active = 1;
2171 iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2172 } else if (link == 0 && sc->vmx_link_active != 0) {
2173 sc->vmx_link_active = 0;
2174 iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2175 }
2176 }
2177
2178 static void
vmxnet3_set_lladdr(struct vmxnet3_softc * sc)2179 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2180 {
2181 uint32_t ml, mh;
2182
2183 ml = sc->vmx_lladdr[0];
2184 ml |= sc->vmx_lladdr[1] << 8;
2185 ml |= sc->vmx_lladdr[2] << 16;
2186 ml |= sc->vmx_lladdr[3] << 24;
2187 vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2188
2189 mh = sc->vmx_lladdr[4];
2190 mh |= sc->vmx_lladdr[5] << 8;
2191 vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2192 }
2193
2194 static void
vmxnet3_get_lladdr(struct vmxnet3_softc * sc)2195 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2196 {
2197 uint32_t ml, mh;
2198
2199 ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2200 mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2201
2202 sc->vmx_lladdr[0] = ml;
2203 sc->vmx_lladdr[1] = ml >> 8;
2204 sc->vmx_lladdr[2] = ml >> 16;
2205 sc->vmx_lladdr[3] = ml >> 24;
2206 sc->vmx_lladdr[4] = mh;
2207 sc->vmx_lladdr[5] = mh >> 8;
2208 }
2209
2210 static void
vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue * txq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2211 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2212 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2213 {
2214 struct sysctl_oid *node, *txsnode;
2215 struct sysctl_oid_list *list, *txslist;
2216 struct UPT1_TxStats *txstats;
2217 char namebuf[16];
2218
2219 txstats = &txq->vxtxq_ts->stats;
2220
2221 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2222 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2223 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2224 txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2225
2226 /*
2227 * Add statistics reported by the host. These are updated by the
2228 * iflib txq timer on txq 0.
2229 */
2230 txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2231 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2232 txslist = SYSCTL_CHILDREN(txsnode);
2233 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2234 &txstats->TSO_packets, "TSO packets");
2235 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2236 &txstats->TSO_bytes, "TSO bytes");
2237 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2238 &txstats->ucast_packets, "Unicast packets");
2239 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2240 &txstats->ucast_bytes, "Unicast bytes");
2241 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2242 &txstats->mcast_packets, "Multicast packets");
2243 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2244 &txstats->mcast_bytes, "Multicast bytes");
2245 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2246 &txstats->error, "Errors");
2247 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2248 &txstats->discard, "Discards");
2249 }
2250
2251 static void
vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue * rxq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2252 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2253 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2254 {
2255 struct sysctl_oid *node, *rxsnode;
2256 struct sysctl_oid_list *list, *rxslist;
2257 struct UPT1_RxStats *rxstats;
2258 char namebuf[16];
2259
2260 rxstats = &rxq->vxrxq_rs->stats;
2261
2262 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2263 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2264 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2265 rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2266
2267 /*
2268 * Add statistics reported by the host. These are updated by the
2269 * iflib txq timer on txq 0.
2270 */
2271 rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2272 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2273 rxslist = SYSCTL_CHILDREN(rxsnode);
2274 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2275 &rxstats->LRO_packets, "LRO packets");
2276 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2277 &rxstats->LRO_bytes, "LRO bytes");
2278 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2279 &rxstats->ucast_packets, "Unicast packets");
2280 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2281 &rxstats->ucast_bytes, "Unicast bytes");
2282 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2283 &rxstats->mcast_packets, "Multicast packets");
2284 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2285 &rxstats->mcast_bytes, "Multicast bytes");
2286 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2287 &rxstats->bcast_packets, "Broadcast packets");
2288 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2289 &rxstats->bcast_bytes, "Broadcast bytes");
2290 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2291 &rxstats->nobuffer, "No buffer");
2292 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2293 &rxstats->error, "Errors");
2294 }
2295
2296 static void
vmxnet3_setup_debug_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2297 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2298 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2299 {
2300 if_softc_ctx_t scctx;
2301 struct sysctl_oid *node;
2302 struct sysctl_oid_list *list;
2303 int i;
2304
2305 scctx = sc->vmx_scctx;
2306
2307 for (i = 0; i < scctx->isc_ntxqsets; i++) {
2308 struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2309
2310 node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2311 "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2312 list = SYSCTL_CHILDREN(node);
2313
2314 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2315 &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2316 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2317 &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2318 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2319 &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2320 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2321 &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2322 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2323 &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2324 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2325 &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2326 }
2327
2328 for (i = 0; i < scctx->isc_nrxqsets; i++) {
2329 struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2330
2331 node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2332 "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2333 list = SYSCTL_CHILDREN(node);
2334
2335 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2336 &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2337 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2338 &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2339 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2340 &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2341 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2342 &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2343 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2344 &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2345 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2346 &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2347 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2348 &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2349 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2350 &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2351 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2352 &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2353 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2354 CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2355 0, "");
2356 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2357 &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2358 }
2359 }
2360
2361 static void
vmxnet3_setup_queue_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2362 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2363 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2364 {
2365 if_softc_ctx_t scctx;
2366 int i;
2367
2368 scctx = sc->vmx_scctx;
2369
2370 for (i = 0; i < scctx->isc_ntxqsets; i++)
2371 vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2372 for (i = 0; i < scctx->isc_nrxqsets; i++)
2373 vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2374
2375 vmxnet3_setup_debug_sysctl(sc, ctx, child);
2376 }
2377
2378 static void
vmxnet3_setup_sysctl(struct vmxnet3_softc * sc)2379 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2380 {
2381 device_t dev;
2382 struct sysctl_ctx_list *ctx;
2383 struct sysctl_oid *tree;
2384 struct sysctl_oid_list *child;
2385
2386 dev = sc->vmx_dev;
2387 ctx = device_get_sysctl_ctx(dev);
2388 tree = device_get_sysctl_tree(dev);
2389 child = SYSCTL_CHILDREN(tree);
2390
2391 vmxnet3_setup_queue_sysctl(sc, ctx, child);
2392 }
2393
2394 static void
vmxnet3_write_bar0(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2395 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2396 {
2397
2398 bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2399 }
2400
2401 static uint32_t
vmxnet3_read_bar1(struct vmxnet3_softc * sc,bus_size_t r)2402 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2403 {
2404
2405 return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2406 }
2407
2408 static void
vmxnet3_write_bar1(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2409 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2410 {
2411
2412 bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2413 }
2414
2415 static void
vmxnet3_write_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2416 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2417 {
2418
2419 vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2420 }
2421
2422 static uint32_t
vmxnet3_read_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2423 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2424 {
2425
2426 vmxnet3_write_cmd(sc, cmd);
2427 bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2428 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2429 return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2430 }
2431
2432 static void
vmxnet3_enable_intr(struct vmxnet3_softc * sc,int irq)2433 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2434 {
2435
2436 vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2437 }
2438
2439 static void
vmxnet3_disable_intr(struct vmxnet3_softc * sc,int irq)2440 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2441 {
2442
2443 vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2444 }
2445
2446 static int
vmxnet3_tx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2447 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2448 {
2449 /* Not using interrupts for TX */
2450 return (0);
2451 }
2452
2453 static int
vmxnet3_rx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2454 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2455 {
2456 struct vmxnet3_softc *sc;
2457
2458 sc = iflib_get_softc(ctx);
2459 vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2460 return (0);
2461 }
2462
2463 static void
vmxnet3_link_intr_enable(if_ctx_t ctx)2464 vmxnet3_link_intr_enable(if_ctx_t ctx)
2465 {
2466 struct vmxnet3_softc *sc;
2467
2468 sc = iflib_get_softc(ctx);
2469 vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2470 }
2471
2472 static void
vmxnet3_intr_enable_all(if_ctx_t ctx)2473 vmxnet3_intr_enable_all(if_ctx_t ctx)
2474 {
2475 struct vmxnet3_softc *sc;
2476 if_softc_ctx_t scctx;
2477 int i;
2478
2479 sc = iflib_get_softc(ctx);
2480 scctx = sc->vmx_scctx;
2481 sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2482 for (i = 0; i < scctx->isc_vectors; i++)
2483 vmxnet3_enable_intr(sc, i);
2484 }
2485
2486 static void
vmxnet3_intr_disable_all(if_ctx_t ctx)2487 vmxnet3_intr_disable_all(if_ctx_t ctx)
2488 {
2489 struct vmxnet3_softc *sc;
2490 int i;
2491
2492 sc = iflib_get_softc(ctx);
2493 /*
2494 * iflib may invoke this routine before vmxnet3_attach_post() has
2495 * run, which is before the top level shared data area is
2496 * initialized and the device made aware of it.
2497 */
2498 if (sc->vmx_ds != NULL)
2499 sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2500 for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2501 vmxnet3_disable_intr(sc, i);
2502 }
2503
2504 static bool
vmxnet3_if_needs_restart(if_ctx_t ctx __unused,enum iflib_restart_event event)2505 vmxnet3_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
2506 {
2507 switch (event) {
2508 case IFLIB_RESTART_VLAN_CONFIG:
2509 return (true);
2510 default:
2511 return (false);
2512 }
2513 }
2514
2515 /*
2516 * Since this is a purely paravirtualized device, we do not have
2517 * to worry about DMA coherency. But at times, we must make sure
2518 * both the compiler and CPU do not reorder memory operations.
2519 */
2520 static inline void
vmxnet3_barrier(struct vmxnet3_softc * sc,vmxnet3_barrier_t type)2521 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2522 {
2523
2524 switch (type) {
2525 case VMXNET3_BARRIER_RD:
2526 rmb();
2527 break;
2528 case VMXNET3_BARRIER_WR:
2529 wmb();
2530 break;
2531 case VMXNET3_BARRIER_RDWR:
2532 mb();
2533 break;
2534 default:
2535 panic("%s: bad barrier type %d", __func__, type);
2536 }
2537 }
2538