1 /*-
2 * Copyright (c) 2013 Tsubai Masanari
3 * Copyright (c) 2013 Bryan Venteicher <bryanv@FreeBSD.org>
4 * Copyright (c) 2018 Patrick Kelsey
5 *
6 * Permission to use, copy, modify, and distribute this software for any
7 * purpose with or without fee is hereby granted, provided that the above
8 * copyright notice and this permission notice appear in all copies.
9 *
10 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
11 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
12 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
13 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
14 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
15 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
16 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
17 *
18 * $OpenBSD: src/sys/dev/pci/if_vmx.c,v 1.11 2013/06/22 00:28:10 uebayasi Exp $
19 */
20
21 /* Driver for VMware vmxnet3 virtual ethernet devices. */
22
23 #include <sys/cdefs.h>
24 #include "opt_rss.h"
25
26 #include <sys/param.h>
27 #include <sys/systm.h>
28 #include <sys/kernel.h>
29 #include <sys/endian.h>
30 #include <sys/sockio.h>
31 #include <sys/mbuf.h>
32 #include <sys/malloc.h>
33 #include <sys/module.h>
34 #include <sys/socket.h>
35 #include <sys/sysctl.h>
36 #include <sys/smp.h>
37 #include <vm/vm.h>
38 #include <vm/pmap.h>
39
40 #include <net/ethernet.h>
41 #include <net/if.h>
42 #include <net/if_var.h>
43 #include <net/if_arp.h>
44 #include <net/if_dl.h>
45 #include <net/if_types.h>
46 #include <net/if_media.h>
47 #include <net/if_vlan_var.h>
48 #include <net/iflib.h>
49 #ifdef RSS
50 #include <net/rss_config.h>
51 #endif
52
53 #include <netinet/in_systm.h>
54 #include <netinet/in.h>
55 #include <netinet/ip.h>
56 #include <netinet/ip6.h>
57 #include <netinet6/ip6_var.h>
58 #include <netinet/udp.h>
59 #include <netinet/tcp.h>
60
61 #include <machine/bus.h>
62 #include <machine/resource.h>
63 #include <sys/bus.h>
64 #include <sys/rman.h>
65
66 #include <dev/pci/pcireg.h>
67 #include <dev/pci/pcivar.h>
68
69 #include "ifdi_if.h"
70
71 #include "if_vmxreg.h"
72 #include "if_vmxvar.h"
73
74 #include "opt_inet.h"
75 #include "opt_inet6.h"
76
77 #define VMXNET3_VMWARE_VENDOR_ID 0x15AD
78 #define VMXNET3_VMWARE_DEVICE_ID 0x07B0
79
80 static const pci_vendor_info_t vmxnet3_vendor_info_array[] =
81 {
82 PVID(VMXNET3_VMWARE_VENDOR_ID, VMXNET3_VMWARE_DEVICE_ID, "VMware VMXNET3 Ethernet Adapter"),
83 /* required last entry */
84 PVID_END
85 };
86
87 static void *vmxnet3_register(device_t);
88 static int vmxnet3_attach_pre(if_ctx_t);
89 static int vmxnet3_msix_intr_assign(if_ctx_t, int);
90 static void vmxnet3_free_irqs(struct vmxnet3_softc *);
91 static int vmxnet3_attach_post(if_ctx_t);
92 static int vmxnet3_detach(if_ctx_t);
93 static int vmxnet3_shutdown(if_ctx_t);
94 static int vmxnet3_suspend(if_ctx_t);
95 static int vmxnet3_resume(if_ctx_t);
96
97 static int vmxnet3_alloc_resources(struct vmxnet3_softc *);
98 static void vmxnet3_free_resources(struct vmxnet3_softc *);
99 static int vmxnet3_check_version(struct vmxnet3_softc *);
100 static void vmxnet3_set_interrupt_idx(struct vmxnet3_softc *);
101
102 static int vmxnet3_queues_shared_alloc(struct vmxnet3_softc *);
103 static void vmxnet3_init_txq(struct vmxnet3_softc *, int);
104 static int vmxnet3_tx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
105 static void vmxnet3_init_rxq(struct vmxnet3_softc *, int, int);
106 static int vmxnet3_rx_queues_alloc(if_ctx_t, caddr_t *, uint64_t *, int, int);
107 static void vmxnet3_queues_free(if_ctx_t);
108
109 static int vmxnet3_alloc_shared_data(struct vmxnet3_softc *);
110 static void vmxnet3_free_shared_data(struct vmxnet3_softc *);
111 static int vmxnet3_alloc_mcast_table(struct vmxnet3_softc *);
112 static void vmxnet3_free_mcast_table(struct vmxnet3_softc *);
113 static void vmxnet3_init_shared_data(struct vmxnet3_softc *);
114 static void vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *);
115 static void vmxnet3_reinit_shared_data(struct vmxnet3_softc *);
116 static int vmxnet3_alloc_data(struct vmxnet3_softc *);
117 static void vmxnet3_free_data(struct vmxnet3_softc *);
118
119 static void vmxnet3_evintr(struct vmxnet3_softc *);
120 static int vmxnet3_isc_txd_encap(void *, if_pkt_info_t);
121 static void vmxnet3_isc_txd_flush(void *, uint16_t, qidx_t);
122 static int vmxnet3_isc_txd_credits_update(void *, uint16_t, bool);
123 static int vmxnet3_isc_rxd_available(void *, uint16_t, qidx_t, qidx_t);
124 static int vmxnet3_isc_rxd_pkt_get(void *, if_rxd_info_t);
125 static void vmxnet3_isc_rxd_refill(void *, if_rxd_update_t);
126 static void vmxnet3_isc_rxd_flush(void *, uint16_t, uint8_t, qidx_t);
127 static int vmxnet3_legacy_intr(void *);
128 static int vmxnet3_rxq_intr(void *);
129 static int vmxnet3_event_intr(void *);
130
131 static void vmxnet3_stop(if_ctx_t);
132
133 static void vmxnet3_txinit(struct vmxnet3_softc *, struct vmxnet3_txqueue *);
134 static void vmxnet3_rxinit(struct vmxnet3_softc *, struct vmxnet3_rxqueue *);
135 static void vmxnet3_reinit_queues(struct vmxnet3_softc *);
136 static int vmxnet3_enable_device(struct vmxnet3_softc *);
137 static void vmxnet3_reinit_rxfilters(struct vmxnet3_softc *);
138 static void vmxnet3_init(if_ctx_t);
139 static void vmxnet3_multi_set(if_ctx_t);
140 static int vmxnet3_mtu_set(if_ctx_t, uint32_t);
141 static void vmxnet3_media_status(if_ctx_t, struct ifmediareq *);
142 static int vmxnet3_media_change(if_ctx_t);
143 static int vmxnet3_promisc_set(if_ctx_t, int);
144 static uint64_t vmxnet3_get_counter(if_ctx_t, ift_counter);
145 static void vmxnet3_update_admin_status(if_ctx_t);
146 static void vmxnet3_txq_timer(if_ctx_t, uint16_t);
147
148 static void vmxnet3_update_vlan_filter(struct vmxnet3_softc *, int,
149 uint16_t);
150 static void vmxnet3_vlan_register(if_ctx_t, uint16_t);
151 static void vmxnet3_vlan_unregister(if_ctx_t, uint16_t);
152 static void vmxnet3_set_rxfilter(struct vmxnet3_softc *, int);
153
154 static void vmxnet3_refresh_host_stats(struct vmxnet3_softc *);
155 static int vmxnet3_link_is_up(struct vmxnet3_softc *);
156 static void vmxnet3_link_status(struct vmxnet3_softc *);
157 static void vmxnet3_set_lladdr(struct vmxnet3_softc *);
158 static void vmxnet3_get_lladdr(struct vmxnet3_softc *);
159
160 static void vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *,
161 struct sysctl_ctx_list *, struct sysctl_oid_list *);
162 static void vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *,
163 struct sysctl_ctx_list *, struct sysctl_oid_list *);
164 static void vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *,
165 struct sysctl_ctx_list *, struct sysctl_oid_list *);
166 static void vmxnet3_setup_sysctl(struct vmxnet3_softc *);
167
168 static void vmxnet3_write_bar0(struct vmxnet3_softc *, bus_size_t,
169 uint32_t);
170 static uint32_t vmxnet3_read_bar1(struct vmxnet3_softc *, bus_size_t);
171 static void vmxnet3_write_bar1(struct vmxnet3_softc *, bus_size_t,
172 uint32_t);
173 static void vmxnet3_write_cmd(struct vmxnet3_softc *, uint32_t);
174 static uint32_t vmxnet3_read_cmd(struct vmxnet3_softc *, uint32_t);
175
176 static int vmxnet3_tx_queue_intr_enable(if_ctx_t, uint16_t);
177 static int vmxnet3_rx_queue_intr_enable(if_ctx_t, uint16_t);
178 static void vmxnet3_link_intr_enable(if_ctx_t);
179 static void vmxnet3_enable_intr(struct vmxnet3_softc *, int);
180 static void vmxnet3_disable_intr(struct vmxnet3_softc *, int);
181 static void vmxnet3_intr_enable_all(if_ctx_t);
182 static void vmxnet3_intr_disable_all(if_ctx_t);
183 static bool vmxnet3_if_needs_restart(if_ctx_t, enum iflib_restart_event);
184
185 typedef enum {
186 VMXNET3_BARRIER_RD,
187 VMXNET3_BARRIER_WR,
188 VMXNET3_BARRIER_RDWR,
189 } vmxnet3_barrier_t;
190
191 static void vmxnet3_barrier(struct vmxnet3_softc *, vmxnet3_barrier_t);
192
193 static device_method_t vmxnet3_methods[] = {
194 /* Device interface */
195 DEVMETHOD(device_register, vmxnet3_register),
196 DEVMETHOD(device_probe, iflib_device_probe),
197 DEVMETHOD(device_attach, iflib_device_attach),
198 DEVMETHOD(device_detach, iflib_device_detach),
199 DEVMETHOD(device_shutdown, iflib_device_shutdown),
200 DEVMETHOD(device_suspend, iflib_device_suspend),
201 DEVMETHOD(device_resume, iflib_device_resume),
202 DEVMETHOD_END
203 };
204
205 static driver_t vmxnet3_driver = {
206 "vmx", vmxnet3_methods, sizeof(struct vmxnet3_softc)
207 };
208
209 DRIVER_MODULE(vmx, pci, vmxnet3_driver, 0, 0);
210 IFLIB_PNP_INFO(pci, vmx, vmxnet3_vendor_info_array);
211 MODULE_VERSION(vmx, 2);
212
213 MODULE_DEPEND(vmx, pci, 1, 1, 1);
214 MODULE_DEPEND(vmx, ether, 1, 1, 1);
215 MODULE_DEPEND(vmx, iflib, 1, 1, 1);
216
217 static device_method_t vmxnet3_iflib_methods[] = {
218 DEVMETHOD(ifdi_tx_queues_alloc, vmxnet3_tx_queues_alloc),
219 DEVMETHOD(ifdi_rx_queues_alloc, vmxnet3_rx_queues_alloc),
220 DEVMETHOD(ifdi_queues_free, vmxnet3_queues_free),
221
222 DEVMETHOD(ifdi_attach_pre, vmxnet3_attach_pre),
223 DEVMETHOD(ifdi_attach_post, vmxnet3_attach_post),
224 DEVMETHOD(ifdi_detach, vmxnet3_detach),
225
226 DEVMETHOD(ifdi_init, vmxnet3_init),
227 DEVMETHOD(ifdi_stop, vmxnet3_stop),
228 DEVMETHOD(ifdi_multi_set, vmxnet3_multi_set),
229 DEVMETHOD(ifdi_mtu_set, vmxnet3_mtu_set),
230 DEVMETHOD(ifdi_media_status, vmxnet3_media_status),
231 DEVMETHOD(ifdi_media_change, vmxnet3_media_change),
232 DEVMETHOD(ifdi_promisc_set, vmxnet3_promisc_set),
233 DEVMETHOD(ifdi_get_counter, vmxnet3_get_counter),
234 DEVMETHOD(ifdi_update_admin_status, vmxnet3_update_admin_status),
235 DEVMETHOD(ifdi_timer, vmxnet3_txq_timer),
236
237 DEVMETHOD(ifdi_tx_queue_intr_enable, vmxnet3_tx_queue_intr_enable),
238 DEVMETHOD(ifdi_rx_queue_intr_enable, vmxnet3_rx_queue_intr_enable),
239 DEVMETHOD(ifdi_link_intr_enable, vmxnet3_link_intr_enable),
240 DEVMETHOD(ifdi_intr_enable, vmxnet3_intr_enable_all),
241 DEVMETHOD(ifdi_intr_disable, vmxnet3_intr_disable_all),
242 DEVMETHOD(ifdi_msix_intr_assign, vmxnet3_msix_intr_assign),
243
244 DEVMETHOD(ifdi_vlan_register, vmxnet3_vlan_register),
245 DEVMETHOD(ifdi_vlan_unregister, vmxnet3_vlan_unregister),
246
247 DEVMETHOD(ifdi_shutdown, vmxnet3_shutdown),
248 DEVMETHOD(ifdi_suspend, vmxnet3_suspend),
249 DEVMETHOD(ifdi_resume, vmxnet3_resume),
250
251 DEVMETHOD(ifdi_needs_restart, vmxnet3_if_needs_restart),
252
253 DEVMETHOD_END
254 };
255
256 static driver_t vmxnet3_iflib_driver = {
257 "vmx", vmxnet3_iflib_methods, sizeof(struct vmxnet3_softc)
258 };
259
260 struct if_txrx vmxnet3_txrx = {
261 .ift_txd_encap = vmxnet3_isc_txd_encap,
262 .ift_txd_flush = vmxnet3_isc_txd_flush,
263 .ift_txd_credits_update = vmxnet3_isc_txd_credits_update,
264 .ift_rxd_available = vmxnet3_isc_rxd_available,
265 .ift_rxd_pkt_get = vmxnet3_isc_rxd_pkt_get,
266 .ift_rxd_refill = vmxnet3_isc_rxd_refill,
267 .ift_rxd_flush = vmxnet3_isc_rxd_flush,
268 .ift_legacy_intr = vmxnet3_legacy_intr
269 };
270
271 static struct if_shared_ctx vmxnet3_sctx_init = {
272 .isc_magic = IFLIB_MAGIC,
273 .isc_q_align = 512,
274
275 .isc_tx_maxsize = VMXNET3_TX_MAXSIZE,
276 .isc_tx_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
277 .isc_tso_maxsize = VMXNET3_TSO_MAXSIZE + sizeof(struct ether_vlan_header),
278 .isc_tso_maxsegsize = VMXNET3_TX_MAXSEGSIZE,
279
280 /*
281 * These values are used to configure the busdma tag used for
282 * receive descriptors. Each receive descriptor only points to one
283 * buffer.
284 */
285 .isc_rx_maxsize = VMXNET3_RX_MAXSEGSIZE, /* One buf per descriptor */
286 .isc_rx_nsegments = 1, /* One mapping per descriptor */
287 .isc_rx_maxsegsize = VMXNET3_RX_MAXSEGSIZE,
288
289 .isc_admin_intrcnt = 1,
290 .isc_vendor_info = vmxnet3_vendor_info_array,
291 .isc_driver_version = "2",
292 .isc_driver = &vmxnet3_iflib_driver,
293 .isc_flags = IFLIB_HAS_RXCQ | IFLIB_HAS_TXCQ | IFLIB_SINGLE_IRQ_RX_ONLY,
294
295 /*
296 * Number of receive queues per receive queue set, with associated
297 * descriptor settings for each.
298 */
299 .isc_nrxqs = 3,
300 .isc_nfl = 2, /* one free list for each receive command queue */
301 .isc_nrxd_min = {VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC, VMXNET3_MIN_RX_NDESC},
302 .isc_nrxd_max = {VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC, VMXNET3_MAX_RX_NDESC},
303 .isc_nrxd_default = {VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC, VMXNET3_DEF_RX_NDESC},
304
305 /*
306 * Number of transmit queues per transmit queue set, with associated
307 * descriptor settings for each.
308 */
309 .isc_ntxqs = 2,
310 .isc_ntxd_min = {VMXNET3_MIN_TX_NDESC, VMXNET3_MIN_TX_NDESC},
311 .isc_ntxd_max = {VMXNET3_MAX_TX_NDESC, VMXNET3_MAX_TX_NDESC},
312 .isc_ntxd_default = {VMXNET3_DEF_TX_NDESC, VMXNET3_DEF_TX_NDESC},
313 };
314
315 static void *
vmxnet3_register(device_t dev)316 vmxnet3_register(device_t dev)
317 {
318 return (&vmxnet3_sctx_init);
319 }
320
321 static int
trunc_powerof2(int val)322 trunc_powerof2(int val)
323 {
324
325 return (1U << (fls(val) - 1));
326 }
327
328 static int
vmxnet3_attach_pre(if_ctx_t ctx)329 vmxnet3_attach_pre(if_ctx_t ctx)
330 {
331 device_t dev;
332 if_softc_ctx_t scctx;
333 struct vmxnet3_softc *sc;
334 uint32_t intr_config;
335 int error;
336
337 dev = iflib_get_dev(ctx);
338 sc = iflib_get_softc(ctx);
339 sc->vmx_dev = dev;
340 sc->vmx_ctx = ctx;
341 sc->vmx_sctx = iflib_get_sctx(ctx);
342 sc->vmx_scctx = iflib_get_softc_ctx(ctx);
343 sc->vmx_ifp = iflib_get_ifp(ctx);
344 sc->vmx_media = iflib_get_media(ctx);
345 scctx = sc->vmx_scctx;
346
347 scctx->isc_tx_nsegments = VMXNET3_TX_MAXSEGS;
348 scctx->isc_tx_tso_segments_max = VMXNET3_TX_MAXSEGS;
349 /* isc_tx_tso_size_max doesn't include possible vlan header */
350 scctx->isc_tx_tso_size_max = VMXNET3_TSO_MAXSIZE;
351 scctx->isc_tx_tso_segsize_max = VMXNET3_TX_MAXSEGSIZE;
352 scctx->isc_txrx = &vmxnet3_txrx;
353
354 /* If 0, the iflib tunable was not set, so set to the default */
355 if (scctx->isc_nrxqsets == 0)
356 scctx->isc_nrxqsets = VMXNET3_DEF_RX_QUEUES;
357 scctx->isc_nrxqsets = trunc_powerof2(scctx->isc_nrxqsets);
358 scctx->isc_nrxqsets_max = min(VMXNET3_MAX_RX_QUEUES, mp_ncpus);
359 scctx->isc_nrxqsets_max = trunc_powerof2(scctx->isc_nrxqsets_max);
360
361 /* If 0, the iflib tunable was not set, so set to the default */
362 if (scctx->isc_ntxqsets == 0)
363 scctx->isc_ntxqsets = VMXNET3_DEF_TX_QUEUES;
364 scctx->isc_ntxqsets = trunc_powerof2(scctx->isc_ntxqsets);
365 scctx->isc_ntxqsets_max = min(VMXNET3_MAX_TX_QUEUES, mp_ncpus);
366 scctx->isc_ntxqsets_max = trunc_powerof2(scctx->isc_ntxqsets_max);
367
368 /*
369 * Enforce that the transmit completion queue descriptor count is
370 * the same as the transmit command queue descriptor count.
371 */
372 scctx->isc_ntxd[0] = scctx->isc_ntxd[1];
373 scctx->isc_txqsizes[0] =
374 sizeof(struct vmxnet3_txcompdesc) * scctx->isc_ntxd[0];
375 scctx->isc_txqsizes[1] =
376 sizeof(struct vmxnet3_txdesc) * scctx->isc_ntxd[1];
377
378 /*
379 * Enforce that the receive completion queue descriptor count is the
380 * sum of the receive command queue descriptor counts, and that the
381 * second receive command queue descriptor count is the same as the
382 * first one.
383 */
384 scctx->isc_nrxd[2] = scctx->isc_nrxd[1];
385 scctx->isc_nrxd[0] = scctx->isc_nrxd[1] + scctx->isc_nrxd[2];
386 scctx->isc_rxqsizes[0] =
387 sizeof(struct vmxnet3_rxcompdesc) * scctx->isc_nrxd[0];
388 scctx->isc_rxqsizes[1] =
389 sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[1];
390 scctx->isc_rxqsizes[2] =
391 sizeof(struct vmxnet3_rxdesc) * scctx->isc_nrxd[2];
392
393 /*
394 * Initialize the max frame size and descriptor queue buffer
395 * sizes.
396 */
397 vmxnet3_mtu_set(ctx, if_getmtu(sc->vmx_ifp));
398
399 scctx->isc_rss_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
400
401 /* Map PCI BARs */
402 error = vmxnet3_alloc_resources(sc);
403 if (error)
404 goto fail;
405
406 /* Check device versions */
407 error = vmxnet3_check_version(sc);
408 if (error)
409 goto fail;
410
411 /*
412 * The interrupt mode can be set in the hypervisor configuration via
413 * the parameter ethernet<N>.intrMode.
414 */
415 intr_config = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_INTRCFG);
416 sc->vmx_intr_mask_mode = (intr_config >> 2) & 0x03;
417
418 /*
419 * Configure the softc context to attempt to configure the interrupt
420 * mode now indicated by intr_config. iflib will follow the usual
421 * fallback path MSI-X -> MSI -> LEGACY, starting at the configured
422 * starting mode.
423 */
424 switch (intr_config & 0x03) {
425 case VMXNET3_IT_AUTO:
426 case VMXNET3_IT_MSIX:
427 scctx->isc_msix_bar = pci_msix_table_bar(dev);
428 break;
429 case VMXNET3_IT_MSI:
430 scctx->isc_msix_bar = -1;
431 scctx->isc_disable_msix = 1;
432 break;
433 case VMXNET3_IT_LEGACY:
434 scctx->isc_msix_bar = 0;
435 break;
436 }
437
438 scctx->isc_tx_csum_flags = VMXNET3_CSUM_ALL_OFFLOAD;
439 scctx->isc_capabilities = scctx->isc_capenable =
440 IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6 |
441 IFCAP_TSO4 | IFCAP_TSO6 |
442 IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 |
443 IFCAP_VLAN_MTU | IFCAP_VLAN_HWTAGGING |
444 IFCAP_VLAN_HWCSUM | IFCAP_VLAN_HWTSO |
445 IFCAP_JUMBO_MTU;
446
447 /* These capabilities are not enabled by default. */
448 scctx->isc_capabilities |= IFCAP_LRO | IFCAP_VLAN_HWFILTER;
449
450 vmxnet3_get_lladdr(sc);
451 iflib_set_mac(ctx, sc->vmx_lladdr);
452
453 return (0);
454 fail:
455 /*
456 * We must completely clean up anything allocated above as iflib
457 * will not invoke any other driver entry points as a result of this
458 * failure.
459 */
460 vmxnet3_free_resources(sc);
461
462 return (error);
463 }
464
465 static int
vmxnet3_msix_intr_assign(if_ctx_t ctx,int msix)466 vmxnet3_msix_intr_assign(if_ctx_t ctx, int msix)
467 {
468 struct vmxnet3_softc *sc;
469 if_softc_ctx_t scctx;
470 struct vmxnet3_rxqueue *rxq;
471 int error;
472 int i;
473 char irq_name[16];
474
475 sc = iflib_get_softc(ctx);
476 scctx = sc->vmx_scctx;
477
478 for (i = 0; i < scctx->isc_nrxqsets; i++) {
479 snprintf(irq_name, sizeof(irq_name), "rxq%d", i);
480
481 rxq = &sc->vmx_rxq[i];
482 error = iflib_irq_alloc_generic(ctx, &rxq->vxrxq_irq, i + 1,
483 IFLIB_INTR_RXTX, vmxnet3_rxq_intr, rxq, i, irq_name);
484 if (error) {
485 device_printf(iflib_get_dev(ctx),
486 "Failed to register rxq %d interrupt handler\n", i);
487 return (error);
488 }
489 }
490
491 for (i = 0; i < scctx->isc_ntxqsets; i++) {
492 snprintf(irq_name, sizeof(irq_name), "txq%d", i);
493
494 /*
495 * Don't provide the corresponding rxq irq for reference -
496 * we want the transmit task to be attached to a task queue
497 * that is different from the one used by the corresponding
498 * rxq irq. That is because the TX doorbell writes are very
499 * expensive as virtualized MMIO operations, so we want to
500 * be able to defer them to another core when possible so
501 * that they don't steal receive processing cycles during
502 * stack turnarounds like TCP ACK generation. The other
503 * piece to this approach is enabling the iflib abdicate
504 * option (currently via an interface-specific
505 * tunable/sysctl).
506 */
507 iflib_softirq_alloc_generic(ctx, NULL, IFLIB_INTR_TX, NULL, i,
508 irq_name);
509 }
510
511 error = iflib_irq_alloc_generic(ctx, &sc->vmx_event_intr_irq,
512 scctx->isc_nrxqsets + 1, IFLIB_INTR_ADMIN, vmxnet3_event_intr, sc, 0,
513 "event");
514 if (error) {
515 device_printf(iflib_get_dev(ctx),
516 "Failed to register event interrupt handler\n");
517 return (error);
518 }
519
520 return (0);
521 }
522
523 static void
vmxnet3_free_irqs(struct vmxnet3_softc * sc)524 vmxnet3_free_irqs(struct vmxnet3_softc *sc)
525 {
526 if_softc_ctx_t scctx;
527 struct vmxnet3_rxqueue *rxq;
528 int i;
529
530 scctx = sc->vmx_scctx;
531
532 for (i = 0; i < scctx->isc_nrxqsets; i++) {
533 rxq = &sc->vmx_rxq[i];
534 iflib_irq_free(sc->vmx_ctx, &rxq->vxrxq_irq);
535 }
536
537 iflib_irq_free(sc->vmx_ctx, &sc->vmx_event_intr_irq);
538 }
539
540 static int
vmxnet3_attach_post(if_ctx_t ctx)541 vmxnet3_attach_post(if_ctx_t ctx)
542 {
543 if_softc_ctx_t scctx;
544 struct vmxnet3_softc *sc;
545 int error;
546
547 scctx = iflib_get_softc_ctx(ctx);
548 sc = iflib_get_softc(ctx);
549
550 if (scctx->isc_nrxqsets > 1)
551 sc->vmx_flags |= VMXNET3_FLAG_RSS;
552
553 error = vmxnet3_alloc_data(sc);
554 if (error)
555 goto fail;
556
557 vmxnet3_set_interrupt_idx(sc);
558 vmxnet3_setup_sysctl(sc);
559
560 ifmedia_add(sc->vmx_media, IFM_ETHER | IFM_AUTO, 0, NULL);
561 ifmedia_set(sc->vmx_media, IFM_ETHER | IFM_AUTO);
562
563 fail:
564 return (error);
565 }
566
567 static int
vmxnet3_detach(if_ctx_t ctx)568 vmxnet3_detach(if_ctx_t ctx)
569 {
570 struct vmxnet3_softc *sc;
571
572 sc = iflib_get_softc(ctx);
573
574 vmxnet3_free_irqs(sc);
575 vmxnet3_free_data(sc);
576 vmxnet3_free_resources(sc);
577
578 return (0);
579 }
580
581 static int
vmxnet3_shutdown(if_ctx_t ctx)582 vmxnet3_shutdown(if_ctx_t ctx)
583 {
584
585 return (0);
586 }
587
588 static int
vmxnet3_suspend(if_ctx_t ctx)589 vmxnet3_suspend(if_ctx_t ctx)
590 {
591
592 return (0);
593 }
594
595 static int
vmxnet3_resume(if_ctx_t ctx)596 vmxnet3_resume(if_ctx_t ctx)
597 {
598
599 return (0);
600 }
601
602 static int
vmxnet3_alloc_resources(struct vmxnet3_softc * sc)603 vmxnet3_alloc_resources(struct vmxnet3_softc *sc)
604 {
605 device_t dev;
606 int rid;
607
608 dev = sc->vmx_dev;
609
610 rid = PCIR_BAR(0);
611 sc->vmx_res0 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
612 RF_ACTIVE);
613 if (sc->vmx_res0 == NULL) {
614 device_printf(dev,
615 "could not map BAR0 memory\n");
616 return (ENXIO);
617 }
618
619 sc->vmx_iot0 = rman_get_bustag(sc->vmx_res0);
620 sc->vmx_ioh0 = rman_get_bushandle(sc->vmx_res0);
621
622 rid = PCIR_BAR(1);
623 sc->vmx_res1 = bus_alloc_resource_any(dev, SYS_RES_MEMORY, &rid,
624 RF_ACTIVE);
625 if (sc->vmx_res1 == NULL) {
626 device_printf(dev,
627 "could not map BAR1 memory\n");
628 return (ENXIO);
629 }
630
631 sc->vmx_iot1 = rman_get_bustag(sc->vmx_res1);
632 sc->vmx_ioh1 = rman_get_bushandle(sc->vmx_res1);
633
634 return (0);
635 }
636
637 static void
vmxnet3_free_resources(struct vmxnet3_softc * sc)638 vmxnet3_free_resources(struct vmxnet3_softc *sc)
639 {
640 device_t dev;
641
642 dev = sc->vmx_dev;
643
644 if (sc->vmx_res0 != NULL) {
645 bus_release_resource(dev, SYS_RES_MEMORY,
646 rman_get_rid(sc->vmx_res0), sc->vmx_res0);
647 sc->vmx_res0 = NULL;
648 }
649
650 if (sc->vmx_res1 != NULL) {
651 bus_release_resource(dev, SYS_RES_MEMORY,
652 rman_get_rid(sc->vmx_res1), sc->vmx_res1);
653 sc->vmx_res1 = NULL;
654 }
655 }
656
657 static int
vmxnet3_check_version(struct vmxnet3_softc * sc)658 vmxnet3_check_version(struct vmxnet3_softc *sc)
659 {
660 device_t dev;
661 uint32_t version;
662
663 dev = sc->vmx_dev;
664
665 version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_VRRS);
666 if ((version & 0x01) == 0) {
667 device_printf(dev, "unsupported hardware version %#x\n",
668 version);
669 return (ENOTSUP);
670 }
671 vmxnet3_write_bar1(sc, VMXNET3_BAR1_VRRS, 1);
672
673 version = vmxnet3_read_bar1(sc, VMXNET3_BAR1_UVRS);
674 if ((version & 0x01) == 0) {
675 device_printf(dev, "unsupported UPT version %#x\n", version);
676 return (ENOTSUP);
677 }
678 vmxnet3_write_bar1(sc, VMXNET3_BAR1_UVRS, 1);
679
680 return (0);
681 }
682
683 static void
vmxnet3_set_interrupt_idx(struct vmxnet3_softc * sc)684 vmxnet3_set_interrupt_idx(struct vmxnet3_softc *sc)
685 {
686 if_softc_ctx_t scctx;
687 struct vmxnet3_txqueue *txq;
688 struct vmxnet3_txq_shared *txs;
689 struct vmxnet3_rxqueue *rxq;
690 struct vmxnet3_rxq_shared *rxs;
691 int intr_idx;
692 int i;
693
694 scctx = sc->vmx_scctx;
695
696 /*
697 * There is always one interrupt per receive queue, assigned
698 * starting with the first interrupt. When there is only one
699 * interrupt available, the event interrupt shares the receive queue
700 * interrupt, otherwise it uses the interrupt following the last
701 * receive queue interrupt. Transmit queues are not assigned
702 * interrupts, so they are given indexes beyond the indexes that
703 * correspond to the real interrupts.
704 */
705
706 /* The event interrupt is always the last vector. */
707 sc->vmx_event_intr_idx = scctx->isc_vectors - 1;
708
709 intr_idx = 0;
710 for (i = 0; i < scctx->isc_nrxqsets; i++, intr_idx++) {
711 rxq = &sc->vmx_rxq[i];
712 rxs = rxq->vxrxq_rs;
713 rxq->vxrxq_intr_idx = intr_idx;
714 rxs->intr_idx = rxq->vxrxq_intr_idx;
715 }
716
717 /*
718 * Assign the tx queues interrupt indexes above what we are actually
719 * using. These interrupts will never be enabled.
720 */
721 intr_idx = scctx->isc_vectors;
722 for (i = 0; i < scctx->isc_ntxqsets; i++, intr_idx++) {
723 txq = &sc->vmx_txq[i];
724 txs = txq->vxtxq_ts;
725 txq->vxtxq_intr_idx = intr_idx;
726 txs->intr_idx = txq->vxtxq_intr_idx;
727 }
728 }
729
730 static int
vmxnet3_queues_shared_alloc(struct vmxnet3_softc * sc)731 vmxnet3_queues_shared_alloc(struct vmxnet3_softc *sc)
732 {
733 if_softc_ctx_t scctx;
734 int size;
735 int error;
736
737 scctx = sc->vmx_scctx;
738
739 /*
740 * The txq and rxq shared data areas must be allocated contiguously
741 * as vmxnet3_driver_shared contains only a single address member
742 * for the shared queue data area.
743 */
744 size = scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared) +
745 scctx->isc_nrxqsets * sizeof(struct vmxnet3_rxq_shared);
746 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128, &sc->vmx_qs_dma, 0);
747 if (error) {
748 device_printf(sc->vmx_dev, "cannot alloc queue shared memory\n");
749 return (error);
750 }
751
752 return (0);
753 }
754
755 static void
vmxnet3_init_txq(struct vmxnet3_softc * sc,int q)756 vmxnet3_init_txq(struct vmxnet3_softc *sc, int q)
757 {
758 struct vmxnet3_txqueue *txq;
759 struct vmxnet3_comp_ring *txc;
760 struct vmxnet3_txring *txr;
761 if_softc_ctx_t scctx;
762
763 txq = &sc->vmx_txq[q];
764 txc = &txq->vxtxq_comp_ring;
765 txr = &txq->vxtxq_cmd_ring;
766 scctx = sc->vmx_scctx;
767
768 snprintf(txq->vxtxq_name, sizeof(txq->vxtxq_name), "%s-tx%d",
769 device_get_nameunit(sc->vmx_dev), q);
770
771 txq->vxtxq_sc = sc;
772 txq->vxtxq_id = q;
773 txc->vxcr_ndesc = scctx->isc_ntxd[0];
774 txr->vxtxr_ndesc = scctx->isc_ntxd[1];
775 }
776
777 static int
vmxnet3_tx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int ntxqs,int ntxqsets)778 vmxnet3_tx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
779 int ntxqs, int ntxqsets)
780 {
781 struct vmxnet3_softc *sc;
782 int q;
783 int error;
784 caddr_t kva;
785
786 sc = iflib_get_softc(ctx);
787
788 /* Allocate the array of transmit queues */
789 sc->vmx_txq = malloc(sizeof(struct vmxnet3_txqueue) *
790 ntxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
791 if (sc->vmx_txq == NULL)
792 return (ENOMEM);
793
794 /* Initialize driver state for each transmit queue */
795 for (q = 0; q < ntxqsets; q++)
796 vmxnet3_init_txq(sc, q);
797
798 /*
799 * Allocate queue state that is shared with the device. This check
800 * and call is performed in both vmxnet3_tx_queues_alloc() and
801 * vmxnet3_rx_queues_alloc() so that we don't have to care which
802 * order iflib invokes those routines in.
803 */
804 if (sc->vmx_qs_dma.idi_size == 0) {
805 error = vmxnet3_queues_shared_alloc(sc);
806 if (error)
807 return (error);
808 }
809
810 kva = sc->vmx_qs_dma.idi_vaddr;
811 for (q = 0; q < ntxqsets; q++) {
812 sc->vmx_txq[q].vxtxq_ts = (struct vmxnet3_txq_shared *) kva;
813 kva += sizeof(struct vmxnet3_txq_shared);
814 }
815
816 /* Record descriptor ring vaddrs and paddrs */
817 for (q = 0; q < ntxqsets; q++) {
818 struct vmxnet3_txqueue *txq;
819 struct vmxnet3_txring *txr;
820 struct vmxnet3_comp_ring *txc;
821
822 txq = &sc->vmx_txq[q];
823 txc = &txq->vxtxq_comp_ring;
824 txr = &txq->vxtxq_cmd_ring;
825
826 /* Completion ring */
827 txc->vxcr_u.txcd =
828 (struct vmxnet3_txcompdesc *) vaddrs[q * ntxqs + 0];
829 txc->vxcr_paddr = paddrs[q * ntxqs + 0];
830
831 /* Command ring */
832 txr->vxtxr_txd =
833 (struct vmxnet3_txdesc *) vaddrs[q * ntxqs + 1];
834 txr->vxtxr_paddr = paddrs[q * ntxqs + 1];
835 }
836
837 return (0);
838 }
839
840 static void
vmxnet3_init_rxq(struct vmxnet3_softc * sc,int q,int nrxqs)841 vmxnet3_init_rxq(struct vmxnet3_softc *sc, int q, int nrxqs)
842 {
843 struct vmxnet3_rxqueue *rxq;
844 struct vmxnet3_comp_ring *rxc;
845 struct vmxnet3_rxring *rxr;
846 if_softc_ctx_t scctx;
847 int i;
848
849 rxq = &sc->vmx_rxq[q];
850 rxc = &rxq->vxrxq_comp_ring;
851 scctx = sc->vmx_scctx;
852
853 snprintf(rxq->vxrxq_name, sizeof(rxq->vxrxq_name), "%s-rx%d",
854 device_get_nameunit(sc->vmx_dev), q);
855
856 rxq->vxrxq_sc = sc;
857 rxq->vxrxq_id = q;
858
859 /*
860 * First rxq is the completion queue, so there are nrxqs - 1 command
861 * rings starting at iflib queue id 1.
862 */
863 rxc->vxcr_ndesc = scctx->isc_nrxd[0];
864 for (i = 0; i < nrxqs - 1; i++) {
865 rxr = &rxq->vxrxq_cmd_ring[i];
866 rxr->vxrxr_ndesc = scctx->isc_nrxd[i + 1];
867 }
868 }
869
870 static int
vmxnet3_rx_queues_alloc(if_ctx_t ctx,caddr_t * vaddrs,uint64_t * paddrs,int nrxqs,int nrxqsets)871 vmxnet3_rx_queues_alloc(if_ctx_t ctx, caddr_t *vaddrs, uint64_t *paddrs,
872 int nrxqs, int nrxqsets)
873 {
874 struct vmxnet3_softc *sc;
875 if_softc_ctx_t scctx;
876 int q;
877 int i;
878 int error;
879 caddr_t kva;
880
881 sc = iflib_get_softc(ctx);
882 scctx = sc->vmx_scctx;
883
884 /* Allocate the array of receive queues */
885 sc->vmx_rxq = malloc(sizeof(struct vmxnet3_rxqueue) *
886 nrxqsets, M_DEVBUF, M_NOWAIT | M_ZERO);
887 if (sc->vmx_rxq == NULL)
888 return (ENOMEM);
889
890 /* Initialize driver state for each receive queue */
891 for (q = 0; q < nrxqsets; q++)
892 vmxnet3_init_rxq(sc, q, nrxqs);
893
894 /*
895 * Allocate queue state that is shared with the device. This check
896 * and call is performed in both vmxnet3_tx_queues_alloc() and
897 * vmxnet3_rx_queues_alloc() so that we don't have to care which
898 * order iflib invokes those routines in.
899 */
900 if (sc->vmx_qs_dma.idi_size == 0) {
901 error = vmxnet3_queues_shared_alloc(sc);
902 if (error)
903 return (error);
904 }
905
906 kva = sc->vmx_qs_dma.idi_vaddr +
907 scctx->isc_ntxqsets * sizeof(struct vmxnet3_txq_shared);
908 for (q = 0; q < nrxqsets; q++) {
909 sc->vmx_rxq[q].vxrxq_rs = (struct vmxnet3_rxq_shared *) kva;
910 kva += sizeof(struct vmxnet3_rxq_shared);
911 }
912
913 /* Record descriptor ring vaddrs and paddrs */
914 for (q = 0; q < nrxqsets; q++) {
915 struct vmxnet3_rxqueue *rxq;
916 struct vmxnet3_rxring *rxr;
917 struct vmxnet3_comp_ring *rxc;
918
919 rxq = &sc->vmx_rxq[q];
920 rxc = &rxq->vxrxq_comp_ring;
921
922 /* Completion ring */
923 rxc->vxcr_u.rxcd =
924 (struct vmxnet3_rxcompdesc *) vaddrs[q * nrxqs + 0];
925 rxc->vxcr_paddr = paddrs[q * nrxqs + 0];
926
927 /* Command ring(s) */
928 for (i = 0; i < nrxqs - 1; i++) {
929 rxr = &rxq->vxrxq_cmd_ring[i];
930
931 rxr->vxrxr_rxd =
932 (struct vmxnet3_rxdesc *) vaddrs[q * nrxqs + 1 + i];
933 rxr->vxrxr_paddr = paddrs[q * nrxqs + 1 + i];
934 }
935 }
936
937 return (0);
938 }
939
940 static void
vmxnet3_queues_free(if_ctx_t ctx)941 vmxnet3_queues_free(if_ctx_t ctx)
942 {
943 struct vmxnet3_softc *sc;
944
945 sc = iflib_get_softc(ctx);
946
947 /* Free queue state area that is shared with the device */
948 if (sc->vmx_qs_dma.idi_size != 0) {
949 iflib_dma_free(&sc->vmx_qs_dma);
950 sc->vmx_qs_dma.idi_size = 0;
951 }
952
953 /* Free array of receive queues */
954 if (sc->vmx_rxq != NULL) {
955 free(sc->vmx_rxq, M_DEVBUF);
956 sc->vmx_rxq = NULL;
957 }
958
959 /* Free array of transmit queues */
960 if (sc->vmx_txq != NULL) {
961 free(sc->vmx_txq, M_DEVBUF);
962 sc->vmx_txq = NULL;
963 }
964 }
965
966 static int
vmxnet3_alloc_shared_data(struct vmxnet3_softc * sc)967 vmxnet3_alloc_shared_data(struct vmxnet3_softc *sc)
968 {
969 device_t dev;
970 size_t size;
971 int error;
972
973 dev = sc->vmx_dev;
974
975 /* Top level state structure shared with the device */
976 size = sizeof(struct vmxnet3_driver_shared);
977 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 1, &sc->vmx_ds_dma, 0);
978 if (error) {
979 device_printf(dev, "cannot alloc shared memory\n");
980 return (error);
981 }
982 sc->vmx_ds = (struct vmxnet3_driver_shared *) sc->vmx_ds_dma.idi_vaddr;
983
984 /* RSS table state shared with the device */
985 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
986 size = sizeof(struct vmxnet3_rss_shared);
987 error = iflib_dma_alloc_align(sc->vmx_ctx, size, 128,
988 &sc->vmx_rss_dma, 0);
989 if (error) {
990 device_printf(dev, "cannot alloc rss shared memory\n");
991 return (error);
992 }
993 sc->vmx_rss =
994 (struct vmxnet3_rss_shared *) sc->vmx_rss_dma.idi_vaddr;
995 }
996
997 return (0);
998 }
999
1000 static void
vmxnet3_free_shared_data(struct vmxnet3_softc * sc)1001 vmxnet3_free_shared_data(struct vmxnet3_softc *sc)
1002 {
1003
1004 /* Free RSS table state shared with the device */
1005 if (sc->vmx_rss != NULL) {
1006 iflib_dma_free(&sc->vmx_rss_dma);
1007 sc->vmx_rss = NULL;
1008 }
1009
1010 /* Free top level state structure shared with the device */
1011 if (sc->vmx_ds != NULL) {
1012 iflib_dma_free(&sc->vmx_ds_dma);
1013 sc->vmx_ds = NULL;
1014 }
1015 }
1016
1017 static int
vmxnet3_alloc_mcast_table(struct vmxnet3_softc * sc)1018 vmxnet3_alloc_mcast_table(struct vmxnet3_softc *sc)
1019 {
1020 int error;
1021
1022 /* Multicast table state shared with the device */
1023 error = iflib_dma_alloc_align(sc->vmx_ctx,
1024 VMXNET3_MULTICAST_MAX * ETHER_ADDR_LEN, 32, &sc->vmx_mcast_dma, 0);
1025 if (error)
1026 device_printf(sc->vmx_dev, "unable to alloc multicast table\n");
1027 else
1028 sc->vmx_mcast = sc->vmx_mcast_dma.idi_vaddr;
1029
1030 return (error);
1031 }
1032
1033 static void
vmxnet3_free_mcast_table(struct vmxnet3_softc * sc)1034 vmxnet3_free_mcast_table(struct vmxnet3_softc *sc)
1035 {
1036
1037 /* Free multicast table state shared with the device */
1038 if (sc->vmx_mcast != NULL) {
1039 iflib_dma_free(&sc->vmx_mcast_dma);
1040 sc->vmx_mcast = NULL;
1041 }
1042 }
1043
1044 static void
vmxnet3_init_shared_data(struct vmxnet3_softc * sc)1045 vmxnet3_init_shared_data(struct vmxnet3_softc *sc)
1046 {
1047 struct vmxnet3_driver_shared *ds;
1048 if_softc_ctx_t scctx;
1049 struct vmxnet3_txqueue *txq;
1050 struct vmxnet3_txq_shared *txs;
1051 struct vmxnet3_rxqueue *rxq;
1052 struct vmxnet3_rxq_shared *rxs;
1053 int i;
1054
1055 ds = sc->vmx_ds;
1056 scctx = sc->vmx_scctx;
1057
1058 /*
1059 * Initialize fields of the shared data that remains the same across
1060 * reinits. Note the shared data is zero'd when allocated.
1061 */
1062
1063 ds->magic = VMXNET3_REV1_MAGIC;
1064
1065 /* DriverInfo */
1066 ds->version = VMXNET3_DRIVER_VERSION;
1067 ds->guest = VMXNET3_GOS_FREEBSD |
1068 #ifdef __LP64__
1069 VMXNET3_GOS_64BIT;
1070 #else
1071 VMXNET3_GOS_32BIT;
1072 #endif
1073 ds->vmxnet3_revision = 1;
1074 ds->upt_version = 1;
1075
1076 /* Misc. conf */
1077 ds->driver_data = vtophys(sc);
1078 ds->driver_data_len = sizeof(struct vmxnet3_softc);
1079 ds->queue_shared = sc->vmx_qs_dma.idi_paddr;
1080 ds->queue_shared_len = sc->vmx_qs_dma.idi_size;
1081 ds->nrxsg_max = IFLIB_MAX_RX_SEGS;
1082
1083 /* RSS conf */
1084 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1085 ds->rss.version = 1;
1086 ds->rss.paddr = sc->vmx_rss_dma.idi_paddr;
1087 ds->rss.len = sc->vmx_rss_dma.idi_size;
1088 }
1089
1090 /* Interrupt control. */
1091 ds->automask = sc->vmx_intr_mask_mode == VMXNET3_IMM_AUTO;
1092 /*
1093 * Total number of interrupt indexes we are using in the shared
1094 * config data, even though we don't actually allocate interrupt
1095 * resources for the tx queues. Some versions of the device will
1096 * fail to initialize successfully if interrupt indexes are used in
1097 * the shared config that exceed the number of interrupts configured
1098 * here.
1099 */
1100 ds->nintr = (scctx->isc_vectors == 1) ?
1101 2 : (scctx->isc_nrxqsets + scctx->isc_ntxqsets + 1);
1102 ds->evintr = sc->vmx_event_intr_idx;
1103 ds->ictrl = VMXNET3_ICTRL_DISABLE_ALL;
1104
1105 for (i = 0; i < ds->nintr; i++)
1106 ds->modlevel[i] = UPT1_IMOD_ADAPTIVE;
1107
1108 /* Receive filter. */
1109 ds->mcast_table = sc->vmx_mcast_dma.idi_paddr;
1110 ds->mcast_tablelen = sc->vmx_mcast_dma.idi_size;
1111
1112 /* Tx queues */
1113 for (i = 0; i < scctx->isc_ntxqsets; i++) {
1114 txq = &sc->vmx_txq[i];
1115 txs = txq->vxtxq_ts;
1116
1117 txs->cmd_ring = txq->vxtxq_cmd_ring.vxtxr_paddr;
1118 txs->cmd_ring_len = txq->vxtxq_cmd_ring.vxtxr_ndesc;
1119 txs->comp_ring = txq->vxtxq_comp_ring.vxcr_paddr;
1120 txs->comp_ring_len = txq->vxtxq_comp_ring.vxcr_ndesc;
1121 txs->driver_data = vtophys(txq);
1122 txs->driver_data_len = sizeof(struct vmxnet3_txqueue);
1123 }
1124
1125 /* Rx queues */
1126 for (i = 0; i < scctx->isc_nrxqsets; i++) {
1127 rxq = &sc->vmx_rxq[i];
1128 rxs = rxq->vxrxq_rs;
1129
1130 rxs->cmd_ring[0] = rxq->vxrxq_cmd_ring[0].vxrxr_paddr;
1131 rxs->cmd_ring_len[0] = rxq->vxrxq_cmd_ring[0].vxrxr_ndesc;
1132 rxs->cmd_ring[1] = rxq->vxrxq_cmd_ring[1].vxrxr_paddr;
1133 rxs->cmd_ring_len[1] = rxq->vxrxq_cmd_ring[1].vxrxr_ndesc;
1134 rxs->comp_ring = rxq->vxrxq_comp_ring.vxcr_paddr;
1135 rxs->comp_ring_len = rxq->vxrxq_comp_ring.vxcr_ndesc;
1136 rxs->driver_data = vtophys(rxq);
1137 rxs->driver_data_len = sizeof(struct vmxnet3_rxqueue);
1138 }
1139 }
1140
1141 static void
vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc * sc)1142 vmxnet3_reinit_rss_shared_data(struct vmxnet3_softc *sc)
1143 {
1144 /*
1145 * Use the same key as the Linux driver until FreeBSD can do
1146 * RSS (presumably Toeplitz) in software.
1147 */
1148 static const uint8_t rss_key[UPT1_RSS_MAX_KEY_SIZE] = {
1149 0x3b, 0x56, 0xd1, 0x56, 0x13, 0x4a, 0xe7, 0xac,
1150 0xe8, 0x79, 0x09, 0x75, 0xe8, 0x65, 0x79, 0x28,
1151 0x35, 0x12, 0xb9, 0x56, 0x7c, 0x76, 0x4b, 0x70,
1152 0xd8, 0x56, 0xa3, 0x18, 0x9b, 0x0a, 0xee, 0xf3,
1153 0x96, 0xa6, 0x9f, 0x8f, 0x9e, 0x8c, 0x90, 0xc9,
1154 };
1155
1156 if_softc_ctx_t scctx;
1157 struct vmxnet3_rss_shared *rss;
1158 #ifdef RSS
1159 uint8_t rss_algo;
1160 #endif
1161 int i;
1162
1163 scctx = sc->vmx_scctx;
1164 rss = sc->vmx_rss;
1165
1166 rss->hash_type =
1167 UPT1_RSS_HASH_TYPE_IPV4 | UPT1_RSS_HASH_TYPE_TCP_IPV4 |
1168 UPT1_RSS_HASH_TYPE_IPV6 | UPT1_RSS_HASH_TYPE_TCP_IPV6;
1169 rss->hash_func = UPT1_RSS_HASH_FUNC_TOEPLITZ;
1170 rss->hash_key_size = UPT1_RSS_MAX_KEY_SIZE;
1171 rss->ind_table_size = UPT1_RSS_MAX_IND_TABLE_SIZE;
1172 #ifdef RSS
1173 /*
1174 * If the software RSS is configured to anything else other than
1175 * Toeplitz, then just do Toeplitz in "hardware" for the sake of
1176 * the packet distribution, but report the hash as opaque to
1177 * disengage from the software RSS.
1178 */
1179 rss_algo = rss_gethashalgo();
1180 if (rss_algo == RSS_HASH_TOEPLITZ) {
1181 rss_getkey(rss->hash_key);
1182 for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++) {
1183 rss->ind_table[i] = rss_get_indirection_to_bucket(i) %
1184 scctx->isc_nrxqsets;
1185 }
1186 sc->vmx_flags |= VMXNET3_FLAG_SOFT_RSS;
1187 } else
1188 #endif
1189 {
1190 memcpy(rss->hash_key, rss_key, UPT1_RSS_MAX_KEY_SIZE);
1191 for (i = 0; i < UPT1_RSS_MAX_IND_TABLE_SIZE; i++)
1192 rss->ind_table[i] = i % scctx->isc_nrxqsets;
1193 sc->vmx_flags &= ~VMXNET3_FLAG_SOFT_RSS;
1194 }
1195 }
1196
1197 static void
vmxnet3_reinit_shared_data(struct vmxnet3_softc * sc)1198 vmxnet3_reinit_shared_data(struct vmxnet3_softc *sc)
1199 {
1200 if_t ifp;
1201 struct vmxnet3_driver_shared *ds;
1202 if_softc_ctx_t scctx;
1203
1204 ifp = sc->vmx_ifp;
1205 ds = sc->vmx_ds;
1206 scctx = sc->vmx_scctx;
1207
1208 ds->mtu = if_getmtu(ifp);
1209 ds->ntxqueue = scctx->isc_ntxqsets;
1210 ds->nrxqueue = scctx->isc_nrxqsets;
1211
1212 ds->upt_features = 0;
1213 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
1214 ds->upt_features |= UPT1_F_CSUM;
1215 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING)
1216 ds->upt_features |= UPT1_F_VLAN;
1217 if (if_getcapenable(ifp) & IFCAP_LRO)
1218 ds->upt_features |= UPT1_F_LRO;
1219
1220 if (sc->vmx_flags & VMXNET3_FLAG_RSS) {
1221 ds->upt_features |= UPT1_F_RSS;
1222 vmxnet3_reinit_rss_shared_data(sc);
1223 }
1224
1225 vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSL, sc->vmx_ds_dma.idi_paddr);
1226 vmxnet3_write_bar1(sc, VMXNET3_BAR1_DSH,
1227 (uint64_t) sc->vmx_ds_dma.idi_paddr >> 32);
1228 }
1229
1230 static int
vmxnet3_alloc_data(struct vmxnet3_softc * sc)1231 vmxnet3_alloc_data(struct vmxnet3_softc *sc)
1232 {
1233 int error;
1234
1235 error = vmxnet3_alloc_shared_data(sc);
1236 if (error)
1237 return (error);
1238
1239 error = vmxnet3_alloc_mcast_table(sc);
1240 if (error)
1241 return (error);
1242
1243 vmxnet3_init_shared_data(sc);
1244
1245 return (0);
1246 }
1247
1248 static void
vmxnet3_free_data(struct vmxnet3_softc * sc)1249 vmxnet3_free_data(struct vmxnet3_softc *sc)
1250 {
1251
1252 vmxnet3_free_mcast_table(sc);
1253 vmxnet3_free_shared_data(sc);
1254 }
1255
1256 static void
vmxnet3_evintr(struct vmxnet3_softc * sc)1257 vmxnet3_evintr(struct vmxnet3_softc *sc)
1258 {
1259 device_t dev;
1260 struct vmxnet3_txq_shared *ts;
1261 struct vmxnet3_rxq_shared *rs;
1262 uint32_t event;
1263
1264 dev = sc->vmx_dev;
1265
1266 /* Clear events. */
1267 event = sc->vmx_ds->event;
1268 vmxnet3_write_bar1(sc, VMXNET3_BAR1_EVENT, event);
1269
1270 if (event & VMXNET3_EVENT_LINK)
1271 vmxnet3_link_status(sc);
1272
1273 if (event & (VMXNET3_EVENT_TQERROR | VMXNET3_EVENT_RQERROR)) {
1274 vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_STATUS);
1275 ts = sc->vmx_txq[0].vxtxq_ts;
1276 if (ts->stopped != 0)
1277 device_printf(dev, "Tx queue error %#x\n", ts->error);
1278 rs = sc->vmx_rxq[0].vxrxq_rs;
1279 if (rs->stopped != 0)
1280 device_printf(dev, "Rx queue error %#x\n", rs->error);
1281
1282 /* XXX - rely on liflib watchdog to reset us? */
1283 device_printf(dev, "Rx/Tx queue error event ... "
1284 "waiting for iflib watchdog reset\n");
1285 }
1286
1287 if (event & VMXNET3_EVENT_DIC)
1288 device_printf(dev, "device implementation change event\n");
1289 if (event & VMXNET3_EVENT_DEBUG)
1290 device_printf(dev, "debug event\n");
1291 }
1292
1293 static int
vmxnet3_isc_txd_encap(void * vsc,if_pkt_info_t pi)1294 vmxnet3_isc_txd_encap(void *vsc, if_pkt_info_t pi)
1295 {
1296 struct vmxnet3_softc *sc;
1297 struct vmxnet3_txqueue *txq;
1298 struct vmxnet3_txring *txr;
1299 struct vmxnet3_txdesc *txd, *sop;
1300 bus_dma_segment_t *segs;
1301 int nsegs;
1302 int pidx;
1303 int hdrlen;
1304 int i;
1305 int gen;
1306
1307 sc = vsc;
1308 txq = &sc->vmx_txq[pi->ipi_qsidx];
1309 txr = &txq->vxtxq_cmd_ring;
1310 segs = pi->ipi_segs;
1311 nsegs = pi->ipi_nsegs;
1312 pidx = pi->ipi_pidx;
1313
1314 KASSERT(nsegs <= VMXNET3_TX_MAXSEGS,
1315 ("%s: packet with too many segments %d", __func__, nsegs));
1316
1317 sop = &txr->vxtxr_txd[pidx];
1318 gen = txr->vxtxr_gen ^ 1; /* Owned by cpu (yet) */
1319
1320 for (i = 0; i < nsegs; i++) {
1321 txd = &txr->vxtxr_txd[pidx];
1322
1323 txd->addr = segs[i].ds_addr;
1324 txd->len = segs[i].ds_len;
1325 txd->gen = gen;
1326 txd->dtype = 0;
1327 txd->offload_mode = VMXNET3_OM_NONE;
1328 txd->offload_pos = 0;
1329 txd->hlen = 0;
1330 txd->eop = 0;
1331 txd->compreq = 0;
1332 txd->vtag_mode = 0;
1333 txd->vtag = 0;
1334
1335 if (++pidx == txr->vxtxr_ndesc) {
1336 pidx = 0;
1337 txr->vxtxr_gen ^= 1;
1338 }
1339 gen = txr->vxtxr_gen;
1340 }
1341 txd->eop = 1;
1342 txd->compreq = !!(pi->ipi_flags & IPI_TX_INTR);
1343 pi->ipi_new_pidx = pidx;
1344
1345 /*
1346 * VLAN
1347 */
1348 if (pi->ipi_mflags & M_VLANTAG) {
1349 sop->vtag_mode = 1;
1350 sop->vtag = pi->ipi_vtag;
1351 }
1352
1353 /*
1354 * TSO and checksum offloads
1355 */
1356 hdrlen = pi->ipi_ehdrlen + pi->ipi_ip_hlen;
1357 if (pi->ipi_csum_flags & CSUM_TSO) {
1358 sop->offload_mode = VMXNET3_OM_TSO;
1359 sop->hlen = hdrlen + pi->ipi_tcp_hlen;
1360 sop->offload_pos = pi->ipi_tso_segsz;
1361 } else if (pi->ipi_csum_flags & (VMXNET3_CSUM_OFFLOAD |
1362 VMXNET3_CSUM_OFFLOAD_IPV6)) {
1363 sop->offload_mode = VMXNET3_OM_CSUM;
1364 sop->hlen = hdrlen;
1365 sop->offload_pos = hdrlen +
1366 ((pi->ipi_ipproto == IPPROTO_TCP) ?
1367 offsetof(struct tcphdr, th_sum) :
1368 offsetof(struct udphdr, uh_sum));
1369 }
1370
1371 /* Finally, change the ownership. */
1372 vmxnet3_barrier(sc, VMXNET3_BARRIER_WR);
1373 sop->gen ^= 1;
1374
1375 return (0);
1376 }
1377
1378 static void
vmxnet3_isc_txd_flush(void * vsc,uint16_t txqid,qidx_t pidx)1379 vmxnet3_isc_txd_flush(void *vsc, uint16_t txqid, qidx_t pidx)
1380 {
1381 struct vmxnet3_softc *sc;
1382 struct vmxnet3_txqueue *txq;
1383
1384 sc = vsc;
1385 txq = &sc->vmx_txq[txqid];
1386
1387 /*
1388 * pidx is what we last set ipi_new_pidx to in
1389 * vmxnet3_isc_txd_encap()
1390 */
1391
1392 /*
1393 * Avoid expensive register updates if the flush request is
1394 * redundant.
1395 */
1396 if (txq->vxtxq_last_flush == pidx)
1397 return;
1398 txq->vxtxq_last_flush = pidx;
1399 vmxnet3_write_bar0(sc, VMXNET3_BAR0_TXH(txq->vxtxq_id), pidx);
1400 }
1401
1402 static int
vmxnet3_isc_txd_credits_update(void * vsc,uint16_t txqid,bool clear)1403 vmxnet3_isc_txd_credits_update(void *vsc, uint16_t txqid, bool clear)
1404 {
1405 struct vmxnet3_softc *sc;
1406 struct vmxnet3_txqueue *txq;
1407 struct vmxnet3_comp_ring *txc;
1408 struct vmxnet3_txcompdesc *txcd;
1409 struct vmxnet3_txring *txr;
1410 int processed;
1411
1412 sc = vsc;
1413 txq = &sc->vmx_txq[txqid];
1414 txc = &txq->vxtxq_comp_ring;
1415 txr = &txq->vxtxq_cmd_ring;
1416
1417 /*
1418 * If clear is true, we need to report the number of TX command ring
1419 * descriptors that have been processed by the device. If clear is
1420 * false, we just need to report whether or not at least one TX
1421 * command ring descriptor has been processed by the device.
1422 */
1423 processed = 0;
1424 for (;;) {
1425 txcd = &txc->vxcr_u.txcd[txc->vxcr_next];
1426 if (txcd->gen != txc->vxcr_gen)
1427 break;
1428 else if (!clear)
1429 return (1);
1430 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1431
1432 MPASS(txc->vxcr_next < txc->vxcr_ndesc);
1433 if (++txc->vxcr_next >= txc->vxcr_ndesc) {
1434 txc->vxcr_next = 0;
1435 txc->vxcr_gen ^= 1;
1436 }
1437
1438 if (txcd->eop_idx < txr->vxtxr_next)
1439 processed += txr->vxtxr_ndesc -
1440 (txr->vxtxr_next - txcd->eop_idx) + 1;
1441 else
1442 processed += txcd->eop_idx - txr->vxtxr_next + 1;
1443 txr->vxtxr_next = (txcd->eop_idx + 1) % txr->vxtxr_ndesc;
1444 }
1445
1446 return (processed);
1447 }
1448
1449 static int
vmxnet3_isc_rxd_available(void * vsc,uint16_t rxqid,qidx_t idx,qidx_t budget)1450 vmxnet3_isc_rxd_available(void *vsc, uint16_t rxqid, qidx_t idx, qidx_t budget)
1451 {
1452 struct vmxnet3_softc *sc;
1453 struct vmxnet3_rxqueue *rxq;
1454 struct vmxnet3_comp_ring *rxc;
1455 struct vmxnet3_rxcompdesc *rxcd;
1456 int avail;
1457 int completed_gen;
1458 #ifdef INVARIANTS
1459 int expect_sop = 1;
1460 #endif
1461 sc = vsc;
1462 rxq = &sc->vmx_rxq[rxqid];
1463 rxc = &rxq->vxrxq_comp_ring;
1464
1465 avail = 0;
1466 completed_gen = rxc->vxcr_gen;
1467 for (;;) {
1468 rxcd = &rxc->vxcr_u.rxcd[idx];
1469 if (rxcd->gen != completed_gen)
1470 break;
1471 vmxnet3_barrier(sc, VMXNET3_BARRIER_RD);
1472
1473 #ifdef INVARIANTS
1474 if (expect_sop)
1475 KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1476 else
1477 KASSERT(!rxcd->sop, ("%s: unexpected sop", __func__));
1478 expect_sop = rxcd->eop;
1479 #endif
1480 if (rxcd->eop && (rxcd->len != 0))
1481 avail++;
1482 if (avail > budget)
1483 break;
1484 if (++idx == rxc->vxcr_ndesc) {
1485 idx = 0;
1486 completed_gen ^= 1;
1487 }
1488 }
1489
1490 return (avail);
1491 }
1492
1493 static int
vmxnet3_isc_rxd_pkt_get(void * vsc,if_rxd_info_t ri)1494 vmxnet3_isc_rxd_pkt_get(void *vsc, if_rxd_info_t ri)
1495 {
1496 struct vmxnet3_softc *sc;
1497 if_softc_ctx_t scctx;
1498 struct vmxnet3_rxqueue *rxq;
1499 struct vmxnet3_comp_ring *rxc;
1500 struct vmxnet3_rxcompdesc *rxcd;
1501 if_rxd_frag_t frag;
1502 int cqidx;
1503 uint16_t total_len;
1504 uint8_t nfrags;
1505 uint8_t i;
1506 uint8_t flid;
1507
1508 sc = vsc;
1509 scctx = sc->vmx_scctx;
1510 rxq = &sc->vmx_rxq[ri->iri_qsidx];
1511 rxc = &rxq->vxrxq_comp_ring;
1512
1513 /*
1514 * Get a single packet starting at the given index in the completion
1515 * queue. That we have been called indicates that
1516 * vmxnet3_isc_rxd_available() has already verified that either
1517 * there is a complete packet available starting at the given index,
1518 * or there are one or more zero length packets starting at the
1519 * given index followed by a complete packet, so no verification of
1520 * ownership of the descriptors (and no associated read barrier) is
1521 * required here.
1522 */
1523 cqidx = ri->iri_cidx;
1524 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1525 while (rxcd->len == 0) {
1526 KASSERT(rxcd->sop && rxcd->eop,
1527 ("%s: zero-length packet without both sop and eop set",
1528 __func__));
1529 rxc->vxcr_zero_length++;
1530 if (++cqidx == rxc->vxcr_ndesc) {
1531 cqidx = 0;
1532 rxc->vxcr_gen ^= 1;
1533 }
1534 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1535 }
1536 KASSERT(rxcd->sop, ("%s: expected sop", __func__));
1537
1538 /*
1539 * RSS and flow ID.
1540 * Types other than M_HASHTYPE_NONE and M_HASHTYPE_OPAQUE_HASH should
1541 * be used only if the software RSS is enabled and it uses the same
1542 * algorithm and the hash key as the "hardware". If the software RSS
1543 * is not enabled, then it's simply pointless to use those types.
1544 * If it's enabled but with different parameters, then hash values will
1545 * not match.
1546 */
1547 ri->iri_flowid = rxcd->rss_hash;
1548 #ifdef RSS
1549 if ((sc->vmx_flags & VMXNET3_FLAG_SOFT_RSS) != 0) {
1550 switch (rxcd->rss_type) {
1551 case VMXNET3_RCD_RSS_TYPE_NONE:
1552 ri->iri_flowid = ri->iri_qsidx;
1553 ri->iri_rsstype = M_HASHTYPE_NONE;
1554 break;
1555 case VMXNET3_RCD_RSS_TYPE_IPV4:
1556 ri->iri_rsstype = M_HASHTYPE_RSS_IPV4;
1557 break;
1558 case VMXNET3_RCD_RSS_TYPE_TCPIPV4:
1559 ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV4;
1560 break;
1561 case VMXNET3_RCD_RSS_TYPE_IPV6:
1562 ri->iri_rsstype = M_HASHTYPE_RSS_IPV6;
1563 break;
1564 case VMXNET3_RCD_RSS_TYPE_TCPIPV6:
1565 ri->iri_rsstype = M_HASHTYPE_RSS_TCP_IPV6;
1566 break;
1567 default:
1568 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1569 break;
1570 }
1571 } else
1572 #endif
1573 {
1574 switch (rxcd->rss_type) {
1575 case VMXNET3_RCD_RSS_TYPE_NONE:
1576 ri->iri_flowid = ri->iri_qsidx;
1577 ri->iri_rsstype = M_HASHTYPE_NONE;
1578 break;
1579 default:
1580 ri->iri_rsstype = M_HASHTYPE_OPAQUE_HASH;
1581 break;
1582 }
1583 }
1584
1585 /*
1586 * The queue numbering scheme used for rxcd->qid is as follows:
1587 * - All of the command ring 0s are numbered [0, nrxqsets - 1]
1588 * - All of the command ring 1s are numbered [nrxqsets, 2*nrxqsets - 1]
1589 *
1590 * Thus, rxcd->qid less than nrxqsets indicates command ring (and
1591 * flid) 0, and rxcd->qid greater than or equal to nrxqsets
1592 * indicates command ring (and flid) 1.
1593 */
1594 nfrags = 0;
1595 total_len = 0;
1596 do {
1597 rxcd = &rxc->vxcr_u.rxcd[cqidx];
1598 KASSERT(rxcd->gen == rxc->vxcr_gen,
1599 ("%s: generation mismatch", __func__));
1600 KASSERT(nfrags < IFLIB_MAX_RX_SEGS,
1601 ("%s: too many fragments", __func__));
1602 if (__predict_true(rxcd->len != 0)) {
1603 frag = &ri->iri_frags[nfrags];
1604 flid = (rxcd->qid >= scctx->isc_nrxqsets) ? 1 : 0;
1605 frag->irf_flid = flid;
1606 frag->irf_idx = rxcd->rxd_idx;
1607 frag->irf_len = rxcd->len;
1608 total_len += rxcd->len;
1609 nfrags++;
1610 } else {
1611 rxc->vcxr_zero_length_frag++;
1612 }
1613 if (++cqidx == rxc->vxcr_ndesc) {
1614 cqidx = 0;
1615 rxc->vxcr_gen ^= 1;
1616 }
1617 } while (!rxcd->eop);
1618
1619 ri->iri_cidx = cqidx;
1620 ri->iri_nfrags = nfrags;
1621 ri->iri_len = total_len;
1622
1623 /*
1624 * If there's an error, the last descriptor in the packet will
1625 * have the error indicator set. In this case, set all
1626 * fragment lengths to zero. This will cause iflib to discard
1627 * the packet, but process all associated descriptors through
1628 * the refill mechanism.
1629 */
1630 if (__predict_false(rxcd->error)) {
1631 rxc->vxcr_pkt_errors++;
1632 for (i = 0; i < nfrags; i++) {
1633 frag = &ri->iri_frags[i];
1634 frag->irf_len = 0;
1635 }
1636 } else {
1637 /* Checksum offload information is in the last descriptor. */
1638 if (!rxcd->no_csum) {
1639 uint32_t csum_flags = 0;
1640
1641 if (rxcd->ipv4) {
1642 csum_flags |= CSUM_IP_CHECKED;
1643 if (rxcd->ipcsum_ok)
1644 csum_flags |= CSUM_IP_VALID;
1645 }
1646 if (!rxcd->fragment && (rxcd->tcp || rxcd->udp)) {
1647 csum_flags |= CSUM_L4_CALC;
1648 if (rxcd->csum_ok) {
1649 csum_flags |= CSUM_L4_VALID;
1650 ri->iri_csum_data = 0xffff;
1651 }
1652 }
1653 ri->iri_csum_flags = csum_flags;
1654 }
1655
1656 /* VLAN information is in the last descriptor. */
1657 if (rxcd->vlan) {
1658 ri->iri_flags |= M_VLANTAG;
1659 ri->iri_vtag = rxcd->vtag;
1660 }
1661 }
1662
1663 return (0);
1664 }
1665
1666 static void
vmxnet3_isc_rxd_refill(void * vsc,if_rxd_update_t iru)1667 vmxnet3_isc_rxd_refill(void *vsc, if_rxd_update_t iru)
1668 {
1669 struct vmxnet3_softc *sc;
1670 struct vmxnet3_rxqueue *rxq;
1671 struct vmxnet3_rxring *rxr;
1672 struct vmxnet3_rxdesc *rxd;
1673 uint64_t *paddrs;
1674 int count;
1675 int len;
1676 int idx;
1677 int i;
1678 uint8_t flid;
1679 uint8_t btype;
1680
1681 count = iru->iru_count;
1682 len = iru->iru_buf_size;
1683 flid = iru->iru_flidx;
1684 paddrs = iru->iru_paddrs;
1685
1686 sc = vsc;
1687 rxq = &sc->vmx_rxq[iru->iru_qsidx];
1688 rxr = &rxq->vxrxq_cmd_ring[flid];
1689 rxd = rxr->vxrxr_rxd;
1690
1691 /*
1692 * Command ring 0 is filled with BTYPE_HEAD descriptors, and
1693 * command ring 1 is filled with BTYPE_BODY descriptors.
1694 */
1695 btype = (flid == 0) ? VMXNET3_BTYPE_HEAD : VMXNET3_BTYPE_BODY;
1696 /*
1697 * The refill entries from iflib will advance monotonically,
1698 * but the refilled descriptors may not be contiguous due to
1699 * earlier skipping of descriptors by the device. The refill
1700 * entries from iflib need an entire state update, while the
1701 * descriptors previously skipped by the device only need to
1702 * have their generation numbers updated.
1703 */
1704 idx = rxr->vxrxr_refill_start;
1705 i = 0;
1706 do {
1707 if (idx == iru->iru_idxs[i]) {
1708 rxd[idx].addr = paddrs[i];
1709 rxd[idx].len = len;
1710 rxd[idx].btype = btype;
1711 i++;
1712 } else
1713 rxr->vxrxr_desc_skips++;
1714 rxd[idx].gen = rxr->vxrxr_gen;
1715
1716 if (++idx == rxr->vxrxr_ndesc) {
1717 idx = 0;
1718 rxr->vxrxr_gen ^= 1;
1719 }
1720 } while (i != count);
1721 rxr->vxrxr_refill_start = idx;
1722 }
1723
1724 static void
vmxnet3_isc_rxd_flush(void * vsc,uint16_t rxqid,uint8_t flid,qidx_t pidx)1725 vmxnet3_isc_rxd_flush(void *vsc, uint16_t rxqid, uint8_t flid, qidx_t pidx)
1726 {
1727 struct vmxnet3_softc *sc;
1728 bus_size_t r;
1729
1730 sc = vsc;
1731
1732 if (flid == 0)
1733 r = VMXNET3_BAR0_RXH1(rxqid);
1734 else
1735 r = VMXNET3_BAR0_RXH2(rxqid);
1736
1737 vmxnet3_write_bar0(sc, r, pidx);
1738 }
1739
1740 static int
vmxnet3_legacy_intr(void * xsc)1741 vmxnet3_legacy_intr(void *xsc)
1742 {
1743 struct vmxnet3_softc *sc;
1744 if_softc_ctx_t scctx;
1745 if_ctx_t ctx;
1746
1747 sc = xsc;
1748 scctx = sc->vmx_scctx;
1749 ctx = sc->vmx_ctx;
1750
1751 /*
1752 * When there is only a single interrupt configured, this routine
1753 * runs in fast interrupt context, following which the rxq 0 task
1754 * will be enqueued.
1755 */
1756 if (scctx->isc_intr == IFLIB_INTR_LEGACY) {
1757 if (vmxnet3_read_bar1(sc, VMXNET3_BAR1_INTR) == 0)
1758 return (FILTER_HANDLED);
1759 }
1760 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1761 vmxnet3_intr_disable_all(ctx);
1762
1763 if (sc->vmx_ds->event != 0)
1764 iflib_admin_intr_deferred(ctx);
1765
1766 /*
1767 * XXX - When there is both rxq and event activity, do we care
1768 * whether the rxq 0 task or the admin task re-enables the interrupt
1769 * first?
1770 */
1771 return (FILTER_SCHEDULE_THREAD);
1772 }
1773
1774 static int
vmxnet3_rxq_intr(void * vrxq)1775 vmxnet3_rxq_intr(void *vrxq)
1776 {
1777 struct vmxnet3_softc *sc;
1778 struct vmxnet3_rxqueue *rxq;
1779
1780 rxq = vrxq;
1781 sc = rxq->vxrxq_sc;
1782
1783 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1784 vmxnet3_disable_intr(sc, rxq->vxrxq_intr_idx);
1785
1786 return (FILTER_SCHEDULE_THREAD);
1787 }
1788
1789 static int
vmxnet3_event_intr(void * vsc)1790 vmxnet3_event_intr(void *vsc)
1791 {
1792 struct vmxnet3_softc *sc;
1793
1794 sc = vsc;
1795
1796 if (sc->vmx_intr_mask_mode == VMXNET3_IMM_ACTIVE)
1797 vmxnet3_disable_intr(sc, sc->vmx_event_intr_idx);
1798
1799 /*
1800 * The work will be done via vmxnet3_update_admin_status(), and the
1801 * interrupt will be re-enabled in vmxnet3_link_intr_enable().
1802 *
1803 * The interrupt will be re-enabled by vmxnet3_link_intr_enable().
1804 */
1805 return (FILTER_SCHEDULE_THREAD);
1806 }
1807
1808 static void
vmxnet3_stop(if_ctx_t ctx)1809 vmxnet3_stop(if_ctx_t ctx)
1810 {
1811 struct vmxnet3_softc *sc;
1812
1813 sc = iflib_get_softc(ctx);
1814
1815 sc->vmx_link_active = 0;
1816 vmxnet3_write_cmd(sc, VMXNET3_CMD_DISABLE);
1817 vmxnet3_write_cmd(sc, VMXNET3_CMD_RESET);
1818 }
1819
1820 static void
vmxnet3_txinit(struct vmxnet3_softc * sc,struct vmxnet3_txqueue * txq)1821 vmxnet3_txinit(struct vmxnet3_softc *sc, struct vmxnet3_txqueue *txq)
1822 {
1823 struct vmxnet3_txring *txr;
1824 struct vmxnet3_comp_ring *txc;
1825
1826 txq->vxtxq_last_flush = -1;
1827
1828 txr = &txq->vxtxq_cmd_ring;
1829 txr->vxtxr_next = 0;
1830 txr->vxtxr_gen = VMXNET3_INIT_GEN;
1831 /*
1832 * iflib has zeroed out the descriptor array during the prior attach
1833 * or stop
1834 */
1835
1836 txc = &txq->vxtxq_comp_ring;
1837 txc->vxcr_next = 0;
1838 txc->vxcr_gen = VMXNET3_INIT_GEN;
1839 /*
1840 * iflib has zeroed out the descriptor array during the prior attach
1841 * or stop
1842 */
1843 }
1844
1845 static void
vmxnet3_rxinit(struct vmxnet3_softc * sc,struct vmxnet3_rxqueue * rxq)1846 vmxnet3_rxinit(struct vmxnet3_softc *sc, struct vmxnet3_rxqueue *rxq)
1847 {
1848 struct vmxnet3_rxring *rxr;
1849 struct vmxnet3_comp_ring *rxc;
1850 int i;
1851
1852 /*
1853 * The descriptors will be populated with buffers during a
1854 * subsequent invocation of vmxnet3_isc_rxd_refill()
1855 */
1856 for (i = 0; i < sc->vmx_sctx->isc_nrxqs - 1; i++) {
1857 rxr = &rxq->vxrxq_cmd_ring[i];
1858 rxr->vxrxr_gen = VMXNET3_INIT_GEN;
1859 rxr->vxrxr_desc_skips = 0;
1860 rxr->vxrxr_refill_start = 0;
1861 /*
1862 * iflib has zeroed out the descriptor array during the
1863 * prior attach or stop
1864 */
1865 }
1866
1867 for (/**/; i < VMXNET3_RXRINGS_PERQ; i++) {
1868 rxr = &rxq->vxrxq_cmd_ring[i];
1869 rxr->vxrxr_gen = 0;
1870 rxr->vxrxr_desc_skips = 0;
1871 rxr->vxrxr_refill_start = 0;
1872 bzero(rxr->vxrxr_rxd,
1873 rxr->vxrxr_ndesc * sizeof(struct vmxnet3_rxdesc));
1874 }
1875
1876 rxc = &rxq->vxrxq_comp_ring;
1877 rxc->vxcr_next = 0;
1878 rxc->vxcr_gen = VMXNET3_INIT_GEN;
1879 rxc->vxcr_zero_length = 0;
1880 rxc->vcxr_zero_length_frag = 0;
1881 rxc->vxcr_pkt_errors = 0;
1882 /*
1883 * iflib has zeroed out the descriptor array during the prior attach
1884 * or stop
1885 */
1886 }
1887
1888 static void
vmxnet3_reinit_queues(struct vmxnet3_softc * sc)1889 vmxnet3_reinit_queues(struct vmxnet3_softc *sc)
1890 {
1891 if_softc_ctx_t scctx;
1892 int q;
1893
1894 scctx = sc->vmx_scctx;
1895
1896 for (q = 0; q < scctx->isc_ntxqsets; q++)
1897 vmxnet3_txinit(sc, &sc->vmx_txq[q]);
1898
1899 for (q = 0; q < scctx->isc_nrxqsets; q++)
1900 vmxnet3_rxinit(sc, &sc->vmx_rxq[q]);
1901 }
1902
1903 static int
vmxnet3_enable_device(struct vmxnet3_softc * sc)1904 vmxnet3_enable_device(struct vmxnet3_softc *sc)
1905 {
1906 if_softc_ctx_t scctx;
1907 int q;
1908
1909 scctx = sc->vmx_scctx;
1910
1911 if (vmxnet3_read_cmd(sc, VMXNET3_CMD_ENABLE) != 0) {
1912 device_printf(sc->vmx_dev, "device enable command failed!\n");
1913 return (1);
1914 }
1915
1916 /* Reset the Rx queue heads. */
1917 for (q = 0; q < scctx->isc_nrxqsets; q++) {
1918 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH1(q), 0);
1919 vmxnet3_write_bar0(sc, VMXNET3_BAR0_RXH2(q), 0);
1920 }
1921
1922 return (0);
1923 }
1924
1925 static void
vmxnet3_reinit_rxfilters(struct vmxnet3_softc * sc)1926 vmxnet3_reinit_rxfilters(struct vmxnet3_softc *sc)
1927 {
1928 if_t ifp;
1929
1930 ifp = sc->vmx_ifp;
1931
1932 vmxnet3_set_rxfilter(sc, if_getflags(ifp));
1933
1934 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
1935 bcopy(sc->vmx_vlan_filter, sc->vmx_ds->vlan_filter,
1936 sizeof(sc->vmx_ds->vlan_filter));
1937 else
1938 bzero(sc->vmx_ds->vlan_filter,
1939 sizeof(sc->vmx_ds->vlan_filter));
1940 vmxnet3_write_cmd(sc, VMXNET3_CMD_VLAN_FILTER);
1941 }
1942
1943 static void
vmxnet3_init(if_ctx_t ctx)1944 vmxnet3_init(if_ctx_t ctx)
1945 {
1946 struct vmxnet3_softc *sc;
1947
1948 sc = iflib_get_softc(ctx);
1949
1950 /* Use the current MAC address. */
1951 bcopy(if_getlladdr(sc->vmx_ifp), sc->vmx_lladdr, ETHER_ADDR_LEN);
1952 vmxnet3_set_lladdr(sc);
1953
1954 vmxnet3_reinit_shared_data(sc);
1955 vmxnet3_reinit_queues(sc);
1956
1957 vmxnet3_enable_device(sc);
1958
1959 vmxnet3_reinit_rxfilters(sc);
1960 vmxnet3_link_status(sc);
1961 }
1962
1963 static void
vmxnet3_multi_set(if_ctx_t ctx)1964 vmxnet3_multi_set(if_ctx_t ctx)
1965 {
1966
1967 vmxnet3_set_rxfilter(iflib_get_softc(ctx),
1968 if_getflags(iflib_get_ifp(ctx)));
1969 }
1970
1971 static int
vmxnet3_mtu_set(if_ctx_t ctx,uint32_t mtu)1972 vmxnet3_mtu_set(if_ctx_t ctx, uint32_t mtu)
1973 {
1974 struct vmxnet3_softc *sc;
1975 if_softc_ctx_t scctx;
1976
1977 sc = iflib_get_softc(ctx);
1978 scctx = sc->vmx_scctx;
1979
1980 if (mtu > VMXNET3_TX_MAXSIZE - (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN +
1981 ETHER_CRC_LEN))
1982 return (EINVAL);
1983
1984 /*
1985 * Update the max frame size so that the rx mbuf size is
1986 * chosen based on the new mtu during the interface init that
1987 * will occur after this routine returns.
1988 */
1989 scctx->isc_max_frame_size = mtu +
1990 ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN + ETHER_CRC_LEN;
1991 /* RX completion queue - n/a */
1992 scctx->isc_rxd_buf_size[0] = 0;
1993 /*
1994 * For header-type descriptors (used for first segment of
1995 * packet), let iflib determine the buffer size based on the
1996 * max frame size.
1997 */
1998 scctx->isc_rxd_buf_size[1] = 0;
1999 /*
2000 * For body-type descriptors (used for jumbo frames and LRO),
2001 * always use page-sized buffers.
2002 */
2003 scctx->isc_rxd_buf_size[2] = MJUMPAGESIZE;
2004
2005 return (0);
2006 }
2007
2008 static void
vmxnet3_media_status(if_ctx_t ctx,struct ifmediareq * ifmr)2009 vmxnet3_media_status(if_ctx_t ctx, struct ifmediareq * ifmr)
2010 {
2011 struct vmxnet3_softc *sc;
2012
2013 sc = iflib_get_softc(ctx);
2014
2015 ifmr->ifm_status = IFM_AVALID;
2016 ifmr->ifm_active = IFM_ETHER;
2017
2018 if (vmxnet3_link_is_up(sc) != 0) {
2019 ifmr->ifm_status |= IFM_ACTIVE;
2020 ifmr->ifm_active |= IFM_AUTO;
2021 } else
2022 ifmr->ifm_active |= IFM_NONE;
2023 }
2024
2025 static int
vmxnet3_media_change(if_ctx_t ctx)2026 vmxnet3_media_change(if_ctx_t ctx)
2027 {
2028
2029 /* Ignore. */
2030 return (0);
2031 }
2032
2033 static int
vmxnet3_promisc_set(if_ctx_t ctx,int flags)2034 vmxnet3_promisc_set(if_ctx_t ctx, int flags)
2035 {
2036
2037 vmxnet3_set_rxfilter(iflib_get_softc(ctx), flags);
2038
2039 return (0);
2040 }
2041
2042 static uint64_t
vmxnet3_get_counter(if_ctx_t ctx,ift_counter cnt)2043 vmxnet3_get_counter(if_ctx_t ctx, ift_counter cnt)
2044 {
2045 if_t ifp = iflib_get_ifp(ctx);
2046
2047 if (cnt < IFCOUNTERS)
2048 return if_get_counter_default(ifp, cnt);
2049
2050 return (0);
2051 }
2052
2053 static void
vmxnet3_update_admin_status(if_ctx_t ctx)2054 vmxnet3_update_admin_status(if_ctx_t ctx)
2055 {
2056 struct vmxnet3_softc *sc;
2057
2058 sc = iflib_get_softc(ctx);
2059 if (sc->vmx_ds->event != 0)
2060 vmxnet3_evintr(sc);
2061
2062 vmxnet3_refresh_host_stats(sc);
2063 }
2064
2065 static void
vmxnet3_txq_timer(if_ctx_t ctx,uint16_t qid)2066 vmxnet3_txq_timer(if_ctx_t ctx, uint16_t qid)
2067 {
2068 /* Host stats refresh is global, so just trigger it on txq 0 */
2069 if (qid == 0)
2070 vmxnet3_refresh_host_stats(iflib_get_softc(ctx));
2071 }
2072
2073 static void
vmxnet3_update_vlan_filter(struct vmxnet3_softc * sc,int add,uint16_t tag)2074 vmxnet3_update_vlan_filter(struct vmxnet3_softc *sc, int add, uint16_t tag)
2075 {
2076 int idx, bit;
2077
2078 if (tag == 0 || tag > 4095)
2079 return;
2080
2081 idx = (tag >> 5) & 0x7F;
2082 bit = tag & 0x1F;
2083
2084 /* Update our private VLAN bitvector. */
2085 if (add)
2086 sc->vmx_vlan_filter[idx] |= (1 << bit);
2087 else
2088 sc->vmx_vlan_filter[idx] &= ~(1 << bit);
2089 }
2090
2091 static void
vmxnet3_vlan_register(if_ctx_t ctx,uint16_t tag)2092 vmxnet3_vlan_register(if_ctx_t ctx, uint16_t tag)
2093 {
2094
2095 vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 1, tag);
2096 }
2097
2098 static void
vmxnet3_vlan_unregister(if_ctx_t ctx,uint16_t tag)2099 vmxnet3_vlan_unregister(if_ctx_t ctx, uint16_t tag)
2100 {
2101
2102 vmxnet3_update_vlan_filter(iflib_get_softc(ctx), 0, tag);
2103 }
2104
2105 static u_int
vmxnet3_hash_maddr(void * arg,struct sockaddr_dl * sdl,u_int count)2106 vmxnet3_hash_maddr(void *arg, struct sockaddr_dl *sdl, u_int count)
2107 {
2108 struct vmxnet3_softc *sc = arg;
2109
2110 if (count < VMXNET3_MULTICAST_MAX)
2111 bcopy(LLADDR(sdl), &sc->vmx_mcast[count * ETHER_ADDR_LEN],
2112 ETHER_ADDR_LEN);
2113
2114 return (1);
2115 }
2116
2117 static void
vmxnet3_set_rxfilter(struct vmxnet3_softc * sc,int flags)2118 vmxnet3_set_rxfilter(struct vmxnet3_softc *sc, int flags)
2119 {
2120 if_t ifp;
2121 struct vmxnet3_driver_shared *ds;
2122 u_int mode;
2123
2124 ifp = sc->vmx_ifp;
2125 ds = sc->vmx_ds;
2126
2127 mode = VMXNET3_RXMODE_UCAST | VMXNET3_RXMODE_BCAST;
2128 if (flags & IFF_PROMISC)
2129 mode |= VMXNET3_RXMODE_PROMISC;
2130 if (flags & IFF_ALLMULTI)
2131 mode |= VMXNET3_RXMODE_ALLMULTI;
2132 else {
2133 int cnt;
2134
2135 cnt = if_foreach_llmaddr(ifp, vmxnet3_hash_maddr, sc);
2136 if (cnt >= VMXNET3_MULTICAST_MAX) {
2137 cnt = 0;
2138 mode |= VMXNET3_RXMODE_ALLMULTI;
2139 } else if (cnt > 0)
2140 mode |= VMXNET3_RXMODE_MCAST;
2141 ds->mcast_tablelen = cnt * ETHER_ADDR_LEN;
2142 }
2143
2144 ds->rxmode = mode;
2145
2146 vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_FILTER);
2147 vmxnet3_write_cmd(sc, VMXNET3_CMD_SET_RXMODE);
2148 }
2149
2150 static void
vmxnet3_refresh_host_stats(struct vmxnet3_softc * sc)2151 vmxnet3_refresh_host_stats(struct vmxnet3_softc *sc)
2152 {
2153
2154 vmxnet3_write_cmd(sc, VMXNET3_CMD_GET_STATS);
2155 }
2156
2157 static int
vmxnet3_link_is_up(struct vmxnet3_softc * sc)2158 vmxnet3_link_is_up(struct vmxnet3_softc *sc)
2159 {
2160 uint32_t status;
2161
2162 status = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_LINK);
2163 return !!(status & 0x1);
2164 }
2165
2166 static void
vmxnet3_link_status(struct vmxnet3_softc * sc)2167 vmxnet3_link_status(struct vmxnet3_softc *sc)
2168 {
2169 if_ctx_t ctx;
2170 uint64_t speed;
2171 int link;
2172
2173 ctx = sc->vmx_ctx;
2174 link = vmxnet3_link_is_up(sc);
2175 speed = IF_Gbps(10);
2176
2177 if (link != 0 && sc->vmx_link_active == 0) {
2178 sc->vmx_link_active = 1;
2179 iflib_link_state_change(ctx, LINK_STATE_UP, speed);
2180 } else if (link == 0 && sc->vmx_link_active != 0) {
2181 sc->vmx_link_active = 0;
2182 iflib_link_state_change(ctx, LINK_STATE_DOWN, speed);
2183 }
2184 }
2185
2186 static void
vmxnet3_set_lladdr(struct vmxnet3_softc * sc)2187 vmxnet3_set_lladdr(struct vmxnet3_softc *sc)
2188 {
2189 uint32_t ml, mh;
2190
2191 ml = sc->vmx_lladdr[0];
2192 ml |= sc->vmx_lladdr[1] << 8;
2193 ml |= sc->vmx_lladdr[2] << 16;
2194 ml |= sc->vmx_lladdr[3] << 24;
2195 vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACL, ml);
2196
2197 mh = sc->vmx_lladdr[4];
2198 mh |= sc->vmx_lladdr[5] << 8;
2199 vmxnet3_write_bar1(sc, VMXNET3_BAR1_MACH, mh);
2200 }
2201
2202 static void
vmxnet3_get_lladdr(struct vmxnet3_softc * sc)2203 vmxnet3_get_lladdr(struct vmxnet3_softc *sc)
2204 {
2205 uint32_t ml, mh;
2206
2207 ml = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACL);
2208 mh = vmxnet3_read_cmd(sc, VMXNET3_CMD_GET_MACH);
2209
2210 sc->vmx_lladdr[0] = ml;
2211 sc->vmx_lladdr[1] = ml >> 8;
2212 sc->vmx_lladdr[2] = ml >> 16;
2213 sc->vmx_lladdr[3] = ml >> 24;
2214 sc->vmx_lladdr[4] = mh;
2215 sc->vmx_lladdr[5] = mh >> 8;
2216 }
2217
2218 static void
vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue * txq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2219 vmxnet3_setup_txq_sysctl(struct vmxnet3_txqueue *txq,
2220 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2221 {
2222 struct sysctl_oid *node, *txsnode;
2223 struct sysctl_oid_list *list, *txslist;
2224 struct UPT1_TxStats *txstats;
2225 char namebuf[16];
2226
2227 txstats = &txq->vxtxq_ts->stats;
2228
2229 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vxtxq_id);
2230 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2231 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
2232 txq->vxtxq_sysctl = list = SYSCTL_CHILDREN(node);
2233
2234 /*
2235 * Add statistics reported by the host. These are updated by the
2236 * iflib txq timer on txq 0.
2237 */
2238 txsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2239 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2240 txslist = SYSCTL_CHILDREN(txsnode);
2241 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_packets", CTLFLAG_RD,
2242 &txstats->TSO_packets, "TSO packets");
2243 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "tso_bytes", CTLFLAG_RD,
2244 &txstats->TSO_bytes, "TSO bytes");
2245 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2246 &txstats->ucast_packets, "Unicast packets");
2247 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2248 &txstats->ucast_bytes, "Unicast bytes");
2249 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2250 &txstats->mcast_packets, "Multicast packets");
2251 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2252 &txstats->mcast_bytes, "Multicast bytes");
2253 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "error", CTLFLAG_RD,
2254 &txstats->error, "Errors");
2255 SYSCTL_ADD_UQUAD(ctx, txslist, OID_AUTO, "discard", CTLFLAG_RD,
2256 &txstats->discard, "Discards");
2257 }
2258
2259 static void
vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue * rxq,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2260 vmxnet3_setup_rxq_sysctl(struct vmxnet3_rxqueue *rxq,
2261 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2262 {
2263 struct sysctl_oid *node, *rxsnode;
2264 struct sysctl_oid_list *list, *rxslist;
2265 struct UPT1_RxStats *rxstats;
2266 char namebuf[16];
2267
2268 rxstats = &rxq->vxrxq_rs->stats;
2269
2270 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vxrxq_id);
2271 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
2272 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
2273 rxq->vxrxq_sysctl = list = SYSCTL_CHILDREN(node);
2274
2275 /*
2276 * Add statistics reported by the host. These are updated by the
2277 * iflib txq timer on txq 0.
2278 */
2279 rxsnode = SYSCTL_ADD_NODE(ctx, list, OID_AUTO, "hstats",
2280 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Host Statistics");
2281 rxslist = SYSCTL_CHILDREN(rxsnode);
2282 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_packets", CTLFLAG_RD,
2283 &rxstats->LRO_packets, "LRO packets");
2284 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "lro_bytes", CTLFLAG_RD,
2285 &rxstats->LRO_bytes, "LRO bytes");
2286 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "ucast_packets", CTLFLAG_RD,
2287 &rxstats->ucast_packets, "Unicast packets");
2288 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "unicast_bytes", CTLFLAG_RD,
2289 &rxstats->ucast_bytes, "Unicast bytes");
2290 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_packets", CTLFLAG_RD,
2291 &rxstats->mcast_packets, "Multicast packets");
2292 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "mcast_bytes", CTLFLAG_RD,
2293 &rxstats->mcast_bytes, "Multicast bytes");
2294 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_packets", CTLFLAG_RD,
2295 &rxstats->bcast_packets, "Broadcast packets");
2296 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "bcast_bytes", CTLFLAG_RD,
2297 &rxstats->bcast_bytes, "Broadcast bytes");
2298 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "nobuffer", CTLFLAG_RD,
2299 &rxstats->nobuffer, "No buffer");
2300 SYSCTL_ADD_UQUAD(ctx, rxslist, OID_AUTO, "error", CTLFLAG_RD,
2301 &rxstats->error, "Errors");
2302 }
2303
2304 static void
vmxnet3_setup_debug_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2305 vmxnet3_setup_debug_sysctl(struct vmxnet3_softc *sc,
2306 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2307 {
2308 if_softc_ctx_t scctx;
2309 struct sysctl_oid *node;
2310 struct sysctl_oid_list *list;
2311 int i;
2312
2313 scctx = sc->vmx_scctx;
2314
2315 for (i = 0; i < scctx->isc_ntxqsets; i++) {
2316 struct vmxnet3_txqueue *txq = &sc->vmx_txq[i];
2317
2318 node = SYSCTL_ADD_NODE(ctx, txq->vxtxq_sysctl, OID_AUTO,
2319 "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2320 list = SYSCTL_CHILDREN(node);
2321
2322 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_next", CTLFLAG_RD,
2323 &txq->vxtxq_cmd_ring.vxtxr_next, 0, "");
2324 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd_ndesc", CTLFLAG_RD,
2325 &txq->vxtxq_cmd_ring.vxtxr_ndesc, 0, "");
2326 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd_gen", CTLFLAG_RD,
2327 &txq->vxtxq_cmd_ring.vxtxr_gen, 0, "");
2328 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_next", CTLFLAG_RD,
2329 &txq->vxtxq_comp_ring.vxcr_next, 0, "");
2330 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2331 &txq->vxtxq_comp_ring.vxcr_ndesc, 0,"");
2332 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2333 &txq->vxtxq_comp_ring.vxcr_gen, 0, "");
2334 }
2335
2336 for (i = 0; i < scctx->isc_nrxqsets; i++) {
2337 struct vmxnet3_rxqueue *rxq = &sc->vmx_rxq[i];
2338
2339 node = SYSCTL_ADD_NODE(ctx, rxq->vxrxq_sysctl, OID_AUTO,
2340 "debug", CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "");
2341 list = SYSCTL_CHILDREN(node);
2342
2343 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd0_ndesc", CTLFLAG_RD,
2344 &rxq->vxrxq_cmd_ring[0].vxrxr_ndesc, 0, "");
2345 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd0_gen", CTLFLAG_RD,
2346 &rxq->vxrxq_cmd_ring[0].vxrxr_gen, 0, "");
2347 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd0_desc_skips", CTLFLAG_RD,
2348 &rxq->vxrxq_cmd_ring[0].vxrxr_desc_skips, 0, "");
2349 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "cmd1_ndesc", CTLFLAG_RD,
2350 &rxq->vxrxq_cmd_ring[1].vxrxr_ndesc, 0, "");
2351 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "cmd1_gen", CTLFLAG_RD,
2352 &rxq->vxrxq_cmd_ring[1].vxrxr_gen, 0, "");
2353 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "cmd1_desc_skips", CTLFLAG_RD,
2354 &rxq->vxrxq_cmd_ring[1].vxrxr_desc_skips, 0, "");
2355 SYSCTL_ADD_UINT(ctx, list, OID_AUTO, "comp_ndesc", CTLFLAG_RD,
2356 &rxq->vxrxq_comp_ring.vxcr_ndesc, 0,"");
2357 SYSCTL_ADD_INT(ctx, list, OID_AUTO, "comp_gen", CTLFLAG_RD,
2358 &rxq->vxrxq_comp_ring.vxcr_gen, 0, "");
2359 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length", CTLFLAG_RD,
2360 &rxq->vxrxq_comp_ring.vxcr_zero_length, 0, "");
2361 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_zero_length_frag",
2362 CTLFLAG_RD, &rxq->vxrxq_comp_ring.vcxr_zero_length_frag,
2363 0, "");
2364 SYSCTL_ADD_U64(ctx, list, OID_AUTO, "comp_pkt_errors", CTLFLAG_RD,
2365 &rxq->vxrxq_comp_ring.vxcr_pkt_errors, 0, "");
2366 }
2367 }
2368
2369 static void
vmxnet3_setup_queue_sysctl(struct vmxnet3_softc * sc,struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child)2370 vmxnet3_setup_queue_sysctl(struct vmxnet3_softc *sc,
2371 struct sysctl_ctx_list *ctx, struct sysctl_oid_list *child)
2372 {
2373 if_softc_ctx_t scctx;
2374 int i;
2375
2376 scctx = sc->vmx_scctx;
2377
2378 for (i = 0; i < scctx->isc_ntxqsets; i++)
2379 vmxnet3_setup_txq_sysctl(&sc->vmx_txq[i], ctx, child);
2380 for (i = 0; i < scctx->isc_nrxqsets; i++)
2381 vmxnet3_setup_rxq_sysctl(&sc->vmx_rxq[i], ctx, child);
2382
2383 vmxnet3_setup_debug_sysctl(sc, ctx, child);
2384 }
2385
2386 static void
vmxnet3_setup_sysctl(struct vmxnet3_softc * sc)2387 vmxnet3_setup_sysctl(struct vmxnet3_softc *sc)
2388 {
2389 device_t dev;
2390 struct sysctl_ctx_list *ctx;
2391 struct sysctl_oid *tree;
2392 struct sysctl_oid_list *child;
2393
2394 dev = sc->vmx_dev;
2395 ctx = device_get_sysctl_ctx(dev);
2396 tree = device_get_sysctl_tree(dev);
2397 child = SYSCTL_CHILDREN(tree);
2398
2399 vmxnet3_setup_queue_sysctl(sc, ctx, child);
2400 }
2401
2402 static void
vmxnet3_write_bar0(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2403 vmxnet3_write_bar0(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2404 {
2405
2406 bus_space_write_4(sc->vmx_iot0, sc->vmx_ioh0, r, v);
2407 }
2408
2409 static uint32_t
vmxnet3_read_bar1(struct vmxnet3_softc * sc,bus_size_t r)2410 vmxnet3_read_bar1(struct vmxnet3_softc *sc, bus_size_t r)
2411 {
2412
2413 return (bus_space_read_4(sc->vmx_iot1, sc->vmx_ioh1, r));
2414 }
2415
2416 static void
vmxnet3_write_bar1(struct vmxnet3_softc * sc,bus_size_t r,uint32_t v)2417 vmxnet3_write_bar1(struct vmxnet3_softc *sc, bus_size_t r, uint32_t v)
2418 {
2419
2420 bus_space_write_4(sc->vmx_iot1, sc->vmx_ioh1, r, v);
2421 }
2422
2423 static void
vmxnet3_write_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2424 vmxnet3_write_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2425 {
2426
2427 vmxnet3_write_bar1(sc, VMXNET3_BAR1_CMD, cmd);
2428 }
2429
2430 static uint32_t
vmxnet3_read_cmd(struct vmxnet3_softc * sc,uint32_t cmd)2431 vmxnet3_read_cmd(struct vmxnet3_softc *sc, uint32_t cmd)
2432 {
2433
2434 vmxnet3_write_cmd(sc, cmd);
2435 bus_space_barrier(sc->vmx_iot1, sc->vmx_ioh1, 0, 0,
2436 BUS_SPACE_BARRIER_READ | BUS_SPACE_BARRIER_WRITE);
2437 return (vmxnet3_read_bar1(sc, VMXNET3_BAR1_CMD));
2438 }
2439
2440 static void
vmxnet3_enable_intr(struct vmxnet3_softc * sc,int irq)2441 vmxnet3_enable_intr(struct vmxnet3_softc *sc, int irq)
2442 {
2443
2444 vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 0);
2445 }
2446
2447 static void
vmxnet3_disable_intr(struct vmxnet3_softc * sc,int irq)2448 vmxnet3_disable_intr(struct vmxnet3_softc *sc, int irq)
2449 {
2450
2451 vmxnet3_write_bar0(sc, VMXNET3_BAR0_IMASK(irq), 1);
2452 }
2453
2454 static int
vmxnet3_tx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2455 vmxnet3_tx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2456 {
2457 /* Not using interrupts for TX */
2458 return (0);
2459 }
2460
2461 static int
vmxnet3_rx_queue_intr_enable(if_ctx_t ctx,uint16_t qid)2462 vmxnet3_rx_queue_intr_enable(if_ctx_t ctx, uint16_t qid)
2463 {
2464 struct vmxnet3_softc *sc;
2465
2466 sc = iflib_get_softc(ctx);
2467 vmxnet3_enable_intr(sc, sc->vmx_rxq[qid].vxrxq_intr_idx);
2468 return (0);
2469 }
2470
2471 static void
vmxnet3_link_intr_enable(if_ctx_t ctx)2472 vmxnet3_link_intr_enable(if_ctx_t ctx)
2473 {
2474 struct vmxnet3_softc *sc;
2475
2476 sc = iflib_get_softc(ctx);
2477 vmxnet3_enable_intr(sc, sc->vmx_event_intr_idx);
2478 }
2479
2480 static void
vmxnet3_intr_enable_all(if_ctx_t ctx)2481 vmxnet3_intr_enable_all(if_ctx_t ctx)
2482 {
2483 struct vmxnet3_softc *sc;
2484 if_softc_ctx_t scctx;
2485 int i;
2486
2487 sc = iflib_get_softc(ctx);
2488 scctx = sc->vmx_scctx;
2489 sc->vmx_ds->ictrl &= ~VMXNET3_ICTRL_DISABLE_ALL;
2490 for (i = 0; i < scctx->isc_vectors; i++)
2491 vmxnet3_enable_intr(sc, i);
2492 }
2493
2494 static void
vmxnet3_intr_disable_all(if_ctx_t ctx)2495 vmxnet3_intr_disable_all(if_ctx_t ctx)
2496 {
2497 struct vmxnet3_softc *sc;
2498 int i;
2499
2500 sc = iflib_get_softc(ctx);
2501 /*
2502 * iflib may invoke this routine before vmxnet3_attach_post() has
2503 * run, which is before the top level shared data area is
2504 * initialized and the device made aware of it.
2505 */
2506 if (sc->vmx_ds != NULL)
2507 sc->vmx_ds->ictrl |= VMXNET3_ICTRL_DISABLE_ALL;
2508 for (i = 0; i < VMXNET3_MAX_INTRS; i++)
2509 vmxnet3_disable_intr(sc, i);
2510 }
2511
2512 static bool
vmxnet3_if_needs_restart(if_ctx_t ctx __unused,enum iflib_restart_event event)2513 vmxnet3_if_needs_restart(if_ctx_t ctx __unused, enum iflib_restart_event event)
2514 {
2515 switch (event) {
2516 case IFLIB_RESTART_VLAN_CONFIG:
2517 return (true);
2518 default:
2519 return (false);
2520 }
2521 }
2522
2523 /*
2524 * Since this is a purely paravirtualized device, we do not have
2525 * to worry about DMA coherency. But at times, we must make sure
2526 * both the compiler and CPU do not reorder memory operations.
2527 */
2528 static inline void
vmxnet3_barrier(struct vmxnet3_softc * sc,vmxnet3_barrier_t type)2529 vmxnet3_barrier(struct vmxnet3_softc *sc, vmxnet3_barrier_t type)
2530 {
2531
2532 switch (type) {
2533 case VMXNET3_BARRIER_RD:
2534 rmb();
2535 break;
2536 case VMXNET3_BARRIER_WR:
2537 wmb();
2538 break;
2539 case VMXNET3_BARRIER_RDWR:
2540 mb();
2541 break;
2542 default:
2543 panic("%s: bad barrier type %d", __func__, type);
2544 }
2545 }
2546