1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO network devices. */
30
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/sockio.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/msan.h>
43 #include <sys/sbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/random.h>
47 #include <sys/sglist.h>
48 #include <sys/lock.h>
49 #include <sys/mutex.h>
50 #include <sys/taskqueue.h>
51 #include <sys/smp.h>
52 #include <machine/smp.h>
53
54 #include <vm/uma.h>
55
56 #include <net/debugnet.h>
57 #include <net/ethernet.h>
58 #include <net/pfil.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_types.h>
64 #include <net/if_media.h>
65 #include <net/if_vlan_var.h>
66
67 #include <net/bpf.h>
68
69 #include <netinet/in_systm.h>
70 #include <netinet/in.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet/udp.h>
75 #include <netinet/tcp.h>
76 #include <netinet/tcp_lro.h>
77
78 #include <machine/bus.h>
79 #include <machine/resource.h>
80 #include <sys/bus.h>
81 #include <sys/rman.h>
82
83 #include <dev/virtio/virtio.h>
84 #include <dev/virtio/virtqueue.h>
85 #include <dev/virtio/network/virtio_net.h>
86 #include <dev/virtio/network/if_vtnetvar.h>
87 #include "virtio_if.h"
88
89 #if defined(INET) || defined(INET6)
90 #include <machine/in_cksum.h>
91 #endif
92
93 #ifdef __NO_STRICT_ALIGNMENT
94 #define VTNET_ETHER_ALIGN 0
95 #else /* Strict alignment */
96 #define VTNET_ETHER_ALIGN ETHER_ALIGN
97 #endif
98
99 static int vtnet_modevent(module_t, int, void *);
100
101 static int vtnet_probe(device_t);
102 static int vtnet_attach(device_t);
103 static int vtnet_detach(device_t);
104 static int vtnet_suspend(device_t);
105 static int vtnet_resume(device_t);
106 static int vtnet_shutdown(device_t);
107 static int vtnet_attach_completed(device_t);
108 static int vtnet_config_change(device_t);
109
110 static int vtnet_negotiate_features(struct vtnet_softc *);
111 static int vtnet_setup_features(struct vtnet_softc *);
112 static int vtnet_init_rxq(struct vtnet_softc *, int);
113 static int vtnet_init_txq(struct vtnet_softc *, int);
114 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *);
115 static void vtnet_free_rxtx_queues(struct vtnet_softc *);
116 static int vtnet_alloc_rx_filters(struct vtnet_softc *);
117 static void vtnet_free_rx_filters(struct vtnet_softc *);
118 static int vtnet_alloc_virtqueues(struct vtnet_softc *);
119 static void vtnet_alloc_interface(struct vtnet_softc *);
120 static int vtnet_setup_interface(struct vtnet_softc *);
121 static int vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
122 static int vtnet_ioctl_ifflags(struct vtnet_softc *);
123 static int vtnet_ioctl_multi(struct vtnet_softc *);
124 static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
125 static int vtnet_ioctl(if_t, u_long, caddr_t);
126 static uint64_t vtnet_get_counter(if_t, ift_counter);
127
128 static int vtnet_rxq_populate(struct vtnet_rxq *);
129 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *);
130 static struct mbuf *
131 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
132 static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
133 struct mbuf *, int);
134 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
135 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
136 static int vtnet_rxq_new_buf(struct vtnet_rxq *);
137 #if defined(INET) || defined(INET6)
138 static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
139 bool, int, struct virtio_net_hdr *);
140 static void vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
141 int);
142 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
143 struct virtio_net_hdr *);
144 #endif
145 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
146 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
147 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
148 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
149 struct virtio_net_hdr *);
150 static int vtnet_rxq_eof(struct vtnet_rxq *);
151 static void vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
152 static void vtnet_rx_vq_intr(void *);
153 static void vtnet_rxq_tq_intr(void *, int);
154
155 static int vtnet_txq_intr_threshold(struct vtnet_txq *);
156 static int vtnet_txq_below_threshold(struct vtnet_txq *);
157 static int vtnet_txq_notify(struct vtnet_txq *);
158 static void vtnet_txq_free_mbufs(struct vtnet_txq *);
159 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
160 int *, int *, int *);
161 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
162 int, struct virtio_net_hdr *);
163 static struct mbuf *
164 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
165 struct virtio_net_hdr *);
166 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
167 struct vtnet_tx_header *);
168 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
169
170 /* Required for ALTQ */
171 static void vtnet_start_locked(struct vtnet_txq *, if_t);
172 static void vtnet_start(if_t);
173
174 /* Required for MQ */
175 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
176 static int vtnet_txq_mq_start(if_t, struct mbuf *);
177 static void vtnet_txq_tq_deferred(void *, int);
178 static void vtnet_qflush(if_t);
179
180
181 static void vtnet_txq_start(struct vtnet_txq *);
182 static void vtnet_txq_tq_intr(void *, int);
183 static int vtnet_txq_eof(struct vtnet_txq *);
184 static void vtnet_tx_vq_intr(void *);
185 static void vtnet_tx_start_all(struct vtnet_softc *);
186
187 static int vtnet_watchdog(struct vtnet_txq *);
188 static void vtnet_accum_stats(struct vtnet_softc *,
189 struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
190 static void vtnet_tick(void *);
191
192 static void vtnet_start_taskqueues(struct vtnet_softc *);
193 static void vtnet_free_taskqueues(struct vtnet_softc *);
194 static void vtnet_drain_taskqueues(struct vtnet_softc *);
195
196 static void vtnet_drain_rxtx_queues(struct vtnet_softc *);
197 static void vtnet_stop_rendezvous(struct vtnet_softc *);
198 static void vtnet_stop(struct vtnet_softc *);
199 static int vtnet_virtio_reinit(struct vtnet_softc *);
200 static void vtnet_init_rx_filters(struct vtnet_softc *);
201 static int vtnet_init_rx_queues(struct vtnet_softc *);
202 static int vtnet_init_tx_queues(struct vtnet_softc *);
203 static int vtnet_init_rxtx_queues(struct vtnet_softc *);
204 static void vtnet_set_active_vq_pairs(struct vtnet_softc *);
205 static void vtnet_update_rx_offloads(struct vtnet_softc *);
206 static int vtnet_reinit(struct vtnet_softc *);
207 static void vtnet_init_locked(struct vtnet_softc *, int);
208 static void vtnet_init(void *);
209
210 static void vtnet_free_ctrl_vq(struct vtnet_softc *);
211 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
212 struct sglist *, int, int);
213 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
214 static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
215 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
216 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
217 static int vtnet_set_promisc(struct vtnet_softc *, bool);
218 static int vtnet_set_allmulti(struct vtnet_softc *, bool);
219 static void vtnet_rx_filter(struct vtnet_softc *);
220 static void vtnet_rx_filter_mac(struct vtnet_softc *);
221 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
222 static void vtnet_rx_filter_vlan(struct vtnet_softc *);
223 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
224 static void vtnet_register_vlan(void *, if_t, uint16_t);
225 static void vtnet_unregister_vlan(void *, if_t, uint16_t);
226
227 static void vtnet_update_speed_duplex(struct vtnet_softc *);
228 static int vtnet_is_link_up(struct vtnet_softc *);
229 static void vtnet_update_link_status(struct vtnet_softc *);
230 static int vtnet_ifmedia_upd(if_t);
231 static void vtnet_ifmedia_sts(if_t, struct ifmediareq *);
232 static void vtnet_get_macaddr(struct vtnet_softc *);
233 static void vtnet_set_macaddr(struct vtnet_softc *);
234 static void vtnet_attached_set_macaddr(struct vtnet_softc *);
235 static void vtnet_vlan_tag_remove(struct mbuf *);
236 static void vtnet_set_rx_process_limit(struct vtnet_softc *);
237
238 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
239 struct sysctl_oid_list *, struct vtnet_rxq *);
240 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
241 struct sysctl_oid_list *, struct vtnet_txq *);
242 static void vtnet_setup_queue_sysctl(struct vtnet_softc *);
243 static void vtnet_load_tunables(struct vtnet_softc *);
244 static void vtnet_setup_sysctl(struct vtnet_softc *);
245
246 static int vtnet_rxq_enable_intr(struct vtnet_rxq *);
247 static void vtnet_rxq_disable_intr(struct vtnet_rxq *);
248 static int vtnet_txq_enable_intr(struct vtnet_txq *);
249 static void vtnet_txq_disable_intr(struct vtnet_txq *);
250 static void vtnet_enable_rx_interrupts(struct vtnet_softc *);
251 static void vtnet_enable_tx_interrupts(struct vtnet_softc *);
252 static void vtnet_enable_interrupts(struct vtnet_softc *);
253 static void vtnet_disable_rx_interrupts(struct vtnet_softc *);
254 static void vtnet_disable_tx_interrupts(struct vtnet_softc *);
255 static void vtnet_disable_interrupts(struct vtnet_softc *);
256
257 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int);
258
259 DEBUGNET_DEFINE(vtnet);
260
261 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val)
262 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val)
263 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val)
264 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val)
265 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val)
266 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val)
267
268 /* Tunables. */
269 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
270 "VirtIO Net driver parameters");
271
272 static int vtnet_csum_disable = 0;
273 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
274 &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
275
276 static int vtnet_fixup_needs_csum = 0;
277 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
278 &vtnet_fixup_needs_csum, 0,
279 "Calculate valid checksum for NEEDS_CSUM packets");
280
281 static int vtnet_tso_disable = 0;
282 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
283 &vtnet_tso_disable, 0, "Disables TSO");
284
285 static int vtnet_lro_disable = 1;
286 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
287 &vtnet_lro_disable, 0, "Disables hardware LRO");
288
289 static int vtnet_mq_disable = 0;
290 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
291 &vtnet_mq_disable, 0, "Disables multiqueue support");
292
293 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
294 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
295 &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
296
297 static int vtnet_tso_maxlen = IP_MAXPACKET;
298 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
299 &vtnet_tso_maxlen, 0, "TSO burst limit");
300
301 static int vtnet_rx_process_limit = 1024;
302 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
303 &vtnet_rx_process_limit, 0,
304 "Number of RX segments processed in one pass");
305
306 static int vtnet_lro_entry_count = 128;
307 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
308 &vtnet_lro_entry_count, 0, "Software LRO entry count");
309
310 /* Enable sorted LRO, and the depth of the mbuf queue. */
311 static int vtnet_lro_mbufq_depth = 0;
312 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
313 &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
314
315 /* Deactivate ALTQ Support */
316 static int vtnet_altq_disable = 0;
317 SYSCTL_INT(_hw_vtnet, OID_AUTO, altq_disable, CTLFLAG_RDTUN,
318 &vtnet_altq_disable, 0, "Disables ALTQ Support");
319
320 /*
321 * For the driver to be considered as having altq enabled,
322 * it must be compiled with an ALTQ capable kernel,
323 * and the tunable hw.vtnet.altq_disable must be zero
324 */
325 #define VTNET_ALTQ_ENABLED (VTNET_ALTQ_CAPABLE && (!vtnet_altq_disable))
326
327
328 static uma_zone_t vtnet_tx_header_zone;
329
330 static struct virtio_feature_desc vtnet_feature_desc[] = {
331 { VIRTIO_NET_F_CSUM, "TxChecksum" },
332 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" },
333 { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" },
334 { VIRTIO_NET_F_MAC, "MAC" },
335 { VIRTIO_NET_F_GSO, "TxGSO" },
336 { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" },
337 { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" },
338 { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" },
339 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" },
340 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" },
341 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" },
342 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" },
343 { VIRTIO_NET_F_HOST_UFO, "TxUFO" },
344 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" },
345 { VIRTIO_NET_F_STATUS, "Status" },
346 { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" },
347 { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" },
348 { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" },
349 { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" },
350 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" },
351 { VIRTIO_NET_F_MQ, "Multiqueue" },
352 { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" },
353 { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" },
354
355 { 0, NULL }
356 };
357
358 static device_method_t vtnet_methods[] = {
359 /* Device methods. */
360 DEVMETHOD(device_probe, vtnet_probe),
361 DEVMETHOD(device_attach, vtnet_attach),
362 DEVMETHOD(device_detach, vtnet_detach),
363 DEVMETHOD(device_suspend, vtnet_suspend),
364 DEVMETHOD(device_resume, vtnet_resume),
365 DEVMETHOD(device_shutdown, vtnet_shutdown),
366
367 /* VirtIO methods. */
368 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed),
369 DEVMETHOD(virtio_config_change, vtnet_config_change),
370
371 DEVMETHOD_END
372 };
373
374 #ifdef DEV_NETMAP
375 #include <dev/netmap/if_vtnet_netmap.h>
376 #endif
377
378 static driver_t vtnet_driver = {
379 .name = "vtnet",
380 .methods = vtnet_methods,
381 .size = sizeof(struct vtnet_softc)
382 };
383 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
384 MODULE_VERSION(vtnet, 1);
385 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
386 #ifdef DEV_NETMAP
387 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
388 #endif
389
390 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
391
392 static int
vtnet_modevent(module_t mod __unused,int type,void * unused __unused)393 vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
394 {
395 int error = 0;
396 static int loaded = 0;
397
398 switch (type) {
399 case MOD_LOAD:
400 if (loaded++ == 0) {
401 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
402 sizeof(struct vtnet_tx_header),
403 NULL, NULL, NULL, NULL, 0, 0);
404 #ifdef DEBUGNET
405 /*
406 * We need to allocate from this zone in the transmit path, so ensure
407 * that we have at least one item per header available.
408 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
409 * buckets
410 */
411 uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
412 uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
413 #endif
414 }
415 break;
416 case MOD_QUIESCE:
417 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
418 error = EBUSY;
419 break;
420 case MOD_UNLOAD:
421 if (--loaded == 0) {
422 uma_zdestroy(vtnet_tx_header_zone);
423 vtnet_tx_header_zone = NULL;
424 }
425 break;
426 case MOD_SHUTDOWN:
427 break;
428 default:
429 error = EOPNOTSUPP;
430 break;
431 }
432
433 return (error);
434 }
435
436 static int
vtnet_probe(device_t dev)437 vtnet_probe(device_t dev)
438 {
439 return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
440 }
441
442 static int
vtnet_attach(device_t dev)443 vtnet_attach(device_t dev)
444 {
445 struct vtnet_softc *sc;
446 int error;
447
448 sc = device_get_softc(dev);
449 sc->vtnet_dev = dev;
450 virtio_set_feature_desc(dev, vtnet_feature_desc);
451
452 VTNET_CORE_LOCK_INIT(sc);
453 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
454 vtnet_load_tunables(sc);
455
456 vtnet_alloc_interface(sc);
457 vtnet_setup_sysctl(sc);
458
459 error = vtnet_setup_features(sc);
460 if (error) {
461 device_printf(dev, "cannot setup features\n");
462 goto fail;
463 }
464
465 error = vtnet_alloc_rx_filters(sc);
466 if (error) {
467 device_printf(dev, "cannot allocate Rx filters\n");
468 goto fail;
469 }
470
471 error = vtnet_alloc_rxtx_queues(sc);
472 if (error) {
473 device_printf(dev, "cannot allocate queues\n");
474 goto fail;
475 }
476
477 error = vtnet_alloc_virtqueues(sc);
478 if (error) {
479 device_printf(dev, "cannot allocate virtqueues\n");
480 goto fail;
481 }
482
483 error = vtnet_setup_interface(sc);
484 if (error) {
485 device_printf(dev, "cannot setup interface\n");
486 goto fail;
487 }
488
489 error = virtio_setup_intr(dev, INTR_TYPE_NET);
490 if (error) {
491 device_printf(dev, "cannot setup interrupts\n");
492 ether_ifdetach(sc->vtnet_ifp);
493 goto fail;
494 }
495
496 #ifdef DEV_NETMAP
497 vtnet_netmap_attach(sc);
498 #endif
499 vtnet_start_taskqueues(sc);
500
501 fail:
502 if (error)
503 vtnet_detach(dev);
504
505 return (error);
506 }
507
508 static int
vtnet_detach(device_t dev)509 vtnet_detach(device_t dev)
510 {
511 struct vtnet_softc *sc;
512 if_t ifp;
513
514 sc = device_get_softc(dev);
515 ifp = sc->vtnet_ifp;
516
517 if (device_is_attached(dev)) {
518 VTNET_CORE_LOCK(sc);
519 vtnet_stop(sc);
520 VTNET_CORE_UNLOCK(sc);
521
522 callout_drain(&sc->vtnet_tick_ch);
523 vtnet_drain_taskqueues(sc);
524
525 ether_ifdetach(ifp);
526 }
527
528 #ifdef DEV_NETMAP
529 netmap_detach(ifp);
530 #endif
531
532 if (sc->vtnet_pfil != NULL) {
533 pfil_head_unregister(sc->vtnet_pfil);
534 sc->vtnet_pfil = NULL;
535 }
536
537 vtnet_free_taskqueues(sc);
538
539 if (sc->vtnet_vlan_attach != NULL) {
540 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
541 sc->vtnet_vlan_attach = NULL;
542 }
543 if (sc->vtnet_vlan_detach != NULL) {
544 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
545 sc->vtnet_vlan_detach = NULL;
546 }
547
548 ifmedia_removeall(&sc->vtnet_media);
549
550 if (ifp != NULL) {
551 if_free(ifp);
552 sc->vtnet_ifp = NULL;
553 }
554
555 vtnet_free_rxtx_queues(sc);
556 vtnet_free_rx_filters(sc);
557
558 if (sc->vtnet_ctrl_vq != NULL)
559 vtnet_free_ctrl_vq(sc);
560
561 VTNET_CORE_LOCK_DESTROY(sc);
562
563 return (0);
564 }
565
566 static int
vtnet_suspend(device_t dev)567 vtnet_suspend(device_t dev)
568 {
569 struct vtnet_softc *sc;
570
571 sc = device_get_softc(dev);
572
573 VTNET_CORE_LOCK(sc);
574 vtnet_stop(sc);
575 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
576 VTNET_CORE_UNLOCK(sc);
577
578 return (0);
579 }
580
581 static int
vtnet_resume(device_t dev)582 vtnet_resume(device_t dev)
583 {
584 struct vtnet_softc *sc;
585 if_t ifp;
586
587 sc = device_get_softc(dev);
588 ifp = sc->vtnet_ifp;
589
590 VTNET_CORE_LOCK(sc);
591 if (if_getflags(ifp) & IFF_UP)
592 vtnet_init_locked(sc, 0);
593 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
594 VTNET_CORE_UNLOCK(sc);
595
596 return (0);
597 }
598
599 static int
vtnet_shutdown(device_t dev)600 vtnet_shutdown(device_t dev)
601 {
602 /*
603 * Suspend already does all of what we need to
604 * do here; we just never expect to be resumed.
605 */
606 return (vtnet_suspend(dev));
607 }
608
609 static int
vtnet_attach_completed(device_t dev)610 vtnet_attach_completed(device_t dev)
611 {
612 struct vtnet_softc *sc;
613
614 sc = device_get_softc(dev);
615
616 VTNET_CORE_LOCK(sc);
617 vtnet_attached_set_macaddr(sc);
618 VTNET_CORE_UNLOCK(sc);
619
620 return (0);
621 }
622
623 static int
vtnet_config_change(device_t dev)624 vtnet_config_change(device_t dev)
625 {
626 struct vtnet_softc *sc;
627
628 sc = device_get_softc(dev);
629
630 VTNET_CORE_LOCK(sc);
631 vtnet_update_link_status(sc);
632 if (sc->vtnet_link_active != 0)
633 vtnet_tx_start_all(sc);
634 VTNET_CORE_UNLOCK(sc);
635
636 return (0);
637 }
638
639 static int
vtnet_negotiate_features(struct vtnet_softc * sc)640 vtnet_negotiate_features(struct vtnet_softc *sc)
641 {
642 device_t dev;
643 uint64_t features, negotiated_features;
644 int no_csum;
645
646 dev = sc->vtnet_dev;
647 features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
648 VTNET_LEGACY_FEATURES;
649
650 /*
651 * TSO and LRO are only available when their corresponding checksum
652 * offload feature is also negotiated.
653 */
654 no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
655 if (no_csum)
656 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
657 if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
658 features &= ~VTNET_TSO_FEATURES;
659 if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
660 features &= ~VTNET_LRO_FEATURES;
661
662 /* Deactivate MQ Feature flag, if driver has ALTQ enabled, or MQ is explicitly disabled */
663 if (VTNET_ALTQ_ENABLED || vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
664 features &= ~VIRTIO_NET_F_MQ;
665
666 negotiated_features = virtio_negotiate_features(dev, features);
667
668 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
669 uint16_t mtu;
670
671 mtu = virtio_read_dev_config_2(dev,
672 offsetof(struct virtio_net_config, mtu));
673 if (mtu < VTNET_MIN_MTU) {
674 device_printf(dev, "Invalid MTU value: %d. "
675 "MTU feature disabled.\n", mtu);
676 features &= ~VIRTIO_NET_F_MTU;
677 negotiated_features =
678 virtio_negotiate_features(dev, features);
679 }
680 }
681
682 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
683 uint16_t npairs;
684
685 npairs = virtio_read_dev_config_2(dev,
686 offsetof(struct virtio_net_config, max_virtqueue_pairs));
687 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
688 npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
689 device_printf(dev, "Invalid max_virtqueue_pairs value: "
690 "%d. Multiqueue feature disabled.\n", npairs);
691 features &= ~VIRTIO_NET_F_MQ;
692 negotiated_features =
693 virtio_negotiate_features(dev, features);
694 }
695 }
696
697 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
698 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
699 /*
700 * LRO without mergeable buffers requires special care. This
701 * is not ideal because every receive buffer must be large
702 * enough to hold the maximum TCP packet, the Ethernet header,
703 * and the header. This requires up to 34 descriptors with
704 * MCLBYTES clusters. If we do not have indirect descriptors,
705 * LRO is disabled since the virtqueue will not contain very
706 * many receive buffers.
707 */
708 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
709 device_printf(dev,
710 "Host LRO disabled since both mergeable buffers "
711 "and indirect descriptors were not negotiated\n");
712 features &= ~VTNET_LRO_FEATURES;
713 negotiated_features =
714 virtio_negotiate_features(dev, features);
715 } else
716 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
717 }
718
719 sc->vtnet_features = negotiated_features;
720 sc->vtnet_negotiated_features = negotiated_features;
721
722 return (virtio_finalize_features(dev));
723 }
724
725 static int
vtnet_setup_features(struct vtnet_softc * sc)726 vtnet_setup_features(struct vtnet_softc *sc)
727 {
728 device_t dev;
729 int error;
730
731 dev = sc->vtnet_dev;
732
733 error = vtnet_negotiate_features(sc);
734 if (error)
735 return (error);
736
737 if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
738 sc->vtnet_flags |= VTNET_FLAG_MODERN;
739 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
740 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
741 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
742 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
743
744 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
745 /* This feature should always be negotiated. */
746 sc->vtnet_flags |= VTNET_FLAG_MAC;
747 }
748
749 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
750 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
751 offsetof(struct virtio_net_config, mtu));
752 } else
753 sc->vtnet_max_mtu = VTNET_MAX_MTU;
754
755 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
756 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
757 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
758 } else if (vtnet_modern(sc)) {
759 /* This is identical to the mergeable header. */
760 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
761 } else
762 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
763
764 if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
765 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
766 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
767 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
768 else
769 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
770
771 /*
772 * Favor "hardware" LRO if negotiated, but support software LRO as
773 * a fallback; there is usually little benefit (or worse) with both.
774 */
775 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
776 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
777 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
778
779 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
780 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
781 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
782 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
783 else
784 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
785
786 sc->vtnet_req_vq_pairs = 1;
787 sc->vtnet_max_vq_pairs = 1;
788
789 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
790 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
791
792 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
793 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
794 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
795 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
796 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
797 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
798
799 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
800 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
801 offsetof(struct virtio_net_config,
802 max_virtqueue_pairs));
803 }
804 }
805
806 if (sc->vtnet_max_vq_pairs > 1) {
807 int req;
808
809 /*
810 * Limit the maximum number of requested queue pairs to the
811 * number of CPUs and the configured maximum.
812 */
813 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
814 if (req < 0)
815 req = 1;
816 if (req == 0)
817 req = mp_ncpus;
818 if (req > sc->vtnet_max_vq_pairs)
819 req = sc->vtnet_max_vq_pairs;
820 if (req > mp_ncpus)
821 req = mp_ncpus;
822 if (req > 1) {
823 sc->vtnet_req_vq_pairs = req;
824 sc->vtnet_flags |= VTNET_FLAG_MQ;
825 }
826 }
827
828 return (0);
829 }
830
831 static int
vtnet_init_rxq(struct vtnet_softc * sc,int id)832 vtnet_init_rxq(struct vtnet_softc *sc, int id)
833 {
834 struct vtnet_rxq *rxq;
835
836 rxq = &sc->vtnet_rxqs[id];
837
838 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
839 device_get_nameunit(sc->vtnet_dev), id);
840 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
841
842 rxq->vtnrx_sc = sc;
843 rxq->vtnrx_id = id;
844
845 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
846 if (rxq->vtnrx_sg == NULL)
847 return (ENOMEM);
848
849 #if defined(INET) || defined(INET6)
850 if (vtnet_software_lro(sc)) {
851 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
852 sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
853 return (ENOMEM);
854 }
855 #endif
856
857 NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
858 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
859 taskqueue_thread_enqueue, &rxq->vtnrx_tq);
860
861 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
862 }
863
864 static int
vtnet_init_txq(struct vtnet_softc * sc,int id)865 vtnet_init_txq(struct vtnet_softc *sc, int id)
866 {
867 struct vtnet_txq *txq;
868
869 txq = &sc->vtnet_txqs[id];
870
871 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
872 device_get_nameunit(sc->vtnet_dev), id);
873 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
874
875 txq->vtntx_sc = sc;
876 txq->vtntx_id = id;
877
878 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
879 if (txq->vtntx_sg == NULL)
880 return (ENOMEM);
881
882 if (!VTNET_ALTQ_ENABLED) {
883 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
884 M_NOWAIT, &txq->vtntx_mtx);
885 if (txq->vtntx_br == NULL)
886 return (ENOMEM);
887
888 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
889 }
890 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
891 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
892 taskqueue_thread_enqueue, &txq->vtntx_tq);
893 if (txq->vtntx_tq == NULL)
894 return (ENOMEM);
895
896 return (0);
897 }
898
899 static int
vtnet_alloc_rxtx_queues(struct vtnet_softc * sc)900 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
901 {
902 int i, npairs, error;
903
904 npairs = sc->vtnet_max_vq_pairs;
905
906 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
907 M_NOWAIT | M_ZERO);
908 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
909 M_NOWAIT | M_ZERO);
910 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
911 return (ENOMEM);
912
913 for (i = 0; i < npairs; i++) {
914 error = vtnet_init_rxq(sc, i);
915 if (error)
916 return (error);
917 error = vtnet_init_txq(sc, i);
918 if (error)
919 return (error);
920 }
921
922 vtnet_set_rx_process_limit(sc);
923 vtnet_setup_queue_sysctl(sc);
924
925 return (0);
926 }
927
928 static void
vtnet_destroy_rxq(struct vtnet_rxq * rxq)929 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
930 {
931
932 rxq->vtnrx_sc = NULL;
933 rxq->vtnrx_id = -1;
934
935 #if defined(INET) || defined(INET6)
936 tcp_lro_free(&rxq->vtnrx_lro);
937 #endif
938
939 if (rxq->vtnrx_sg != NULL) {
940 sglist_free(rxq->vtnrx_sg);
941 rxq->vtnrx_sg = NULL;
942 }
943
944 if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
945 mtx_destroy(&rxq->vtnrx_mtx);
946 }
947
948 static void
vtnet_destroy_txq(struct vtnet_txq * txq)949 vtnet_destroy_txq(struct vtnet_txq *txq)
950 {
951
952 txq->vtntx_sc = NULL;
953 txq->vtntx_id = -1;
954
955 if (txq->vtntx_sg != NULL) {
956 sglist_free(txq->vtntx_sg);
957 txq->vtntx_sg = NULL;
958 }
959
960 if (!VTNET_ALTQ_ENABLED) {
961 if (txq->vtntx_br != NULL) {
962 buf_ring_free(txq->vtntx_br, M_DEVBUF);
963 txq->vtntx_br = NULL;
964 }
965 }
966
967 if (mtx_initialized(&txq->vtntx_mtx) != 0)
968 mtx_destroy(&txq->vtntx_mtx);
969 }
970
971 static void
vtnet_free_rxtx_queues(struct vtnet_softc * sc)972 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
973 {
974 int i;
975
976 if (sc->vtnet_rxqs != NULL) {
977 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
978 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
979 free(sc->vtnet_rxqs, M_DEVBUF);
980 sc->vtnet_rxqs = NULL;
981 }
982
983 if (sc->vtnet_txqs != NULL) {
984 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
985 vtnet_destroy_txq(&sc->vtnet_txqs[i]);
986 free(sc->vtnet_txqs, M_DEVBUF);
987 sc->vtnet_txqs = NULL;
988 }
989 }
990
991 static int
vtnet_alloc_rx_filters(struct vtnet_softc * sc)992 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
993 {
994
995 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
996 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
997 M_DEVBUF, M_NOWAIT | M_ZERO);
998 if (sc->vtnet_mac_filter == NULL)
999 return (ENOMEM);
1000 }
1001
1002 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1003 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
1004 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
1005 if (sc->vtnet_vlan_filter == NULL)
1006 return (ENOMEM);
1007 }
1008
1009 return (0);
1010 }
1011
1012 static void
vtnet_free_rx_filters(struct vtnet_softc * sc)1013 vtnet_free_rx_filters(struct vtnet_softc *sc)
1014 {
1015
1016 if (sc->vtnet_mac_filter != NULL) {
1017 free(sc->vtnet_mac_filter, M_DEVBUF);
1018 sc->vtnet_mac_filter = NULL;
1019 }
1020
1021 if (sc->vtnet_vlan_filter != NULL) {
1022 free(sc->vtnet_vlan_filter, M_DEVBUF);
1023 sc->vtnet_vlan_filter = NULL;
1024 }
1025 }
1026
1027 static int
vtnet_alloc_virtqueues(struct vtnet_softc * sc)1028 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1029 {
1030 device_t dev;
1031 struct vq_alloc_info *info;
1032 struct vtnet_rxq *rxq;
1033 struct vtnet_txq *txq;
1034 int i, idx, nvqs, error;
1035
1036 dev = sc->vtnet_dev;
1037
1038 nvqs = sc->vtnet_max_vq_pairs * 2;
1039 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1040 nvqs++;
1041
1042 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1043 if (info == NULL)
1044 return (ENOMEM);
1045
1046 for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1047 rxq = &sc->vtnet_rxqs[i];
1048 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1049 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1050 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1051
1052 txq = &sc->vtnet_txqs[i];
1053 VQ_ALLOC_INFO_INIT(&info[idx + 1], sc->vtnet_tx_nsegs,
1054 vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1055 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1056 }
1057
1058 /* These queues will not be used so allocate the minimum resources. */
1059 for (; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1060 rxq = &sc->vtnet_rxqs[i];
1061 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1062 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1063
1064 txq = &sc->vtnet_txqs[i];
1065 VQ_ALLOC_INFO_INIT(&info[idx + 1], 0, NULL, txq, &txq->vtntx_vq,
1066 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1067 }
1068
1069 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1070 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1071 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1072 }
1073
1074 error = virtio_alloc_virtqueues(dev, nvqs, info);
1075 free(info, M_TEMP);
1076
1077 return (error);
1078 }
1079
1080 static void
vtnet_alloc_interface(struct vtnet_softc * sc)1081 vtnet_alloc_interface(struct vtnet_softc *sc)
1082 {
1083 device_t dev;
1084 if_t ifp;
1085
1086 dev = sc->vtnet_dev;
1087
1088 ifp = if_alloc(IFT_ETHER);
1089 sc->vtnet_ifp = ifp;
1090 if_setsoftc(ifp, sc);
1091 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1092 }
1093
1094 static int
vtnet_setup_interface(struct vtnet_softc * sc)1095 vtnet_setup_interface(struct vtnet_softc *sc)
1096 {
1097 device_t dev;
1098 struct pfil_head_args pa;
1099 if_t ifp;
1100
1101 dev = sc->vtnet_dev;
1102 ifp = sc->vtnet_ifp;
1103
1104 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1105 if_setbaudrate(ifp, IF_Gbps(10));
1106 if_setinitfn(ifp, vtnet_init);
1107 if_setioctlfn(ifp, vtnet_ioctl);
1108 if_setgetcounterfn(ifp, vtnet_get_counter);
1109
1110 if (!VTNET_ALTQ_ENABLED) {
1111 if_settransmitfn(ifp, vtnet_txq_mq_start);
1112 if_setqflushfn(ifp, vtnet_qflush);
1113 } else {
1114 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1115 if_setstartfn(ifp, vtnet_start);
1116 if_setsendqlen(ifp, virtqueue_size(vq) - 1);
1117 if_setsendqready(ifp);
1118 }
1119
1120 vtnet_get_macaddr(sc);
1121
1122 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1123 if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
1124
1125 ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1126 ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1127 ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1128
1129 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1130 int gso;
1131
1132 if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
1133
1134 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1135 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1136 if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
1137 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1138 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
1139 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1140 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1141
1142 if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
1143 int tso_maxlen;
1144
1145 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
1146
1147 tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1148 vtnet_tso_maxlen);
1149 if_sethwtsomax(ifp, tso_maxlen -
1150 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
1151 if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
1152 if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
1153 }
1154 }
1155
1156 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1157 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1158 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
1159 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
1160
1161 if (vtnet_tunable_int(sc, "fixup_needs_csum",
1162 vtnet_fixup_needs_csum) != 0)
1163 sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1164
1165 /* Support either "hardware" or software LRO. */
1166 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
1167 }
1168
1169 if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1170 /*
1171 * VirtIO does not support VLAN tagging, but we can fake
1172 * it by inserting and removing the 802.1Q header during
1173 * transmit and receive. We are then able to do checksum
1174 * offloading of VLAN frames.
1175 */
1176 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
1177 }
1178
1179 if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1180 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
1181 if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
1182 if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
1183
1184 /*
1185 * Capabilities after here are not enabled by default.
1186 */
1187 if_setcapenable(ifp, if_getcapabilities(ifp));
1188
1189 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1190 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
1191
1192 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1193 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1194 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1195 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1196 }
1197
1198 ether_ifattach(ifp, sc->vtnet_hwaddr);
1199
1200 /* Tell the upper layer(s) we support long frames. */
1201 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
1202
1203 DEBUGNET_SET(ifp, vtnet);
1204
1205 pa.pa_version = PFIL_VERSION;
1206 pa.pa_flags = PFIL_IN;
1207 pa.pa_type = PFIL_TYPE_ETHERNET;
1208 pa.pa_headname = if_name(ifp);
1209 sc->vtnet_pfil = pfil_head_register(&pa);
1210
1211 return (0);
1212 }
1213
1214 static int
vtnet_rx_cluster_size(struct vtnet_softc * sc,int mtu)1215 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1216 {
1217 int framesz;
1218
1219 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1220 return (MJUMPAGESIZE);
1221 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1222 return (MCLBYTES);
1223
1224 /*
1225 * Try to scale the receive mbuf cluster size from the MTU. We
1226 * could also use the VQ size to influence the selected size,
1227 * but that would only matter for very small queues.
1228 */
1229 if (vtnet_modern(sc)) {
1230 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1231 framesz = sizeof(struct virtio_net_hdr_v1);
1232 } else
1233 framesz = sizeof(struct vtnet_rx_header);
1234 framesz += sizeof(struct ether_vlan_header) + mtu;
1235 /*
1236 * Account for the offsetting we'll do elsewhere so we allocate the
1237 * right size for the mtu.
1238 */
1239 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1240 framesz += VTNET_ETHER_ALIGN;
1241 }
1242
1243 if (framesz <= MCLBYTES)
1244 return (MCLBYTES);
1245 else if (framesz <= MJUMPAGESIZE)
1246 return (MJUMPAGESIZE);
1247 else if (framesz <= MJUM9BYTES)
1248 return (MJUM9BYTES);
1249
1250 /* Sane default; avoid 16KB clusters. */
1251 return (MCLBYTES);
1252 }
1253
1254 static int
vtnet_ioctl_mtu(struct vtnet_softc * sc,u_int mtu)1255 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
1256 {
1257 if_t ifp;
1258 int clustersz;
1259
1260 ifp = sc->vtnet_ifp;
1261 VTNET_CORE_LOCK_ASSERT(sc);
1262
1263 if (if_getmtu(ifp) == mtu)
1264 return (0);
1265 else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1266 return (EINVAL);
1267
1268 if_setmtu(ifp, mtu);
1269 clustersz = vtnet_rx_cluster_size(sc, mtu);
1270
1271 if (clustersz != sc->vtnet_rx_clustersz &&
1272 if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1273 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1274 vtnet_init_locked(sc, 0);
1275 }
1276
1277 return (0);
1278 }
1279
1280 static int
vtnet_ioctl_ifflags(struct vtnet_softc * sc)1281 vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1282 {
1283 if_t ifp;
1284 int drv_running;
1285
1286 ifp = sc->vtnet_ifp;
1287 drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
1288
1289 VTNET_CORE_LOCK_ASSERT(sc);
1290
1291 if ((if_getflags(ifp) & IFF_UP) == 0) {
1292 if (drv_running)
1293 vtnet_stop(sc);
1294 goto out;
1295 }
1296
1297 if (!drv_running) {
1298 vtnet_init_locked(sc, 0);
1299 goto out;
1300 }
1301
1302 if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
1303 (IFF_PROMISC | IFF_ALLMULTI)) {
1304 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1305 vtnet_rx_filter(sc);
1306 else {
1307 /*
1308 * We don't support filtering out multicast, so
1309 * ALLMULTI is always set.
1310 */
1311 if_setflagbits(ifp, IFF_ALLMULTI, 0);
1312 if_setflagbits(ifp, IFF_PROMISC, 0);
1313 }
1314 }
1315
1316 out:
1317 sc->vtnet_if_flags = if_getflags(ifp);
1318 return (0);
1319 }
1320
1321 static int
vtnet_ioctl_multi(struct vtnet_softc * sc)1322 vtnet_ioctl_multi(struct vtnet_softc *sc)
1323 {
1324 if_t ifp;
1325
1326 ifp = sc->vtnet_ifp;
1327
1328 VTNET_CORE_LOCK_ASSERT(sc);
1329
1330 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1331 if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1332 vtnet_rx_filter_mac(sc);
1333
1334 return (0);
1335 }
1336
1337 static int
vtnet_ioctl_ifcap(struct vtnet_softc * sc,struct ifreq * ifr)1338 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1339 {
1340 if_t ifp;
1341 int mask, reinit, update;
1342
1343 ifp = sc->vtnet_ifp;
1344 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
1345 reinit = update = 0;
1346
1347 VTNET_CORE_LOCK_ASSERT(sc);
1348
1349 if (mask & IFCAP_TXCSUM) {
1350 if (if_getcapenable(ifp) & IFCAP_TXCSUM &&
1351 if_getcapenable(ifp) & IFCAP_TSO4) {
1352 /* Disable tso4, because txcsum will be disabled. */
1353 if_setcapenablebit(ifp, 0, IFCAP_TSO4);
1354 if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
1355 mask &= ~IFCAP_TSO4;
1356 }
1357 if_togglecapenable(ifp, IFCAP_TXCSUM);
1358 if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD);
1359 }
1360 if (mask & IFCAP_TXCSUM_IPV6) {
1361 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6 &&
1362 if_getcapenable(ifp) & IFCAP_TSO6) {
1363 /* Disable tso6, because txcsum6 will be disabled. */
1364 if_setcapenablebit(ifp, 0, IFCAP_TSO6);
1365 if_sethwassistbits(ifp, 0, CSUM_IP6_TSO);
1366 mask &= ~IFCAP_TSO6;
1367 }
1368 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1369 if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD_IPV6);
1370 }
1371 if (mask & IFCAP_TSO4) {
1372 if (if_getcapenable(ifp) & (IFCAP_TXCSUM | IFCAP_TSO4)) {
1373 /* tso4 can only be enabled, if txcsum is enabled. */
1374 if_togglecapenable(ifp, IFCAP_TSO4);
1375 if_togglehwassist(ifp, CSUM_IP_TSO);
1376 }
1377 }
1378 if (mask & IFCAP_TSO6) {
1379 if (if_getcapenable(ifp) & (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6)) {
1380 /* tso6 can only be enabled, if txcsum6 is enabled. */
1381 if_togglecapenable(ifp, IFCAP_TSO6);
1382 if_togglehwassist(ifp, CSUM_IP6_TSO);
1383 }
1384 }
1385
1386 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1387 /*
1388 * These Rx features require the negotiated features to
1389 * be updated. Avoid a full reinit if possible.
1390 */
1391 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1392 update = 1;
1393 else
1394 reinit = 1;
1395
1396 /* BMV: Avoid needless renegotiation for just software LRO. */
1397 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1398 IFCAP_LRO && vtnet_software_lro(sc))
1399 reinit = update = 0;
1400 /*
1401 * VirtIO does not distinguish between receive checksum offload
1402 * for IPv4 and IPv6 packets, so treat them as a pair.
1403 */
1404 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1405 if_togglecapenable(ifp, IFCAP_RXCSUM);
1406 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1407 }
1408 if (mask & IFCAP_LRO)
1409 if_togglecapenable(ifp, IFCAP_LRO);
1410 /* Both SW and HW TCP LRO require receive checksum offload. */
1411 if ((if_getcapenable(ifp) &
1412 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
1413 if_setcapenablebit(ifp, 0, IFCAP_LRO);
1414 }
1415
1416 if (mask & IFCAP_VLAN_HWFILTER) {
1417 /* These Rx features require renegotiation. */
1418 reinit = 1;
1419
1420 if (mask & IFCAP_VLAN_HWFILTER)
1421 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1422 }
1423
1424 if (mask & IFCAP_VLAN_HWTSO)
1425 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1426 if (mask & IFCAP_VLAN_HWTAGGING)
1427 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1428
1429 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1430 if (reinit) {
1431 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1432 vtnet_init_locked(sc, 0);
1433 } else if (update)
1434 vtnet_update_rx_offloads(sc);
1435 }
1436
1437 return (0);
1438 }
1439
1440 static int
vtnet_ioctl(if_t ifp,u_long cmd,caddr_t data)1441 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
1442 {
1443 struct vtnet_softc *sc;
1444 struct ifreq *ifr;
1445 int error;
1446
1447 sc = if_getsoftc(ifp);
1448 ifr = (struct ifreq *) data;
1449 error = 0;
1450
1451 switch (cmd) {
1452 case SIOCSIFMTU:
1453 VTNET_CORE_LOCK(sc);
1454 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1455 VTNET_CORE_UNLOCK(sc);
1456 break;
1457
1458 case SIOCSIFFLAGS:
1459 VTNET_CORE_LOCK(sc);
1460 error = vtnet_ioctl_ifflags(sc);
1461 VTNET_CORE_UNLOCK(sc);
1462 break;
1463
1464 case SIOCADDMULTI:
1465 case SIOCDELMULTI:
1466 VTNET_CORE_LOCK(sc);
1467 error = vtnet_ioctl_multi(sc);
1468 VTNET_CORE_UNLOCK(sc);
1469 break;
1470
1471 case SIOCSIFMEDIA:
1472 case SIOCGIFMEDIA:
1473 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1474 break;
1475
1476 case SIOCSIFCAP:
1477 VTNET_CORE_LOCK(sc);
1478 error = vtnet_ioctl_ifcap(sc, ifr);
1479 VTNET_CORE_UNLOCK(sc);
1480 VLAN_CAPABILITIES(ifp);
1481 break;
1482
1483 default:
1484 error = ether_ioctl(ifp, cmd, data);
1485 break;
1486 }
1487
1488 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1489
1490 return (error);
1491 }
1492
1493 static int
vtnet_rxq_populate(struct vtnet_rxq * rxq)1494 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1495 {
1496 struct virtqueue *vq;
1497 int nbufs, error;
1498
1499 #ifdef DEV_NETMAP
1500 error = vtnet_netmap_rxq_populate(rxq);
1501 if (error >= 0)
1502 return (error);
1503 #endif /* DEV_NETMAP */
1504
1505 vq = rxq->vtnrx_vq;
1506 error = ENOSPC;
1507
1508 for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1509 error = vtnet_rxq_new_buf(rxq);
1510 if (error)
1511 break;
1512 }
1513
1514 if (nbufs > 0) {
1515 virtqueue_notify(vq);
1516 /*
1517 * EMSGSIZE signifies the virtqueue did not have enough
1518 * entries available to hold the last mbuf. This is not
1519 * an error.
1520 */
1521 if (error == EMSGSIZE)
1522 error = 0;
1523 }
1524
1525 return (error);
1526 }
1527
1528 static void
vtnet_rxq_free_mbufs(struct vtnet_rxq * rxq)1529 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1530 {
1531 struct virtqueue *vq;
1532 struct mbuf *m;
1533 int last;
1534 #ifdef DEV_NETMAP
1535 struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1536 rxq->vtnrx_id, NR_RX);
1537 #else /* !DEV_NETMAP */
1538 void *kring = NULL;
1539 #endif /* !DEV_NETMAP */
1540
1541 vq = rxq->vtnrx_vq;
1542 last = 0;
1543
1544 while ((m = virtqueue_drain(vq, &last)) != NULL) {
1545 if (kring == NULL)
1546 m_freem(m);
1547 }
1548
1549 KASSERT(virtqueue_empty(vq),
1550 ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1551 }
1552
1553 static struct mbuf *
vtnet_rx_alloc_buf(struct vtnet_softc * sc,int nbufs,struct mbuf ** m_tailp)1554 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1555 {
1556 struct mbuf *m_head, *m_tail, *m;
1557 int i, size;
1558
1559 m_head = NULL;
1560 size = sc->vtnet_rx_clustersz;
1561
1562 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1563 ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1564
1565 for (i = 0; i < nbufs; i++) {
1566 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1567 if (m == NULL) {
1568 sc->vtnet_stats.mbuf_alloc_failed++;
1569 m_freem(m_head);
1570 return (NULL);
1571 }
1572
1573 m->m_len = size;
1574 /*
1575 * Need to offset the mbuf if the header we're going to add
1576 * will misalign.
1577 */
1578 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1579 m_adj(m, VTNET_ETHER_ALIGN);
1580 }
1581 if (m_head != NULL) {
1582 m_tail->m_next = m;
1583 m_tail = m;
1584 } else
1585 m_head = m_tail = m;
1586 }
1587
1588 if (m_tailp != NULL)
1589 *m_tailp = m_tail;
1590
1591 return (m_head);
1592 }
1593
1594 /*
1595 * Slow path for when LRO without mergeable buffers is negotiated.
1596 */
1597 static int
vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq * rxq,struct mbuf * m0,int len0)1598 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1599 int len0)
1600 {
1601 struct vtnet_softc *sc;
1602 struct mbuf *m, *m_prev, *m_new, *m_tail;
1603 int len, clustersz, nreplace, error;
1604
1605 sc = rxq->vtnrx_sc;
1606 clustersz = sc->vtnet_rx_clustersz;
1607 /*
1608 * Need to offset the mbuf if the header we're going to add will
1609 * misalign, account for that here.
1610 */
1611 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1612 clustersz -= VTNET_ETHER_ALIGN;
1613
1614 m_prev = NULL;
1615 m_tail = NULL;
1616 nreplace = 0;
1617
1618 m = m0;
1619 len = len0;
1620
1621 /*
1622 * Since these mbuf chains are so large, avoid allocating a complete
1623 * replacement when the received frame did not consume the entire
1624 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1625 */
1626 while (len > 0) {
1627 if (m == NULL) {
1628 sc->vtnet_stats.rx_frame_too_large++;
1629 return (EMSGSIZE);
1630 }
1631
1632 /*
1633 * Every mbuf should have the expected cluster size since that
1634 * is also used to allocate the replacements.
1635 */
1636 KASSERT(m->m_len == clustersz,
1637 ("%s: mbuf size %d not expected cluster size %d", __func__,
1638 m->m_len, clustersz));
1639
1640 m->m_len = MIN(m->m_len, len);
1641 len -= m->m_len;
1642
1643 m_prev = m;
1644 m = m->m_next;
1645 nreplace++;
1646 }
1647
1648 KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1649 ("%s: invalid replacement mbuf count %d max %d", __func__,
1650 nreplace, sc->vtnet_rx_nmbufs));
1651
1652 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1653 if (m_new == NULL) {
1654 m_prev->m_len = clustersz;
1655 return (ENOBUFS);
1656 }
1657
1658 /*
1659 * Move any unused mbufs from the received mbuf chain onto the
1660 * end of the replacement chain.
1661 */
1662 if (m_prev->m_next != NULL) {
1663 m_tail->m_next = m_prev->m_next;
1664 m_prev->m_next = NULL;
1665 }
1666
1667 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1668 if (error) {
1669 /*
1670 * The replacement is suppose to be an copy of the one
1671 * dequeued so this is a very unexpected error.
1672 *
1673 * Restore the m0 chain to the original state if it was
1674 * modified so we can then discard it.
1675 */
1676 if (m_tail->m_next != NULL) {
1677 m_prev->m_next = m_tail->m_next;
1678 m_tail->m_next = NULL;
1679 }
1680 m_prev->m_len = clustersz;
1681 sc->vtnet_stats.rx_enq_replacement_failed++;
1682 m_freem(m_new);
1683 }
1684
1685 return (error);
1686 }
1687
1688 static int
vtnet_rxq_replace_buf(struct vtnet_rxq * rxq,struct mbuf * m,int len)1689 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1690 {
1691 struct vtnet_softc *sc;
1692 struct mbuf *m_new;
1693 int error;
1694
1695 sc = rxq->vtnrx_sc;
1696
1697 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1698 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1699
1700 MPASS(m->m_next == NULL);
1701 if (m->m_len < len)
1702 return (EMSGSIZE);
1703
1704 m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1705 if (m_new == NULL)
1706 return (ENOBUFS);
1707
1708 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1709 if (error) {
1710 sc->vtnet_stats.rx_enq_replacement_failed++;
1711 m_freem(m_new);
1712 } else
1713 m->m_len = len;
1714
1715 return (error);
1716 }
1717
1718 static int
vtnet_rxq_enqueue_buf(struct vtnet_rxq * rxq,struct mbuf * m)1719 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1720 {
1721 struct vtnet_softc *sc;
1722 struct sglist *sg;
1723 int header_inlined, error;
1724
1725 sc = rxq->vtnrx_sc;
1726 sg = rxq->vtnrx_sg;
1727
1728 KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1729 ("%s: mbuf chain without LRO_NOMRG", __func__));
1730 VTNET_RXQ_LOCK_ASSERT(rxq);
1731
1732 sglist_reset(sg);
1733 header_inlined = vtnet_modern(sc) ||
1734 (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1735
1736 /*
1737 * Note: The mbuf has been already adjusted when we allocate it if we
1738 * have to do strict alignment.
1739 */
1740 if (header_inlined)
1741 error = sglist_append_mbuf(sg, m);
1742 else {
1743 struct vtnet_rx_header *rxhdr =
1744 mtod(m, struct vtnet_rx_header *);
1745 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1746
1747 /* Append the header and remaining mbuf data. */
1748 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1749 if (error)
1750 return (error);
1751 error = sglist_append(sg, &rxhdr[1],
1752 m->m_len - sizeof(struct vtnet_rx_header));
1753 if (error)
1754 return (error);
1755
1756 if (m->m_next != NULL)
1757 error = sglist_append_mbuf(sg, m->m_next);
1758 }
1759
1760 if (error)
1761 return (error);
1762
1763 return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1764 }
1765
1766 static int
vtnet_rxq_new_buf(struct vtnet_rxq * rxq)1767 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1768 {
1769 struct vtnet_softc *sc;
1770 struct mbuf *m;
1771 int error;
1772
1773 sc = rxq->vtnrx_sc;
1774
1775 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1776 if (m == NULL)
1777 return (ENOBUFS);
1778
1779 error = vtnet_rxq_enqueue_buf(rxq, m);
1780 if (error)
1781 m_freem(m);
1782
1783 return (error);
1784 }
1785
1786 #if defined(INET) || defined(INET6)
1787 static int
vtnet_rxq_csum_needs_csum(struct vtnet_rxq * rxq,struct mbuf * m,bool isipv6,int protocol,struct virtio_net_hdr * hdr)1788 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6,
1789 int protocol, struct virtio_net_hdr *hdr)
1790 {
1791 struct vtnet_softc *sc;
1792
1793 /*
1794 * The packet is likely from another VM on the same host or from the
1795 * host that itself performed checksum offloading so Tx/Rx is basically
1796 * a memcpy and the checksum has little value so far.
1797 */
1798
1799 KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
1800 ("%s: unsupported IP protocol %d", __func__, protocol));
1801
1802 /*
1803 * If the user don't want us to fix it up here by computing the
1804 * checksum, just forward the order to compute the checksum by setting
1805 * the corresponding mbuf flag (e.g., CSUM_TCP).
1806 */
1807 sc = rxq->vtnrx_sc;
1808 if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1809 switch (protocol) {
1810 case IPPROTO_TCP:
1811 m->m_pkthdr.csum_flags |=
1812 (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP);
1813 break;
1814 case IPPROTO_UDP:
1815 m->m_pkthdr.csum_flags |=
1816 (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP);
1817 break;
1818 }
1819 m->m_pkthdr.csum_data = hdr->csum_offset;
1820 return (0);
1821 }
1822
1823 /*
1824 * Compute the checksum in the driver so the packet will contain a
1825 * valid checksum. The checksum is at csum_offset from csum_start.
1826 */
1827 int csum_off, csum_end;
1828 uint16_t csum;
1829
1830 csum_off = hdr->csum_start + hdr->csum_offset;
1831 csum_end = csum_off + sizeof(uint16_t);
1832
1833 /* Assume checksum will be in the first mbuf. */
1834 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) {
1835 sc->vtnet_stats.rx_csum_bad_offset++;
1836 return (1);
1837 }
1838
1839 /*
1840 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1841 * checksum and write it at the specified offset. We could
1842 * try to verify the packet: csum_start should probably
1843 * correspond to the start of the TCP/UDP header.
1844 *
1845 * BMV: Need to properly handle UDP with zero checksum. Is
1846 * the IPv4 header checksum implicitly validated?
1847 */
1848 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1849 *(uint16_t *)(mtodo(m, csum_off)) = csum;
1850 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1851 m->m_pkthdr.csum_data = 0xFFFF;
1852
1853 return (0);
1854 }
1855
1856 static void
vtnet_rxq_csum_data_valid(struct vtnet_rxq * rxq,struct mbuf * m,int protocol)1857 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol)
1858 {
1859 KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
1860 ("%s: unsupported IP protocol %d", __func__, protocol));
1861
1862 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1863 m->m_pkthdr.csum_data = 0xFFFF;
1864 }
1865
1866 static int
vtnet_rxq_csum(struct vtnet_rxq * rxq,struct mbuf * m,struct virtio_net_hdr * hdr)1867 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1868 struct virtio_net_hdr *hdr)
1869 {
1870 const struct ether_header *eh;
1871 struct vtnet_softc *sc;
1872 int hoff, protocol;
1873 uint16_t etype;
1874 bool isipv6;
1875
1876 KASSERT(hdr->flags &
1877 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID),
1878 ("%s: missing checksum offloading flag %x", __func__, hdr->flags));
1879
1880 eh = mtod(m, const struct ether_header *);
1881 etype = ntohs(eh->ether_type);
1882 if (etype == ETHERTYPE_VLAN) {
1883 /* TODO BMV: Handle QinQ. */
1884 const struct ether_vlan_header *evh =
1885 mtod(m, const struct ether_vlan_header *);
1886 etype = ntohs(evh->evl_proto);
1887 hoff = sizeof(struct ether_vlan_header);
1888 } else
1889 hoff = sizeof(struct ether_header);
1890
1891 sc = rxq->vtnrx_sc;
1892
1893 /* Check whether ethernet type is IP or IPv6, and get protocol. */
1894 switch (etype) {
1895 #if defined(INET)
1896 case ETHERTYPE_IP:
1897 if (__predict_false(m->m_len < hoff + sizeof(struct ip))) {
1898 sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
1899 return (1);
1900 } else {
1901 struct ip *ip = (struct ip *)(m->m_data + hoff);
1902 protocol = ip->ip_p;
1903 }
1904 isipv6 = false;
1905 break;
1906 #endif
1907 #if defined(INET6)
1908 case ETHERTYPE_IPV6:
1909 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1910 || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) {
1911 sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
1912 return (1);
1913 }
1914 isipv6 = true;
1915 break;
1916 #endif
1917 default:
1918 sc->vtnet_stats.rx_csum_bad_ethtype++;
1919 return (1);
1920 }
1921
1922 /* Check whether protocol is TCP or UDP. */
1923 switch (protocol) {
1924 case IPPROTO_TCP:
1925 case IPPROTO_UDP:
1926 break;
1927 default:
1928 /*
1929 * FreeBSD does not support checksum offloading of this
1930 * protocol here.
1931 */
1932 sc->vtnet_stats.rx_csum_bad_ipproto++;
1933 return (1);
1934 }
1935
1936 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1937 return (vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol,
1938 hdr));
1939 else /* VIRTIO_NET_HDR_F_DATA_VALID */
1940 vtnet_rxq_csum_data_valid(rxq, m, protocol);
1941
1942 return (0);
1943 }
1944 #endif
1945
1946 static void
vtnet_rxq_discard_merged_bufs(struct vtnet_rxq * rxq,int nbufs)1947 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1948 {
1949 struct mbuf *m;
1950
1951 while (--nbufs > 0) {
1952 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1953 if (m == NULL)
1954 break;
1955 vtnet_rxq_discard_buf(rxq, m);
1956 }
1957 }
1958
1959 static void
vtnet_rxq_discard_buf(struct vtnet_rxq * rxq,struct mbuf * m)1960 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1961 {
1962 int error __diagused;
1963
1964 /*
1965 * Requeue the discarded mbuf. This should always be successful
1966 * since it was just dequeued.
1967 */
1968 error = vtnet_rxq_enqueue_buf(rxq, m);
1969 KASSERT(error == 0,
1970 ("%s: cannot requeue discarded mbuf %d", __func__, error));
1971 }
1972
1973 static int
vtnet_rxq_merged_eof(struct vtnet_rxq * rxq,struct mbuf * m_head,int nbufs)1974 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1975 {
1976 struct vtnet_softc *sc;
1977 struct virtqueue *vq;
1978 struct mbuf *m_tail;
1979
1980 sc = rxq->vtnrx_sc;
1981 vq = rxq->vtnrx_vq;
1982 m_tail = m_head;
1983
1984 while (--nbufs > 0) {
1985 struct mbuf *m;
1986 uint32_t len;
1987
1988 m = virtqueue_dequeue(vq, &len);
1989 if (m == NULL) {
1990 rxq->vtnrx_stats.vrxs_ierrors++;
1991 goto fail;
1992 }
1993
1994 if (vtnet_rxq_new_buf(rxq) != 0) {
1995 rxq->vtnrx_stats.vrxs_iqdrops++;
1996 vtnet_rxq_discard_buf(rxq, m);
1997 if (nbufs > 1)
1998 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1999 goto fail;
2000 }
2001
2002 if (m->m_len < len)
2003 len = m->m_len;
2004
2005 m->m_len = len;
2006 m->m_flags &= ~M_PKTHDR;
2007
2008 m_head->m_pkthdr.len += len;
2009 m_tail->m_next = m;
2010 m_tail = m;
2011 }
2012
2013 return (0);
2014
2015 fail:
2016 sc->vtnet_stats.rx_mergeable_failed++;
2017 m_freem(m_head);
2018
2019 return (1);
2020 }
2021
2022 #if defined(INET) || defined(INET6)
2023 static int
vtnet_lro_rx(struct vtnet_rxq * rxq,struct mbuf * m)2024 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
2025 {
2026 struct lro_ctrl *lro;
2027
2028 lro = &rxq->vtnrx_lro;
2029
2030 if (lro->lro_mbuf_max != 0) {
2031 tcp_lro_queue_mbuf(lro, m);
2032 return (0);
2033 }
2034
2035 return (tcp_lro_rx(lro, m, 0));
2036 }
2037 #endif
2038
2039 static void
vtnet_rxq_input(struct vtnet_rxq * rxq,struct mbuf * m,struct virtio_net_hdr * hdr)2040 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
2041 struct virtio_net_hdr *hdr)
2042 {
2043 struct vtnet_softc *sc;
2044 if_t ifp;
2045
2046 sc = rxq->vtnrx_sc;
2047 ifp = sc->vtnet_ifp;
2048
2049 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
2050 struct ether_header *eh = mtod(m, struct ether_header *);
2051 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2052 vtnet_vlan_tag_remove(m);
2053 /*
2054 * With the 802.1Q header removed, update the
2055 * checksum starting location accordingly.
2056 */
2057 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2058 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2059 }
2060 }
2061
2062 if (sc->vtnet_act_vq_pairs == 1) {
2063 /*
2064 * When RSS is not needed (one active rx queue), let the upper
2065 * layer know and react.
2066 */
2067 M_HASHTYPE_CLEAR(m);
2068 } else {
2069 m->m_pkthdr.flowid = rxq->vtnrx_id;
2070 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2071 }
2072
2073 if (hdr->flags &
2074 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2075 #if defined(INET) || defined(INET6)
2076 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2077 rxq->vtnrx_stats.vrxs_csum++;
2078 else
2079 rxq->vtnrx_stats.vrxs_csum_failed++;
2080 #else
2081 sc->vtnet_stats.rx_csum_bad_ethtype++;
2082 rxq->vtnrx_stats.vrxs_csum_failed++;
2083 #endif
2084 }
2085
2086 if (hdr->gso_size != 0) {
2087 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2088 case VIRTIO_NET_HDR_GSO_TCPV4:
2089 case VIRTIO_NET_HDR_GSO_TCPV6:
2090 m->m_pkthdr.lro_nsegs =
2091 howmany(m->m_pkthdr.len, hdr->gso_size);
2092 rxq->vtnrx_stats.vrxs_host_lro++;
2093 break;
2094 }
2095 }
2096
2097 rxq->vtnrx_stats.vrxs_ipackets++;
2098 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2099
2100 #if defined(INET) || defined(INET6)
2101 if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
2102 if (vtnet_lro_rx(rxq, m) == 0)
2103 return;
2104 }
2105 #endif
2106
2107 if_input(ifp, m);
2108 }
2109
2110 static int
vtnet_rxq_eof(struct vtnet_rxq * rxq)2111 vtnet_rxq_eof(struct vtnet_rxq *rxq)
2112 {
2113 struct virtio_net_hdr lhdr, *hdr;
2114 struct vtnet_softc *sc;
2115 if_t ifp;
2116 struct virtqueue *vq;
2117 int deq, count;
2118
2119 sc = rxq->vtnrx_sc;
2120 vq = rxq->vtnrx_vq;
2121 ifp = sc->vtnet_ifp;
2122 deq = 0;
2123 count = sc->vtnet_rx_process_limit;
2124
2125 VTNET_RXQ_LOCK_ASSERT(rxq);
2126
2127 CURVNET_SET(if_getvnet(ifp));
2128 while (count-- > 0) {
2129 struct mbuf *m;
2130 uint32_t len, nbufs, adjsz;
2131
2132 m = virtqueue_dequeue(vq, &len);
2133 if (m == NULL)
2134 break;
2135 deq++;
2136
2137 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2138 rxq->vtnrx_stats.vrxs_ierrors++;
2139 vtnet_rxq_discard_buf(rxq, m);
2140 continue;
2141 }
2142
2143 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2144 struct virtio_net_hdr_mrg_rxbuf *mhdr =
2145 mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2146 kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
2147 nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2148 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2149 } else if (vtnet_modern(sc)) {
2150 nbufs = 1; /* num_buffers is always 1 */
2151 adjsz = sizeof(struct virtio_net_hdr_v1);
2152 } else {
2153 nbufs = 1;
2154 adjsz = sizeof(struct vtnet_rx_header);
2155 /*
2156 * Account for our gap between the header and start of
2157 * data to keep the segments separated.
2158 */
2159 len += VTNET_RX_HEADER_PAD;
2160 }
2161
2162 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2163 rxq->vtnrx_stats.vrxs_iqdrops++;
2164 vtnet_rxq_discard_buf(rxq, m);
2165 if (nbufs > 1)
2166 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2167 continue;
2168 }
2169
2170 m->m_pkthdr.len = len;
2171 m->m_pkthdr.rcvif = ifp;
2172 m->m_pkthdr.csum_flags = 0;
2173
2174 if (nbufs > 1) {
2175 /* Dequeue the rest of chain. */
2176 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2177 continue;
2178 }
2179
2180 kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
2181
2182 /*
2183 * Save an endian swapped version of the header prior to it
2184 * being stripped. The header is always at the start of the
2185 * mbuf data. num_buffers was already saved (and not needed)
2186 * so use the standard header.
2187 */
2188 hdr = mtod(m, struct virtio_net_hdr *);
2189 lhdr.flags = hdr->flags;
2190 lhdr.gso_type = hdr->gso_type;
2191 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2192 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2193 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2194 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2195 m_adj(m, adjsz);
2196
2197 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2198 pfil_return_t pfil;
2199
2200 pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
2201 switch (pfil) {
2202 case PFIL_DROPPED:
2203 case PFIL_CONSUMED:
2204 continue;
2205 default:
2206 KASSERT(pfil == PFIL_PASS,
2207 ("Filter returned %d!", pfil));
2208 }
2209 }
2210
2211 vtnet_rxq_input(rxq, m, &lhdr);
2212 }
2213
2214 if (deq > 0) {
2215 #if defined(INET) || defined(INET6)
2216 if (vtnet_software_lro(sc))
2217 tcp_lro_flush_all(&rxq->vtnrx_lro);
2218 #endif
2219 virtqueue_notify(vq);
2220 }
2221 CURVNET_RESTORE();
2222
2223 return (count > 0 ? 0 : EAGAIN);
2224 }
2225
2226 static void
vtnet_rx_vq_process(struct vtnet_rxq * rxq,int tries)2227 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2228 {
2229 struct vtnet_softc *sc;
2230 if_t ifp;
2231 u_int more;
2232 #ifdef DEV_NETMAP
2233 int nmirq;
2234 #endif /* DEV_NETMAP */
2235
2236 sc = rxq->vtnrx_sc;
2237 ifp = sc->vtnet_ifp;
2238
2239 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2240 /*
2241 * Ignore this interrupt. Either this is a spurious interrupt
2242 * or multiqueue without per-VQ MSIX so every queue needs to
2243 * be polled (a brain dead configuration we could try harder
2244 * to avoid).
2245 */
2246 vtnet_rxq_disable_intr(rxq);
2247 return;
2248 }
2249
2250 VTNET_RXQ_LOCK(rxq);
2251
2252 #ifdef DEV_NETMAP
2253 /*
2254 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2255 * This is not necessary to serialize the access to the RX vq, but
2256 * rather to avoid races that may happen if this interface is
2257 * attached to a VALE switch, which would cause received packets
2258 * to stall in the RX queue (nm_kr_tryget() could find the kring
2259 * busy when called from netmap_bwrap_intr_notify()).
2260 */
2261 nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2262 if (nmirq != NM_IRQ_PASS) {
2263 VTNET_RXQ_UNLOCK(rxq);
2264 if (nmirq == NM_IRQ_RESCHED) {
2265 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2266 }
2267 return;
2268 }
2269 #endif /* DEV_NETMAP */
2270
2271 again:
2272 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2273 VTNET_RXQ_UNLOCK(rxq);
2274 return;
2275 }
2276
2277 more = vtnet_rxq_eof(rxq);
2278 if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2279 if (!more)
2280 vtnet_rxq_disable_intr(rxq);
2281 /*
2282 * This is an occasional condition or race (when !more),
2283 * so retry a few times before scheduling the taskqueue.
2284 */
2285 if (tries-- > 0)
2286 goto again;
2287
2288 rxq->vtnrx_stats.vrxs_rescheduled++;
2289 VTNET_RXQ_UNLOCK(rxq);
2290 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2291 } else
2292 VTNET_RXQ_UNLOCK(rxq);
2293 }
2294
2295 static void
vtnet_rx_vq_intr(void * xrxq)2296 vtnet_rx_vq_intr(void *xrxq)
2297 {
2298 struct vtnet_rxq *rxq;
2299
2300 rxq = xrxq;
2301 vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2302 }
2303
2304 static void
vtnet_rxq_tq_intr(void * xrxq,int pending __unused)2305 vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
2306 {
2307 struct vtnet_rxq *rxq;
2308
2309 rxq = xrxq;
2310 vtnet_rx_vq_process(rxq, 0);
2311 }
2312
2313 static int
vtnet_txq_intr_threshold(struct vtnet_txq * txq)2314 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2315 {
2316 struct vtnet_softc *sc;
2317 int threshold;
2318
2319 sc = txq->vtntx_sc;
2320
2321 /*
2322 * The Tx interrupt is disabled until the queue free count falls
2323 * below our threshold. Completed frames are drained from the Tx
2324 * virtqueue before transmitting new frames and in the watchdog
2325 * callout, so the frequency of Tx interrupts is greatly reduced,
2326 * at the cost of not freeing mbufs as quickly as they otherwise
2327 * would be.
2328 */
2329 threshold = virtqueue_size(txq->vtntx_vq) / 4;
2330
2331 /*
2332 * Without indirect descriptors, leave enough room for the most
2333 * segments we handle.
2334 */
2335 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2336 threshold < sc->vtnet_tx_nsegs)
2337 threshold = sc->vtnet_tx_nsegs;
2338
2339 return (threshold);
2340 }
2341
2342 static int
vtnet_txq_below_threshold(struct vtnet_txq * txq)2343 vtnet_txq_below_threshold(struct vtnet_txq *txq)
2344 {
2345 struct virtqueue *vq;
2346
2347 vq = txq->vtntx_vq;
2348
2349 return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2350 }
2351
2352 static int
vtnet_txq_notify(struct vtnet_txq * txq)2353 vtnet_txq_notify(struct vtnet_txq *txq)
2354 {
2355 struct virtqueue *vq;
2356
2357 vq = txq->vtntx_vq;
2358
2359 txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2360 virtqueue_notify(vq);
2361
2362 if (vtnet_txq_enable_intr(txq) == 0)
2363 return (0);
2364
2365 /*
2366 * Drain frames that were completed since last checked. If this
2367 * causes the queue to go above the threshold, the caller should
2368 * continue transmitting.
2369 */
2370 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2371 virtqueue_disable_intr(vq);
2372 return (1);
2373 }
2374
2375 return (0);
2376 }
2377
2378 static void
vtnet_txq_free_mbufs(struct vtnet_txq * txq)2379 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2380 {
2381 struct virtqueue *vq;
2382 struct vtnet_tx_header *txhdr;
2383 int last;
2384 #ifdef DEV_NETMAP
2385 struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2386 txq->vtntx_id, NR_TX);
2387 #else /* !DEV_NETMAP */
2388 void *kring = NULL;
2389 #endif /* !DEV_NETMAP */
2390
2391 vq = txq->vtntx_vq;
2392 last = 0;
2393
2394 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2395 if (kring == NULL) {
2396 m_freem(txhdr->vth_mbuf);
2397 uma_zfree(vtnet_tx_header_zone, txhdr);
2398 }
2399 }
2400
2401 KASSERT(virtqueue_empty(vq),
2402 ("%s: mbufs remaining in tx queue %p", __func__, txq));
2403 }
2404
2405 /*
2406 * BMV: This can go away once we finally have offsets in the mbuf header.
2407 */
2408 static int
vtnet_txq_offload_ctx(struct vtnet_txq * txq,struct mbuf * m,int * etype,int * proto,int * start)2409 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2410 int *proto, int *start)
2411 {
2412 struct vtnet_softc *sc;
2413 struct ether_vlan_header *evh;
2414 #if defined(INET) || defined(INET6)
2415 int offset;
2416 #endif
2417
2418 sc = txq->vtntx_sc;
2419
2420 evh = mtod(m, struct ether_vlan_header *);
2421 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2422 /* BMV: We should handle nested VLAN tags too. */
2423 *etype = ntohs(evh->evl_proto);
2424 #if defined(INET) || defined(INET6)
2425 offset = sizeof(struct ether_vlan_header);
2426 #endif
2427 } else {
2428 *etype = ntohs(evh->evl_encap_proto);
2429 #if defined(INET) || defined(INET6)
2430 offset = sizeof(struct ether_header);
2431 #endif
2432 }
2433
2434 switch (*etype) {
2435 #if defined(INET)
2436 case ETHERTYPE_IP: {
2437 struct ip *ip, iphdr;
2438 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2439 m_copydata(m, offset, sizeof(struct ip),
2440 (caddr_t) &iphdr);
2441 ip = &iphdr;
2442 } else
2443 ip = (struct ip *)(m->m_data + offset);
2444 *proto = ip->ip_p;
2445 *start = offset + (ip->ip_hl << 2);
2446 break;
2447 }
2448 #endif
2449 #if defined(INET6)
2450 case ETHERTYPE_IPV6:
2451 *proto = -1;
2452 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2453 /* Assert the network stack sent us a valid packet. */
2454 KASSERT(*start > offset,
2455 ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2456 *start, offset, *proto));
2457 break;
2458 #endif
2459 default:
2460 sc->vtnet_stats.tx_csum_unknown_ethtype++;
2461 return (EINVAL);
2462 }
2463
2464 return (0);
2465 }
2466
2467 static int
vtnet_txq_offload_tso(struct vtnet_txq * txq,struct mbuf * m,int eth_type,int offset,struct virtio_net_hdr * hdr)2468 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2469 int offset, struct virtio_net_hdr *hdr)
2470 {
2471 static struct timeval lastecn;
2472 static int curecn;
2473 struct vtnet_softc *sc;
2474 struct tcphdr *tcp, tcphdr;
2475
2476 sc = txq->vtntx_sc;
2477
2478 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2479 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2480 tcp = &tcphdr;
2481 } else
2482 tcp = (struct tcphdr *)(m->m_data + offset);
2483
2484 hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2485 hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2486 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2487 VIRTIO_NET_HDR_GSO_TCPV6;
2488
2489 if (__predict_false(tcp_get_flags(tcp) & TH_CWR)) {
2490 /*
2491 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2492 * FreeBSD, ECN support is not on a per-interface basis,
2493 * but globally via the net.inet.tcp.ecn.enable sysctl
2494 * knob. The default is off.
2495 */
2496 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2497 if (ppsratecheck(&lastecn, &curecn, 1))
2498 if_printf(sc->vtnet_ifp,
2499 "TSO with ECN not negotiated with host\n");
2500 return (ENOTSUP);
2501 }
2502 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2503 }
2504
2505 txq->vtntx_stats.vtxs_tso++;
2506
2507 return (0);
2508 }
2509
2510 static struct mbuf *
vtnet_txq_offload(struct vtnet_txq * txq,struct mbuf * m,struct virtio_net_hdr * hdr)2511 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2512 struct virtio_net_hdr *hdr)
2513 {
2514 struct vtnet_softc *sc;
2515 int flags, etype, csum_start, proto, error;
2516
2517 sc = txq->vtntx_sc;
2518 flags = m->m_pkthdr.csum_flags;
2519
2520 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2521 if (error)
2522 goto drop;
2523
2524 if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2525 /* Sanity check the parsed mbuf matches the offload flags. */
2526 if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2527 etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2528 && etype != ETHERTYPE_IPV6))) {
2529 sc->vtnet_stats.tx_csum_proto_mismatch++;
2530 goto drop;
2531 }
2532
2533 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2534 hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2535 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2536 txq->vtntx_stats.vtxs_csum++;
2537 }
2538
2539 if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2540 /*
2541 * Sanity check the parsed mbuf IP protocol is TCP, and
2542 * VirtIO TSO reqires the checksum offloading above.
2543 */
2544 if (__predict_false(proto != IPPROTO_TCP)) {
2545 sc->vtnet_stats.tx_tso_not_tcp++;
2546 goto drop;
2547 } else if (__predict_false((hdr->flags &
2548 VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2549 sc->vtnet_stats.tx_tso_without_csum++;
2550 goto drop;
2551 }
2552
2553 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2554 if (error)
2555 goto drop;
2556 }
2557
2558 return (m);
2559
2560 drop:
2561 m_freem(m);
2562 return (NULL);
2563 }
2564
2565 static int
vtnet_txq_enqueue_buf(struct vtnet_txq * txq,struct mbuf ** m_head,struct vtnet_tx_header * txhdr)2566 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2567 struct vtnet_tx_header *txhdr)
2568 {
2569 struct vtnet_softc *sc;
2570 struct virtqueue *vq;
2571 struct sglist *sg;
2572 struct mbuf *m;
2573 int error;
2574
2575 sc = txq->vtntx_sc;
2576 vq = txq->vtntx_vq;
2577 sg = txq->vtntx_sg;
2578 m = *m_head;
2579
2580 sglist_reset(sg);
2581 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2582 if (error != 0 || sg->sg_nseg != 1) {
2583 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2584 __func__, error, sg->sg_nseg));
2585 goto fail;
2586 }
2587
2588 error = sglist_append_mbuf(sg, m);
2589 if (error) {
2590 m = m_defrag(m, M_NOWAIT);
2591 if (m == NULL) {
2592 sc->vtnet_stats.tx_defrag_failed++;
2593 goto fail;
2594 }
2595
2596 *m_head = m;
2597 sc->vtnet_stats.tx_defragged++;
2598
2599 error = sglist_append_mbuf(sg, m);
2600 if (error)
2601 goto fail;
2602 }
2603
2604 txhdr->vth_mbuf = m;
2605 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2606
2607 return (error);
2608
2609 fail:
2610 m_freem(*m_head);
2611 *m_head = NULL;
2612
2613 return (ENOBUFS);
2614 }
2615
2616 static int
vtnet_txq_encap(struct vtnet_txq * txq,struct mbuf ** m_head,int flags)2617 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2618 {
2619 struct vtnet_tx_header *txhdr;
2620 struct virtio_net_hdr *hdr;
2621 struct mbuf *m;
2622 int error;
2623
2624 m = *m_head;
2625 M_ASSERTPKTHDR(m);
2626
2627 txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2628 if (txhdr == NULL) {
2629 m_freem(m);
2630 *m_head = NULL;
2631 return (ENOMEM);
2632 }
2633
2634 /*
2635 * Always use the non-mergeable header, regardless if mergable headers
2636 * were negotiated, because for transmit num_buffers is always zero.
2637 * The vtnet_hdr_size is used to enqueue the right header size segment.
2638 */
2639 hdr = &txhdr->vth_uhdr.hdr;
2640
2641 if (m->m_flags & M_VLANTAG) {
2642 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2643 if ((*m_head = m) == NULL) {
2644 error = ENOBUFS;
2645 goto fail;
2646 }
2647 m->m_flags &= ~M_VLANTAG;
2648 }
2649
2650 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2651 m = vtnet_txq_offload(txq, m, hdr);
2652 if ((*m_head = m) == NULL) {
2653 error = ENOBUFS;
2654 goto fail;
2655 }
2656 }
2657
2658 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2659 fail:
2660 if (error)
2661 uma_zfree(vtnet_tx_header_zone, txhdr);
2662
2663 return (error);
2664 }
2665
2666
2667 static void
vtnet_start_locked(struct vtnet_txq * txq,if_t ifp)2668 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
2669 {
2670 struct vtnet_softc *sc;
2671 struct virtqueue *vq;
2672 struct mbuf *m0;
2673 int tries, enq;
2674
2675 sc = txq->vtntx_sc;
2676 vq = txq->vtntx_vq;
2677 tries = 0;
2678
2679 VTNET_TXQ_LOCK_ASSERT(txq);
2680
2681 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2682 sc->vtnet_link_active == 0)
2683 return;
2684
2685 vtnet_txq_eof(txq);
2686
2687 again:
2688 enq = 0;
2689
2690 while (!if_sendq_empty(ifp)) {
2691 if (virtqueue_full(vq))
2692 break;
2693
2694 m0 = if_dequeue(ifp);
2695 if (m0 == NULL)
2696 break;
2697
2698 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2699 if (m0 != NULL)
2700 if_sendq_prepend(ifp, m0);
2701 break;
2702 }
2703
2704 enq++;
2705 ETHER_BPF_MTAP(ifp, m0);
2706 }
2707
2708 if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2709 if (tries++ < VTNET_NOTIFY_RETRIES)
2710 goto again;
2711
2712 txq->vtntx_stats.vtxs_rescheduled++;
2713 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2714 }
2715 }
2716
2717 static void
vtnet_start(if_t ifp)2718 vtnet_start(if_t ifp)
2719 {
2720 struct vtnet_softc *sc;
2721 struct vtnet_txq *txq;
2722
2723 sc = if_getsoftc(ifp);
2724 txq = &sc->vtnet_txqs[0];
2725
2726 VTNET_TXQ_LOCK(txq);
2727 vtnet_start_locked(txq, ifp);
2728 VTNET_TXQ_UNLOCK(txq);
2729 }
2730
2731
2732 static int
vtnet_txq_mq_start_locked(struct vtnet_txq * txq,struct mbuf * m)2733 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2734 {
2735 struct vtnet_softc *sc;
2736 struct virtqueue *vq;
2737 struct buf_ring *br;
2738 if_t ifp;
2739 int enq, tries, error;
2740
2741 sc = txq->vtntx_sc;
2742 vq = txq->vtntx_vq;
2743 br = txq->vtntx_br;
2744 ifp = sc->vtnet_ifp;
2745 tries = 0;
2746 error = 0;
2747
2748 VTNET_TXQ_LOCK_ASSERT(txq);
2749
2750 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2751 sc->vtnet_link_active == 0) {
2752 if (m != NULL)
2753 error = drbr_enqueue(ifp, br, m);
2754 return (error);
2755 }
2756
2757 if (m != NULL) {
2758 error = drbr_enqueue(ifp, br, m);
2759 if (error)
2760 return (error);
2761 }
2762
2763 vtnet_txq_eof(txq);
2764
2765 again:
2766 enq = 0;
2767
2768 while ((m = drbr_peek(ifp, br)) != NULL) {
2769 if (virtqueue_full(vq)) {
2770 drbr_putback(ifp, br, m);
2771 break;
2772 }
2773
2774 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2775 if (m != NULL)
2776 drbr_putback(ifp, br, m);
2777 else
2778 drbr_advance(ifp, br);
2779 break;
2780 }
2781 drbr_advance(ifp, br);
2782
2783 enq++;
2784 ETHER_BPF_MTAP(ifp, m);
2785 }
2786
2787 if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2788 if (tries++ < VTNET_NOTIFY_RETRIES)
2789 goto again;
2790
2791 txq->vtntx_stats.vtxs_rescheduled++;
2792 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2793 }
2794
2795 return (0);
2796 }
2797
2798 static int
vtnet_txq_mq_start(if_t ifp,struct mbuf * m)2799 vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
2800 {
2801 struct vtnet_softc *sc;
2802 struct vtnet_txq *txq;
2803 int i, npairs, error;
2804
2805 sc = if_getsoftc(ifp);
2806 npairs = sc->vtnet_act_vq_pairs;
2807
2808 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2809 i = m->m_pkthdr.flowid % npairs;
2810 else
2811 i = curcpu % npairs;
2812
2813 txq = &sc->vtnet_txqs[i];
2814
2815 if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2816 error = vtnet_txq_mq_start_locked(txq, m);
2817 VTNET_TXQ_UNLOCK(txq);
2818 } else {
2819 error = drbr_enqueue(ifp, txq->vtntx_br, m);
2820 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2821 }
2822
2823 return (error);
2824 }
2825
2826 static void
vtnet_txq_tq_deferred(void * xtxq,int pending __unused)2827 vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
2828 {
2829 struct vtnet_softc *sc;
2830 struct vtnet_txq *txq;
2831
2832 txq = xtxq;
2833 sc = txq->vtntx_sc;
2834
2835 VTNET_TXQ_LOCK(txq);
2836 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2837 vtnet_txq_mq_start_locked(txq, NULL);
2838 VTNET_TXQ_UNLOCK(txq);
2839 }
2840
2841
2842 static void
vtnet_txq_start(struct vtnet_txq * txq)2843 vtnet_txq_start(struct vtnet_txq *txq)
2844 {
2845 struct vtnet_softc *sc;
2846 if_t ifp;
2847
2848 sc = txq->vtntx_sc;
2849 ifp = sc->vtnet_ifp;
2850
2851 if (!VTNET_ALTQ_ENABLED) {
2852 if (!drbr_empty(ifp, txq->vtntx_br))
2853 vtnet_txq_mq_start_locked(txq, NULL);
2854 } else {
2855 if (!if_sendq_empty(ifp))
2856 vtnet_start_locked(txq, ifp);
2857
2858 }
2859 }
2860
2861 static void
vtnet_txq_tq_intr(void * xtxq,int pending __unused)2862 vtnet_txq_tq_intr(void *xtxq, int pending __unused)
2863 {
2864 struct vtnet_softc *sc;
2865 struct vtnet_txq *txq;
2866 if_t ifp;
2867
2868 txq = xtxq;
2869 sc = txq->vtntx_sc;
2870 ifp = sc->vtnet_ifp;
2871
2872 VTNET_TXQ_LOCK(txq);
2873
2874 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2875 VTNET_TXQ_UNLOCK(txq);
2876 return;
2877 }
2878
2879 vtnet_txq_eof(txq);
2880 vtnet_txq_start(txq);
2881
2882 VTNET_TXQ_UNLOCK(txq);
2883 }
2884
2885 static int
vtnet_txq_eof(struct vtnet_txq * txq)2886 vtnet_txq_eof(struct vtnet_txq *txq)
2887 {
2888 struct virtqueue *vq;
2889 struct vtnet_tx_header *txhdr;
2890 struct mbuf *m;
2891 int deq;
2892
2893 vq = txq->vtntx_vq;
2894 deq = 0;
2895 VTNET_TXQ_LOCK_ASSERT(txq);
2896
2897 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2898 m = txhdr->vth_mbuf;
2899 deq++;
2900
2901 txq->vtntx_stats.vtxs_opackets++;
2902 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2903 if (m->m_flags & M_MCAST)
2904 txq->vtntx_stats.vtxs_omcasts++;
2905
2906 m_freem(m);
2907 uma_zfree(vtnet_tx_header_zone, txhdr);
2908 }
2909
2910 if (virtqueue_empty(vq))
2911 txq->vtntx_watchdog = 0;
2912
2913 return (deq);
2914 }
2915
2916 static void
vtnet_tx_vq_intr(void * xtxq)2917 vtnet_tx_vq_intr(void *xtxq)
2918 {
2919 struct vtnet_softc *sc;
2920 struct vtnet_txq *txq;
2921 if_t ifp;
2922
2923 txq = xtxq;
2924 sc = txq->vtntx_sc;
2925 ifp = sc->vtnet_ifp;
2926
2927 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2928 /*
2929 * Ignore this interrupt. Either this is a spurious interrupt
2930 * or multiqueue without per-VQ MSIX so every queue needs to
2931 * be polled (a brain dead configuration we could try harder
2932 * to avoid).
2933 */
2934 vtnet_txq_disable_intr(txq);
2935 return;
2936 }
2937
2938 #ifdef DEV_NETMAP
2939 if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2940 return;
2941 #endif /* DEV_NETMAP */
2942
2943 VTNET_TXQ_LOCK(txq);
2944
2945 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2946 VTNET_TXQ_UNLOCK(txq);
2947 return;
2948 }
2949
2950 vtnet_txq_eof(txq);
2951 vtnet_txq_start(txq);
2952
2953 VTNET_TXQ_UNLOCK(txq);
2954 }
2955
2956 static void
vtnet_tx_start_all(struct vtnet_softc * sc)2957 vtnet_tx_start_all(struct vtnet_softc *sc)
2958 {
2959 struct vtnet_txq *txq;
2960 int i;
2961
2962 VTNET_CORE_LOCK_ASSERT(sc);
2963
2964 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2965 txq = &sc->vtnet_txqs[i];
2966
2967 VTNET_TXQ_LOCK(txq);
2968 vtnet_txq_start(txq);
2969 VTNET_TXQ_UNLOCK(txq);
2970 }
2971 }
2972
2973 static void
vtnet_qflush(if_t ifp)2974 vtnet_qflush(if_t ifp)
2975 {
2976 struct vtnet_softc *sc;
2977 struct vtnet_txq *txq;
2978 struct mbuf *m;
2979 int i;
2980
2981 sc = if_getsoftc(ifp);
2982
2983 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2984 txq = &sc->vtnet_txqs[i];
2985
2986 VTNET_TXQ_LOCK(txq);
2987 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2988 m_freem(m);
2989 VTNET_TXQ_UNLOCK(txq);
2990 }
2991
2992 if_qflush(ifp);
2993 }
2994
2995 static int
vtnet_watchdog(struct vtnet_txq * txq)2996 vtnet_watchdog(struct vtnet_txq *txq)
2997 {
2998 if_t ifp;
2999
3000 ifp = txq->vtntx_sc->vtnet_ifp;
3001
3002 VTNET_TXQ_LOCK(txq);
3003 if (txq->vtntx_watchdog == 1) {
3004 /*
3005 * Only drain completed frames if the watchdog is about to
3006 * expire. If any frames were drained, there may be enough
3007 * free descriptors now available to transmit queued frames.
3008 * In that case, the timer will immediately be decremented
3009 * below, but the timeout is generous enough that should not
3010 * be a problem.
3011 */
3012 if (vtnet_txq_eof(txq) != 0)
3013 vtnet_txq_start(txq);
3014 }
3015
3016 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
3017 VTNET_TXQ_UNLOCK(txq);
3018 return (0);
3019 }
3020 VTNET_TXQ_UNLOCK(txq);
3021
3022 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
3023 return (1);
3024 }
3025
3026 static void
vtnet_accum_stats(struct vtnet_softc * sc,struct vtnet_rxq_stats * rxacc,struct vtnet_txq_stats * txacc)3027 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
3028 struct vtnet_txq_stats *txacc)
3029 {
3030
3031 bzero(rxacc, sizeof(struct vtnet_rxq_stats));
3032 bzero(txacc, sizeof(struct vtnet_txq_stats));
3033
3034 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3035 struct vtnet_rxq_stats *rxst;
3036 struct vtnet_txq_stats *txst;
3037
3038 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
3039 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
3040 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
3041 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
3042 rxacc->vrxs_csum += rxst->vrxs_csum;
3043 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
3044 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
3045
3046 txst = &sc->vtnet_txqs[i].vtntx_stats;
3047 txacc->vtxs_opackets += txst->vtxs_opackets;
3048 txacc->vtxs_obytes += txst->vtxs_obytes;
3049 txacc->vtxs_csum += txst->vtxs_csum;
3050 txacc->vtxs_tso += txst->vtxs_tso;
3051 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
3052 }
3053 }
3054
3055 static uint64_t
vtnet_get_counter(if_t ifp,ift_counter cnt)3056 vtnet_get_counter(if_t ifp, ift_counter cnt)
3057 {
3058 struct vtnet_softc *sc;
3059 struct vtnet_rxq_stats rxaccum;
3060 struct vtnet_txq_stats txaccum;
3061
3062 sc = if_getsoftc(ifp);
3063 vtnet_accum_stats(sc, &rxaccum, &txaccum);
3064
3065 switch (cnt) {
3066 case IFCOUNTER_IPACKETS:
3067 return (rxaccum.vrxs_ipackets);
3068 case IFCOUNTER_IQDROPS:
3069 return (rxaccum.vrxs_iqdrops);
3070 case IFCOUNTER_IERRORS:
3071 return (rxaccum.vrxs_ierrors);
3072 case IFCOUNTER_IBYTES:
3073 return (rxaccum.vrxs_ibytes);
3074 case IFCOUNTER_OPACKETS:
3075 return (txaccum.vtxs_opackets);
3076 case IFCOUNTER_OBYTES:
3077 return (txaccum.vtxs_obytes);
3078 case IFCOUNTER_OMCASTS:
3079 return (txaccum.vtxs_omcasts);
3080 default:
3081 return (if_get_counter_default(ifp, cnt));
3082 }
3083 }
3084
3085 static void
vtnet_tick(void * xsc)3086 vtnet_tick(void *xsc)
3087 {
3088 struct vtnet_softc *sc;
3089 if_t ifp;
3090 int i, timedout;
3091
3092 sc = xsc;
3093 ifp = sc->vtnet_ifp;
3094 timedout = 0;
3095
3096 VTNET_CORE_LOCK_ASSERT(sc);
3097
3098 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3099 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3100
3101 if (timedout != 0) {
3102 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3103 vtnet_init_locked(sc, 0);
3104 } else
3105 callout_schedule(&sc->vtnet_tick_ch, hz);
3106 }
3107
3108 static void
vtnet_start_taskqueues(struct vtnet_softc * sc)3109 vtnet_start_taskqueues(struct vtnet_softc *sc)
3110 {
3111 device_t dev;
3112 struct vtnet_rxq *rxq;
3113 struct vtnet_txq *txq;
3114 int i, error;
3115
3116 dev = sc->vtnet_dev;
3117
3118 /*
3119 * Errors here are very difficult to recover from - we cannot
3120 * easily fail because, if this is during boot, we will hang
3121 * when freeing any successfully started taskqueues because
3122 * the scheduler isn't up yet.
3123 *
3124 * Most drivers just ignore the return value - it only fails
3125 * with ENOMEM so an error is not likely.
3126 */
3127 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3128 rxq = &sc->vtnet_rxqs[i];
3129 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3130 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3131 if (error) {
3132 device_printf(dev, "failed to start rx taskq %d\n",
3133 rxq->vtnrx_id);
3134 }
3135
3136 txq = &sc->vtnet_txqs[i];
3137 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3138 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3139 if (error) {
3140 device_printf(dev, "failed to start tx taskq %d\n",
3141 txq->vtntx_id);
3142 }
3143 }
3144 }
3145
3146 static void
vtnet_free_taskqueues(struct vtnet_softc * sc)3147 vtnet_free_taskqueues(struct vtnet_softc *sc)
3148 {
3149 struct vtnet_rxq *rxq;
3150 struct vtnet_txq *txq;
3151 int i;
3152
3153 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3154 rxq = &sc->vtnet_rxqs[i];
3155 if (rxq->vtnrx_tq != NULL) {
3156 taskqueue_free(rxq->vtnrx_tq);
3157 rxq->vtnrx_tq = NULL;
3158 }
3159
3160 txq = &sc->vtnet_txqs[i];
3161 if (txq->vtntx_tq != NULL) {
3162 taskqueue_free(txq->vtntx_tq);
3163 txq->vtntx_tq = NULL;
3164 }
3165 }
3166 }
3167
3168 static void
vtnet_drain_taskqueues(struct vtnet_softc * sc)3169 vtnet_drain_taskqueues(struct vtnet_softc *sc)
3170 {
3171 struct vtnet_rxq *rxq;
3172 struct vtnet_txq *txq;
3173 int i;
3174
3175 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3176 rxq = &sc->vtnet_rxqs[i];
3177 if (rxq->vtnrx_tq != NULL)
3178 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3179
3180 txq = &sc->vtnet_txqs[i];
3181 if (txq->vtntx_tq != NULL) {
3182 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3183 if (!VTNET_ALTQ_ENABLED)
3184 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3185 }
3186 }
3187 }
3188
3189 static void
vtnet_drain_rxtx_queues(struct vtnet_softc * sc)3190 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3191 {
3192 struct vtnet_rxq *rxq;
3193 struct vtnet_txq *txq;
3194 int i;
3195
3196 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3197 rxq = &sc->vtnet_rxqs[i];
3198 vtnet_rxq_free_mbufs(rxq);
3199
3200 txq = &sc->vtnet_txqs[i];
3201 vtnet_txq_free_mbufs(txq);
3202 }
3203 }
3204
3205 static void
vtnet_stop_rendezvous(struct vtnet_softc * sc)3206 vtnet_stop_rendezvous(struct vtnet_softc *sc)
3207 {
3208 struct vtnet_rxq *rxq;
3209 struct vtnet_txq *txq;
3210 int i;
3211
3212 VTNET_CORE_LOCK_ASSERT(sc);
3213
3214 /*
3215 * Lock and unlock the per-queue mutex so we known the stop
3216 * state is visible. Doing only the active queues should be
3217 * sufficient, but it does not cost much extra to do all the
3218 * queues.
3219 */
3220 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3221 rxq = &sc->vtnet_rxqs[i];
3222 VTNET_RXQ_LOCK(rxq);
3223 VTNET_RXQ_UNLOCK(rxq);
3224
3225 txq = &sc->vtnet_txqs[i];
3226 VTNET_TXQ_LOCK(txq);
3227 VTNET_TXQ_UNLOCK(txq);
3228 }
3229 }
3230
3231 static void
vtnet_stop(struct vtnet_softc * sc)3232 vtnet_stop(struct vtnet_softc *sc)
3233 {
3234 device_t dev;
3235 if_t ifp;
3236
3237 dev = sc->vtnet_dev;
3238 ifp = sc->vtnet_ifp;
3239
3240 VTNET_CORE_LOCK_ASSERT(sc);
3241
3242 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3243 sc->vtnet_link_active = 0;
3244 callout_stop(&sc->vtnet_tick_ch);
3245
3246 /* Only advisory. */
3247 vtnet_disable_interrupts(sc);
3248
3249 #ifdef DEV_NETMAP
3250 /* Stop any pending txsync/rxsync and disable them. */
3251 netmap_disable_all_rings(ifp);
3252 #endif /* DEV_NETMAP */
3253
3254 /*
3255 * Stop the host adapter. This resets it to the pre-initialized
3256 * state. It will not generate any interrupts until after it is
3257 * reinitialized.
3258 */
3259 virtio_stop(dev);
3260 vtnet_stop_rendezvous(sc);
3261
3262 vtnet_drain_rxtx_queues(sc);
3263 sc->vtnet_act_vq_pairs = 1;
3264 }
3265
3266 static int
vtnet_virtio_reinit(struct vtnet_softc * sc)3267 vtnet_virtio_reinit(struct vtnet_softc *sc)
3268 {
3269 device_t dev;
3270 if_t ifp;
3271 uint64_t features;
3272 int error;
3273
3274 dev = sc->vtnet_dev;
3275 ifp = sc->vtnet_ifp;
3276 features = sc->vtnet_negotiated_features;
3277
3278 /*
3279 * Re-negotiate with the host, removing any disabled receive
3280 * features. Transmit features are disabled only on our side
3281 * via if_capenable and if_hwassist.
3282 */
3283
3284 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3285 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3286
3287 if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
3288 features &= ~VTNET_LRO_FEATURES;
3289
3290 if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
3291 features &= ~VIRTIO_NET_F_CTRL_VLAN;
3292
3293 error = virtio_reinit(dev, features);
3294 if (error) {
3295 device_printf(dev, "virtio reinit error %d\n", error);
3296 return (error);
3297 }
3298
3299 sc->vtnet_features = features;
3300 virtio_reinit_complete(dev);
3301
3302 return (0);
3303 }
3304
3305 static void
vtnet_init_rx_filters(struct vtnet_softc * sc)3306 vtnet_init_rx_filters(struct vtnet_softc *sc)
3307 {
3308 if_t ifp;
3309
3310 ifp = sc->vtnet_ifp;
3311
3312 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3313 vtnet_rx_filter(sc);
3314 vtnet_rx_filter_mac(sc);
3315 }
3316
3317 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3318 vtnet_rx_filter_vlan(sc);
3319 }
3320
3321 static int
vtnet_init_rx_queues(struct vtnet_softc * sc)3322 vtnet_init_rx_queues(struct vtnet_softc *sc)
3323 {
3324 device_t dev;
3325 if_t ifp;
3326 struct vtnet_rxq *rxq;
3327 int i, clustersz, error;
3328
3329 dev = sc->vtnet_dev;
3330 ifp = sc->vtnet_ifp;
3331
3332 clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
3333 sc->vtnet_rx_clustersz = clustersz;
3334
3335 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3336 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3337 VTNET_MAX_RX_SIZE, clustersz);
3338 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3339 ("%s: too many rx mbufs %d for %d segments", __func__,
3340 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3341 } else
3342 sc->vtnet_rx_nmbufs = 1;
3343
3344 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3345 rxq = &sc->vtnet_rxqs[i];
3346
3347 /* Hold the lock to satisfy asserts. */
3348 VTNET_RXQ_LOCK(rxq);
3349 error = vtnet_rxq_populate(rxq);
3350 VTNET_RXQ_UNLOCK(rxq);
3351
3352 if (error) {
3353 device_printf(dev, "cannot populate Rx queue %d\n", i);
3354 return (error);
3355 }
3356 }
3357
3358 return (0);
3359 }
3360
3361 static int
vtnet_init_tx_queues(struct vtnet_softc * sc)3362 vtnet_init_tx_queues(struct vtnet_softc *sc)
3363 {
3364 struct vtnet_txq *txq;
3365 int i;
3366
3367 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3368 txq = &sc->vtnet_txqs[i];
3369 txq->vtntx_watchdog = 0;
3370 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3371 #ifdef DEV_NETMAP
3372 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3373 #endif /* DEV_NETMAP */
3374 }
3375
3376 return (0);
3377 }
3378
3379 static int
vtnet_init_rxtx_queues(struct vtnet_softc * sc)3380 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3381 {
3382 int error;
3383
3384 error = vtnet_init_rx_queues(sc);
3385 if (error)
3386 return (error);
3387
3388 error = vtnet_init_tx_queues(sc);
3389 if (error)
3390 return (error);
3391
3392 return (0);
3393 }
3394
3395 static void
vtnet_set_active_vq_pairs(struct vtnet_softc * sc)3396 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3397 {
3398 device_t dev;
3399 int npairs;
3400
3401 dev = sc->vtnet_dev;
3402
3403 if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3404 sc->vtnet_act_vq_pairs = 1;
3405 return;
3406 }
3407
3408 npairs = sc->vtnet_req_vq_pairs;
3409
3410 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3411 device_printf(dev, "cannot set active queue pairs to %d, "
3412 "falling back to 1 queue pair\n", npairs);
3413 npairs = 1;
3414 }
3415
3416 sc->vtnet_act_vq_pairs = npairs;
3417 }
3418
3419 static void
vtnet_update_rx_offloads(struct vtnet_softc * sc)3420 vtnet_update_rx_offloads(struct vtnet_softc *sc)
3421 {
3422 if_t ifp;
3423 uint64_t features;
3424 int error;
3425
3426 ifp = sc->vtnet_ifp;
3427 features = sc->vtnet_features;
3428
3429 VTNET_CORE_LOCK_ASSERT(sc);
3430
3431 if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3432 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3433 features |= VIRTIO_NET_F_GUEST_CSUM;
3434 else
3435 features &= ~VIRTIO_NET_F_GUEST_CSUM;
3436 }
3437
3438 if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
3439 if (if_getcapenable(ifp) & IFCAP_LRO)
3440 features |= VTNET_LRO_FEATURES;
3441 else
3442 features &= ~VTNET_LRO_FEATURES;
3443 }
3444
3445 error = vtnet_ctrl_guest_offloads(sc,
3446 features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3447 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN |
3448 VIRTIO_NET_F_GUEST_UFO));
3449 if (error) {
3450 device_printf(sc->vtnet_dev,
3451 "%s: cannot update Rx features\n", __func__);
3452 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3453 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3454 vtnet_init_locked(sc, 0);
3455 }
3456 } else
3457 sc->vtnet_features = features;
3458 }
3459
3460 static int
vtnet_reinit(struct vtnet_softc * sc)3461 vtnet_reinit(struct vtnet_softc *sc)
3462 {
3463 if_t ifp;
3464 int error;
3465
3466 ifp = sc->vtnet_ifp;
3467
3468 bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3469
3470 error = vtnet_virtio_reinit(sc);
3471 if (error)
3472 return (error);
3473
3474 vtnet_set_macaddr(sc);
3475 vtnet_set_active_vq_pairs(sc);
3476
3477 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3478 vtnet_init_rx_filters(sc);
3479
3480 if_sethwassist(ifp, 0);
3481 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
3482 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
3483 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
3484 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
3485 if (if_getcapenable(ifp) & IFCAP_TSO4)
3486 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
3487 if (if_getcapenable(ifp) & IFCAP_TSO6)
3488 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
3489
3490 error = vtnet_init_rxtx_queues(sc);
3491 if (error)
3492 return (error);
3493
3494 return (0);
3495 }
3496
3497 static void
vtnet_init_locked(struct vtnet_softc * sc,int init_mode)3498 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3499 {
3500 if_t ifp;
3501
3502 ifp = sc->vtnet_ifp;
3503
3504 VTNET_CORE_LOCK_ASSERT(sc);
3505
3506 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
3507 return;
3508
3509 vtnet_stop(sc);
3510
3511 #ifdef DEV_NETMAP
3512 /* Once stopped we can update the netmap flags, if necessary. */
3513 switch (init_mode) {
3514 case VTNET_INIT_NETMAP_ENTER:
3515 nm_set_native_flags(NA(ifp));
3516 break;
3517 case VTNET_INIT_NETMAP_EXIT:
3518 nm_clear_native_flags(NA(ifp));
3519 break;
3520 }
3521 #endif /* DEV_NETMAP */
3522
3523 if (vtnet_reinit(sc) != 0) {
3524 vtnet_stop(sc);
3525 return;
3526 }
3527
3528 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
3529 vtnet_update_link_status(sc);
3530 vtnet_enable_interrupts(sc);
3531 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3532
3533 #ifdef DEV_NETMAP
3534 /* Re-enable txsync/rxsync. */
3535 netmap_enable_all_rings(ifp);
3536 #endif /* DEV_NETMAP */
3537 }
3538
3539 static void
vtnet_init(void * xsc)3540 vtnet_init(void *xsc)
3541 {
3542 struct vtnet_softc *sc;
3543
3544 sc = xsc;
3545
3546 VTNET_CORE_LOCK(sc);
3547 vtnet_init_locked(sc, 0);
3548 VTNET_CORE_UNLOCK(sc);
3549 }
3550
3551 static void
vtnet_free_ctrl_vq(struct vtnet_softc * sc)3552 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3553 {
3554
3555 /*
3556 * The control virtqueue is only polled and therefore it should
3557 * already be empty.
3558 */
3559 KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3560 ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3561 }
3562
3563 static void
vtnet_exec_ctrl_cmd(struct vtnet_softc * sc,void * cookie,struct sglist * sg,int readable,int writable)3564 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3565 struct sglist *sg, int readable, int writable)
3566 {
3567 struct virtqueue *vq;
3568
3569 vq = sc->vtnet_ctrl_vq;
3570
3571 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3572 VTNET_CORE_LOCK_ASSERT(sc);
3573
3574 if (!virtqueue_empty(vq))
3575 return;
3576
3577 /*
3578 * Poll for the response, but the command is likely completed before
3579 * returning from the notify.
3580 */
3581 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) {
3582 virtqueue_notify(vq);
3583 virtqueue_poll(vq, NULL);
3584 }
3585 }
3586
3587 static int
vtnet_ctrl_mac_cmd(struct vtnet_softc * sc,uint8_t * hwaddr)3588 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3589 {
3590 struct sglist_seg segs[3];
3591 struct sglist sg;
3592 struct {
3593 struct virtio_net_ctrl_hdr hdr __aligned(2);
3594 uint8_t pad1;
3595 uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3596 uint8_t pad2;
3597 uint8_t ack;
3598 } s;
3599 int error;
3600
3601 error = 0;
3602 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3603
3604 s.hdr.class = VIRTIO_NET_CTRL_MAC;
3605 s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3606 bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3607 s.ack = VIRTIO_NET_ERR;
3608
3609 sglist_init(&sg, nitems(segs), segs);
3610 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3611 error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3612 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3613 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3614
3615 if (error == 0)
3616 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3617
3618 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3619 }
3620
3621 static int
vtnet_ctrl_guest_offloads(struct vtnet_softc * sc,uint64_t offloads)3622 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3623 {
3624 struct sglist_seg segs[3];
3625 struct sglist sg;
3626 struct {
3627 struct virtio_net_ctrl_hdr hdr __aligned(2);
3628 uint8_t pad1;
3629 uint64_t offloads __aligned(8);
3630 uint8_t pad2;
3631 uint8_t ack;
3632 } s;
3633 int error;
3634
3635 error = 0;
3636 MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3637
3638 s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3639 s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3640 s.offloads = vtnet_gtoh64(sc, offloads);
3641 s.ack = VIRTIO_NET_ERR;
3642
3643 sglist_init(&sg, nitems(segs), segs);
3644 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3645 error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
3646 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3647 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3648
3649 if (error == 0)
3650 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3651
3652 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3653 }
3654
3655 static int
vtnet_ctrl_mq_cmd(struct vtnet_softc * sc,uint16_t npairs)3656 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3657 {
3658 struct sglist_seg segs[3];
3659 struct sglist sg;
3660 struct {
3661 struct virtio_net_ctrl_hdr hdr __aligned(2);
3662 uint8_t pad1;
3663 struct virtio_net_ctrl_mq mq __aligned(2);
3664 uint8_t pad2;
3665 uint8_t ack;
3666 } s;
3667 int error;
3668
3669 error = 0;
3670 MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3671
3672 s.hdr.class = VIRTIO_NET_CTRL_MQ;
3673 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3674 s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3675 s.ack = VIRTIO_NET_ERR;
3676
3677 sglist_init(&sg, nitems(segs), segs);
3678 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3679 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3680 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3681 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3682
3683 if (error == 0)
3684 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3685
3686 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3687 }
3688
3689 static int
vtnet_ctrl_rx_cmd(struct vtnet_softc * sc,uint8_t cmd,bool on)3690 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
3691 {
3692 struct sglist_seg segs[3];
3693 struct sglist sg;
3694 struct {
3695 struct virtio_net_ctrl_hdr hdr __aligned(2);
3696 uint8_t pad1;
3697 uint8_t onoff;
3698 uint8_t pad2;
3699 uint8_t ack;
3700 } s;
3701 int error;
3702
3703 error = 0;
3704 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3705
3706 s.hdr.class = VIRTIO_NET_CTRL_RX;
3707 s.hdr.cmd = cmd;
3708 s.onoff = on;
3709 s.ack = VIRTIO_NET_ERR;
3710
3711 sglist_init(&sg, nitems(segs), segs);
3712 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3713 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3714 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3715 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3716
3717 if (error == 0)
3718 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3719
3720 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3721 }
3722
3723 static int
vtnet_set_promisc(struct vtnet_softc * sc,bool on)3724 vtnet_set_promisc(struct vtnet_softc *sc, bool on)
3725 {
3726 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3727 }
3728
3729 static int
vtnet_set_allmulti(struct vtnet_softc * sc,bool on)3730 vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
3731 {
3732 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3733 }
3734
3735 static void
vtnet_rx_filter(struct vtnet_softc * sc)3736 vtnet_rx_filter(struct vtnet_softc *sc)
3737 {
3738 device_t dev;
3739 if_t ifp;
3740
3741 dev = sc->vtnet_dev;
3742 ifp = sc->vtnet_ifp;
3743
3744 VTNET_CORE_LOCK_ASSERT(sc);
3745
3746 if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
3747 device_printf(dev, "cannot %s promiscuous mode\n",
3748 if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
3749 }
3750
3751 if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
3752 device_printf(dev, "cannot %s all-multicast mode\n",
3753 if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
3754 }
3755 }
3756
3757 static u_int
vtnet_copy_ifaddr(void * arg,struct sockaddr_dl * sdl,u_int ucnt)3758 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3759 {
3760 struct vtnet_softc *sc = arg;
3761
3762 if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3763 return (0);
3764
3765 if (ucnt < VTNET_MAX_MAC_ENTRIES)
3766 bcopy(LLADDR(sdl),
3767 &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3768 ETHER_ADDR_LEN);
3769
3770 return (1);
3771 }
3772
3773 static u_int
vtnet_copy_maddr(void * arg,struct sockaddr_dl * sdl,u_int mcnt)3774 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3775 {
3776 struct vtnet_mac_filter *filter = arg;
3777
3778 if (mcnt < VTNET_MAX_MAC_ENTRIES)
3779 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3780 ETHER_ADDR_LEN);
3781
3782 return (1);
3783 }
3784
3785 static void
vtnet_rx_filter_mac(struct vtnet_softc * sc)3786 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3787 {
3788 struct virtio_net_ctrl_hdr hdr __aligned(2);
3789 struct vtnet_mac_filter *filter;
3790 struct sglist_seg segs[4];
3791 struct sglist sg;
3792 if_t ifp;
3793 bool promisc, allmulti;
3794 u_int ucnt, mcnt;
3795 int error;
3796 uint8_t ack;
3797
3798 ifp = sc->vtnet_ifp;
3799 filter = sc->vtnet_mac_filter;
3800 error = 0;
3801
3802 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3803 VTNET_CORE_LOCK_ASSERT(sc);
3804
3805 /* Unicast MAC addresses: */
3806 ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3807 promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3808
3809 if (promisc) {
3810 ucnt = 0;
3811 if_printf(ifp, "more than %d MAC addresses assigned, "
3812 "falling back to promiscuous mode\n",
3813 VTNET_MAX_MAC_ENTRIES);
3814 }
3815
3816 /* Multicast MAC addresses: */
3817 mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3818 allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3819
3820 if (allmulti) {
3821 mcnt = 0;
3822 if_printf(ifp, "more than %d multicast MAC addresses "
3823 "assigned, falling back to all-multicast mode\n",
3824 VTNET_MAX_MAC_ENTRIES);
3825 }
3826
3827 if (promisc && allmulti)
3828 goto out;
3829
3830 filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3831 filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3832
3833 hdr.class = VIRTIO_NET_CTRL_MAC;
3834 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3835 ack = VIRTIO_NET_ERR;
3836
3837 sglist_init(&sg, nitems(segs), segs);
3838 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3839 error |= sglist_append(&sg, &filter->vmf_unicast,
3840 sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3841 error |= sglist_append(&sg, &filter->vmf_multicast,
3842 sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3843 error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3844 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3845
3846 if (error == 0)
3847 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3848 if (ack != VIRTIO_NET_OK)
3849 if_printf(ifp, "error setting host MAC filter table\n");
3850
3851 out:
3852 if (promisc && vtnet_set_promisc(sc, true) != 0)
3853 if_printf(ifp, "cannot enable promiscuous mode\n");
3854 if (allmulti && vtnet_set_allmulti(sc, true) != 0)
3855 if_printf(ifp, "cannot enable all-multicast mode\n");
3856 }
3857
3858 static int
vtnet_exec_vlan_filter(struct vtnet_softc * sc,int add,uint16_t tag)3859 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3860 {
3861 struct sglist_seg segs[3];
3862 struct sglist sg;
3863 struct {
3864 struct virtio_net_ctrl_hdr hdr __aligned(2);
3865 uint8_t pad1;
3866 uint16_t tag __aligned(2);
3867 uint8_t pad2;
3868 uint8_t ack;
3869 } s;
3870 int error;
3871
3872 error = 0;
3873 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3874
3875 s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3876 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3877 s.tag = vtnet_gtoh16(sc, tag);
3878 s.ack = VIRTIO_NET_ERR;
3879
3880 sglist_init(&sg, nitems(segs), segs);
3881 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3882 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3883 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3884 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3885
3886 if (error == 0)
3887 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3888
3889 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3890 }
3891
3892 static void
vtnet_rx_filter_vlan(struct vtnet_softc * sc)3893 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3894 {
3895 int i, bit;
3896 uint32_t w;
3897 uint16_t tag;
3898
3899 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3900 VTNET_CORE_LOCK_ASSERT(sc);
3901
3902 /* Enable the filter for each configured VLAN. */
3903 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3904 w = sc->vtnet_vlan_filter[i];
3905
3906 while ((bit = ffs(w) - 1) != -1) {
3907 w &= ~(1 << bit);
3908 tag = sizeof(w) * CHAR_BIT * i + bit;
3909
3910 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3911 device_printf(sc->vtnet_dev,
3912 "cannot enable VLAN %d filter\n", tag);
3913 }
3914 }
3915 }
3916 }
3917
3918 static void
vtnet_update_vlan_filter(struct vtnet_softc * sc,int add,uint16_t tag)3919 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3920 {
3921 if_t ifp;
3922 int idx, bit;
3923
3924 ifp = sc->vtnet_ifp;
3925 idx = (tag >> 5) & 0x7F;
3926 bit = tag & 0x1F;
3927
3928 if (tag == 0 || tag > 4095)
3929 return;
3930
3931 VTNET_CORE_LOCK(sc);
3932
3933 if (add)
3934 sc->vtnet_vlan_filter[idx] |= (1 << bit);
3935 else
3936 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3937
3938 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
3939 if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
3940 vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3941 device_printf(sc->vtnet_dev,
3942 "cannot %s VLAN %d %s the host filter table\n",
3943 add ? "add" : "remove", tag, add ? "to" : "from");
3944 }
3945
3946 VTNET_CORE_UNLOCK(sc);
3947 }
3948
3949 static void
vtnet_register_vlan(void * arg,if_t ifp,uint16_t tag)3950 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
3951 {
3952
3953 if (if_getsoftc(ifp) != arg)
3954 return;
3955
3956 vtnet_update_vlan_filter(arg, 1, tag);
3957 }
3958
3959 static void
vtnet_unregister_vlan(void * arg,if_t ifp,uint16_t tag)3960 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
3961 {
3962
3963 if (if_getsoftc(ifp) != arg)
3964 return;
3965
3966 vtnet_update_vlan_filter(arg, 0, tag);
3967 }
3968
3969 static void
vtnet_update_speed_duplex(struct vtnet_softc * sc)3970 vtnet_update_speed_duplex(struct vtnet_softc *sc)
3971 {
3972 if_t ifp;
3973 uint32_t speed;
3974
3975 ifp = sc->vtnet_ifp;
3976
3977 if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3978 return;
3979
3980 /* BMV: Ignore duplex. */
3981 speed = virtio_read_dev_config_4(sc->vtnet_dev,
3982 offsetof(struct virtio_net_config, speed));
3983 if (speed != UINT32_MAX)
3984 if_setbaudrate(ifp, IF_Mbps(speed));
3985 }
3986
3987 static int
vtnet_is_link_up(struct vtnet_softc * sc)3988 vtnet_is_link_up(struct vtnet_softc *sc)
3989 {
3990 uint16_t status;
3991
3992 if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
3993 return (1);
3994
3995 status = virtio_read_dev_config_2(sc->vtnet_dev,
3996 offsetof(struct virtio_net_config, status));
3997
3998 return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3999 }
4000
4001 static void
vtnet_update_link_status(struct vtnet_softc * sc)4002 vtnet_update_link_status(struct vtnet_softc *sc)
4003 {
4004 if_t ifp;
4005 int link;
4006
4007 ifp = sc->vtnet_ifp;
4008 VTNET_CORE_LOCK_ASSERT(sc);
4009 link = vtnet_is_link_up(sc);
4010
4011 /* Notify if the link status has changed. */
4012 if (link != 0 && sc->vtnet_link_active == 0) {
4013 vtnet_update_speed_duplex(sc);
4014 sc->vtnet_link_active = 1;
4015 if_link_state_change(ifp, LINK_STATE_UP);
4016 } else if (link == 0 && sc->vtnet_link_active != 0) {
4017 sc->vtnet_link_active = 0;
4018 if_link_state_change(ifp, LINK_STATE_DOWN);
4019 }
4020 }
4021
4022 static int
vtnet_ifmedia_upd(if_t ifp __unused)4023 vtnet_ifmedia_upd(if_t ifp __unused)
4024 {
4025 return (EOPNOTSUPP);
4026 }
4027
4028 static void
vtnet_ifmedia_sts(if_t ifp,struct ifmediareq * ifmr)4029 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
4030 {
4031 struct vtnet_softc *sc;
4032
4033 sc = if_getsoftc(ifp);
4034
4035 ifmr->ifm_status = IFM_AVALID;
4036 ifmr->ifm_active = IFM_ETHER;
4037
4038 VTNET_CORE_LOCK(sc);
4039 if (vtnet_is_link_up(sc) != 0) {
4040 ifmr->ifm_status |= IFM_ACTIVE;
4041 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
4042 } else
4043 ifmr->ifm_active |= IFM_NONE;
4044 VTNET_CORE_UNLOCK(sc);
4045 }
4046
4047 static void
vtnet_get_macaddr(struct vtnet_softc * sc)4048 vtnet_get_macaddr(struct vtnet_softc *sc)
4049 {
4050
4051 if (sc->vtnet_flags & VTNET_FLAG_MAC) {
4052 virtio_read_device_config_array(sc->vtnet_dev,
4053 offsetof(struct virtio_net_config, mac),
4054 &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
4055 } else {
4056 /* Generate a random locally administered unicast address. */
4057 sc->vtnet_hwaddr[0] = 0xB2;
4058 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
4059 }
4060 }
4061
4062 static void
vtnet_set_macaddr(struct vtnet_softc * sc)4063 vtnet_set_macaddr(struct vtnet_softc *sc)
4064 {
4065 device_t dev;
4066 int error;
4067
4068 dev = sc->vtnet_dev;
4069
4070 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4071 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4072 if (error)
4073 device_printf(dev, "unable to set MAC address\n");
4074 return;
4075 }
4076
4077 /* MAC in config is read-only in modern VirtIO. */
4078 if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4079 for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4080 virtio_write_dev_config_1(dev,
4081 offsetof(struct virtio_net_config, mac) + i,
4082 sc->vtnet_hwaddr[i]);
4083 }
4084 }
4085 }
4086
4087 static void
vtnet_attached_set_macaddr(struct vtnet_softc * sc)4088 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4089 {
4090
4091 /* Assign MAC address if it was generated. */
4092 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4093 vtnet_set_macaddr(sc);
4094 }
4095
4096 static void
vtnet_vlan_tag_remove(struct mbuf * m)4097 vtnet_vlan_tag_remove(struct mbuf *m)
4098 {
4099 struct ether_vlan_header *evh;
4100
4101 evh = mtod(m, struct ether_vlan_header *);
4102 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4103 m->m_flags |= M_VLANTAG;
4104
4105 /* Strip the 802.1Q header. */
4106 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4107 ETHER_HDR_LEN - ETHER_TYPE_LEN);
4108 m_adj(m, ETHER_VLAN_ENCAP_LEN);
4109 }
4110
4111 static void
vtnet_set_rx_process_limit(struct vtnet_softc * sc)4112 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4113 {
4114 int limit;
4115
4116 limit = vtnet_tunable_int(sc, "rx_process_limit",
4117 vtnet_rx_process_limit);
4118 if (limit < 0)
4119 limit = INT_MAX;
4120 sc->vtnet_rx_process_limit = limit;
4121 }
4122
4123 static void
vtnet_setup_rxq_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_rxq * rxq)4124 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4125 struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4126 {
4127 struct sysctl_oid *node;
4128 struct sysctl_oid_list *list;
4129 struct vtnet_rxq_stats *stats;
4130 char namebuf[16];
4131
4132 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4133 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4134 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4135 list = SYSCTL_CHILDREN(node);
4136
4137 stats = &rxq->vtnrx_stats;
4138
4139 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets",
4140 CTLFLAG_RD | CTLFLAG_STATS,
4141 &stats->vrxs_ipackets, "Receive packets");
4142 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes",
4143 CTLFLAG_RD | CTLFLAG_STATS,
4144 &stats->vrxs_ibytes, "Receive bytes");
4145 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops",
4146 CTLFLAG_RD | CTLFLAG_STATS,
4147 &stats->vrxs_iqdrops, "Receive drops");
4148 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors",
4149 CTLFLAG_RD | CTLFLAG_STATS,
4150 &stats->vrxs_ierrors, "Receive errors");
4151 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
4152 CTLFLAG_RD | CTLFLAG_STATS,
4153 &stats->vrxs_csum, "Receive checksum offloaded");
4154 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed",
4155 CTLFLAG_RD | CTLFLAG_STATS,
4156 &stats->vrxs_csum_failed, "Receive checksum offload failed");
4157 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro",
4158 CTLFLAG_RD | CTLFLAG_STATS,
4159 &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4160 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
4161 CTLFLAG_RD | CTLFLAG_STATS,
4162 &stats->vrxs_rescheduled,
4163 "Receive interrupt handler rescheduled");
4164 }
4165
4166 static void
vtnet_setup_txq_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_txq * txq)4167 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4168 struct sysctl_oid_list *child, struct vtnet_txq *txq)
4169 {
4170 struct sysctl_oid *node;
4171 struct sysctl_oid_list *list;
4172 struct vtnet_txq_stats *stats;
4173 char namebuf[16];
4174
4175 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4176 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4177 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4178 list = SYSCTL_CHILDREN(node);
4179
4180 stats = &txq->vtntx_stats;
4181
4182 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets",
4183 CTLFLAG_RD | CTLFLAG_STATS,
4184 &stats->vtxs_opackets, "Transmit packets");
4185 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes",
4186 CTLFLAG_RD | CTLFLAG_STATS,
4187 &stats->vtxs_obytes, "Transmit bytes");
4188 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts",
4189 CTLFLAG_RD | CTLFLAG_STATS,
4190 &stats->vtxs_omcasts, "Transmit multicasts");
4191 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
4192 CTLFLAG_RD | CTLFLAG_STATS,
4193 &stats->vtxs_csum, "Transmit checksum offloaded");
4194 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso",
4195 CTLFLAG_RD | CTLFLAG_STATS,
4196 &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4197 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
4198 CTLFLAG_RD | CTLFLAG_STATS,
4199 &stats->vtxs_rescheduled,
4200 "Transmit interrupt handler rescheduled");
4201 }
4202
4203 static void
vtnet_setup_queue_sysctl(struct vtnet_softc * sc)4204 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4205 {
4206 device_t dev;
4207 struct sysctl_ctx_list *ctx;
4208 struct sysctl_oid *tree;
4209 struct sysctl_oid_list *child;
4210 int i;
4211
4212 dev = sc->vtnet_dev;
4213 ctx = device_get_sysctl_ctx(dev);
4214 tree = device_get_sysctl_tree(dev);
4215 child = SYSCTL_CHILDREN(tree);
4216
4217 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4218 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4219 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4220 }
4221 }
4222
4223 static int
vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)4224 vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)
4225 {
4226 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4227 struct vtnet_statistics *stats = &sc->vtnet_stats;
4228 struct vtnet_rxq_stats *rxst;
4229 int i;
4230
4231 stats->rx_csum_failed = 0;
4232 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4233 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4234 stats->rx_csum_failed += rxst->vrxs_csum_failed;
4235 }
4236 return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req));
4237 }
4238
4239 static int
vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)4240 vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)
4241 {
4242 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4243 struct vtnet_statistics *stats = &sc->vtnet_stats;
4244 struct vtnet_rxq_stats *rxst;
4245 int i;
4246
4247 stats->rx_csum_offloaded = 0;
4248 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4249 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4250 stats->rx_csum_offloaded += rxst->vrxs_csum;
4251 }
4252 return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req));
4253 }
4254
4255 static int
vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)4256 vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)
4257 {
4258 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4259 struct vtnet_statistics *stats = &sc->vtnet_stats;
4260 struct vtnet_rxq_stats *rxst;
4261 int i;
4262
4263 stats->rx_task_rescheduled = 0;
4264 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4265 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4266 stats->rx_task_rescheduled += rxst->vrxs_rescheduled;
4267 }
4268 return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req));
4269 }
4270
4271 static int
vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)4272 vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)
4273 {
4274 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4275 struct vtnet_statistics *stats = &sc->vtnet_stats;
4276 struct vtnet_txq_stats *txst;
4277 int i;
4278
4279 stats->tx_csum_offloaded = 0;
4280 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4281 txst = &sc->vtnet_txqs[i].vtntx_stats;
4282 stats->tx_csum_offloaded += txst->vtxs_csum;
4283 }
4284 return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req));
4285 }
4286
4287 static int
vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)4288 vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)
4289 {
4290 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4291 struct vtnet_statistics *stats = &sc->vtnet_stats;
4292 struct vtnet_txq_stats *txst;
4293 int i;
4294
4295 stats->tx_tso_offloaded = 0;
4296 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4297 txst = &sc->vtnet_txqs[i].vtntx_stats;
4298 stats->tx_tso_offloaded += txst->vtxs_tso;
4299 }
4300 return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req));
4301 }
4302
4303 static int
vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)4304 vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)
4305 {
4306 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4307 struct vtnet_statistics *stats = &sc->vtnet_stats;
4308 struct vtnet_txq_stats *txst;
4309 int i;
4310
4311 stats->tx_task_rescheduled = 0;
4312 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4313 txst = &sc->vtnet_txqs[i].vtntx_stats;
4314 stats->tx_task_rescheduled += txst->vtxs_rescheduled;
4315 }
4316 return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req));
4317 }
4318
4319 static void
vtnet_setup_stat_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_softc * sc)4320 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4321 struct sysctl_oid_list *child, struct vtnet_softc *sc)
4322 {
4323 struct vtnet_statistics *stats;
4324 struct vtnet_rxq_stats rxaccum;
4325 struct vtnet_txq_stats txaccum;
4326
4327 vtnet_accum_stats(sc, &rxaccum, &txaccum);
4328
4329 stats = &sc->vtnet_stats;
4330 stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4331 stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4332 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4333 stats->tx_csum_offloaded = txaccum.vtxs_csum;
4334 stats->tx_tso_offloaded = txaccum.vtxs_tso;
4335 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4336
4337 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4338 CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed,
4339 "Mbuf cluster allocation failures");
4340
4341 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4342 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large,
4343 "Received frame larger than the mbuf chain");
4344 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4345 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed,
4346 "Enqueuing the replacement receive mbuf failed");
4347 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4348 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed,
4349 "Mergeable buffers receive failures");
4350 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4351 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype,
4352 "Received checksum offloaded buffer with unsupported "
4353 "Ethernet type");
4354 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4355 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto,
4356 "Received checksum offloaded buffer with incorrect IP protocol");
4357 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
4358 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_offset,
4359 "Received checksum offloaded buffer with incorrect offset");
4360 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto",
4361 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto,
4362 "Received checksum offloaded buffer with inaccessible IP protocol");
4363 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed",
4364 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4365 sc, 0, vtnet_sysctl_rx_csum_failed, "QU",
4366 "Received buffer checksum offload failed");
4367 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded",
4368 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4369 sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU",
4370 "Received buffer checksum offload succeeded");
4371 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled",
4372 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4373 sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU",
4374 "Times the receive interrupt task rescheduled itself");
4375
4376 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4377 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype,
4378 "Aborted transmit of checksum offloaded buffer with unknown "
4379 "Ethernet type");
4380 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4381 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch,
4382 "Aborted transmit of checksum offloaded buffer because mismatched "
4383 "protocols");
4384 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4385 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp,
4386 "Aborted transmit of TSO buffer with non TCP protocol");
4387 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4388 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum,
4389 "Aborted transmit of TSO buffer without TCP checksum offload");
4390 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4391 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged,
4392 "Transmit mbufs defragged");
4393 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4394 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed,
4395 "Aborted transmit of buffer because defrag failed");
4396 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded",
4397 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4398 sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU",
4399 "Offloaded checksum of transmitted buffer");
4400 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded",
4401 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4402 sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU",
4403 "Segmentation offload of transmitted buffer");
4404 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled",
4405 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4406 sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU",
4407 "Times the transmit interrupt task rescheduled itself");
4408 }
4409
4410 static int
vtnet_sysctl_features(SYSCTL_HANDLER_ARGS)4411 vtnet_sysctl_features(SYSCTL_HANDLER_ARGS)
4412 {
4413 struct sbuf sb;
4414 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4415 int error;
4416
4417 sbuf_new_for_sysctl(&sb, NULL, 0, req);
4418 sbuf_printf(&sb, "%b", (uint32_t)sc->vtnet_features,
4419 VIRTIO_NET_FEATURE_BITS);
4420 error = sbuf_finish(&sb);
4421 sbuf_delete(&sb);
4422 return (error);
4423 }
4424
4425 static int
vtnet_sysctl_flags(SYSCTL_HANDLER_ARGS)4426 vtnet_sysctl_flags(SYSCTL_HANDLER_ARGS)
4427 {
4428 struct sbuf sb;
4429 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4430 int error;
4431
4432 sbuf_new_for_sysctl(&sb, NULL, 0, req);
4433 sbuf_printf(&sb, "%b", sc->vtnet_flags, VTNET_FLAGS_BITS);
4434 error = sbuf_finish(&sb);
4435 sbuf_delete(&sb);
4436 return (error);
4437 }
4438
4439 static void
vtnet_setup_sysctl(struct vtnet_softc * sc)4440 vtnet_setup_sysctl(struct vtnet_softc *sc)
4441 {
4442 device_t dev;
4443 struct sysctl_ctx_list *ctx;
4444 struct sysctl_oid *tree;
4445 struct sysctl_oid_list *child;
4446
4447 dev = sc->vtnet_dev;
4448 ctx = device_get_sysctl_ctx(dev);
4449 tree = device_get_sysctl_tree(dev);
4450 child = SYSCTL_CHILDREN(tree);
4451
4452 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4453 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4454 "Number of maximum supported virtqueue pairs");
4455 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4456 CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4457 "Number of requested virtqueue pairs");
4458 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4459 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4460 "Number of active virtqueue pairs");
4461 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "features",
4462 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
4463 vtnet_sysctl_features, "A", "Features");
4464 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "flags",
4465 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
4466 vtnet_sysctl_flags, "A", "Flags");
4467
4468 vtnet_setup_stat_sysctl(ctx, child, sc);
4469 }
4470
4471 static void
vtnet_load_tunables(struct vtnet_softc * sc)4472 vtnet_load_tunables(struct vtnet_softc *sc)
4473 {
4474
4475 sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4476 "lro_entry_count", vtnet_lro_entry_count);
4477 if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4478 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4479
4480 sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4481 "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4482 }
4483
4484 static int
vtnet_rxq_enable_intr(struct vtnet_rxq * rxq)4485 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4486 {
4487
4488 return (virtqueue_enable_intr(rxq->vtnrx_vq));
4489 }
4490
4491 static void
vtnet_rxq_disable_intr(struct vtnet_rxq * rxq)4492 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4493 {
4494
4495 virtqueue_disable_intr(rxq->vtnrx_vq);
4496 }
4497
4498 static int
vtnet_txq_enable_intr(struct vtnet_txq * txq)4499 vtnet_txq_enable_intr(struct vtnet_txq *txq)
4500 {
4501 struct virtqueue *vq;
4502
4503 vq = txq->vtntx_vq;
4504
4505 if (vtnet_txq_below_threshold(txq) != 0)
4506 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4507
4508 /*
4509 * The free count is above our threshold. Keep the Tx interrupt
4510 * disabled until the queue is fuller.
4511 */
4512 return (0);
4513 }
4514
4515 static void
vtnet_txq_disable_intr(struct vtnet_txq * txq)4516 vtnet_txq_disable_intr(struct vtnet_txq *txq)
4517 {
4518
4519 virtqueue_disable_intr(txq->vtntx_vq);
4520 }
4521
4522 static void
vtnet_enable_rx_interrupts(struct vtnet_softc * sc)4523 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4524 {
4525 struct vtnet_rxq *rxq;
4526 int i;
4527
4528 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4529 rxq = &sc->vtnet_rxqs[i];
4530 if (vtnet_rxq_enable_intr(rxq) != 0)
4531 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4532 }
4533 }
4534
4535 static void
vtnet_enable_tx_interrupts(struct vtnet_softc * sc)4536 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4537 {
4538 int i;
4539
4540 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4541 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4542 }
4543
4544 static void
vtnet_enable_interrupts(struct vtnet_softc * sc)4545 vtnet_enable_interrupts(struct vtnet_softc *sc)
4546 {
4547
4548 vtnet_enable_rx_interrupts(sc);
4549 vtnet_enable_tx_interrupts(sc);
4550 }
4551
4552 static void
vtnet_disable_rx_interrupts(struct vtnet_softc * sc)4553 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4554 {
4555 int i;
4556
4557 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4558 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4559 }
4560
4561 static void
vtnet_disable_tx_interrupts(struct vtnet_softc * sc)4562 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4563 {
4564 int i;
4565
4566 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4567 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4568 }
4569
4570 static void
vtnet_disable_interrupts(struct vtnet_softc * sc)4571 vtnet_disable_interrupts(struct vtnet_softc *sc)
4572 {
4573
4574 vtnet_disable_rx_interrupts(sc);
4575 vtnet_disable_tx_interrupts(sc);
4576 }
4577
4578 static int
vtnet_tunable_int(struct vtnet_softc * sc,const char * knob,int def)4579 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4580 {
4581 char path[64];
4582
4583 snprintf(path, sizeof(path),
4584 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4585 TUNABLE_INT_FETCH(path, &def);
4586
4587 return (def);
4588 }
4589
4590 #ifdef DEBUGNET
4591 static void
vtnet_debugnet_init(if_t ifp,int * nrxr,int * ncl,int * clsize)4592 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
4593 {
4594 struct vtnet_softc *sc;
4595
4596 sc = if_getsoftc(ifp);
4597
4598 VTNET_CORE_LOCK(sc);
4599 *nrxr = sc->vtnet_req_vq_pairs;
4600 *ncl = DEBUGNET_MAX_IN_FLIGHT;
4601 *clsize = sc->vtnet_rx_clustersz;
4602 VTNET_CORE_UNLOCK(sc);
4603 }
4604
4605 static void
vtnet_debugnet_event(if_t ifp __unused,enum debugnet_ev event)4606 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
4607 {
4608 struct vtnet_softc *sc;
4609 static bool sw_lro_enabled = false;
4610
4611 /*
4612 * Disable software LRO, since it would require entering the network
4613 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
4614 */
4615 sc = if_getsoftc(ifp);
4616 switch (event) {
4617 case DEBUGNET_START:
4618 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
4619 if (sw_lro_enabled)
4620 sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
4621 break;
4622 case DEBUGNET_END:
4623 if (sw_lro_enabled)
4624 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
4625 break;
4626 }
4627 }
4628
4629 static int
vtnet_debugnet_transmit(if_t ifp,struct mbuf * m)4630 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
4631 {
4632 struct vtnet_softc *sc;
4633 struct vtnet_txq *txq;
4634 int error;
4635
4636 sc = if_getsoftc(ifp);
4637 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4638 IFF_DRV_RUNNING)
4639 return (EBUSY);
4640
4641 txq = &sc->vtnet_txqs[0];
4642 error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4643 if (error == 0)
4644 (void)vtnet_txq_notify(txq);
4645 return (error);
4646 }
4647
4648 static int
vtnet_debugnet_poll(if_t ifp,int count)4649 vtnet_debugnet_poll(if_t ifp, int count)
4650 {
4651 struct vtnet_softc *sc;
4652 int i;
4653
4654 sc = if_getsoftc(ifp);
4655 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4656 IFF_DRV_RUNNING)
4657 return (EBUSY);
4658
4659 (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4660 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4661 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4662 return (0);
4663 }
4664 #endif /* DEBUGNET */
4665