1 /*-
2 * SPDX-License-Identifier: BSD-2-Clause
3 *
4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org>
5 * All rights reserved.
6 *
7 * Redistribution and use in source and binary forms, with or without
8 * modification, are permitted provided that the following conditions
9 * are met:
10 * 1. Redistributions of source code must retain the above copyright
11 * notice unmodified, this list of conditions, and the following
12 * disclaimer.
13 * 2. Redistributions in binary form must reproduce the above copyright
14 * notice, this list of conditions and the following disclaimer in the
15 * documentation and/or other materials provided with the distribution.
16 *
17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
27 */
28
29 /* Driver for VirtIO network devices. */
30
31 #include "opt_inet.h"
32 #include "opt_inet6.h"
33
34 #include <sys/param.h>
35 #include <sys/eventhandler.h>
36 #include <sys/systm.h>
37 #include <sys/kernel.h>
38 #include <sys/sockio.h>
39 #include <sys/malloc.h>
40 #include <sys/mbuf.h>
41 #include <sys/module.h>
42 #include <sys/msan.h>
43 #include <sys/sbuf.h>
44 #include <sys/socket.h>
45 #include <sys/sysctl.h>
46 #include <sys/random.h>
47 #include <sys/sglist.h>
48 #include <sys/lock.h>
49 #include <sys/mutex.h>
50 #include <sys/taskqueue.h>
51 #include <sys/smp.h>
52 #include <machine/smp.h>
53
54 #include <vm/uma.h>
55
56 #include <net/debugnet.h>
57 #include <net/ethernet.h>
58 #include <net/pfil.h>
59 #include <net/if.h>
60 #include <net/if_var.h>
61 #include <net/if_arp.h>
62 #include <net/if_dl.h>
63 #include <net/if_types.h>
64 #include <net/if_media.h>
65 #include <net/if_vlan_var.h>
66
67 #include <net/bpf.h>
68
69 #include <netinet/in_systm.h>
70 #include <netinet/in.h>
71 #include <netinet/ip.h>
72 #include <netinet/ip6.h>
73 #include <netinet6/ip6_var.h>
74 #include <netinet/udp.h>
75 #include <netinet/tcp.h>
76 #include <netinet/tcp_lro.h>
77
78 #include <machine/bus.h>
79 #include <machine/resource.h>
80 #include <sys/bus.h>
81 #include <sys/rman.h>
82
83 #include <dev/virtio/virtio.h>
84 #include <dev/virtio/virtqueue.h>
85 #include <dev/virtio/network/virtio_net.h>
86 #include <dev/virtio/network/if_vtnetvar.h>
87 #include "virtio_if.h"
88
89 #if defined(INET) || defined(INET6)
90 #include <machine/in_cksum.h>
91 #endif
92
93 #ifdef __NO_STRICT_ALIGNMENT
94 #define VTNET_ETHER_ALIGN 0
95 #else /* Strict alignment */
96 #define VTNET_ETHER_ALIGN ETHER_ALIGN
97 #endif
98
99 static int vtnet_modevent(module_t, int, void *);
100
101 static int vtnet_probe(device_t);
102 static int vtnet_attach(device_t);
103 static int vtnet_detach(device_t);
104 static int vtnet_suspend(device_t);
105 static int vtnet_resume(device_t);
106 static int vtnet_shutdown(device_t);
107 static int vtnet_attach_completed(device_t);
108 static int vtnet_config_change(device_t);
109
110 static int vtnet_negotiate_features(struct vtnet_softc *);
111 static int vtnet_setup_features(struct vtnet_softc *);
112 static int vtnet_init_rxq(struct vtnet_softc *, int);
113 static int vtnet_init_txq(struct vtnet_softc *, int);
114 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *);
115 static void vtnet_free_rxtx_queues(struct vtnet_softc *);
116 static int vtnet_alloc_rx_filters(struct vtnet_softc *);
117 static void vtnet_free_rx_filters(struct vtnet_softc *);
118 static int vtnet_alloc_virtqueues(struct vtnet_softc *);
119 static void vtnet_alloc_interface(struct vtnet_softc *);
120 static int vtnet_setup_interface(struct vtnet_softc *);
121 static int vtnet_ioctl_mtu(struct vtnet_softc *, u_int);
122 static int vtnet_ioctl_ifflags(struct vtnet_softc *);
123 static int vtnet_ioctl_multi(struct vtnet_softc *);
124 static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *);
125 static int vtnet_ioctl(if_t, u_long, caddr_t);
126 static uint64_t vtnet_get_counter(if_t, ift_counter);
127
128 static int vtnet_rxq_populate(struct vtnet_rxq *);
129 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *);
130 static struct mbuf *
131 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **);
132 static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *,
133 struct mbuf *, int);
134 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int);
135 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *);
136 static int vtnet_rxq_new_buf(struct vtnet_rxq *);
137 #if defined(INET) || defined(INET6)
138 static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *,
139 bool, int, struct virtio_net_hdr *);
140 static void vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *,
141 int);
142 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *,
143 struct virtio_net_hdr *);
144 #endif
145 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int);
146 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *);
147 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int);
148 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *,
149 struct virtio_net_hdr *);
150 static int vtnet_rxq_eof(struct vtnet_rxq *);
151 static void vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries);
152 static void vtnet_rx_vq_intr(void *);
153 static void vtnet_rxq_tq_intr(void *, int);
154
155 static int vtnet_txq_intr_threshold(struct vtnet_txq *);
156 static int vtnet_txq_below_threshold(struct vtnet_txq *);
157 static int vtnet_txq_notify(struct vtnet_txq *);
158 static void vtnet_txq_free_mbufs(struct vtnet_txq *);
159 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *,
160 int *, int *, int *);
161 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int,
162 int, struct virtio_net_hdr *);
163 static struct mbuf *
164 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *,
165 struct virtio_net_hdr *);
166 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **,
167 struct vtnet_tx_header *);
168 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int);
169
170 /* Required for ALTQ */
171 static void vtnet_start_locked(struct vtnet_txq *, if_t);
172 static void vtnet_start(if_t);
173
174 /* Required for MQ */
175 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *);
176 static int vtnet_txq_mq_start(if_t, struct mbuf *);
177 static void vtnet_txq_tq_deferred(void *, int);
178 static void vtnet_qflush(if_t);
179
180
181 static void vtnet_txq_start(struct vtnet_txq *);
182 static void vtnet_txq_tq_intr(void *, int);
183 static int vtnet_txq_eof(struct vtnet_txq *);
184 static void vtnet_tx_vq_intr(void *);
185 static void vtnet_tx_start_all(struct vtnet_softc *);
186
187 static int vtnet_watchdog(struct vtnet_txq *);
188 static void vtnet_accum_stats(struct vtnet_softc *,
189 struct vtnet_rxq_stats *, struct vtnet_txq_stats *);
190 static void vtnet_tick(void *);
191
192 static void vtnet_start_taskqueues(struct vtnet_softc *);
193 static void vtnet_free_taskqueues(struct vtnet_softc *);
194 static void vtnet_drain_taskqueues(struct vtnet_softc *);
195
196 static void vtnet_drain_rxtx_queues(struct vtnet_softc *);
197 static void vtnet_stop_rendezvous(struct vtnet_softc *);
198 static void vtnet_stop(struct vtnet_softc *);
199 static int vtnet_virtio_reinit(struct vtnet_softc *);
200 static void vtnet_init_rx_filters(struct vtnet_softc *);
201 static int vtnet_init_rx_queues(struct vtnet_softc *);
202 static int vtnet_init_tx_queues(struct vtnet_softc *);
203 static int vtnet_init_rxtx_queues(struct vtnet_softc *);
204 static void vtnet_set_active_vq_pairs(struct vtnet_softc *);
205 static void vtnet_update_rx_offloads(struct vtnet_softc *);
206 static int vtnet_reinit(struct vtnet_softc *);
207 static void vtnet_init_locked(struct vtnet_softc *, int);
208 static void vtnet_init(void *);
209
210 static void vtnet_free_ctrl_vq(struct vtnet_softc *);
211 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *,
212 struct sglist *, int, int);
213 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *);
214 static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t);
215 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t);
216 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool);
217 static int vtnet_set_promisc(struct vtnet_softc *, bool);
218 static int vtnet_set_allmulti(struct vtnet_softc *, bool);
219 static void vtnet_rx_filter(struct vtnet_softc *);
220 static void vtnet_rx_filter_mac(struct vtnet_softc *);
221 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t);
222 static void vtnet_rx_filter_vlan(struct vtnet_softc *);
223 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t);
224 static void vtnet_register_vlan(void *, if_t, uint16_t);
225 static void vtnet_unregister_vlan(void *, if_t, uint16_t);
226
227 static void vtnet_update_speed_duplex(struct vtnet_softc *);
228 static int vtnet_is_link_up(struct vtnet_softc *);
229 static void vtnet_update_link_status(struct vtnet_softc *);
230 static int vtnet_ifmedia_upd(if_t);
231 static void vtnet_ifmedia_sts(if_t, struct ifmediareq *);
232 static void vtnet_get_macaddr(struct vtnet_softc *);
233 static void vtnet_set_macaddr(struct vtnet_softc *);
234 static void vtnet_attached_set_macaddr(struct vtnet_softc *);
235 static void vtnet_vlan_tag_remove(struct mbuf *);
236 static void vtnet_set_rx_process_limit(struct vtnet_softc *);
237
238 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *,
239 struct sysctl_oid_list *, struct vtnet_rxq *);
240 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *,
241 struct sysctl_oid_list *, struct vtnet_txq *);
242 static void vtnet_setup_queue_sysctl(struct vtnet_softc *);
243 static void vtnet_load_tunables(struct vtnet_softc *);
244 static void vtnet_setup_sysctl(struct vtnet_softc *);
245
246 static int vtnet_rxq_enable_intr(struct vtnet_rxq *);
247 static void vtnet_rxq_disable_intr(struct vtnet_rxq *);
248 static int vtnet_txq_enable_intr(struct vtnet_txq *);
249 static void vtnet_txq_disable_intr(struct vtnet_txq *);
250 static void vtnet_enable_rx_interrupts(struct vtnet_softc *);
251 static void vtnet_enable_tx_interrupts(struct vtnet_softc *);
252 static void vtnet_enable_interrupts(struct vtnet_softc *);
253 static void vtnet_disable_rx_interrupts(struct vtnet_softc *);
254 static void vtnet_disable_tx_interrupts(struct vtnet_softc *);
255 static void vtnet_disable_interrupts(struct vtnet_softc *);
256
257 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int);
258
259 DEBUGNET_DEFINE(vtnet);
260
261 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val)
262 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val)
263 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val)
264 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val)
265 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val)
266 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val)
267
268 /* Tunables. */
269 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0,
270 "VirtIO Net driver parameters");
271
272 static int vtnet_csum_disable = 0;
273 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN,
274 &vtnet_csum_disable, 0, "Disables receive and send checksum offload");
275
276 static int vtnet_fixup_needs_csum = 0;
277 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN,
278 &vtnet_fixup_needs_csum, 0,
279 "Calculate valid checksum for NEEDS_CSUM packets");
280
281 static int vtnet_tso_disable = 0;
282 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN,
283 &vtnet_tso_disable, 0, "Disables TSO");
284
285 static int vtnet_lro_disable = 1;
286 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN,
287 &vtnet_lro_disable, 0, "Disables hardware LRO");
288
289 static int vtnet_mq_disable = 0;
290 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN,
291 &vtnet_mq_disable, 0, "Disables multiqueue support");
292
293 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS;
294 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN,
295 &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs");
296
297 static int vtnet_tso_maxlen = IP_MAXPACKET;
298 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN,
299 &vtnet_tso_maxlen, 0, "TSO burst limit");
300
301 static int vtnet_rx_process_limit = 1024;
302 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN,
303 &vtnet_rx_process_limit, 0,
304 "Number of RX segments processed in one pass");
305
306 static int vtnet_lro_entry_count = 128;
307 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN,
308 &vtnet_lro_entry_count, 0, "Software LRO entry count");
309
310 /* Enable sorted LRO, and the depth of the mbuf queue. */
311 static int vtnet_lro_mbufq_depth = 0;
312 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN,
313 &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue");
314
315 /* Deactivate ALTQ Support */
316 static int vtnet_altq_disable = 0;
317 SYSCTL_INT(_hw_vtnet, OID_AUTO, altq_disable, CTLFLAG_RDTUN,
318 &vtnet_altq_disable, 0, "Disables ALTQ Support");
319
320 /*
321 * For the driver to be considered as having altq enabled,
322 * it must be compiled with an ALTQ capable kernel,
323 * and the tunable hw.vtnet.altq_disable must be zero
324 */
325 #define VTNET_ALTQ_ENABLED (VTNET_ALTQ_CAPABLE && (!vtnet_altq_disable))
326
327
328 static uma_zone_t vtnet_tx_header_zone;
329
330 static struct virtio_feature_desc vtnet_feature_desc[] = {
331 { VIRTIO_NET_F_CSUM, "TxChecksum" },
332 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" },
333 { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" },
334 { VIRTIO_NET_F_MAC, "MAC" },
335 { VIRTIO_NET_F_GSO, "TxGSO" },
336 { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" },
337 { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" },
338 { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" },
339 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" },
340 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" },
341 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" },
342 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" },
343 { VIRTIO_NET_F_HOST_UFO, "TxUFO" },
344 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" },
345 { VIRTIO_NET_F_STATUS, "Status" },
346 { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" },
347 { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" },
348 { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" },
349 { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" },
350 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" },
351 { VIRTIO_NET_F_MQ, "Multiqueue" },
352 { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" },
353 { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" },
354
355 { 0, NULL }
356 };
357
358 static device_method_t vtnet_methods[] = {
359 /* Device methods. */
360 DEVMETHOD(device_probe, vtnet_probe),
361 DEVMETHOD(device_attach, vtnet_attach),
362 DEVMETHOD(device_detach, vtnet_detach),
363 DEVMETHOD(device_suspend, vtnet_suspend),
364 DEVMETHOD(device_resume, vtnet_resume),
365 DEVMETHOD(device_shutdown, vtnet_shutdown),
366
367 /* VirtIO methods. */
368 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed),
369 DEVMETHOD(virtio_config_change, vtnet_config_change),
370
371 DEVMETHOD_END
372 };
373
374 #ifdef DEV_NETMAP
375 #include <dev/netmap/if_vtnet_netmap.h>
376 #endif
377
378 static driver_t vtnet_driver = {
379 .name = "vtnet",
380 .methods = vtnet_methods,
381 .size = sizeof(struct vtnet_softc)
382 };
383 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL);
384 MODULE_VERSION(vtnet, 1);
385 MODULE_DEPEND(vtnet, virtio, 1, 1, 1);
386 #ifdef DEV_NETMAP
387 MODULE_DEPEND(vtnet, netmap, 1, 1, 1);
388 #endif
389
390 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter");
391
392 static int
vtnet_modevent(module_t mod __unused,int type,void * unused __unused)393 vtnet_modevent(module_t mod __unused, int type, void *unused __unused)
394 {
395 int error = 0;
396 static int loaded = 0;
397
398 switch (type) {
399 case MOD_LOAD:
400 if (loaded++ == 0) {
401 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr",
402 sizeof(struct vtnet_tx_header),
403 NULL, NULL, NULL, NULL, 0, 0);
404 #ifdef DEBUGNET
405 /*
406 * We need to allocate from this zone in the transmit path, so ensure
407 * that we have at least one item per header available.
408 * XXX add a separate zone like we do for mbufs? otherwise we may alloc
409 * buckets
410 */
411 uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
412 uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2);
413 #endif
414 }
415 break;
416 case MOD_QUIESCE:
417 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0)
418 error = EBUSY;
419 break;
420 case MOD_UNLOAD:
421 if (--loaded == 0) {
422 uma_zdestroy(vtnet_tx_header_zone);
423 vtnet_tx_header_zone = NULL;
424 }
425 break;
426 case MOD_SHUTDOWN:
427 break;
428 default:
429 error = EOPNOTSUPP;
430 break;
431 }
432
433 return (error);
434 }
435
436 static int
vtnet_probe(device_t dev)437 vtnet_probe(device_t dev)
438 {
439 return (VIRTIO_SIMPLE_PROBE(dev, vtnet));
440 }
441
442 static int
vtnet_attach(device_t dev)443 vtnet_attach(device_t dev)
444 {
445 struct vtnet_softc *sc;
446 int error;
447
448 sc = device_get_softc(dev);
449 sc->vtnet_dev = dev;
450 virtio_set_feature_desc(dev, vtnet_feature_desc);
451
452 VTNET_CORE_LOCK_INIT(sc);
453 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0);
454 vtnet_load_tunables(sc);
455
456 vtnet_alloc_interface(sc);
457 vtnet_setup_sysctl(sc);
458
459 error = vtnet_setup_features(sc);
460 if (error) {
461 device_printf(dev, "cannot setup features\n");
462 goto fail;
463 }
464
465 error = vtnet_alloc_rx_filters(sc);
466 if (error) {
467 device_printf(dev, "cannot allocate Rx filters\n");
468 goto fail;
469 }
470
471 error = vtnet_alloc_rxtx_queues(sc);
472 if (error) {
473 device_printf(dev, "cannot allocate queues\n");
474 goto fail;
475 }
476
477 error = vtnet_alloc_virtqueues(sc);
478 if (error) {
479 device_printf(dev, "cannot allocate virtqueues\n");
480 goto fail;
481 }
482
483 error = vtnet_setup_interface(sc);
484 if (error) {
485 device_printf(dev, "cannot setup interface\n");
486 goto fail;
487 }
488
489 error = virtio_setup_intr(dev, INTR_TYPE_NET);
490 if (error) {
491 device_printf(dev, "cannot setup interrupts\n");
492 ether_ifdetach(sc->vtnet_ifp);
493 goto fail;
494 }
495
496 #ifdef DEV_NETMAP
497 vtnet_netmap_attach(sc);
498 #endif
499 vtnet_start_taskqueues(sc);
500
501 fail:
502 if (error)
503 vtnet_detach(dev);
504
505 return (error);
506 }
507
508 static int
vtnet_detach(device_t dev)509 vtnet_detach(device_t dev)
510 {
511 struct vtnet_softc *sc;
512 if_t ifp;
513
514 sc = device_get_softc(dev);
515 ifp = sc->vtnet_ifp;
516
517 if (device_is_attached(dev)) {
518 VTNET_CORE_LOCK(sc);
519 vtnet_stop(sc);
520 VTNET_CORE_UNLOCK(sc);
521
522 callout_drain(&sc->vtnet_tick_ch);
523 vtnet_drain_taskqueues(sc);
524
525 ether_ifdetach(ifp);
526 }
527
528 #ifdef DEV_NETMAP
529 netmap_detach(ifp);
530 #endif
531
532 if (sc->vtnet_pfil != NULL) {
533 pfil_head_unregister(sc->vtnet_pfil);
534 sc->vtnet_pfil = NULL;
535 }
536
537 vtnet_free_taskqueues(sc);
538
539 if (sc->vtnet_vlan_attach != NULL) {
540 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach);
541 sc->vtnet_vlan_attach = NULL;
542 }
543 if (sc->vtnet_vlan_detach != NULL) {
544 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach);
545 sc->vtnet_vlan_detach = NULL;
546 }
547
548 ifmedia_removeall(&sc->vtnet_media);
549
550 if (ifp != NULL) {
551 if_free(ifp);
552 sc->vtnet_ifp = NULL;
553 }
554
555 vtnet_free_rxtx_queues(sc);
556 vtnet_free_rx_filters(sc);
557
558 if (sc->vtnet_ctrl_vq != NULL)
559 vtnet_free_ctrl_vq(sc);
560
561 VTNET_CORE_LOCK_DESTROY(sc);
562
563 return (0);
564 }
565
566 static int
vtnet_suspend(device_t dev)567 vtnet_suspend(device_t dev)
568 {
569 struct vtnet_softc *sc;
570
571 sc = device_get_softc(dev);
572
573 VTNET_CORE_LOCK(sc);
574 vtnet_stop(sc);
575 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED;
576 VTNET_CORE_UNLOCK(sc);
577
578 return (0);
579 }
580
581 static int
vtnet_resume(device_t dev)582 vtnet_resume(device_t dev)
583 {
584 struct vtnet_softc *sc;
585 if_t ifp;
586
587 sc = device_get_softc(dev);
588 ifp = sc->vtnet_ifp;
589
590 VTNET_CORE_LOCK(sc);
591 if (if_getflags(ifp) & IFF_UP)
592 vtnet_init_locked(sc, 0);
593 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED;
594 VTNET_CORE_UNLOCK(sc);
595
596 return (0);
597 }
598
599 static int
vtnet_shutdown(device_t dev)600 vtnet_shutdown(device_t dev)
601 {
602 /*
603 * Suspend already does all of what we need to
604 * do here; we just never expect to be resumed.
605 */
606 return (vtnet_suspend(dev));
607 }
608
609 static int
vtnet_attach_completed(device_t dev)610 vtnet_attach_completed(device_t dev)
611 {
612 struct vtnet_softc *sc;
613
614 sc = device_get_softc(dev);
615
616 VTNET_CORE_LOCK(sc);
617 vtnet_attached_set_macaddr(sc);
618 VTNET_CORE_UNLOCK(sc);
619
620 return (0);
621 }
622
623 static int
vtnet_config_change(device_t dev)624 vtnet_config_change(device_t dev)
625 {
626 struct vtnet_softc *sc;
627
628 sc = device_get_softc(dev);
629
630 VTNET_CORE_LOCK(sc);
631 vtnet_update_link_status(sc);
632 if (sc->vtnet_link_active != 0)
633 vtnet_tx_start_all(sc);
634 VTNET_CORE_UNLOCK(sc);
635
636 return (0);
637 }
638
639 static int
vtnet_negotiate_features(struct vtnet_softc * sc)640 vtnet_negotiate_features(struct vtnet_softc *sc)
641 {
642 device_t dev;
643 uint64_t features, negotiated_features;
644 int no_csum;
645
646 dev = sc->vtnet_dev;
647 features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES :
648 VTNET_LEGACY_FEATURES;
649
650 /*
651 * TSO and LRO are only available when their corresponding checksum
652 * offload feature is also negotiated.
653 */
654 no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable);
655 if (no_csum)
656 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM);
657 if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable))
658 features &= ~VTNET_TSO_FEATURES;
659 if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable))
660 features &= ~VTNET_LRO_FEATURES;
661
662 /* Deactivate MQ Feature flag, if driver has ALTQ enabled, or MQ is explicitly disabled */
663 if (VTNET_ALTQ_ENABLED || vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable))
664 features &= ~VIRTIO_NET_F_MQ;
665
666 negotiated_features = virtio_negotiate_features(dev, features);
667
668 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
669 uint16_t mtu;
670
671 mtu = virtio_read_dev_config_2(dev,
672 offsetof(struct virtio_net_config, mtu));
673 if (mtu < VTNET_MIN_MTU) {
674 device_printf(dev, "Invalid MTU value: %d. "
675 "MTU feature disabled.\n", mtu);
676 features &= ~VIRTIO_NET_F_MTU;
677 negotiated_features =
678 virtio_negotiate_features(dev, features);
679 }
680 }
681
682 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
683 uint16_t npairs;
684
685 npairs = virtio_read_dev_config_2(dev,
686 offsetof(struct virtio_net_config, max_virtqueue_pairs));
687 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN ||
688 npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) {
689 device_printf(dev, "Invalid max_virtqueue_pairs value: "
690 "%d. Multiqueue feature disabled.\n", npairs);
691 features &= ~VIRTIO_NET_F_MQ;
692 negotiated_features =
693 virtio_negotiate_features(dev, features);
694 }
695 }
696
697 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) &&
698 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) {
699 /*
700 * LRO without mergeable buffers requires special care. This
701 * is not ideal because every receive buffer must be large
702 * enough to hold the maximum TCP packet, the Ethernet header,
703 * and the header. This requires up to 34 descriptors with
704 * MCLBYTES clusters. If we do not have indirect descriptors,
705 * LRO is disabled since the virtqueue will not contain very
706 * many receive buffers.
707 */
708 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) {
709 device_printf(dev,
710 "Host LRO disabled since both mergeable buffers "
711 "and indirect descriptors were not negotiated\n");
712 features &= ~VTNET_LRO_FEATURES;
713 negotiated_features =
714 virtio_negotiate_features(dev, features);
715 } else
716 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG;
717 }
718
719 sc->vtnet_features = negotiated_features;
720 sc->vtnet_negotiated_features = negotiated_features;
721
722 return (virtio_finalize_features(dev));
723 }
724
725 static int
vtnet_setup_features(struct vtnet_softc * sc)726 vtnet_setup_features(struct vtnet_softc *sc)
727 {
728 device_t dev;
729 int error;
730
731 dev = sc->vtnet_dev;
732
733 error = vtnet_negotiate_features(sc);
734 if (error)
735 return (error);
736
737 if (virtio_with_feature(dev, VIRTIO_F_VERSION_1))
738 sc->vtnet_flags |= VTNET_FLAG_MODERN;
739 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC))
740 sc->vtnet_flags |= VTNET_FLAG_INDIRECT;
741 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX))
742 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX;
743
744 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) {
745 /* This feature should always be negotiated. */
746 sc->vtnet_flags |= VTNET_FLAG_MAC;
747 }
748
749 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) {
750 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev,
751 offsetof(struct virtio_net_config, mtu));
752 } else
753 sc->vtnet_max_mtu = VTNET_MAX_MTU;
754
755 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) {
756 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS;
757 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf);
758 } else if (vtnet_modern(sc)) {
759 /* This is identical to the mergeable header. */
760 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1);
761 } else
762 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr);
763
764 if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
765 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE;
766 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
767 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG;
768 else
769 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE;
770
771 /*
772 * Favor "hardware" LRO if negotiated, but support software LRO as
773 * a fallback; there is usually little benefit (or worse) with both.
774 */
775 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 &&
776 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0)
777 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
778
779 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) ||
780 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) ||
781 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
782 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX;
783 else
784 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN;
785
786 sc->vtnet_req_vq_pairs = 1;
787 sc->vtnet_max_vq_pairs = 1;
788
789 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) {
790 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ;
791
792 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX))
793 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX;
794 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN))
795 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER;
796 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR))
797 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC;
798
799 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) {
800 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev,
801 offsetof(struct virtio_net_config,
802 max_virtqueue_pairs));
803 }
804 }
805
806 if (sc->vtnet_max_vq_pairs > 1) {
807 int req;
808
809 /*
810 * Limit the maximum number of requested queue pairs to the
811 * number of CPUs and the configured maximum.
812 */
813 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs);
814 if (req < 0)
815 req = 1;
816 if (req == 0)
817 req = mp_ncpus;
818 if (req > sc->vtnet_max_vq_pairs)
819 req = sc->vtnet_max_vq_pairs;
820 if (req > mp_ncpus)
821 req = mp_ncpus;
822 if (req > 1) {
823 sc->vtnet_req_vq_pairs = req;
824 sc->vtnet_flags |= VTNET_FLAG_MQ;
825 }
826 }
827
828 return (0);
829 }
830
831 static int
vtnet_init_rxq(struct vtnet_softc * sc,int id)832 vtnet_init_rxq(struct vtnet_softc *sc, int id)
833 {
834 struct vtnet_rxq *rxq;
835
836 rxq = &sc->vtnet_rxqs[id];
837
838 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d",
839 device_get_nameunit(sc->vtnet_dev), id);
840 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF);
841
842 rxq->vtnrx_sc = sc;
843 rxq->vtnrx_id = id;
844
845 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT);
846 if (rxq->vtnrx_sg == NULL)
847 return (ENOMEM);
848
849 #if defined(INET) || defined(INET6)
850 if (vtnet_software_lro(sc)) {
851 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp,
852 sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0)
853 return (ENOMEM);
854 }
855 #endif
856
857 NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq);
858 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT,
859 taskqueue_thread_enqueue, &rxq->vtnrx_tq);
860
861 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0);
862 }
863
864 static int
vtnet_init_txq(struct vtnet_softc * sc,int id)865 vtnet_init_txq(struct vtnet_softc *sc, int id)
866 {
867 struct vtnet_txq *txq;
868
869 txq = &sc->vtnet_txqs[id];
870
871 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d",
872 device_get_nameunit(sc->vtnet_dev), id);
873 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF);
874
875 txq->vtntx_sc = sc;
876 txq->vtntx_id = id;
877
878 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT);
879 if (txq->vtntx_sg == NULL)
880 return (ENOMEM);
881
882 if (!VTNET_ALTQ_ENABLED) {
883 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF,
884 M_NOWAIT, &txq->vtntx_mtx);
885 if (txq->vtntx_br == NULL)
886 return (ENOMEM);
887
888 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq);
889 }
890 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq);
891 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT,
892 taskqueue_thread_enqueue, &txq->vtntx_tq);
893 if (txq->vtntx_tq == NULL)
894 return (ENOMEM);
895
896 return (0);
897 }
898
899 static int
vtnet_alloc_rxtx_queues(struct vtnet_softc * sc)900 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc)
901 {
902 int i, npairs, error;
903
904 npairs = sc->vtnet_max_vq_pairs;
905
906 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF,
907 M_NOWAIT | M_ZERO);
908 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF,
909 M_NOWAIT | M_ZERO);
910 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL)
911 return (ENOMEM);
912
913 for (i = 0; i < npairs; i++) {
914 error = vtnet_init_rxq(sc, i);
915 if (error)
916 return (error);
917 error = vtnet_init_txq(sc, i);
918 if (error)
919 return (error);
920 }
921
922 vtnet_set_rx_process_limit(sc);
923 vtnet_setup_queue_sysctl(sc);
924
925 return (0);
926 }
927
928 static void
vtnet_destroy_rxq(struct vtnet_rxq * rxq)929 vtnet_destroy_rxq(struct vtnet_rxq *rxq)
930 {
931
932 rxq->vtnrx_sc = NULL;
933 rxq->vtnrx_id = -1;
934
935 #if defined(INET) || defined(INET6)
936 tcp_lro_free(&rxq->vtnrx_lro);
937 #endif
938
939 if (rxq->vtnrx_sg != NULL) {
940 sglist_free(rxq->vtnrx_sg);
941 rxq->vtnrx_sg = NULL;
942 }
943
944 if (mtx_initialized(&rxq->vtnrx_mtx) != 0)
945 mtx_destroy(&rxq->vtnrx_mtx);
946 }
947
948 static void
vtnet_destroy_txq(struct vtnet_txq * txq)949 vtnet_destroy_txq(struct vtnet_txq *txq)
950 {
951
952 txq->vtntx_sc = NULL;
953 txq->vtntx_id = -1;
954
955 if (txq->vtntx_sg != NULL) {
956 sglist_free(txq->vtntx_sg);
957 txq->vtntx_sg = NULL;
958 }
959
960 if (!VTNET_ALTQ_ENABLED) {
961 if (txq->vtntx_br != NULL) {
962 buf_ring_free(txq->vtntx_br, M_DEVBUF);
963 txq->vtntx_br = NULL;
964 }
965 }
966
967 if (mtx_initialized(&txq->vtntx_mtx) != 0)
968 mtx_destroy(&txq->vtntx_mtx);
969 }
970
971 static void
vtnet_free_rxtx_queues(struct vtnet_softc * sc)972 vtnet_free_rxtx_queues(struct vtnet_softc *sc)
973 {
974 int i;
975
976 if (sc->vtnet_rxqs != NULL) {
977 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
978 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]);
979 free(sc->vtnet_rxqs, M_DEVBUF);
980 sc->vtnet_rxqs = NULL;
981 }
982
983 if (sc->vtnet_txqs != NULL) {
984 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
985 vtnet_destroy_txq(&sc->vtnet_txqs[i]);
986 free(sc->vtnet_txqs, M_DEVBUF);
987 sc->vtnet_txqs = NULL;
988 }
989 }
990
991 static int
vtnet_alloc_rx_filters(struct vtnet_softc * sc)992 vtnet_alloc_rx_filters(struct vtnet_softc *sc)
993 {
994
995 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
996 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter),
997 M_DEVBUF, M_NOWAIT | M_ZERO);
998 if (sc->vtnet_mac_filter == NULL)
999 return (ENOMEM);
1000 }
1001
1002 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1003 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) *
1004 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO);
1005 if (sc->vtnet_vlan_filter == NULL)
1006 return (ENOMEM);
1007 }
1008
1009 return (0);
1010 }
1011
1012 static void
vtnet_free_rx_filters(struct vtnet_softc * sc)1013 vtnet_free_rx_filters(struct vtnet_softc *sc)
1014 {
1015
1016 if (sc->vtnet_mac_filter != NULL) {
1017 free(sc->vtnet_mac_filter, M_DEVBUF);
1018 sc->vtnet_mac_filter = NULL;
1019 }
1020
1021 if (sc->vtnet_vlan_filter != NULL) {
1022 free(sc->vtnet_vlan_filter, M_DEVBUF);
1023 sc->vtnet_vlan_filter = NULL;
1024 }
1025 }
1026
1027 static int
vtnet_alloc_virtqueues(struct vtnet_softc * sc)1028 vtnet_alloc_virtqueues(struct vtnet_softc *sc)
1029 {
1030 device_t dev;
1031 struct vq_alloc_info *info;
1032 struct vtnet_rxq *rxq;
1033 struct vtnet_txq *txq;
1034 int i, idx, nvqs, error;
1035
1036 dev = sc->vtnet_dev;
1037
1038 nvqs = sc->vtnet_max_vq_pairs * 2;
1039 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
1040 nvqs++;
1041
1042 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT);
1043 if (info == NULL)
1044 return (ENOMEM);
1045
1046 for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) {
1047 rxq = &sc->vtnet_rxqs[i];
1048 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs,
1049 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq,
1050 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1051
1052 txq = &sc->vtnet_txqs[i];
1053 VQ_ALLOC_INFO_INIT(&info[idx + 1], sc->vtnet_tx_nsegs,
1054 vtnet_tx_vq_intr, txq, &txq->vtntx_vq,
1055 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1056 }
1057
1058 /* These queues will not be used so allocate the minimum resources. */
1059 for (; i < sc->vtnet_max_vq_pairs; i++, idx += 2) {
1060 rxq = &sc->vtnet_rxqs[i];
1061 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq,
1062 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id);
1063
1064 txq = &sc->vtnet_txqs[i];
1065 VQ_ALLOC_INFO_INIT(&info[idx + 1], 0, NULL, txq, &txq->vtntx_vq,
1066 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id);
1067 }
1068
1069 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) {
1070 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL,
1071 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev));
1072 }
1073
1074 error = virtio_alloc_virtqueues(dev, nvqs, info);
1075 free(info, M_TEMP);
1076
1077 return (error);
1078 }
1079
1080 static void
vtnet_alloc_interface(struct vtnet_softc * sc)1081 vtnet_alloc_interface(struct vtnet_softc *sc)
1082 {
1083 device_t dev;
1084 if_t ifp;
1085
1086 dev = sc->vtnet_dev;
1087
1088 ifp = if_alloc(IFT_ETHER);
1089 sc->vtnet_ifp = ifp;
1090 if_setsoftc(ifp, sc);
1091 if_initname(ifp, device_get_name(dev), device_get_unit(dev));
1092 }
1093
1094 static int
vtnet_setup_interface(struct vtnet_softc * sc)1095 vtnet_setup_interface(struct vtnet_softc *sc)
1096 {
1097 device_t dev;
1098 struct pfil_head_args pa;
1099 if_t ifp;
1100
1101 dev = sc->vtnet_dev;
1102 ifp = sc->vtnet_ifp;
1103
1104 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST);
1105 if_setbaudrate(ifp, IF_Gbps(10));
1106 if_setinitfn(ifp, vtnet_init);
1107 if_setioctlfn(ifp, vtnet_ioctl);
1108 if_setgetcounterfn(ifp, vtnet_get_counter);
1109
1110 if (!VTNET_ALTQ_ENABLED) {
1111 if_settransmitfn(ifp, vtnet_txq_mq_start);
1112 if_setqflushfn(ifp, vtnet_qflush);
1113 } else {
1114 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq;
1115 if_setstartfn(ifp, vtnet_start);
1116 if_setsendqlen(ifp, virtqueue_size(vq) - 1);
1117 if_setsendqready(ifp);
1118 }
1119
1120 vtnet_get_macaddr(sc);
1121
1122 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS))
1123 if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0);
1124
1125 ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts);
1126 ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL);
1127 ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO);
1128
1129 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) {
1130 int gso;
1131
1132 if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0);
1133
1134 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO);
1135 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4))
1136 if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0);
1137 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6))
1138 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0);
1139 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN))
1140 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN;
1141
1142 if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) {
1143 int tso_maxlen;
1144
1145 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0);
1146
1147 tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen",
1148 vtnet_tso_maxlen);
1149 if_sethwtsomax(ifp, tso_maxlen -
1150 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN));
1151 if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1);
1152 if_sethwtsomaxsegsize(ifp, PAGE_SIZE);
1153 }
1154 }
1155
1156 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) {
1157 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */
1158 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0);
1159 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0);
1160
1161 if (vtnet_tunable_int(sc, "fixup_needs_csum",
1162 vtnet_fixup_needs_csum) != 0)
1163 sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM;
1164
1165 /* Support either "hardware" or software LRO. */
1166 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0);
1167 }
1168
1169 if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) {
1170 /*
1171 * VirtIO does not support VLAN tagging, but we can fake
1172 * it by inserting and removing the 802.1Q header during
1173 * transmit and receive. We are then able to do checksum
1174 * offloading of VLAN frames.
1175 */
1176 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0);
1177 }
1178
1179 if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO)
1180 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0);
1181 if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0);
1182 if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0);
1183
1184 /*
1185 * Capabilities after here are not enabled by default.
1186 */
1187 if_setcapenable(ifp, if_getcapabilities(ifp));
1188
1189 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) {
1190 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0);
1191
1192 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config,
1193 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST);
1194 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig,
1195 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST);
1196 }
1197
1198 ether_ifattach(ifp, sc->vtnet_hwaddr);
1199
1200 /* Tell the upper layer(s) we support long frames. */
1201 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header));
1202
1203 DEBUGNET_SET(ifp, vtnet);
1204
1205 pa.pa_version = PFIL_VERSION;
1206 pa.pa_flags = PFIL_IN;
1207 pa.pa_type = PFIL_TYPE_ETHERNET;
1208 pa.pa_headname = if_name(ifp);
1209 sc->vtnet_pfil = pfil_head_register(&pa);
1210
1211 return (0);
1212 }
1213
1214 static int
vtnet_rx_cluster_size(struct vtnet_softc * sc,int mtu)1215 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu)
1216 {
1217 int framesz;
1218
1219 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS)
1220 return (MJUMPAGESIZE);
1221 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1222 return (MCLBYTES);
1223
1224 /*
1225 * Try to scale the receive mbuf cluster size from the MTU. We
1226 * could also use the VQ size to influence the selected size,
1227 * but that would only matter for very small queues.
1228 */
1229 if (vtnet_modern(sc)) {
1230 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1));
1231 framesz = sizeof(struct virtio_net_hdr_v1);
1232 } else
1233 framesz = sizeof(struct vtnet_rx_header);
1234 framesz += sizeof(struct ether_vlan_header) + mtu;
1235 /*
1236 * Account for the offsetting we'll do elsewhere so we allocate the
1237 * right size for the mtu.
1238 */
1239 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1240 framesz += VTNET_ETHER_ALIGN;
1241 }
1242
1243 if (framesz <= MCLBYTES)
1244 return (MCLBYTES);
1245 else if (framesz <= MJUMPAGESIZE)
1246 return (MJUMPAGESIZE);
1247 else if (framesz <= MJUM9BYTES)
1248 return (MJUM9BYTES);
1249
1250 /* Sane default; avoid 16KB clusters. */
1251 return (MCLBYTES);
1252 }
1253
1254 static int
vtnet_ioctl_mtu(struct vtnet_softc * sc,u_int mtu)1255 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu)
1256 {
1257 if_t ifp;
1258 int clustersz;
1259
1260 ifp = sc->vtnet_ifp;
1261 VTNET_CORE_LOCK_ASSERT(sc);
1262
1263 if (if_getmtu(ifp) == mtu)
1264 return (0);
1265 else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu)
1266 return (EINVAL);
1267
1268 if_setmtu(ifp, mtu);
1269 clustersz = vtnet_rx_cluster_size(sc, mtu);
1270
1271 if (clustersz != sc->vtnet_rx_clustersz &&
1272 if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1273 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1274 vtnet_init_locked(sc, 0);
1275 }
1276
1277 return (0);
1278 }
1279
1280 static int
vtnet_ioctl_ifflags(struct vtnet_softc * sc)1281 vtnet_ioctl_ifflags(struct vtnet_softc *sc)
1282 {
1283 if_t ifp;
1284 int drv_running;
1285
1286 ifp = sc->vtnet_ifp;
1287 drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0;
1288
1289 VTNET_CORE_LOCK_ASSERT(sc);
1290
1291 if ((if_getflags(ifp) & IFF_UP) == 0) {
1292 if (drv_running)
1293 vtnet_stop(sc);
1294 goto out;
1295 }
1296
1297 if (!drv_running) {
1298 vtnet_init_locked(sc, 0);
1299 goto out;
1300 }
1301
1302 if ((if_getflags(ifp) ^ sc->vtnet_if_flags) &
1303 (IFF_PROMISC | IFF_ALLMULTI)) {
1304 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX)
1305 vtnet_rx_filter(sc);
1306 else {
1307 /*
1308 * We don't support filtering out multicast, so
1309 * ALLMULTI is always set.
1310 */
1311 if_setflagbits(ifp, IFF_ALLMULTI, 0);
1312 if_setflagbits(ifp, IFF_PROMISC, 0);
1313 }
1314 }
1315
1316 out:
1317 sc->vtnet_if_flags = if_getflags(ifp);
1318 return (0);
1319 }
1320
1321 static int
vtnet_ioctl_multi(struct vtnet_softc * sc)1322 vtnet_ioctl_multi(struct vtnet_softc *sc)
1323 {
1324 if_t ifp;
1325
1326 ifp = sc->vtnet_ifp;
1327
1328 VTNET_CORE_LOCK_ASSERT(sc);
1329
1330 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX &&
1331 if_getdrvflags(ifp) & IFF_DRV_RUNNING)
1332 vtnet_rx_filter_mac(sc);
1333
1334 return (0);
1335 }
1336
1337 static int
vtnet_ioctl_ifcap(struct vtnet_softc * sc,struct ifreq * ifr)1338 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr)
1339 {
1340 if_t ifp;
1341 int mask, reinit, update;
1342
1343 ifp = sc->vtnet_ifp;
1344 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp);
1345 reinit = update = 0;
1346
1347 VTNET_CORE_LOCK_ASSERT(sc);
1348
1349 if (mask & IFCAP_TXCSUM) {
1350 if (if_getcapenable(ifp) & IFCAP_TXCSUM &&
1351 if_getcapenable(ifp) & IFCAP_TSO4) {
1352 /* Disable tso4, because txcsum will be disabled. */
1353 if_setcapenablebit(ifp, 0, IFCAP_TSO4);
1354 if_sethwassistbits(ifp, 0, CSUM_IP_TSO);
1355 mask &= ~IFCAP_TSO4;
1356 }
1357 if_togglecapenable(ifp, IFCAP_TXCSUM);
1358 if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD);
1359 }
1360 if (mask & IFCAP_TXCSUM_IPV6) {
1361 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6 &&
1362 if_getcapenable(ifp) & IFCAP_TSO6) {
1363 /* Disable tso6, because txcsum6 will be disabled. */
1364 if_setcapenablebit(ifp, 0, IFCAP_TSO6);
1365 if_sethwassistbits(ifp, 0, CSUM_IP6_TSO);
1366 mask &= ~IFCAP_TSO6;
1367 }
1368 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6);
1369 if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD_IPV6);
1370 }
1371 if (mask & IFCAP_TSO4) {
1372 if (if_getcapenable(ifp) & (IFCAP_TXCSUM | IFCAP_TSO4)) {
1373 /* tso4 can only be enabled, if txcsum is enabled. */
1374 if_togglecapenable(ifp, IFCAP_TSO4);
1375 if_togglehwassist(ifp, CSUM_IP_TSO);
1376 }
1377 }
1378 if (mask & IFCAP_TSO6) {
1379 if (if_getcapenable(ifp) & (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6)) {
1380 /* tso6 can only be enabled, if txcsum6 is enabled. */
1381 if_togglecapenable(ifp, IFCAP_TSO6);
1382 if_togglehwassist(ifp, CSUM_IP6_TSO);
1383 }
1384 }
1385
1386 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) {
1387 /*
1388 * These Rx features require the negotiated features to
1389 * be updated. Avoid a full reinit if possible.
1390 */
1391 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS)
1392 update = 1;
1393 else
1394 reinit = 1;
1395
1396 /* BMV: Avoid needless renegotiation for just software LRO. */
1397 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) ==
1398 IFCAP_LRO && vtnet_software_lro(sc))
1399 reinit = update = 0;
1400 /*
1401 * VirtIO does not distinguish between receive checksum offload
1402 * for IPv4 and IPv6 packets, so treat them as a pair.
1403 */
1404 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
1405 if_togglecapenable(ifp, IFCAP_RXCSUM);
1406 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6);
1407 }
1408 if (mask & IFCAP_LRO)
1409 if_togglecapenable(ifp, IFCAP_LRO);
1410 /* Both SW and HW TCP LRO require receive checksum offload. */
1411 if ((if_getcapenable(ifp) &
1412 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
1413 if_setcapenablebit(ifp, 0, IFCAP_LRO);
1414 }
1415
1416 if (mask & IFCAP_VLAN_HWFILTER) {
1417 /* These Rx features require renegotiation. */
1418 reinit = 1;
1419
1420 if (mask & IFCAP_VLAN_HWFILTER)
1421 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER);
1422 }
1423
1424 if (mask & IFCAP_VLAN_HWTSO)
1425 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO);
1426 if (mask & IFCAP_VLAN_HWTAGGING)
1427 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING);
1428
1429 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
1430 if (reinit) {
1431 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
1432 vtnet_init_locked(sc, 0);
1433 } else if (update)
1434 vtnet_update_rx_offloads(sc);
1435 }
1436
1437 return (0);
1438 }
1439
1440 static int
vtnet_ioctl(if_t ifp,u_long cmd,caddr_t data)1441 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data)
1442 {
1443 struct vtnet_softc *sc;
1444 struct ifreq *ifr;
1445 int error;
1446
1447 sc = if_getsoftc(ifp);
1448 ifr = (struct ifreq *) data;
1449 error = 0;
1450
1451 switch (cmd) {
1452 case SIOCSIFMTU:
1453 VTNET_CORE_LOCK(sc);
1454 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu);
1455 VTNET_CORE_UNLOCK(sc);
1456 break;
1457
1458 case SIOCSIFFLAGS:
1459 VTNET_CORE_LOCK(sc);
1460 error = vtnet_ioctl_ifflags(sc);
1461 VTNET_CORE_UNLOCK(sc);
1462 break;
1463
1464 case SIOCADDMULTI:
1465 case SIOCDELMULTI:
1466 VTNET_CORE_LOCK(sc);
1467 error = vtnet_ioctl_multi(sc);
1468 VTNET_CORE_UNLOCK(sc);
1469 break;
1470
1471 case SIOCSIFMEDIA:
1472 case SIOCGIFMEDIA:
1473 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd);
1474 break;
1475
1476 case SIOCSIFCAP:
1477 VTNET_CORE_LOCK(sc);
1478 error = vtnet_ioctl_ifcap(sc, ifr);
1479 VTNET_CORE_UNLOCK(sc);
1480 VLAN_CAPABILITIES(ifp);
1481 break;
1482
1483 default:
1484 error = ether_ioctl(ifp, cmd, data);
1485 break;
1486 }
1487
1488 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc);
1489
1490 return (error);
1491 }
1492
1493 static int
vtnet_rxq_populate(struct vtnet_rxq * rxq)1494 vtnet_rxq_populate(struct vtnet_rxq *rxq)
1495 {
1496 struct virtqueue *vq;
1497 int nbufs, error;
1498
1499 #ifdef DEV_NETMAP
1500 error = vtnet_netmap_rxq_populate(rxq);
1501 if (error >= 0)
1502 return (error);
1503 #endif /* DEV_NETMAP */
1504
1505 vq = rxq->vtnrx_vq;
1506 error = ENOSPC;
1507
1508 for (nbufs = 0; !virtqueue_full(vq); nbufs++) {
1509 error = vtnet_rxq_new_buf(rxq);
1510 if (error)
1511 break;
1512 }
1513
1514 if (nbufs > 0) {
1515 virtqueue_notify(vq);
1516 /*
1517 * EMSGSIZE signifies the virtqueue did not have enough
1518 * entries available to hold the last mbuf. This is not
1519 * an error.
1520 */
1521 if (error == EMSGSIZE)
1522 error = 0;
1523 }
1524
1525 return (error);
1526 }
1527
1528 static void
vtnet_rxq_free_mbufs(struct vtnet_rxq * rxq)1529 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq)
1530 {
1531 struct virtqueue *vq;
1532 struct mbuf *m;
1533 int last;
1534 #ifdef DEV_NETMAP
1535 struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp),
1536 rxq->vtnrx_id, NR_RX);
1537 #else /* !DEV_NETMAP */
1538 void *kring = NULL;
1539 #endif /* !DEV_NETMAP */
1540
1541 vq = rxq->vtnrx_vq;
1542 last = 0;
1543
1544 while ((m = virtqueue_drain(vq, &last)) != NULL) {
1545 if (kring == NULL)
1546 m_freem(m);
1547 }
1548
1549 KASSERT(virtqueue_empty(vq),
1550 ("%s: mbufs remaining in rx queue %p", __func__, rxq));
1551 }
1552
1553 static struct mbuf *
vtnet_rx_alloc_buf(struct vtnet_softc * sc,int nbufs,struct mbuf ** m_tailp)1554 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp)
1555 {
1556 struct mbuf *m_head, *m_tail, *m;
1557 int i, size;
1558
1559 m_head = NULL;
1560 size = sc->vtnet_rx_clustersz;
1561
1562 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1563 ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs));
1564
1565 for (i = 0; i < nbufs; i++) {
1566 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size);
1567 if (m == NULL) {
1568 sc->vtnet_stats.mbuf_alloc_failed++;
1569 m_freem(m_head);
1570 return (NULL);
1571 }
1572
1573 m->m_len = size;
1574 /*
1575 * Need to offset the mbuf if the header we're going to add
1576 * will misalign.
1577 */
1578 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) {
1579 m_adj(m, VTNET_ETHER_ALIGN);
1580 }
1581 if (m_head != NULL) {
1582 m_tail->m_next = m;
1583 m_tail = m;
1584 } else
1585 m_head = m_tail = m;
1586 }
1587
1588 if (m_tailp != NULL)
1589 *m_tailp = m_tail;
1590
1591 return (m_head);
1592 }
1593
1594 /*
1595 * Slow path for when LRO without mergeable buffers is negotiated.
1596 */
1597 static int
vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq * rxq,struct mbuf * m0,int len0)1598 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0,
1599 int len0)
1600 {
1601 struct vtnet_softc *sc;
1602 struct mbuf *m, *m_prev, *m_new, *m_tail;
1603 int len, clustersz, nreplace, error;
1604
1605 sc = rxq->vtnrx_sc;
1606 clustersz = sc->vtnet_rx_clustersz;
1607 /*
1608 * Need to offset the mbuf if the header we're going to add will
1609 * misalign, account for that here.
1610 */
1611 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0)
1612 clustersz -= VTNET_ETHER_ALIGN;
1613
1614 m_prev = NULL;
1615 m_tail = NULL;
1616 nreplace = 0;
1617
1618 m = m0;
1619 len = len0;
1620
1621 /*
1622 * Since these mbuf chains are so large, avoid allocating a complete
1623 * replacement when the received frame did not consume the entire
1624 * chain. Unused mbufs are moved to the tail of the replacement mbuf.
1625 */
1626 while (len > 0) {
1627 if (m == NULL) {
1628 sc->vtnet_stats.rx_frame_too_large++;
1629 return (EMSGSIZE);
1630 }
1631
1632 /*
1633 * Every mbuf should have the expected cluster size since that
1634 * is also used to allocate the replacements.
1635 */
1636 KASSERT(m->m_len == clustersz,
1637 ("%s: mbuf size %d not expected cluster size %d", __func__,
1638 m->m_len, clustersz));
1639
1640 m->m_len = MIN(m->m_len, len);
1641 len -= m->m_len;
1642
1643 m_prev = m;
1644 m = m->m_next;
1645 nreplace++;
1646 }
1647
1648 KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs,
1649 ("%s: invalid replacement mbuf count %d max %d", __func__,
1650 nreplace, sc->vtnet_rx_nmbufs));
1651
1652 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail);
1653 if (m_new == NULL) {
1654 m_prev->m_len = clustersz;
1655 return (ENOBUFS);
1656 }
1657
1658 /*
1659 * Move any unused mbufs from the received mbuf chain onto the
1660 * end of the replacement chain.
1661 */
1662 if (m_prev->m_next != NULL) {
1663 m_tail->m_next = m_prev->m_next;
1664 m_prev->m_next = NULL;
1665 }
1666
1667 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1668 if (error) {
1669 /*
1670 * The replacement is suppose to be an copy of the one
1671 * dequeued so this is a very unexpected error.
1672 *
1673 * Restore the m0 chain to the original state if it was
1674 * modified so we can then discard it.
1675 */
1676 if (m_tail->m_next != NULL) {
1677 m_prev->m_next = m_tail->m_next;
1678 m_tail->m_next = NULL;
1679 }
1680 m_prev->m_len = clustersz;
1681 sc->vtnet_stats.rx_enq_replacement_failed++;
1682 m_freem(m_new);
1683 }
1684
1685 return (error);
1686 }
1687
1688 static int
vtnet_rxq_replace_buf(struct vtnet_rxq * rxq,struct mbuf * m,int len)1689 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len)
1690 {
1691 struct vtnet_softc *sc;
1692 struct mbuf *m_new;
1693 int error;
1694
1695 sc = rxq->vtnrx_sc;
1696
1697 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG)
1698 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len));
1699
1700 MPASS(m->m_next == NULL);
1701 if (m->m_len < len)
1702 return (EMSGSIZE);
1703
1704 m_new = vtnet_rx_alloc_buf(sc, 1, NULL);
1705 if (m_new == NULL)
1706 return (ENOBUFS);
1707
1708 error = vtnet_rxq_enqueue_buf(rxq, m_new);
1709 if (error) {
1710 sc->vtnet_stats.rx_enq_replacement_failed++;
1711 m_freem(m_new);
1712 } else
1713 m->m_len = len;
1714
1715 return (error);
1716 }
1717
1718 static int
vtnet_rxq_enqueue_buf(struct vtnet_rxq * rxq,struct mbuf * m)1719 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1720 {
1721 struct vtnet_softc *sc;
1722 struct sglist *sg;
1723 int header_inlined, error;
1724
1725 sc = rxq->vtnrx_sc;
1726 sg = rxq->vtnrx_sg;
1727
1728 KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG,
1729 ("%s: mbuf chain without LRO_NOMRG", __func__));
1730 VTNET_RXQ_LOCK_ASSERT(rxq);
1731
1732 sglist_reset(sg);
1733 header_inlined = vtnet_modern(sc) ||
1734 (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */
1735
1736 /*
1737 * Note: The mbuf has been already adjusted when we allocate it if we
1738 * have to do strict alignment.
1739 */
1740 if (header_inlined)
1741 error = sglist_append_mbuf(sg, m);
1742 else {
1743 struct vtnet_rx_header *rxhdr =
1744 mtod(m, struct vtnet_rx_header *);
1745 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr));
1746
1747 /* Append the header and remaining mbuf data. */
1748 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size);
1749 if (error)
1750 return (error);
1751 error = sglist_append(sg, &rxhdr[1],
1752 m->m_len - sizeof(struct vtnet_rx_header));
1753 if (error)
1754 return (error);
1755
1756 if (m->m_next != NULL)
1757 error = sglist_append_mbuf(sg, m->m_next);
1758 }
1759
1760 if (error)
1761 return (error);
1762
1763 return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg));
1764 }
1765
1766 static int
vtnet_rxq_new_buf(struct vtnet_rxq * rxq)1767 vtnet_rxq_new_buf(struct vtnet_rxq *rxq)
1768 {
1769 struct vtnet_softc *sc;
1770 struct mbuf *m;
1771 int error;
1772
1773 sc = rxq->vtnrx_sc;
1774
1775 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL);
1776 if (m == NULL)
1777 return (ENOBUFS);
1778
1779 error = vtnet_rxq_enqueue_buf(rxq, m);
1780 if (error)
1781 m_freem(m);
1782
1783 return (error);
1784 }
1785
1786 #if defined(INET) || defined(INET6)
1787 static int
vtnet_rxq_csum_needs_csum(struct vtnet_rxq * rxq,struct mbuf * m,bool isipv6,int protocol,struct virtio_net_hdr * hdr)1788 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6,
1789 int protocol, struct virtio_net_hdr *hdr)
1790 {
1791 struct vtnet_softc *sc;
1792
1793 /*
1794 * The packet is likely from another VM on the same host or from the
1795 * host that itself performed checksum offloading so Tx/Rx is basically
1796 * a memcpy and the checksum has little value so far.
1797 */
1798
1799 KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
1800 ("%s: unsupported IP protocol %d", __func__, protocol));
1801
1802 /*
1803 * If the user don't want us to fix it up here by computing the
1804 * checksum, just forward the order to compute the checksum by setting
1805 * the corresponding mbuf flag (e.g., CSUM_TCP).
1806 */
1807 sc = rxq->vtnrx_sc;
1808 if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) {
1809 switch (protocol) {
1810 case IPPROTO_TCP:
1811 m->m_pkthdr.csum_flags |=
1812 (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP);
1813 break;
1814 case IPPROTO_UDP:
1815 m->m_pkthdr.csum_flags |=
1816 (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP);
1817 break;
1818 }
1819 m->m_pkthdr.csum_data = hdr->csum_offset;
1820 return (0);
1821 }
1822
1823 /*
1824 * Compute the checksum in the driver so the packet will contain a
1825 * valid checksum. The checksum is at csum_offset from csum_start.
1826 */
1827 int csum_off, csum_end;
1828 uint16_t csum;
1829
1830 csum_off = hdr->csum_start + hdr->csum_offset;
1831 csum_end = csum_off + sizeof(uint16_t);
1832
1833 /* Assume checksum will be in the first mbuf. */
1834 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) {
1835 sc->vtnet_stats.rx_csum_bad_offset++;
1836 return (1);
1837 }
1838
1839 /*
1840 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the
1841 * checksum and write it at the specified offset. We could
1842 * try to verify the packet: csum_start should probably
1843 * correspond to the start of the TCP/UDP header.
1844 *
1845 * BMV: Need to properly handle UDP with zero checksum. Is
1846 * the IPv4 header checksum implicitly validated?
1847 */
1848 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start);
1849 *(uint16_t *)(mtodo(m, csum_off)) = csum;
1850 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1851 m->m_pkthdr.csum_data = 0xFFFF;
1852
1853 return (0);
1854 }
1855
1856 static void
vtnet_rxq_csum_data_valid(struct vtnet_rxq * rxq,struct mbuf * m,int protocol)1857 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol)
1858 {
1859 KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP,
1860 ("%s: unsupported IP protocol %d", __func__, protocol));
1861
1862 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR;
1863 m->m_pkthdr.csum_data = 0xFFFF;
1864 }
1865
1866 static int
vtnet_rxq_csum(struct vtnet_rxq * rxq,struct mbuf * m,struct virtio_net_hdr * hdr)1867 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m,
1868 struct virtio_net_hdr *hdr)
1869 {
1870 const struct ether_header *eh;
1871 struct vtnet_softc *sc;
1872 int hoff, protocol;
1873 uint16_t etype;
1874 bool isipv6;
1875
1876 KASSERT(hdr->flags &
1877 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID),
1878 ("%s: missing checksum offloading flag %x", __func__, hdr->flags));
1879
1880 eh = mtod(m, const struct ether_header *);
1881 etype = ntohs(eh->ether_type);
1882 if (etype == ETHERTYPE_VLAN) {
1883 /* TODO BMV: Handle QinQ. */
1884 const struct ether_vlan_header *evh =
1885 mtod(m, const struct ether_vlan_header *);
1886 etype = ntohs(evh->evl_proto);
1887 hoff = sizeof(struct ether_vlan_header);
1888 } else
1889 hoff = sizeof(struct ether_header);
1890
1891 sc = rxq->vtnrx_sc;
1892
1893 /* Check whether ethernet type is IP or IPv6, and get protocol. */
1894 switch (etype) {
1895 #if defined(INET)
1896 case ETHERTYPE_IP:
1897 if (__predict_false(m->m_len < hoff + sizeof(struct ip))) {
1898 sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
1899 return (1);
1900 } else {
1901 struct ip *ip = (struct ip *)(m->m_data + hoff);
1902 protocol = ip->ip_p;
1903 }
1904 isipv6 = false;
1905 break;
1906 #endif
1907 #if defined(INET6)
1908 case ETHERTYPE_IPV6:
1909 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr))
1910 || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) {
1911 sc->vtnet_stats.rx_csum_inaccessible_ipproto++;
1912 return (1);
1913 }
1914 isipv6 = true;
1915 break;
1916 #endif
1917 default:
1918 sc->vtnet_stats.rx_csum_bad_ethtype++;
1919 return (1);
1920 }
1921
1922 /* Check whether protocol is TCP or UDP. */
1923 switch (protocol) {
1924 case IPPROTO_TCP:
1925 case IPPROTO_UDP:
1926 break;
1927 default:
1928 /*
1929 * FreeBSD does not support checksum offloading of this
1930 * protocol here.
1931 */
1932 sc->vtnet_stats.rx_csum_bad_ipproto++;
1933 return (1);
1934 }
1935
1936 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
1937 return (vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol,
1938 hdr));
1939 else /* VIRTIO_NET_HDR_F_DATA_VALID */
1940 vtnet_rxq_csum_data_valid(rxq, m, protocol);
1941
1942 return (0);
1943 }
1944 #endif
1945
1946 static void
vtnet_rxq_discard_merged_bufs(struct vtnet_rxq * rxq,int nbufs)1947 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs)
1948 {
1949 struct mbuf *m;
1950
1951 while (--nbufs > 0) {
1952 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL);
1953 if (m == NULL)
1954 break;
1955 vtnet_rxq_discard_buf(rxq, m);
1956 }
1957 }
1958
1959 static void
vtnet_rxq_discard_buf(struct vtnet_rxq * rxq,struct mbuf * m)1960 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m)
1961 {
1962 int error __diagused;
1963
1964 /*
1965 * Requeue the discarded mbuf. This should always be successful
1966 * since it was just dequeued.
1967 */
1968 error = vtnet_rxq_enqueue_buf(rxq, m);
1969 KASSERT(error == 0,
1970 ("%s: cannot requeue discarded mbuf %d", __func__, error));
1971 }
1972
1973 static int
vtnet_rxq_merged_eof(struct vtnet_rxq * rxq,struct mbuf * m_head,int nbufs)1974 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs)
1975 {
1976 struct vtnet_softc *sc;
1977 struct virtqueue *vq;
1978 struct mbuf *m_tail;
1979
1980 sc = rxq->vtnrx_sc;
1981 vq = rxq->vtnrx_vq;
1982 m_tail = m_head;
1983
1984 while (--nbufs > 0) {
1985 struct mbuf *m;
1986 uint32_t len;
1987
1988 m = virtqueue_dequeue(vq, &len);
1989 if (m == NULL) {
1990 rxq->vtnrx_stats.vrxs_ierrors++;
1991 goto fail;
1992 }
1993
1994 if (vtnet_rxq_new_buf(rxq) != 0) {
1995 rxq->vtnrx_stats.vrxs_iqdrops++;
1996 vtnet_rxq_discard_buf(rxq, m);
1997 if (nbufs > 1)
1998 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
1999 goto fail;
2000 }
2001
2002 if (m->m_len < len)
2003 len = m->m_len;
2004
2005 m->m_len = len;
2006 m->m_flags &= ~M_PKTHDR;
2007
2008 m_head->m_pkthdr.len += len;
2009 m_tail->m_next = m;
2010 m_tail = m;
2011 }
2012
2013 return (0);
2014
2015 fail:
2016 sc->vtnet_stats.rx_mergeable_failed++;
2017 m_freem(m_head);
2018
2019 return (1);
2020 }
2021
2022 #if defined(INET) || defined(INET6)
2023 static int
vtnet_lro_rx(struct vtnet_rxq * rxq,struct mbuf * m)2024 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m)
2025 {
2026 struct lro_ctrl *lro;
2027
2028 lro = &rxq->vtnrx_lro;
2029
2030 if (lro->lro_mbuf_max != 0) {
2031 tcp_lro_queue_mbuf(lro, m);
2032 return (0);
2033 }
2034
2035 return (tcp_lro_rx(lro, m, 0));
2036 }
2037 #endif
2038
2039 static void
vtnet_rxq_input(struct vtnet_rxq * rxq,struct mbuf * m,struct virtio_net_hdr * hdr)2040 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m,
2041 struct virtio_net_hdr *hdr)
2042 {
2043 struct vtnet_softc *sc;
2044 if_t ifp;
2045
2046 sc = rxq->vtnrx_sc;
2047 ifp = sc->vtnet_ifp;
2048
2049 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) {
2050 struct ether_header *eh = mtod(m, struct ether_header *);
2051 if (eh->ether_type == htons(ETHERTYPE_VLAN)) {
2052 vtnet_vlan_tag_remove(m);
2053 /*
2054 * With the 802.1Q header removed, update the
2055 * checksum starting location accordingly.
2056 */
2057 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM)
2058 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN;
2059 }
2060 }
2061
2062 m->m_pkthdr.flowid = rxq->vtnrx_id;
2063 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE);
2064
2065 if (hdr->flags &
2066 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) {
2067 #if defined(INET) || defined(INET6)
2068 if (vtnet_rxq_csum(rxq, m, hdr) == 0)
2069 rxq->vtnrx_stats.vrxs_csum++;
2070 else
2071 rxq->vtnrx_stats.vrxs_csum_failed++;
2072 #else
2073 sc->vtnet_stats.rx_csum_bad_ethtype++;
2074 rxq->vtnrx_stats.vrxs_csum_failed++;
2075 #endif
2076 }
2077
2078 if (hdr->gso_size != 0) {
2079 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) {
2080 case VIRTIO_NET_HDR_GSO_TCPV4:
2081 case VIRTIO_NET_HDR_GSO_TCPV6:
2082 m->m_pkthdr.lro_nsegs =
2083 howmany(m->m_pkthdr.len, hdr->gso_size);
2084 rxq->vtnrx_stats.vrxs_host_lro++;
2085 break;
2086 }
2087 }
2088
2089 rxq->vtnrx_stats.vrxs_ipackets++;
2090 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len;
2091
2092 #if defined(INET) || defined(INET6)
2093 if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) {
2094 if (vtnet_lro_rx(rxq, m) == 0)
2095 return;
2096 }
2097 #endif
2098
2099 if_input(ifp, m);
2100 }
2101
2102 static int
vtnet_rxq_eof(struct vtnet_rxq * rxq)2103 vtnet_rxq_eof(struct vtnet_rxq *rxq)
2104 {
2105 struct virtio_net_hdr lhdr, *hdr;
2106 struct vtnet_softc *sc;
2107 if_t ifp;
2108 struct virtqueue *vq;
2109 int deq, count;
2110
2111 sc = rxq->vtnrx_sc;
2112 vq = rxq->vtnrx_vq;
2113 ifp = sc->vtnet_ifp;
2114 deq = 0;
2115 count = sc->vtnet_rx_process_limit;
2116
2117 VTNET_RXQ_LOCK_ASSERT(rxq);
2118
2119 CURVNET_SET(if_getvnet(ifp));
2120 while (count-- > 0) {
2121 struct mbuf *m;
2122 uint32_t len, nbufs, adjsz;
2123
2124 m = virtqueue_dequeue(vq, &len);
2125 if (m == NULL)
2126 break;
2127 deq++;
2128
2129 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) {
2130 rxq->vtnrx_stats.vrxs_ierrors++;
2131 vtnet_rxq_discard_buf(rxq, m);
2132 continue;
2133 }
2134
2135 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) {
2136 struct virtio_net_hdr_mrg_rxbuf *mhdr =
2137 mtod(m, struct virtio_net_hdr_mrg_rxbuf *);
2138 kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED);
2139 nbufs = vtnet_htog16(sc, mhdr->num_buffers);
2140 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf);
2141 } else if (vtnet_modern(sc)) {
2142 nbufs = 1; /* num_buffers is always 1 */
2143 adjsz = sizeof(struct virtio_net_hdr_v1);
2144 } else {
2145 nbufs = 1;
2146 adjsz = sizeof(struct vtnet_rx_header);
2147 /*
2148 * Account for our gap between the header and start of
2149 * data to keep the segments separated.
2150 */
2151 len += VTNET_RX_HEADER_PAD;
2152 }
2153
2154 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) {
2155 rxq->vtnrx_stats.vrxs_iqdrops++;
2156 vtnet_rxq_discard_buf(rxq, m);
2157 if (nbufs > 1)
2158 vtnet_rxq_discard_merged_bufs(rxq, nbufs);
2159 continue;
2160 }
2161
2162 m->m_pkthdr.len = len;
2163 m->m_pkthdr.rcvif = ifp;
2164 m->m_pkthdr.csum_flags = 0;
2165
2166 if (nbufs > 1) {
2167 /* Dequeue the rest of chain. */
2168 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0)
2169 continue;
2170 }
2171
2172 kmsan_mark_mbuf(m, KMSAN_STATE_INITED);
2173
2174 /*
2175 * Save an endian swapped version of the header prior to it
2176 * being stripped. The header is always at the start of the
2177 * mbuf data. num_buffers was already saved (and not needed)
2178 * so use the standard header.
2179 */
2180 hdr = mtod(m, struct virtio_net_hdr *);
2181 lhdr.flags = hdr->flags;
2182 lhdr.gso_type = hdr->gso_type;
2183 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len);
2184 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size);
2185 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start);
2186 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset);
2187 m_adj(m, adjsz);
2188
2189 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) {
2190 pfil_return_t pfil;
2191
2192 pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL);
2193 switch (pfil) {
2194 case PFIL_DROPPED:
2195 case PFIL_CONSUMED:
2196 continue;
2197 default:
2198 KASSERT(pfil == PFIL_PASS,
2199 ("Filter returned %d!", pfil));
2200 }
2201 }
2202
2203 vtnet_rxq_input(rxq, m, &lhdr);
2204 }
2205
2206 if (deq > 0) {
2207 #if defined(INET) || defined(INET6)
2208 if (vtnet_software_lro(sc))
2209 tcp_lro_flush_all(&rxq->vtnrx_lro);
2210 #endif
2211 virtqueue_notify(vq);
2212 }
2213 CURVNET_RESTORE();
2214
2215 return (count > 0 ? 0 : EAGAIN);
2216 }
2217
2218 static void
vtnet_rx_vq_process(struct vtnet_rxq * rxq,int tries)2219 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries)
2220 {
2221 struct vtnet_softc *sc;
2222 if_t ifp;
2223 u_int more;
2224 #ifdef DEV_NETMAP
2225 int nmirq;
2226 #endif /* DEV_NETMAP */
2227
2228 sc = rxq->vtnrx_sc;
2229 ifp = sc->vtnet_ifp;
2230
2231 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) {
2232 /*
2233 * Ignore this interrupt. Either this is a spurious interrupt
2234 * or multiqueue without per-VQ MSIX so every queue needs to
2235 * be polled (a brain dead configuration we could try harder
2236 * to avoid).
2237 */
2238 vtnet_rxq_disable_intr(rxq);
2239 return;
2240 }
2241
2242 VTNET_RXQ_LOCK(rxq);
2243
2244 #ifdef DEV_NETMAP
2245 /*
2246 * We call netmap_rx_irq() under lock to prevent concurrent calls.
2247 * This is not necessary to serialize the access to the RX vq, but
2248 * rather to avoid races that may happen if this interface is
2249 * attached to a VALE switch, which would cause received packets
2250 * to stall in the RX queue (nm_kr_tryget() could find the kring
2251 * busy when called from netmap_bwrap_intr_notify()).
2252 */
2253 nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more);
2254 if (nmirq != NM_IRQ_PASS) {
2255 VTNET_RXQ_UNLOCK(rxq);
2256 if (nmirq == NM_IRQ_RESCHED) {
2257 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2258 }
2259 return;
2260 }
2261 #endif /* DEV_NETMAP */
2262
2263 again:
2264 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2265 VTNET_RXQ_UNLOCK(rxq);
2266 return;
2267 }
2268
2269 more = vtnet_rxq_eof(rxq);
2270 if (more || vtnet_rxq_enable_intr(rxq) != 0) {
2271 if (!more)
2272 vtnet_rxq_disable_intr(rxq);
2273 /*
2274 * This is an occasional condition or race (when !more),
2275 * so retry a few times before scheduling the taskqueue.
2276 */
2277 if (tries-- > 0)
2278 goto again;
2279
2280 rxq->vtnrx_stats.vrxs_rescheduled++;
2281 VTNET_RXQ_UNLOCK(rxq);
2282 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
2283 } else
2284 VTNET_RXQ_UNLOCK(rxq);
2285 }
2286
2287 static void
vtnet_rx_vq_intr(void * xrxq)2288 vtnet_rx_vq_intr(void *xrxq)
2289 {
2290 struct vtnet_rxq *rxq;
2291
2292 rxq = xrxq;
2293 vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES);
2294 }
2295
2296 static void
vtnet_rxq_tq_intr(void * xrxq,int pending __unused)2297 vtnet_rxq_tq_intr(void *xrxq, int pending __unused)
2298 {
2299 struct vtnet_rxq *rxq;
2300
2301 rxq = xrxq;
2302 vtnet_rx_vq_process(rxq, 0);
2303 }
2304
2305 static int
vtnet_txq_intr_threshold(struct vtnet_txq * txq)2306 vtnet_txq_intr_threshold(struct vtnet_txq *txq)
2307 {
2308 struct vtnet_softc *sc;
2309 int threshold;
2310
2311 sc = txq->vtntx_sc;
2312
2313 /*
2314 * The Tx interrupt is disabled until the queue free count falls
2315 * below our threshold. Completed frames are drained from the Tx
2316 * virtqueue before transmitting new frames and in the watchdog
2317 * callout, so the frequency of Tx interrupts is greatly reduced,
2318 * at the cost of not freeing mbufs as quickly as they otherwise
2319 * would be.
2320 */
2321 threshold = virtqueue_size(txq->vtntx_vq) / 4;
2322
2323 /*
2324 * Without indirect descriptors, leave enough room for the most
2325 * segments we handle.
2326 */
2327 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 &&
2328 threshold < sc->vtnet_tx_nsegs)
2329 threshold = sc->vtnet_tx_nsegs;
2330
2331 return (threshold);
2332 }
2333
2334 static int
vtnet_txq_below_threshold(struct vtnet_txq * txq)2335 vtnet_txq_below_threshold(struct vtnet_txq *txq)
2336 {
2337 struct virtqueue *vq;
2338
2339 vq = txq->vtntx_vq;
2340
2341 return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold);
2342 }
2343
2344 static int
vtnet_txq_notify(struct vtnet_txq * txq)2345 vtnet_txq_notify(struct vtnet_txq *txq)
2346 {
2347 struct virtqueue *vq;
2348
2349 vq = txq->vtntx_vq;
2350
2351 txq->vtntx_watchdog = VTNET_TX_TIMEOUT;
2352 virtqueue_notify(vq);
2353
2354 if (vtnet_txq_enable_intr(txq) == 0)
2355 return (0);
2356
2357 /*
2358 * Drain frames that were completed since last checked. If this
2359 * causes the queue to go above the threshold, the caller should
2360 * continue transmitting.
2361 */
2362 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) {
2363 virtqueue_disable_intr(vq);
2364 return (1);
2365 }
2366
2367 return (0);
2368 }
2369
2370 static void
vtnet_txq_free_mbufs(struct vtnet_txq * txq)2371 vtnet_txq_free_mbufs(struct vtnet_txq *txq)
2372 {
2373 struct virtqueue *vq;
2374 struct vtnet_tx_header *txhdr;
2375 int last;
2376 #ifdef DEV_NETMAP
2377 struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp),
2378 txq->vtntx_id, NR_TX);
2379 #else /* !DEV_NETMAP */
2380 void *kring = NULL;
2381 #endif /* !DEV_NETMAP */
2382
2383 vq = txq->vtntx_vq;
2384 last = 0;
2385
2386 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) {
2387 if (kring == NULL) {
2388 m_freem(txhdr->vth_mbuf);
2389 uma_zfree(vtnet_tx_header_zone, txhdr);
2390 }
2391 }
2392
2393 KASSERT(virtqueue_empty(vq),
2394 ("%s: mbufs remaining in tx queue %p", __func__, txq));
2395 }
2396
2397 /*
2398 * BMV: This can go away once we finally have offsets in the mbuf header.
2399 */
2400 static int
vtnet_txq_offload_ctx(struct vtnet_txq * txq,struct mbuf * m,int * etype,int * proto,int * start)2401 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype,
2402 int *proto, int *start)
2403 {
2404 struct vtnet_softc *sc;
2405 struct ether_vlan_header *evh;
2406 #if defined(INET) || defined(INET6)
2407 int offset;
2408 #endif
2409
2410 sc = txq->vtntx_sc;
2411
2412 evh = mtod(m, struct ether_vlan_header *);
2413 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) {
2414 /* BMV: We should handle nested VLAN tags too. */
2415 *etype = ntohs(evh->evl_proto);
2416 #if defined(INET) || defined(INET6)
2417 offset = sizeof(struct ether_vlan_header);
2418 #endif
2419 } else {
2420 *etype = ntohs(evh->evl_encap_proto);
2421 #if defined(INET) || defined(INET6)
2422 offset = sizeof(struct ether_header);
2423 #endif
2424 }
2425
2426 switch (*etype) {
2427 #if defined(INET)
2428 case ETHERTYPE_IP: {
2429 struct ip *ip, iphdr;
2430 if (__predict_false(m->m_len < offset + sizeof(struct ip))) {
2431 m_copydata(m, offset, sizeof(struct ip),
2432 (caddr_t) &iphdr);
2433 ip = &iphdr;
2434 } else
2435 ip = (struct ip *)(m->m_data + offset);
2436 *proto = ip->ip_p;
2437 *start = offset + (ip->ip_hl << 2);
2438 break;
2439 }
2440 #endif
2441 #if defined(INET6)
2442 case ETHERTYPE_IPV6:
2443 *proto = -1;
2444 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto);
2445 /* Assert the network stack sent us a valid packet. */
2446 KASSERT(*start > offset,
2447 ("%s: mbuf %p start %d offset %d proto %d", __func__, m,
2448 *start, offset, *proto));
2449 break;
2450 #endif
2451 default:
2452 sc->vtnet_stats.tx_csum_unknown_ethtype++;
2453 return (EINVAL);
2454 }
2455
2456 return (0);
2457 }
2458
2459 static int
vtnet_txq_offload_tso(struct vtnet_txq * txq,struct mbuf * m,int eth_type,int offset,struct virtio_net_hdr * hdr)2460 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type,
2461 int offset, struct virtio_net_hdr *hdr)
2462 {
2463 static struct timeval lastecn;
2464 static int curecn;
2465 struct vtnet_softc *sc;
2466 struct tcphdr *tcp, tcphdr;
2467
2468 sc = txq->vtntx_sc;
2469
2470 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) {
2471 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr);
2472 tcp = &tcphdr;
2473 } else
2474 tcp = (struct tcphdr *)(m->m_data + offset);
2475
2476 hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2));
2477 hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz);
2478 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 :
2479 VIRTIO_NET_HDR_GSO_TCPV6;
2480
2481 if (__predict_false(tcp_get_flags(tcp) & TH_CWR)) {
2482 /*
2483 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In
2484 * FreeBSD, ECN support is not on a per-interface basis,
2485 * but globally via the net.inet.tcp.ecn.enable sysctl
2486 * knob. The default is off.
2487 */
2488 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) {
2489 if (ppsratecheck(&lastecn, &curecn, 1))
2490 if_printf(sc->vtnet_ifp,
2491 "TSO with ECN not negotiated with host\n");
2492 return (ENOTSUP);
2493 }
2494 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN;
2495 }
2496
2497 txq->vtntx_stats.vtxs_tso++;
2498
2499 return (0);
2500 }
2501
2502 static struct mbuf *
vtnet_txq_offload(struct vtnet_txq * txq,struct mbuf * m,struct virtio_net_hdr * hdr)2503 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m,
2504 struct virtio_net_hdr *hdr)
2505 {
2506 struct vtnet_softc *sc;
2507 int flags, etype, csum_start, proto, error;
2508
2509 sc = txq->vtntx_sc;
2510 flags = m->m_pkthdr.csum_flags;
2511
2512 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start);
2513 if (error)
2514 goto drop;
2515
2516 if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) {
2517 /* Sanity check the parsed mbuf matches the offload flags. */
2518 if (__predict_false((flags & VTNET_CSUM_OFFLOAD &&
2519 etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6
2520 && etype != ETHERTYPE_IPV6))) {
2521 sc->vtnet_stats.tx_csum_proto_mismatch++;
2522 goto drop;
2523 }
2524
2525 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM;
2526 hdr->csum_start = vtnet_gtoh16(sc, csum_start);
2527 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data);
2528 txq->vtntx_stats.vtxs_csum++;
2529 }
2530
2531 if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) {
2532 /*
2533 * Sanity check the parsed mbuf IP protocol is TCP, and
2534 * VirtIO TSO reqires the checksum offloading above.
2535 */
2536 if (__predict_false(proto != IPPROTO_TCP)) {
2537 sc->vtnet_stats.tx_tso_not_tcp++;
2538 goto drop;
2539 } else if (__predict_false((hdr->flags &
2540 VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) {
2541 sc->vtnet_stats.tx_tso_without_csum++;
2542 goto drop;
2543 }
2544
2545 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr);
2546 if (error)
2547 goto drop;
2548 }
2549
2550 return (m);
2551
2552 drop:
2553 m_freem(m);
2554 return (NULL);
2555 }
2556
2557 static int
vtnet_txq_enqueue_buf(struct vtnet_txq * txq,struct mbuf ** m_head,struct vtnet_tx_header * txhdr)2558 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head,
2559 struct vtnet_tx_header *txhdr)
2560 {
2561 struct vtnet_softc *sc;
2562 struct virtqueue *vq;
2563 struct sglist *sg;
2564 struct mbuf *m;
2565 int error;
2566
2567 sc = txq->vtntx_sc;
2568 vq = txq->vtntx_vq;
2569 sg = txq->vtntx_sg;
2570 m = *m_head;
2571
2572 sglist_reset(sg);
2573 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size);
2574 if (error != 0 || sg->sg_nseg != 1) {
2575 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d",
2576 __func__, error, sg->sg_nseg));
2577 goto fail;
2578 }
2579
2580 error = sglist_append_mbuf(sg, m);
2581 if (error) {
2582 m = m_defrag(m, M_NOWAIT);
2583 if (m == NULL) {
2584 sc->vtnet_stats.tx_defrag_failed++;
2585 goto fail;
2586 }
2587
2588 *m_head = m;
2589 sc->vtnet_stats.tx_defragged++;
2590
2591 error = sglist_append_mbuf(sg, m);
2592 if (error)
2593 goto fail;
2594 }
2595
2596 txhdr->vth_mbuf = m;
2597 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0);
2598
2599 return (error);
2600
2601 fail:
2602 m_freem(*m_head);
2603 *m_head = NULL;
2604
2605 return (ENOBUFS);
2606 }
2607
2608 static int
vtnet_txq_encap(struct vtnet_txq * txq,struct mbuf ** m_head,int flags)2609 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags)
2610 {
2611 struct vtnet_tx_header *txhdr;
2612 struct virtio_net_hdr *hdr;
2613 struct mbuf *m;
2614 int error;
2615
2616 m = *m_head;
2617 M_ASSERTPKTHDR(m);
2618
2619 txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO);
2620 if (txhdr == NULL) {
2621 m_freem(m);
2622 *m_head = NULL;
2623 return (ENOMEM);
2624 }
2625
2626 /*
2627 * Always use the non-mergeable header, regardless if mergable headers
2628 * were negotiated, because for transmit num_buffers is always zero.
2629 * The vtnet_hdr_size is used to enqueue the right header size segment.
2630 */
2631 hdr = &txhdr->vth_uhdr.hdr;
2632
2633 if (m->m_flags & M_VLANTAG) {
2634 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag);
2635 if ((*m_head = m) == NULL) {
2636 error = ENOBUFS;
2637 goto fail;
2638 }
2639 m->m_flags &= ~M_VLANTAG;
2640 }
2641
2642 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) {
2643 m = vtnet_txq_offload(txq, m, hdr);
2644 if ((*m_head = m) == NULL) {
2645 error = ENOBUFS;
2646 goto fail;
2647 }
2648 }
2649
2650 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr);
2651 fail:
2652 if (error)
2653 uma_zfree(vtnet_tx_header_zone, txhdr);
2654
2655 return (error);
2656 }
2657
2658
2659 static void
vtnet_start_locked(struct vtnet_txq * txq,if_t ifp)2660 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp)
2661 {
2662 struct vtnet_softc *sc;
2663 struct virtqueue *vq;
2664 struct mbuf *m0;
2665 int tries, enq;
2666
2667 sc = txq->vtntx_sc;
2668 vq = txq->vtntx_vq;
2669 tries = 0;
2670
2671 VTNET_TXQ_LOCK_ASSERT(txq);
2672
2673 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2674 sc->vtnet_link_active == 0)
2675 return;
2676
2677 vtnet_txq_eof(txq);
2678
2679 again:
2680 enq = 0;
2681
2682 while (!if_sendq_empty(ifp)) {
2683 if (virtqueue_full(vq))
2684 break;
2685
2686 m0 = if_dequeue(ifp);
2687 if (m0 == NULL)
2688 break;
2689
2690 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) {
2691 if (m0 != NULL)
2692 if_sendq_prepend(ifp, m0);
2693 break;
2694 }
2695
2696 enq++;
2697 ETHER_BPF_MTAP(ifp, m0);
2698 }
2699
2700 if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2701 if (tries++ < VTNET_NOTIFY_RETRIES)
2702 goto again;
2703
2704 txq->vtntx_stats.vtxs_rescheduled++;
2705 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2706 }
2707 }
2708
2709 static void
vtnet_start(if_t ifp)2710 vtnet_start(if_t ifp)
2711 {
2712 struct vtnet_softc *sc;
2713 struct vtnet_txq *txq;
2714
2715 sc = if_getsoftc(ifp);
2716 txq = &sc->vtnet_txqs[0];
2717
2718 VTNET_TXQ_LOCK(txq);
2719 vtnet_start_locked(txq, ifp);
2720 VTNET_TXQ_UNLOCK(txq);
2721 }
2722
2723
2724 static int
vtnet_txq_mq_start_locked(struct vtnet_txq * txq,struct mbuf * m)2725 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m)
2726 {
2727 struct vtnet_softc *sc;
2728 struct virtqueue *vq;
2729 struct buf_ring *br;
2730 if_t ifp;
2731 int enq, tries, error;
2732
2733 sc = txq->vtntx_sc;
2734 vq = txq->vtntx_vq;
2735 br = txq->vtntx_br;
2736 ifp = sc->vtnet_ifp;
2737 tries = 0;
2738 error = 0;
2739
2740 VTNET_TXQ_LOCK_ASSERT(txq);
2741
2742 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 ||
2743 sc->vtnet_link_active == 0) {
2744 if (m != NULL)
2745 error = drbr_enqueue(ifp, br, m);
2746 return (error);
2747 }
2748
2749 if (m != NULL) {
2750 error = drbr_enqueue(ifp, br, m);
2751 if (error)
2752 return (error);
2753 }
2754
2755 vtnet_txq_eof(txq);
2756
2757 again:
2758 enq = 0;
2759
2760 while ((m = drbr_peek(ifp, br)) != NULL) {
2761 if (virtqueue_full(vq)) {
2762 drbr_putback(ifp, br, m);
2763 break;
2764 }
2765
2766 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) {
2767 if (m != NULL)
2768 drbr_putback(ifp, br, m);
2769 else
2770 drbr_advance(ifp, br);
2771 break;
2772 }
2773 drbr_advance(ifp, br);
2774
2775 enq++;
2776 ETHER_BPF_MTAP(ifp, m);
2777 }
2778
2779 if (enq > 0 && vtnet_txq_notify(txq) != 0) {
2780 if (tries++ < VTNET_NOTIFY_RETRIES)
2781 goto again;
2782
2783 txq->vtntx_stats.vtxs_rescheduled++;
2784 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask);
2785 }
2786
2787 return (0);
2788 }
2789
2790 static int
vtnet_txq_mq_start(if_t ifp,struct mbuf * m)2791 vtnet_txq_mq_start(if_t ifp, struct mbuf *m)
2792 {
2793 struct vtnet_softc *sc;
2794 struct vtnet_txq *txq;
2795 int i, npairs, error;
2796
2797 sc = if_getsoftc(ifp);
2798 npairs = sc->vtnet_act_vq_pairs;
2799
2800 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE)
2801 i = m->m_pkthdr.flowid % npairs;
2802 else
2803 i = curcpu % npairs;
2804
2805 txq = &sc->vtnet_txqs[i];
2806
2807 if (VTNET_TXQ_TRYLOCK(txq) != 0) {
2808 error = vtnet_txq_mq_start_locked(txq, m);
2809 VTNET_TXQ_UNLOCK(txq);
2810 } else {
2811 error = drbr_enqueue(ifp, txq->vtntx_br, m);
2812 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask);
2813 }
2814
2815 return (error);
2816 }
2817
2818 static void
vtnet_txq_tq_deferred(void * xtxq,int pending __unused)2819 vtnet_txq_tq_deferred(void *xtxq, int pending __unused)
2820 {
2821 struct vtnet_softc *sc;
2822 struct vtnet_txq *txq;
2823
2824 txq = xtxq;
2825 sc = txq->vtntx_sc;
2826
2827 VTNET_TXQ_LOCK(txq);
2828 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br))
2829 vtnet_txq_mq_start_locked(txq, NULL);
2830 VTNET_TXQ_UNLOCK(txq);
2831 }
2832
2833
2834 static void
vtnet_txq_start(struct vtnet_txq * txq)2835 vtnet_txq_start(struct vtnet_txq *txq)
2836 {
2837 struct vtnet_softc *sc;
2838 if_t ifp;
2839
2840 sc = txq->vtntx_sc;
2841 ifp = sc->vtnet_ifp;
2842
2843 if (!VTNET_ALTQ_ENABLED) {
2844 if (!drbr_empty(ifp, txq->vtntx_br))
2845 vtnet_txq_mq_start_locked(txq, NULL);
2846 } else {
2847 if (!if_sendq_empty(ifp))
2848 vtnet_start_locked(txq, ifp);
2849
2850 }
2851 }
2852
2853 static void
vtnet_txq_tq_intr(void * xtxq,int pending __unused)2854 vtnet_txq_tq_intr(void *xtxq, int pending __unused)
2855 {
2856 struct vtnet_softc *sc;
2857 struct vtnet_txq *txq;
2858 if_t ifp;
2859
2860 txq = xtxq;
2861 sc = txq->vtntx_sc;
2862 ifp = sc->vtnet_ifp;
2863
2864 VTNET_TXQ_LOCK(txq);
2865
2866 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2867 VTNET_TXQ_UNLOCK(txq);
2868 return;
2869 }
2870
2871 vtnet_txq_eof(txq);
2872 vtnet_txq_start(txq);
2873
2874 VTNET_TXQ_UNLOCK(txq);
2875 }
2876
2877 static int
vtnet_txq_eof(struct vtnet_txq * txq)2878 vtnet_txq_eof(struct vtnet_txq *txq)
2879 {
2880 struct virtqueue *vq;
2881 struct vtnet_tx_header *txhdr;
2882 struct mbuf *m;
2883 int deq;
2884
2885 vq = txq->vtntx_vq;
2886 deq = 0;
2887 VTNET_TXQ_LOCK_ASSERT(txq);
2888
2889 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) {
2890 m = txhdr->vth_mbuf;
2891 deq++;
2892
2893 txq->vtntx_stats.vtxs_opackets++;
2894 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len;
2895 if (m->m_flags & M_MCAST)
2896 txq->vtntx_stats.vtxs_omcasts++;
2897
2898 m_freem(m);
2899 uma_zfree(vtnet_tx_header_zone, txhdr);
2900 }
2901
2902 if (virtqueue_empty(vq))
2903 txq->vtntx_watchdog = 0;
2904
2905 return (deq);
2906 }
2907
2908 static void
vtnet_tx_vq_intr(void * xtxq)2909 vtnet_tx_vq_intr(void *xtxq)
2910 {
2911 struct vtnet_softc *sc;
2912 struct vtnet_txq *txq;
2913 if_t ifp;
2914
2915 txq = xtxq;
2916 sc = txq->vtntx_sc;
2917 ifp = sc->vtnet_ifp;
2918
2919 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) {
2920 /*
2921 * Ignore this interrupt. Either this is a spurious interrupt
2922 * or multiqueue without per-VQ MSIX so every queue needs to
2923 * be polled (a brain dead configuration we could try harder
2924 * to avoid).
2925 */
2926 vtnet_txq_disable_intr(txq);
2927 return;
2928 }
2929
2930 #ifdef DEV_NETMAP
2931 if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS)
2932 return;
2933 #endif /* DEV_NETMAP */
2934
2935 VTNET_TXQ_LOCK(txq);
2936
2937 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) {
2938 VTNET_TXQ_UNLOCK(txq);
2939 return;
2940 }
2941
2942 vtnet_txq_eof(txq);
2943 vtnet_txq_start(txq);
2944
2945 VTNET_TXQ_UNLOCK(txq);
2946 }
2947
2948 static void
vtnet_tx_start_all(struct vtnet_softc * sc)2949 vtnet_tx_start_all(struct vtnet_softc *sc)
2950 {
2951 struct vtnet_txq *txq;
2952 int i;
2953
2954 VTNET_CORE_LOCK_ASSERT(sc);
2955
2956 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2957 txq = &sc->vtnet_txqs[i];
2958
2959 VTNET_TXQ_LOCK(txq);
2960 vtnet_txq_start(txq);
2961 VTNET_TXQ_UNLOCK(txq);
2962 }
2963 }
2964
2965 static void
vtnet_qflush(if_t ifp)2966 vtnet_qflush(if_t ifp)
2967 {
2968 struct vtnet_softc *sc;
2969 struct vtnet_txq *txq;
2970 struct mbuf *m;
2971 int i;
2972
2973 sc = if_getsoftc(ifp);
2974
2975 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
2976 txq = &sc->vtnet_txqs[i];
2977
2978 VTNET_TXQ_LOCK(txq);
2979 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL)
2980 m_freem(m);
2981 VTNET_TXQ_UNLOCK(txq);
2982 }
2983
2984 if_qflush(ifp);
2985 }
2986
2987 static int
vtnet_watchdog(struct vtnet_txq * txq)2988 vtnet_watchdog(struct vtnet_txq *txq)
2989 {
2990 if_t ifp;
2991
2992 ifp = txq->vtntx_sc->vtnet_ifp;
2993
2994 VTNET_TXQ_LOCK(txq);
2995 if (txq->vtntx_watchdog == 1) {
2996 /*
2997 * Only drain completed frames if the watchdog is about to
2998 * expire. If any frames were drained, there may be enough
2999 * free descriptors now available to transmit queued frames.
3000 * In that case, the timer will immediately be decremented
3001 * below, but the timeout is generous enough that should not
3002 * be a problem.
3003 */
3004 if (vtnet_txq_eof(txq) != 0)
3005 vtnet_txq_start(txq);
3006 }
3007
3008 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) {
3009 VTNET_TXQ_UNLOCK(txq);
3010 return (0);
3011 }
3012 VTNET_TXQ_UNLOCK(txq);
3013
3014 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id);
3015 return (1);
3016 }
3017
3018 static void
vtnet_accum_stats(struct vtnet_softc * sc,struct vtnet_rxq_stats * rxacc,struct vtnet_txq_stats * txacc)3019 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc,
3020 struct vtnet_txq_stats *txacc)
3021 {
3022
3023 bzero(rxacc, sizeof(struct vtnet_rxq_stats));
3024 bzero(txacc, sizeof(struct vtnet_txq_stats));
3025
3026 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3027 struct vtnet_rxq_stats *rxst;
3028 struct vtnet_txq_stats *txst;
3029
3030 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
3031 rxacc->vrxs_ipackets += rxst->vrxs_ipackets;
3032 rxacc->vrxs_ibytes += rxst->vrxs_ibytes;
3033 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops;
3034 rxacc->vrxs_csum += rxst->vrxs_csum;
3035 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed;
3036 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled;
3037
3038 txst = &sc->vtnet_txqs[i].vtntx_stats;
3039 txacc->vtxs_opackets += txst->vtxs_opackets;
3040 txacc->vtxs_obytes += txst->vtxs_obytes;
3041 txacc->vtxs_csum += txst->vtxs_csum;
3042 txacc->vtxs_tso += txst->vtxs_tso;
3043 txacc->vtxs_rescheduled += txst->vtxs_rescheduled;
3044 }
3045 }
3046
3047 static uint64_t
vtnet_get_counter(if_t ifp,ift_counter cnt)3048 vtnet_get_counter(if_t ifp, ift_counter cnt)
3049 {
3050 struct vtnet_softc *sc;
3051 struct vtnet_rxq_stats rxaccum;
3052 struct vtnet_txq_stats txaccum;
3053
3054 sc = if_getsoftc(ifp);
3055 vtnet_accum_stats(sc, &rxaccum, &txaccum);
3056
3057 switch (cnt) {
3058 case IFCOUNTER_IPACKETS:
3059 return (rxaccum.vrxs_ipackets);
3060 case IFCOUNTER_IQDROPS:
3061 return (rxaccum.vrxs_iqdrops);
3062 case IFCOUNTER_IERRORS:
3063 return (rxaccum.vrxs_ierrors);
3064 case IFCOUNTER_IBYTES:
3065 return (rxaccum.vrxs_ibytes);
3066 case IFCOUNTER_OPACKETS:
3067 return (txaccum.vtxs_opackets);
3068 case IFCOUNTER_OBYTES:
3069 return (txaccum.vtxs_obytes);
3070 case IFCOUNTER_OMCASTS:
3071 return (txaccum.vtxs_omcasts);
3072 default:
3073 return (if_get_counter_default(ifp, cnt));
3074 }
3075 }
3076
3077 static void
vtnet_tick(void * xsc)3078 vtnet_tick(void *xsc)
3079 {
3080 struct vtnet_softc *sc;
3081 if_t ifp;
3082 int i, timedout;
3083
3084 sc = xsc;
3085 ifp = sc->vtnet_ifp;
3086 timedout = 0;
3087
3088 VTNET_CORE_LOCK_ASSERT(sc);
3089
3090 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
3091 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]);
3092
3093 if (timedout != 0) {
3094 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3095 vtnet_init_locked(sc, 0);
3096 } else
3097 callout_schedule(&sc->vtnet_tick_ch, hz);
3098 }
3099
3100 static void
vtnet_start_taskqueues(struct vtnet_softc * sc)3101 vtnet_start_taskqueues(struct vtnet_softc *sc)
3102 {
3103 device_t dev;
3104 struct vtnet_rxq *rxq;
3105 struct vtnet_txq *txq;
3106 int i, error;
3107
3108 dev = sc->vtnet_dev;
3109
3110 /*
3111 * Errors here are very difficult to recover from - we cannot
3112 * easily fail because, if this is during boot, we will hang
3113 * when freeing any successfully started taskqueues because
3114 * the scheduler isn't up yet.
3115 *
3116 * Most drivers just ignore the return value - it only fails
3117 * with ENOMEM so an error is not likely.
3118 */
3119 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
3120 rxq = &sc->vtnet_rxqs[i];
3121 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET,
3122 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id);
3123 if (error) {
3124 device_printf(dev, "failed to start rx taskq %d\n",
3125 rxq->vtnrx_id);
3126 }
3127
3128 txq = &sc->vtnet_txqs[i];
3129 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET,
3130 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id);
3131 if (error) {
3132 device_printf(dev, "failed to start tx taskq %d\n",
3133 txq->vtntx_id);
3134 }
3135 }
3136 }
3137
3138 static void
vtnet_free_taskqueues(struct vtnet_softc * sc)3139 vtnet_free_taskqueues(struct vtnet_softc *sc)
3140 {
3141 struct vtnet_rxq *rxq;
3142 struct vtnet_txq *txq;
3143 int i;
3144
3145 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3146 rxq = &sc->vtnet_rxqs[i];
3147 if (rxq->vtnrx_tq != NULL) {
3148 taskqueue_free(rxq->vtnrx_tq);
3149 rxq->vtnrx_tq = NULL;
3150 }
3151
3152 txq = &sc->vtnet_txqs[i];
3153 if (txq->vtntx_tq != NULL) {
3154 taskqueue_free(txq->vtntx_tq);
3155 txq->vtntx_tq = NULL;
3156 }
3157 }
3158 }
3159
3160 static void
vtnet_drain_taskqueues(struct vtnet_softc * sc)3161 vtnet_drain_taskqueues(struct vtnet_softc *sc)
3162 {
3163 struct vtnet_rxq *rxq;
3164 struct vtnet_txq *txq;
3165 int i;
3166
3167 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3168 rxq = &sc->vtnet_rxqs[i];
3169 if (rxq->vtnrx_tq != NULL)
3170 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
3171
3172 txq = &sc->vtnet_txqs[i];
3173 if (txq->vtntx_tq != NULL) {
3174 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask);
3175 if (!VTNET_ALTQ_ENABLED)
3176 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask);
3177 }
3178 }
3179 }
3180
3181 static void
vtnet_drain_rxtx_queues(struct vtnet_softc * sc)3182 vtnet_drain_rxtx_queues(struct vtnet_softc *sc)
3183 {
3184 struct vtnet_rxq *rxq;
3185 struct vtnet_txq *txq;
3186 int i;
3187
3188 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3189 rxq = &sc->vtnet_rxqs[i];
3190 vtnet_rxq_free_mbufs(rxq);
3191
3192 txq = &sc->vtnet_txqs[i];
3193 vtnet_txq_free_mbufs(txq);
3194 }
3195 }
3196
3197 static void
vtnet_stop_rendezvous(struct vtnet_softc * sc)3198 vtnet_stop_rendezvous(struct vtnet_softc *sc)
3199 {
3200 struct vtnet_rxq *rxq;
3201 struct vtnet_txq *txq;
3202 int i;
3203
3204 VTNET_CORE_LOCK_ASSERT(sc);
3205
3206 /*
3207 * Lock and unlock the per-queue mutex so we known the stop
3208 * state is visible. Doing only the active queues should be
3209 * sufficient, but it does not cost much extra to do all the
3210 * queues.
3211 */
3212 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
3213 rxq = &sc->vtnet_rxqs[i];
3214 VTNET_RXQ_LOCK(rxq);
3215 VTNET_RXQ_UNLOCK(rxq);
3216
3217 txq = &sc->vtnet_txqs[i];
3218 VTNET_TXQ_LOCK(txq);
3219 VTNET_TXQ_UNLOCK(txq);
3220 }
3221 }
3222
3223 static void
vtnet_stop(struct vtnet_softc * sc)3224 vtnet_stop(struct vtnet_softc *sc)
3225 {
3226 device_t dev;
3227 if_t ifp;
3228
3229 dev = sc->vtnet_dev;
3230 ifp = sc->vtnet_ifp;
3231
3232 VTNET_CORE_LOCK_ASSERT(sc);
3233
3234 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3235 sc->vtnet_link_active = 0;
3236 callout_stop(&sc->vtnet_tick_ch);
3237
3238 /* Only advisory. */
3239 vtnet_disable_interrupts(sc);
3240
3241 #ifdef DEV_NETMAP
3242 /* Stop any pending txsync/rxsync and disable them. */
3243 netmap_disable_all_rings(ifp);
3244 #endif /* DEV_NETMAP */
3245
3246 /*
3247 * Stop the host adapter. This resets it to the pre-initialized
3248 * state. It will not generate any interrupts until after it is
3249 * reinitialized.
3250 */
3251 virtio_stop(dev);
3252 vtnet_stop_rendezvous(sc);
3253
3254 vtnet_drain_rxtx_queues(sc);
3255 sc->vtnet_act_vq_pairs = 1;
3256 }
3257
3258 static int
vtnet_virtio_reinit(struct vtnet_softc * sc)3259 vtnet_virtio_reinit(struct vtnet_softc *sc)
3260 {
3261 device_t dev;
3262 if_t ifp;
3263 uint64_t features;
3264 int error;
3265
3266 dev = sc->vtnet_dev;
3267 ifp = sc->vtnet_ifp;
3268 features = sc->vtnet_negotiated_features;
3269
3270 /*
3271 * Re-negotiate with the host, removing any disabled receive
3272 * features. Transmit features are disabled only on our side
3273 * via if_capenable and if_hwassist.
3274 */
3275
3276 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0)
3277 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES);
3278
3279 if ((if_getcapenable(ifp) & IFCAP_LRO) == 0)
3280 features &= ~VTNET_LRO_FEATURES;
3281
3282 if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0)
3283 features &= ~VIRTIO_NET_F_CTRL_VLAN;
3284
3285 error = virtio_reinit(dev, features);
3286 if (error) {
3287 device_printf(dev, "virtio reinit error %d\n", error);
3288 return (error);
3289 }
3290
3291 sc->vtnet_features = features;
3292 virtio_reinit_complete(dev);
3293
3294 return (0);
3295 }
3296
3297 static void
vtnet_init_rx_filters(struct vtnet_softc * sc)3298 vtnet_init_rx_filters(struct vtnet_softc *sc)
3299 {
3300 if_t ifp;
3301
3302 ifp = sc->vtnet_ifp;
3303
3304 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) {
3305 vtnet_rx_filter(sc);
3306 vtnet_rx_filter_mac(sc);
3307 }
3308
3309 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER)
3310 vtnet_rx_filter_vlan(sc);
3311 }
3312
3313 static int
vtnet_init_rx_queues(struct vtnet_softc * sc)3314 vtnet_init_rx_queues(struct vtnet_softc *sc)
3315 {
3316 device_t dev;
3317 if_t ifp;
3318 struct vtnet_rxq *rxq;
3319 int i, clustersz, error;
3320
3321 dev = sc->vtnet_dev;
3322 ifp = sc->vtnet_ifp;
3323
3324 clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp));
3325 sc->vtnet_rx_clustersz = clustersz;
3326
3327 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) {
3328 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) +
3329 VTNET_MAX_RX_SIZE, clustersz);
3330 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs,
3331 ("%s: too many rx mbufs %d for %d segments", __func__,
3332 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs));
3333 } else
3334 sc->vtnet_rx_nmbufs = 1;
3335
3336 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3337 rxq = &sc->vtnet_rxqs[i];
3338
3339 /* Hold the lock to satisfy asserts. */
3340 VTNET_RXQ_LOCK(rxq);
3341 error = vtnet_rxq_populate(rxq);
3342 VTNET_RXQ_UNLOCK(rxq);
3343
3344 if (error) {
3345 device_printf(dev, "cannot populate Rx queue %d\n", i);
3346 return (error);
3347 }
3348 }
3349
3350 return (0);
3351 }
3352
3353 static int
vtnet_init_tx_queues(struct vtnet_softc * sc)3354 vtnet_init_tx_queues(struct vtnet_softc *sc)
3355 {
3356 struct vtnet_txq *txq;
3357 int i;
3358
3359 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
3360 txq = &sc->vtnet_txqs[i];
3361 txq->vtntx_watchdog = 0;
3362 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq);
3363 #ifdef DEV_NETMAP
3364 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0);
3365 #endif /* DEV_NETMAP */
3366 }
3367
3368 return (0);
3369 }
3370
3371 static int
vtnet_init_rxtx_queues(struct vtnet_softc * sc)3372 vtnet_init_rxtx_queues(struct vtnet_softc *sc)
3373 {
3374 int error;
3375
3376 error = vtnet_init_rx_queues(sc);
3377 if (error)
3378 return (error);
3379
3380 error = vtnet_init_tx_queues(sc);
3381 if (error)
3382 return (error);
3383
3384 return (0);
3385 }
3386
3387 static void
vtnet_set_active_vq_pairs(struct vtnet_softc * sc)3388 vtnet_set_active_vq_pairs(struct vtnet_softc *sc)
3389 {
3390 device_t dev;
3391 int npairs;
3392
3393 dev = sc->vtnet_dev;
3394
3395 if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) {
3396 sc->vtnet_act_vq_pairs = 1;
3397 return;
3398 }
3399
3400 npairs = sc->vtnet_req_vq_pairs;
3401
3402 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) {
3403 device_printf(dev, "cannot set active queue pairs to %d, "
3404 "falling back to 1 queue pair\n", npairs);
3405 npairs = 1;
3406 }
3407
3408 sc->vtnet_act_vq_pairs = npairs;
3409 }
3410
3411 static void
vtnet_update_rx_offloads(struct vtnet_softc * sc)3412 vtnet_update_rx_offloads(struct vtnet_softc *sc)
3413 {
3414 if_t ifp;
3415 uint64_t features;
3416 int error;
3417
3418 ifp = sc->vtnet_ifp;
3419 features = sc->vtnet_features;
3420
3421 VTNET_CORE_LOCK_ASSERT(sc);
3422
3423 if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) {
3424 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6))
3425 features |= VIRTIO_NET_F_GUEST_CSUM;
3426 else
3427 features &= ~VIRTIO_NET_F_GUEST_CSUM;
3428 }
3429
3430 if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) {
3431 if (if_getcapenable(ifp) & IFCAP_LRO)
3432 features |= VTNET_LRO_FEATURES;
3433 else
3434 features &= ~VTNET_LRO_FEATURES;
3435 }
3436
3437 error = vtnet_ctrl_guest_offloads(sc,
3438 features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 |
3439 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN |
3440 VIRTIO_NET_F_GUEST_UFO));
3441 if (error) {
3442 device_printf(sc->vtnet_dev,
3443 "%s: cannot update Rx features\n", __func__);
3444 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) {
3445 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING);
3446 vtnet_init_locked(sc, 0);
3447 }
3448 } else
3449 sc->vtnet_features = features;
3450 }
3451
3452 static int
vtnet_reinit(struct vtnet_softc * sc)3453 vtnet_reinit(struct vtnet_softc *sc)
3454 {
3455 if_t ifp;
3456 int error;
3457
3458 ifp = sc->vtnet_ifp;
3459
3460 bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN);
3461
3462 error = vtnet_virtio_reinit(sc);
3463 if (error)
3464 return (error);
3465
3466 vtnet_set_macaddr(sc);
3467 vtnet_set_active_vq_pairs(sc);
3468
3469 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ)
3470 vtnet_init_rx_filters(sc);
3471
3472 if_sethwassist(ifp, 0);
3473 if (if_getcapenable(ifp) & IFCAP_TXCSUM)
3474 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0);
3475 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6)
3476 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0);
3477 if (if_getcapenable(ifp) & IFCAP_TSO4)
3478 if_sethwassistbits(ifp, CSUM_IP_TSO, 0);
3479 if (if_getcapenable(ifp) & IFCAP_TSO6)
3480 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0);
3481
3482 error = vtnet_init_rxtx_queues(sc);
3483 if (error)
3484 return (error);
3485
3486 return (0);
3487 }
3488
3489 static void
vtnet_init_locked(struct vtnet_softc * sc,int init_mode)3490 vtnet_init_locked(struct vtnet_softc *sc, int init_mode)
3491 {
3492 if_t ifp;
3493
3494 ifp = sc->vtnet_ifp;
3495
3496 VTNET_CORE_LOCK_ASSERT(sc);
3497
3498 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING)
3499 return;
3500
3501 vtnet_stop(sc);
3502
3503 #ifdef DEV_NETMAP
3504 /* Once stopped we can update the netmap flags, if necessary. */
3505 switch (init_mode) {
3506 case VTNET_INIT_NETMAP_ENTER:
3507 nm_set_native_flags(NA(ifp));
3508 break;
3509 case VTNET_INIT_NETMAP_EXIT:
3510 nm_clear_native_flags(NA(ifp));
3511 break;
3512 }
3513 #endif /* DEV_NETMAP */
3514
3515 if (vtnet_reinit(sc) != 0) {
3516 vtnet_stop(sc);
3517 return;
3518 }
3519
3520 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0);
3521 vtnet_update_link_status(sc);
3522 vtnet_enable_interrupts(sc);
3523 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc);
3524
3525 #ifdef DEV_NETMAP
3526 /* Re-enable txsync/rxsync. */
3527 netmap_enable_all_rings(ifp);
3528 #endif /* DEV_NETMAP */
3529 }
3530
3531 static void
vtnet_init(void * xsc)3532 vtnet_init(void *xsc)
3533 {
3534 struct vtnet_softc *sc;
3535
3536 sc = xsc;
3537
3538 VTNET_CORE_LOCK(sc);
3539 vtnet_init_locked(sc, 0);
3540 VTNET_CORE_UNLOCK(sc);
3541 }
3542
3543 static void
vtnet_free_ctrl_vq(struct vtnet_softc * sc)3544 vtnet_free_ctrl_vq(struct vtnet_softc *sc)
3545 {
3546
3547 /*
3548 * The control virtqueue is only polled and therefore it should
3549 * already be empty.
3550 */
3551 KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq),
3552 ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq));
3553 }
3554
3555 static void
vtnet_exec_ctrl_cmd(struct vtnet_softc * sc,void * cookie,struct sglist * sg,int readable,int writable)3556 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie,
3557 struct sglist *sg, int readable, int writable)
3558 {
3559 struct virtqueue *vq;
3560
3561 vq = sc->vtnet_ctrl_vq;
3562
3563 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ);
3564 VTNET_CORE_LOCK_ASSERT(sc);
3565
3566 if (!virtqueue_empty(vq))
3567 return;
3568
3569 /*
3570 * Poll for the response, but the command is likely completed before
3571 * returning from the notify.
3572 */
3573 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) {
3574 virtqueue_notify(vq);
3575 virtqueue_poll(vq, NULL);
3576 }
3577 }
3578
3579 static int
vtnet_ctrl_mac_cmd(struct vtnet_softc * sc,uint8_t * hwaddr)3580 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr)
3581 {
3582 struct sglist_seg segs[3];
3583 struct sglist sg;
3584 struct {
3585 struct virtio_net_ctrl_hdr hdr __aligned(2);
3586 uint8_t pad1;
3587 uint8_t addr[ETHER_ADDR_LEN] __aligned(8);
3588 uint8_t pad2;
3589 uint8_t ack;
3590 } s;
3591 int error;
3592
3593 error = 0;
3594 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC);
3595
3596 s.hdr.class = VIRTIO_NET_CTRL_MAC;
3597 s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET;
3598 bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN);
3599 s.ack = VIRTIO_NET_ERR;
3600
3601 sglist_init(&sg, nitems(segs), segs);
3602 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3603 error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN);
3604 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3605 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3606
3607 if (error == 0)
3608 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3609
3610 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3611 }
3612
3613 static int
vtnet_ctrl_guest_offloads(struct vtnet_softc * sc,uint64_t offloads)3614 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads)
3615 {
3616 struct sglist_seg segs[3];
3617 struct sglist sg;
3618 struct {
3619 struct virtio_net_ctrl_hdr hdr __aligned(2);
3620 uint8_t pad1;
3621 uint64_t offloads __aligned(8);
3622 uint8_t pad2;
3623 uint8_t ack;
3624 } s;
3625 int error;
3626
3627 error = 0;
3628 MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS);
3629
3630 s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS;
3631 s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET;
3632 s.offloads = vtnet_gtoh64(sc, offloads);
3633 s.ack = VIRTIO_NET_ERR;
3634
3635 sglist_init(&sg, nitems(segs), segs);
3636 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3637 error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t));
3638 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3639 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3640
3641 if (error == 0)
3642 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3643
3644 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3645 }
3646
3647 static int
vtnet_ctrl_mq_cmd(struct vtnet_softc * sc,uint16_t npairs)3648 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs)
3649 {
3650 struct sglist_seg segs[3];
3651 struct sglist sg;
3652 struct {
3653 struct virtio_net_ctrl_hdr hdr __aligned(2);
3654 uint8_t pad1;
3655 struct virtio_net_ctrl_mq mq __aligned(2);
3656 uint8_t pad2;
3657 uint8_t ack;
3658 } s;
3659 int error;
3660
3661 error = 0;
3662 MPASS(sc->vtnet_flags & VTNET_FLAG_MQ);
3663
3664 s.hdr.class = VIRTIO_NET_CTRL_MQ;
3665 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET;
3666 s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs);
3667 s.ack = VIRTIO_NET_ERR;
3668
3669 sglist_init(&sg, nitems(segs), segs);
3670 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3671 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq));
3672 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3673 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3674
3675 if (error == 0)
3676 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3677
3678 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3679 }
3680
3681 static int
vtnet_ctrl_rx_cmd(struct vtnet_softc * sc,uint8_t cmd,bool on)3682 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on)
3683 {
3684 struct sglist_seg segs[3];
3685 struct sglist sg;
3686 struct {
3687 struct virtio_net_ctrl_hdr hdr __aligned(2);
3688 uint8_t pad1;
3689 uint8_t onoff;
3690 uint8_t pad2;
3691 uint8_t ack;
3692 } s;
3693 int error;
3694
3695 error = 0;
3696 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3697
3698 s.hdr.class = VIRTIO_NET_CTRL_RX;
3699 s.hdr.cmd = cmd;
3700 s.onoff = on;
3701 s.ack = VIRTIO_NET_ERR;
3702
3703 sglist_init(&sg, nitems(segs), segs);
3704 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3705 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t));
3706 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3707 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3708
3709 if (error == 0)
3710 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3711
3712 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3713 }
3714
3715 static int
vtnet_set_promisc(struct vtnet_softc * sc,bool on)3716 vtnet_set_promisc(struct vtnet_softc *sc, bool on)
3717 {
3718 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on));
3719 }
3720
3721 static int
vtnet_set_allmulti(struct vtnet_softc * sc,bool on)3722 vtnet_set_allmulti(struct vtnet_softc *sc, bool on)
3723 {
3724 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on));
3725 }
3726
3727 static void
vtnet_rx_filter(struct vtnet_softc * sc)3728 vtnet_rx_filter(struct vtnet_softc *sc)
3729 {
3730 device_t dev;
3731 if_t ifp;
3732
3733 dev = sc->vtnet_dev;
3734 ifp = sc->vtnet_ifp;
3735
3736 VTNET_CORE_LOCK_ASSERT(sc);
3737
3738 if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) {
3739 device_printf(dev, "cannot %s promiscuous mode\n",
3740 if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable");
3741 }
3742
3743 if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) {
3744 device_printf(dev, "cannot %s all-multicast mode\n",
3745 if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable");
3746 }
3747 }
3748
3749 static u_int
vtnet_copy_ifaddr(void * arg,struct sockaddr_dl * sdl,u_int ucnt)3750 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt)
3751 {
3752 struct vtnet_softc *sc = arg;
3753
3754 if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0)
3755 return (0);
3756
3757 if (ucnt < VTNET_MAX_MAC_ENTRIES)
3758 bcopy(LLADDR(sdl),
3759 &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt],
3760 ETHER_ADDR_LEN);
3761
3762 return (1);
3763 }
3764
3765 static u_int
vtnet_copy_maddr(void * arg,struct sockaddr_dl * sdl,u_int mcnt)3766 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt)
3767 {
3768 struct vtnet_mac_filter *filter = arg;
3769
3770 if (mcnt < VTNET_MAX_MAC_ENTRIES)
3771 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt],
3772 ETHER_ADDR_LEN);
3773
3774 return (1);
3775 }
3776
3777 static void
vtnet_rx_filter_mac(struct vtnet_softc * sc)3778 vtnet_rx_filter_mac(struct vtnet_softc *sc)
3779 {
3780 struct virtio_net_ctrl_hdr hdr __aligned(2);
3781 struct vtnet_mac_filter *filter;
3782 struct sglist_seg segs[4];
3783 struct sglist sg;
3784 if_t ifp;
3785 bool promisc, allmulti;
3786 u_int ucnt, mcnt;
3787 int error;
3788 uint8_t ack;
3789
3790 ifp = sc->vtnet_ifp;
3791 filter = sc->vtnet_mac_filter;
3792 error = 0;
3793
3794 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX);
3795 VTNET_CORE_LOCK_ASSERT(sc);
3796
3797 /* Unicast MAC addresses: */
3798 ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc);
3799 promisc = (ucnt > VTNET_MAX_MAC_ENTRIES);
3800
3801 if (promisc) {
3802 ucnt = 0;
3803 if_printf(ifp, "more than %d MAC addresses assigned, "
3804 "falling back to promiscuous mode\n",
3805 VTNET_MAX_MAC_ENTRIES);
3806 }
3807
3808 /* Multicast MAC addresses: */
3809 mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter);
3810 allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES);
3811
3812 if (allmulti) {
3813 mcnt = 0;
3814 if_printf(ifp, "more than %d multicast MAC addresses "
3815 "assigned, falling back to all-multicast mode\n",
3816 VTNET_MAX_MAC_ENTRIES);
3817 }
3818
3819 if (promisc && allmulti)
3820 goto out;
3821
3822 filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt);
3823 filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt);
3824
3825 hdr.class = VIRTIO_NET_CTRL_MAC;
3826 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET;
3827 ack = VIRTIO_NET_ERR;
3828
3829 sglist_init(&sg, nitems(segs), segs);
3830 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr));
3831 error |= sglist_append(&sg, &filter->vmf_unicast,
3832 sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN);
3833 error |= sglist_append(&sg, &filter->vmf_multicast,
3834 sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN);
3835 error |= sglist_append(&sg, &ack, sizeof(uint8_t));
3836 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3837
3838 if (error == 0)
3839 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1);
3840 if (ack != VIRTIO_NET_OK)
3841 if_printf(ifp, "error setting host MAC filter table\n");
3842
3843 out:
3844 if (promisc && vtnet_set_promisc(sc, true) != 0)
3845 if_printf(ifp, "cannot enable promiscuous mode\n");
3846 if (allmulti && vtnet_set_allmulti(sc, true) != 0)
3847 if_printf(ifp, "cannot enable all-multicast mode\n");
3848 }
3849
3850 static int
vtnet_exec_vlan_filter(struct vtnet_softc * sc,int add,uint16_t tag)3851 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3852 {
3853 struct sglist_seg segs[3];
3854 struct sglist sg;
3855 struct {
3856 struct virtio_net_ctrl_hdr hdr __aligned(2);
3857 uint8_t pad1;
3858 uint16_t tag __aligned(2);
3859 uint8_t pad2;
3860 uint8_t ack;
3861 } s;
3862 int error;
3863
3864 error = 0;
3865 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3866
3867 s.hdr.class = VIRTIO_NET_CTRL_VLAN;
3868 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL;
3869 s.tag = vtnet_gtoh16(sc, tag);
3870 s.ack = VIRTIO_NET_ERR;
3871
3872 sglist_init(&sg, nitems(segs), segs);
3873 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr));
3874 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t));
3875 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t));
3876 MPASS(error == 0 && sg.sg_nseg == nitems(segs));
3877
3878 if (error == 0)
3879 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1);
3880
3881 return (s.ack == VIRTIO_NET_OK ? 0 : EIO);
3882 }
3883
3884 static void
vtnet_rx_filter_vlan(struct vtnet_softc * sc)3885 vtnet_rx_filter_vlan(struct vtnet_softc *sc)
3886 {
3887 int i, bit;
3888 uint32_t w;
3889 uint16_t tag;
3890
3891 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER);
3892 VTNET_CORE_LOCK_ASSERT(sc);
3893
3894 /* Enable the filter for each configured VLAN. */
3895 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) {
3896 w = sc->vtnet_vlan_filter[i];
3897
3898 while ((bit = ffs(w) - 1) != -1) {
3899 w &= ~(1 << bit);
3900 tag = sizeof(w) * CHAR_BIT * i + bit;
3901
3902 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) {
3903 device_printf(sc->vtnet_dev,
3904 "cannot enable VLAN %d filter\n", tag);
3905 }
3906 }
3907 }
3908 }
3909
3910 static void
vtnet_update_vlan_filter(struct vtnet_softc * sc,int add,uint16_t tag)3911 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag)
3912 {
3913 if_t ifp;
3914 int idx, bit;
3915
3916 ifp = sc->vtnet_ifp;
3917 idx = (tag >> 5) & 0x7F;
3918 bit = tag & 0x1F;
3919
3920 if (tag == 0 || tag > 4095)
3921 return;
3922
3923 VTNET_CORE_LOCK(sc);
3924
3925 if (add)
3926 sc->vtnet_vlan_filter[idx] |= (1 << bit);
3927 else
3928 sc->vtnet_vlan_filter[idx] &= ~(1 << bit);
3929
3930 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER &&
3931 if_getdrvflags(ifp) & IFF_DRV_RUNNING &&
3932 vtnet_exec_vlan_filter(sc, add, tag) != 0) {
3933 device_printf(sc->vtnet_dev,
3934 "cannot %s VLAN %d %s the host filter table\n",
3935 add ? "add" : "remove", tag, add ? "to" : "from");
3936 }
3937
3938 VTNET_CORE_UNLOCK(sc);
3939 }
3940
3941 static void
vtnet_register_vlan(void * arg,if_t ifp,uint16_t tag)3942 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag)
3943 {
3944
3945 if (if_getsoftc(ifp) != arg)
3946 return;
3947
3948 vtnet_update_vlan_filter(arg, 1, tag);
3949 }
3950
3951 static void
vtnet_unregister_vlan(void * arg,if_t ifp,uint16_t tag)3952 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag)
3953 {
3954
3955 if (if_getsoftc(ifp) != arg)
3956 return;
3957
3958 vtnet_update_vlan_filter(arg, 0, tag);
3959 }
3960
3961 static void
vtnet_update_speed_duplex(struct vtnet_softc * sc)3962 vtnet_update_speed_duplex(struct vtnet_softc *sc)
3963 {
3964 if_t ifp;
3965 uint32_t speed;
3966
3967 ifp = sc->vtnet_ifp;
3968
3969 if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0)
3970 return;
3971
3972 /* BMV: Ignore duplex. */
3973 speed = virtio_read_dev_config_4(sc->vtnet_dev,
3974 offsetof(struct virtio_net_config, speed));
3975 if (speed != UINT32_MAX)
3976 if_setbaudrate(ifp, IF_Mbps(speed));
3977 }
3978
3979 static int
vtnet_is_link_up(struct vtnet_softc * sc)3980 vtnet_is_link_up(struct vtnet_softc *sc)
3981 {
3982 uint16_t status;
3983
3984 if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0)
3985 return (1);
3986
3987 status = virtio_read_dev_config_2(sc->vtnet_dev,
3988 offsetof(struct virtio_net_config, status));
3989
3990 return ((status & VIRTIO_NET_S_LINK_UP) != 0);
3991 }
3992
3993 static void
vtnet_update_link_status(struct vtnet_softc * sc)3994 vtnet_update_link_status(struct vtnet_softc *sc)
3995 {
3996 if_t ifp;
3997 int link;
3998
3999 ifp = sc->vtnet_ifp;
4000 VTNET_CORE_LOCK_ASSERT(sc);
4001 link = vtnet_is_link_up(sc);
4002
4003 /* Notify if the link status has changed. */
4004 if (link != 0 && sc->vtnet_link_active == 0) {
4005 vtnet_update_speed_duplex(sc);
4006 sc->vtnet_link_active = 1;
4007 if_link_state_change(ifp, LINK_STATE_UP);
4008 } else if (link == 0 && sc->vtnet_link_active != 0) {
4009 sc->vtnet_link_active = 0;
4010 if_link_state_change(ifp, LINK_STATE_DOWN);
4011 }
4012 }
4013
4014 static int
vtnet_ifmedia_upd(if_t ifp __unused)4015 vtnet_ifmedia_upd(if_t ifp __unused)
4016 {
4017 return (EOPNOTSUPP);
4018 }
4019
4020 static void
vtnet_ifmedia_sts(if_t ifp,struct ifmediareq * ifmr)4021 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr)
4022 {
4023 struct vtnet_softc *sc;
4024
4025 sc = if_getsoftc(ifp);
4026
4027 ifmr->ifm_status = IFM_AVALID;
4028 ifmr->ifm_active = IFM_ETHER;
4029
4030 VTNET_CORE_LOCK(sc);
4031 if (vtnet_is_link_up(sc) != 0) {
4032 ifmr->ifm_status |= IFM_ACTIVE;
4033 ifmr->ifm_active |= IFM_10G_T | IFM_FDX;
4034 } else
4035 ifmr->ifm_active |= IFM_NONE;
4036 VTNET_CORE_UNLOCK(sc);
4037 }
4038
4039 static void
vtnet_get_macaddr(struct vtnet_softc * sc)4040 vtnet_get_macaddr(struct vtnet_softc *sc)
4041 {
4042
4043 if (sc->vtnet_flags & VTNET_FLAG_MAC) {
4044 virtio_read_device_config_array(sc->vtnet_dev,
4045 offsetof(struct virtio_net_config, mac),
4046 &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN);
4047 } else {
4048 /* Generate a random locally administered unicast address. */
4049 sc->vtnet_hwaddr[0] = 0xB2;
4050 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0);
4051 }
4052 }
4053
4054 static void
vtnet_set_macaddr(struct vtnet_softc * sc)4055 vtnet_set_macaddr(struct vtnet_softc *sc)
4056 {
4057 device_t dev;
4058 int error;
4059
4060 dev = sc->vtnet_dev;
4061
4062 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) {
4063 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr);
4064 if (error)
4065 device_printf(dev, "unable to set MAC address\n");
4066 return;
4067 }
4068
4069 /* MAC in config is read-only in modern VirtIO. */
4070 if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) {
4071 for (int i = 0; i < ETHER_ADDR_LEN; i++) {
4072 virtio_write_dev_config_1(dev,
4073 offsetof(struct virtio_net_config, mac) + i,
4074 sc->vtnet_hwaddr[i]);
4075 }
4076 }
4077 }
4078
4079 static void
vtnet_attached_set_macaddr(struct vtnet_softc * sc)4080 vtnet_attached_set_macaddr(struct vtnet_softc *sc)
4081 {
4082
4083 /* Assign MAC address if it was generated. */
4084 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0)
4085 vtnet_set_macaddr(sc);
4086 }
4087
4088 static void
vtnet_vlan_tag_remove(struct mbuf * m)4089 vtnet_vlan_tag_remove(struct mbuf *m)
4090 {
4091 struct ether_vlan_header *evh;
4092
4093 evh = mtod(m, struct ether_vlan_header *);
4094 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag);
4095 m->m_flags |= M_VLANTAG;
4096
4097 /* Strip the 802.1Q header. */
4098 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN,
4099 ETHER_HDR_LEN - ETHER_TYPE_LEN);
4100 m_adj(m, ETHER_VLAN_ENCAP_LEN);
4101 }
4102
4103 static void
vtnet_set_rx_process_limit(struct vtnet_softc * sc)4104 vtnet_set_rx_process_limit(struct vtnet_softc *sc)
4105 {
4106 int limit;
4107
4108 limit = vtnet_tunable_int(sc, "rx_process_limit",
4109 vtnet_rx_process_limit);
4110 if (limit < 0)
4111 limit = INT_MAX;
4112 sc->vtnet_rx_process_limit = limit;
4113 }
4114
4115 static void
vtnet_setup_rxq_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_rxq * rxq)4116 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx,
4117 struct sysctl_oid_list *child, struct vtnet_rxq *rxq)
4118 {
4119 struct sysctl_oid *node;
4120 struct sysctl_oid_list *list;
4121 struct vtnet_rxq_stats *stats;
4122 char namebuf[16];
4123
4124 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id);
4125 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4126 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue");
4127 list = SYSCTL_CHILDREN(node);
4128
4129 stats = &rxq->vtnrx_stats;
4130
4131 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets",
4132 CTLFLAG_RD | CTLFLAG_STATS,
4133 &stats->vrxs_ipackets, "Receive packets");
4134 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes",
4135 CTLFLAG_RD | CTLFLAG_STATS,
4136 &stats->vrxs_ibytes, "Receive bytes");
4137 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops",
4138 CTLFLAG_RD | CTLFLAG_STATS,
4139 &stats->vrxs_iqdrops, "Receive drops");
4140 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors",
4141 CTLFLAG_RD | CTLFLAG_STATS,
4142 &stats->vrxs_ierrors, "Receive errors");
4143 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
4144 CTLFLAG_RD | CTLFLAG_STATS,
4145 &stats->vrxs_csum, "Receive checksum offloaded");
4146 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed",
4147 CTLFLAG_RD | CTLFLAG_STATS,
4148 &stats->vrxs_csum_failed, "Receive checksum offload failed");
4149 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro",
4150 CTLFLAG_RD | CTLFLAG_STATS,
4151 &stats->vrxs_host_lro, "Receive host segmentation offloaded");
4152 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
4153 CTLFLAG_RD | CTLFLAG_STATS,
4154 &stats->vrxs_rescheduled,
4155 "Receive interrupt handler rescheduled");
4156 }
4157
4158 static void
vtnet_setup_txq_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_txq * txq)4159 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx,
4160 struct sysctl_oid_list *child, struct vtnet_txq *txq)
4161 {
4162 struct sysctl_oid *node;
4163 struct sysctl_oid_list *list;
4164 struct vtnet_txq_stats *stats;
4165 char namebuf[16];
4166
4167 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id);
4168 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf,
4169 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue");
4170 list = SYSCTL_CHILDREN(node);
4171
4172 stats = &txq->vtntx_stats;
4173
4174 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets",
4175 CTLFLAG_RD | CTLFLAG_STATS,
4176 &stats->vtxs_opackets, "Transmit packets");
4177 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes",
4178 CTLFLAG_RD | CTLFLAG_STATS,
4179 &stats->vtxs_obytes, "Transmit bytes");
4180 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts",
4181 CTLFLAG_RD | CTLFLAG_STATS,
4182 &stats->vtxs_omcasts, "Transmit multicasts");
4183 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum",
4184 CTLFLAG_RD | CTLFLAG_STATS,
4185 &stats->vtxs_csum, "Transmit checksum offloaded");
4186 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso",
4187 CTLFLAG_RD | CTLFLAG_STATS,
4188 &stats->vtxs_tso, "Transmit TCP segmentation offloaded");
4189 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled",
4190 CTLFLAG_RD | CTLFLAG_STATS,
4191 &stats->vtxs_rescheduled,
4192 "Transmit interrupt handler rescheduled");
4193 }
4194
4195 static void
vtnet_setup_queue_sysctl(struct vtnet_softc * sc)4196 vtnet_setup_queue_sysctl(struct vtnet_softc *sc)
4197 {
4198 device_t dev;
4199 struct sysctl_ctx_list *ctx;
4200 struct sysctl_oid *tree;
4201 struct sysctl_oid_list *child;
4202 int i;
4203
4204 dev = sc->vtnet_dev;
4205 ctx = device_get_sysctl_ctx(dev);
4206 tree = device_get_sysctl_tree(dev);
4207 child = SYSCTL_CHILDREN(tree);
4208
4209 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) {
4210 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]);
4211 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]);
4212 }
4213 }
4214
4215 static int
vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)4216 vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS)
4217 {
4218 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4219 struct vtnet_statistics *stats = &sc->vtnet_stats;
4220 struct vtnet_rxq_stats *rxst;
4221 int i;
4222
4223 stats->rx_csum_failed = 0;
4224 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4225 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4226 stats->rx_csum_failed += rxst->vrxs_csum_failed;
4227 }
4228 return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req));
4229 }
4230
4231 static int
vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)4232 vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS)
4233 {
4234 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4235 struct vtnet_statistics *stats = &sc->vtnet_stats;
4236 struct vtnet_rxq_stats *rxst;
4237 int i;
4238
4239 stats->rx_csum_offloaded = 0;
4240 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4241 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4242 stats->rx_csum_offloaded += rxst->vrxs_csum;
4243 }
4244 return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req));
4245 }
4246
4247 static int
vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)4248 vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS)
4249 {
4250 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4251 struct vtnet_statistics *stats = &sc->vtnet_stats;
4252 struct vtnet_rxq_stats *rxst;
4253 int i;
4254
4255 stats->rx_task_rescheduled = 0;
4256 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4257 rxst = &sc->vtnet_rxqs[i].vtnrx_stats;
4258 stats->rx_task_rescheduled += rxst->vrxs_rescheduled;
4259 }
4260 return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req));
4261 }
4262
4263 static int
vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)4264 vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS)
4265 {
4266 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4267 struct vtnet_statistics *stats = &sc->vtnet_stats;
4268 struct vtnet_txq_stats *txst;
4269 int i;
4270
4271 stats->tx_csum_offloaded = 0;
4272 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4273 txst = &sc->vtnet_txqs[i].vtntx_stats;
4274 stats->tx_csum_offloaded += txst->vtxs_csum;
4275 }
4276 return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req));
4277 }
4278
4279 static int
vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)4280 vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS)
4281 {
4282 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4283 struct vtnet_statistics *stats = &sc->vtnet_stats;
4284 struct vtnet_txq_stats *txst;
4285 int i;
4286
4287 stats->tx_tso_offloaded = 0;
4288 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4289 txst = &sc->vtnet_txqs[i].vtntx_stats;
4290 stats->tx_tso_offloaded += txst->vtxs_tso;
4291 }
4292 return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req));
4293 }
4294
4295 static int
vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)4296 vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS)
4297 {
4298 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4299 struct vtnet_statistics *stats = &sc->vtnet_stats;
4300 struct vtnet_txq_stats *txst;
4301 int i;
4302
4303 stats->tx_task_rescheduled = 0;
4304 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) {
4305 txst = &sc->vtnet_txqs[i].vtntx_stats;
4306 stats->tx_task_rescheduled += txst->vtxs_rescheduled;
4307 }
4308 return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req));
4309 }
4310
4311 static void
vtnet_setup_stat_sysctl(struct sysctl_ctx_list * ctx,struct sysctl_oid_list * child,struct vtnet_softc * sc)4312 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx,
4313 struct sysctl_oid_list *child, struct vtnet_softc *sc)
4314 {
4315 struct vtnet_statistics *stats;
4316 struct vtnet_rxq_stats rxaccum;
4317 struct vtnet_txq_stats txaccum;
4318
4319 vtnet_accum_stats(sc, &rxaccum, &txaccum);
4320
4321 stats = &sc->vtnet_stats;
4322 stats->rx_csum_offloaded = rxaccum.vrxs_csum;
4323 stats->rx_csum_failed = rxaccum.vrxs_csum_failed;
4324 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled;
4325 stats->tx_csum_offloaded = txaccum.vtxs_csum;
4326 stats->tx_tso_offloaded = txaccum.vtxs_tso;
4327 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled;
4328
4329 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed",
4330 CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed,
4331 "Mbuf cluster allocation failures");
4332
4333 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large",
4334 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large,
4335 "Received frame larger than the mbuf chain");
4336 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed",
4337 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed,
4338 "Enqueuing the replacement receive mbuf failed");
4339 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed",
4340 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed,
4341 "Mergeable buffers receive failures");
4342 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype",
4343 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype,
4344 "Received checksum offloaded buffer with unsupported "
4345 "Ethernet type");
4346 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto",
4347 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto,
4348 "Received checksum offloaded buffer with incorrect IP protocol");
4349 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset",
4350 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_offset,
4351 "Received checksum offloaded buffer with incorrect offset");
4352 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto",
4353 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto,
4354 "Received checksum offloaded buffer with inaccessible IP protocol");
4355 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed",
4356 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4357 sc, 0, vtnet_sysctl_rx_csum_failed, "QU",
4358 "Received buffer checksum offload failed");
4359 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded",
4360 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4361 sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU",
4362 "Received buffer checksum offload succeeded");
4363 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled",
4364 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4365 sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU",
4366 "Times the receive interrupt task rescheduled itself");
4367
4368 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype",
4369 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype,
4370 "Aborted transmit of checksum offloaded buffer with unknown "
4371 "Ethernet type");
4372 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch",
4373 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch,
4374 "Aborted transmit of checksum offloaded buffer because mismatched "
4375 "protocols");
4376 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp",
4377 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp,
4378 "Aborted transmit of TSO buffer with non TCP protocol");
4379 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum",
4380 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum,
4381 "Aborted transmit of TSO buffer without TCP checksum offload");
4382 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged",
4383 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged,
4384 "Transmit mbufs defragged");
4385 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed",
4386 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed,
4387 "Aborted transmit of buffer because defrag failed");
4388 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded",
4389 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4390 sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU",
4391 "Offloaded checksum of transmitted buffer");
4392 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded",
4393 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4394 sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU",
4395 "Segmentation offload of transmitted buffer");
4396 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled",
4397 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS,
4398 sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU",
4399 "Times the transmit interrupt task rescheduled itself");
4400 }
4401
4402 static int
vtnet_sysctl_features(SYSCTL_HANDLER_ARGS)4403 vtnet_sysctl_features(SYSCTL_HANDLER_ARGS)
4404 {
4405 struct sbuf sb;
4406 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4407 int error;
4408
4409 sbuf_new_for_sysctl(&sb, NULL, 0, req);
4410 sbuf_printf(&sb, "%b", (uint32_t)sc->vtnet_features,
4411 VIRTIO_NET_FEATURE_BITS);
4412 error = sbuf_finish(&sb);
4413 sbuf_delete(&sb);
4414 return (error);
4415 }
4416
4417 static int
vtnet_sysctl_flags(SYSCTL_HANDLER_ARGS)4418 vtnet_sysctl_flags(SYSCTL_HANDLER_ARGS)
4419 {
4420 struct sbuf sb;
4421 struct vtnet_softc *sc = (struct vtnet_softc *)arg1;
4422 int error;
4423
4424 sbuf_new_for_sysctl(&sb, NULL, 0, req);
4425 sbuf_printf(&sb, "%b", sc->vtnet_flags, VTNET_FLAGS_BITS);
4426 error = sbuf_finish(&sb);
4427 sbuf_delete(&sb);
4428 return (error);
4429 }
4430
4431 static void
vtnet_setup_sysctl(struct vtnet_softc * sc)4432 vtnet_setup_sysctl(struct vtnet_softc *sc)
4433 {
4434 device_t dev;
4435 struct sysctl_ctx_list *ctx;
4436 struct sysctl_oid *tree;
4437 struct sysctl_oid_list *child;
4438
4439 dev = sc->vtnet_dev;
4440 ctx = device_get_sysctl_ctx(dev);
4441 tree = device_get_sysctl_tree(dev);
4442 child = SYSCTL_CHILDREN(tree);
4443
4444 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs",
4445 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0,
4446 "Number of maximum supported virtqueue pairs");
4447 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs",
4448 CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0,
4449 "Number of requested virtqueue pairs");
4450 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs",
4451 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0,
4452 "Number of active virtqueue pairs");
4453 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "features",
4454 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
4455 vtnet_sysctl_features, "A", "Features");
4456 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "flags",
4457 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0,
4458 vtnet_sysctl_flags, "A", "Flags");
4459
4460 vtnet_setup_stat_sysctl(ctx, child, sc);
4461 }
4462
4463 static void
vtnet_load_tunables(struct vtnet_softc * sc)4464 vtnet_load_tunables(struct vtnet_softc *sc)
4465 {
4466
4467 sc->vtnet_lro_entry_count = vtnet_tunable_int(sc,
4468 "lro_entry_count", vtnet_lro_entry_count);
4469 if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES)
4470 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES;
4471
4472 sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc,
4473 "lro_mbufq_depth", vtnet_lro_mbufq_depth);
4474 }
4475
4476 static int
vtnet_rxq_enable_intr(struct vtnet_rxq * rxq)4477 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq)
4478 {
4479
4480 return (virtqueue_enable_intr(rxq->vtnrx_vq));
4481 }
4482
4483 static void
vtnet_rxq_disable_intr(struct vtnet_rxq * rxq)4484 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq)
4485 {
4486
4487 virtqueue_disable_intr(rxq->vtnrx_vq);
4488 }
4489
4490 static int
vtnet_txq_enable_intr(struct vtnet_txq * txq)4491 vtnet_txq_enable_intr(struct vtnet_txq *txq)
4492 {
4493 struct virtqueue *vq;
4494
4495 vq = txq->vtntx_vq;
4496
4497 if (vtnet_txq_below_threshold(txq) != 0)
4498 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG));
4499
4500 /*
4501 * The free count is above our threshold. Keep the Tx interrupt
4502 * disabled until the queue is fuller.
4503 */
4504 return (0);
4505 }
4506
4507 static void
vtnet_txq_disable_intr(struct vtnet_txq * txq)4508 vtnet_txq_disable_intr(struct vtnet_txq *txq)
4509 {
4510
4511 virtqueue_disable_intr(txq->vtntx_vq);
4512 }
4513
4514 static void
vtnet_enable_rx_interrupts(struct vtnet_softc * sc)4515 vtnet_enable_rx_interrupts(struct vtnet_softc *sc)
4516 {
4517 struct vtnet_rxq *rxq;
4518 int i;
4519
4520 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) {
4521 rxq = &sc->vtnet_rxqs[i];
4522 if (vtnet_rxq_enable_intr(rxq) != 0)
4523 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask);
4524 }
4525 }
4526
4527 static void
vtnet_enable_tx_interrupts(struct vtnet_softc * sc)4528 vtnet_enable_tx_interrupts(struct vtnet_softc *sc)
4529 {
4530 int i;
4531
4532 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4533 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]);
4534 }
4535
4536 static void
vtnet_enable_interrupts(struct vtnet_softc * sc)4537 vtnet_enable_interrupts(struct vtnet_softc *sc)
4538 {
4539
4540 vtnet_enable_rx_interrupts(sc);
4541 vtnet_enable_tx_interrupts(sc);
4542 }
4543
4544 static void
vtnet_disable_rx_interrupts(struct vtnet_softc * sc)4545 vtnet_disable_rx_interrupts(struct vtnet_softc *sc)
4546 {
4547 int i;
4548
4549 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4550 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]);
4551 }
4552
4553 static void
vtnet_disable_tx_interrupts(struct vtnet_softc * sc)4554 vtnet_disable_tx_interrupts(struct vtnet_softc *sc)
4555 {
4556 int i;
4557
4558 for (i = 0; i < sc->vtnet_max_vq_pairs; i++)
4559 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]);
4560 }
4561
4562 static void
vtnet_disable_interrupts(struct vtnet_softc * sc)4563 vtnet_disable_interrupts(struct vtnet_softc *sc)
4564 {
4565
4566 vtnet_disable_rx_interrupts(sc);
4567 vtnet_disable_tx_interrupts(sc);
4568 }
4569
4570 static int
vtnet_tunable_int(struct vtnet_softc * sc,const char * knob,int def)4571 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def)
4572 {
4573 char path[64];
4574
4575 snprintf(path, sizeof(path),
4576 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob);
4577 TUNABLE_INT_FETCH(path, &def);
4578
4579 return (def);
4580 }
4581
4582 #ifdef DEBUGNET
4583 static void
vtnet_debugnet_init(if_t ifp,int * nrxr,int * ncl,int * clsize)4584 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize)
4585 {
4586 struct vtnet_softc *sc;
4587
4588 sc = if_getsoftc(ifp);
4589
4590 VTNET_CORE_LOCK(sc);
4591 *nrxr = sc->vtnet_req_vq_pairs;
4592 *ncl = DEBUGNET_MAX_IN_FLIGHT;
4593 *clsize = sc->vtnet_rx_clustersz;
4594 VTNET_CORE_UNLOCK(sc);
4595 }
4596
4597 static void
vtnet_debugnet_event(if_t ifp __unused,enum debugnet_ev event)4598 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event)
4599 {
4600 struct vtnet_softc *sc;
4601 static bool sw_lro_enabled = false;
4602
4603 /*
4604 * Disable software LRO, since it would require entering the network
4605 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll().
4606 */
4607 sc = if_getsoftc(ifp);
4608 switch (event) {
4609 case DEBUGNET_START:
4610 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0;
4611 if (sw_lro_enabled)
4612 sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO;
4613 break;
4614 case DEBUGNET_END:
4615 if (sw_lro_enabled)
4616 sc->vtnet_flags |= VTNET_FLAG_SW_LRO;
4617 break;
4618 }
4619 }
4620
4621 static int
vtnet_debugnet_transmit(if_t ifp,struct mbuf * m)4622 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m)
4623 {
4624 struct vtnet_softc *sc;
4625 struct vtnet_txq *txq;
4626 int error;
4627
4628 sc = if_getsoftc(ifp);
4629 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4630 IFF_DRV_RUNNING)
4631 return (EBUSY);
4632
4633 txq = &sc->vtnet_txqs[0];
4634 error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE);
4635 if (error == 0)
4636 (void)vtnet_txq_notify(txq);
4637 return (error);
4638 }
4639
4640 static int
vtnet_debugnet_poll(if_t ifp,int count)4641 vtnet_debugnet_poll(if_t ifp, int count)
4642 {
4643 struct vtnet_softc *sc;
4644 int i;
4645
4646 sc = if_getsoftc(ifp);
4647 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) !=
4648 IFF_DRV_RUNNING)
4649 return (EBUSY);
4650
4651 (void)vtnet_txq_eof(&sc->vtnet_txqs[0]);
4652 for (i = 0; i < sc->vtnet_act_vq_pairs; i++)
4653 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]);
4654 return (0);
4655 }
4656 #endif /* DEBUGNET */
4657