1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Driver for VirtIO network devices. */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/eventhandler.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/sockio.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/module.h> 42 #include <sys/msan.h> 43 #include <sys/socket.h> 44 #include <sys/sysctl.h> 45 #include <sys/random.h> 46 #include <sys/sglist.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/taskqueue.h> 50 #include <sys/smp.h> 51 #include <machine/smp.h> 52 53 #include <vm/uma.h> 54 55 #include <net/debugnet.h> 56 #include <net/ethernet.h> 57 #include <net/pfil.h> 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_arp.h> 61 #include <net/if_dl.h> 62 #include <net/if_types.h> 63 #include <net/if_media.h> 64 #include <net/if_vlan_var.h> 65 66 #include <net/bpf.h> 67 68 #include <netinet/in_systm.h> 69 #include <netinet/in.h> 70 #include <netinet/ip.h> 71 #include <netinet/ip6.h> 72 #include <netinet6/ip6_var.h> 73 #include <netinet/udp.h> 74 #include <netinet/tcp.h> 75 #include <netinet/tcp_lro.h> 76 77 #include <machine/bus.h> 78 #include <machine/resource.h> 79 #include <sys/bus.h> 80 #include <sys/rman.h> 81 82 #include <dev/virtio/virtio.h> 83 #include <dev/virtio/virtqueue.h> 84 #include <dev/virtio/network/virtio_net.h> 85 #include <dev/virtio/network/if_vtnetvar.h> 86 #include "virtio_if.h" 87 88 #include "opt_inet.h" 89 #include "opt_inet6.h" 90 91 #if defined(INET) || defined(INET6) 92 #include <machine/in_cksum.h> 93 #endif 94 95 static int vtnet_modevent(module_t, int, void *); 96 97 static int vtnet_probe(device_t); 98 static int vtnet_attach(device_t); 99 static int vtnet_detach(device_t); 100 static int vtnet_suspend(device_t); 101 static int vtnet_resume(device_t); 102 static int vtnet_shutdown(device_t); 103 static int vtnet_attach_completed(device_t); 104 static int vtnet_config_change(device_t); 105 106 static int vtnet_negotiate_features(struct vtnet_softc *); 107 static int vtnet_setup_features(struct vtnet_softc *); 108 static int vtnet_init_rxq(struct vtnet_softc *, int); 109 static int vtnet_init_txq(struct vtnet_softc *, int); 110 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); 111 static void vtnet_free_rxtx_queues(struct vtnet_softc *); 112 static int vtnet_alloc_rx_filters(struct vtnet_softc *); 113 static void vtnet_free_rx_filters(struct vtnet_softc *); 114 static int vtnet_alloc_virtqueues(struct vtnet_softc *); 115 static int vtnet_alloc_interface(struct vtnet_softc *); 116 static int vtnet_setup_interface(struct vtnet_softc *); 117 static int vtnet_ioctl_mtu(struct vtnet_softc *, u_int); 118 static int vtnet_ioctl_ifflags(struct vtnet_softc *); 119 static int vtnet_ioctl_multi(struct vtnet_softc *); 120 static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *); 121 static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); 122 static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); 123 124 static int vtnet_rxq_populate(struct vtnet_rxq *); 125 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); 126 static struct mbuf * 127 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); 128 static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *, 129 struct mbuf *, int); 130 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); 131 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); 132 static int vtnet_rxq_new_buf(struct vtnet_rxq *); 133 static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *, 134 uint16_t, int, struct virtio_net_hdr *); 135 static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, 136 uint16_t, int, struct virtio_net_hdr *); 137 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, 138 struct virtio_net_hdr *); 139 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); 140 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); 141 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); 142 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, 143 struct virtio_net_hdr *); 144 static int vtnet_rxq_eof(struct vtnet_rxq *); 145 static void vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries); 146 static void vtnet_rx_vq_intr(void *); 147 static void vtnet_rxq_tq_intr(void *, int); 148 149 static int vtnet_txq_intr_threshold(struct vtnet_txq *); 150 static int vtnet_txq_below_threshold(struct vtnet_txq *); 151 static int vtnet_txq_notify(struct vtnet_txq *); 152 static void vtnet_txq_free_mbufs(struct vtnet_txq *); 153 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, 154 int *, int *, int *); 155 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, 156 int, struct virtio_net_hdr *); 157 static struct mbuf * 158 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, 159 struct virtio_net_hdr *); 160 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, 161 struct vtnet_tx_header *); 162 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); 163 #ifdef VTNET_LEGACY_TX 164 static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); 165 static void vtnet_start(struct ifnet *); 166 #else 167 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); 168 static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); 169 static void vtnet_txq_tq_deferred(void *, int); 170 #endif 171 static void vtnet_txq_start(struct vtnet_txq *); 172 static void vtnet_txq_tq_intr(void *, int); 173 static int vtnet_txq_eof(struct vtnet_txq *); 174 static void vtnet_tx_vq_intr(void *); 175 static void vtnet_tx_start_all(struct vtnet_softc *); 176 177 #ifndef VTNET_LEGACY_TX 178 static void vtnet_qflush(struct ifnet *); 179 #endif 180 181 static int vtnet_watchdog(struct vtnet_txq *); 182 static void vtnet_accum_stats(struct vtnet_softc *, 183 struct vtnet_rxq_stats *, struct vtnet_txq_stats *); 184 static void vtnet_tick(void *); 185 186 static void vtnet_start_taskqueues(struct vtnet_softc *); 187 static void vtnet_free_taskqueues(struct vtnet_softc *); 188 static void vtnet_drain_taskqueues(struct vtnet_softc *); 189 190 static void vtnet_drain_rxtx_queues(struct vtnet_softc *); 191 static void vtnet_stop_rendezvous(struct vtnet_softc *); 192 static void vtnet_stop(struct vtnet_softc *); 193 static int vtnet_virtio_reinit(struct vtnet_softc *); 194 static void vtnet_init_rx_filters(struct vtnet_softc *); 195 static int vtnet_init_rx_queues(struct vtnet_softc *); 196 static int vtnet_init_tx_queues(struct vtnet_softc *); 197 static int vtnet_init_rxtx_queues(struct vtnet_softc *); 198 static void vtnet_set_active_vq_pairs(struct vtnet_softc *); 199 static void vtnet_update_rx_offloads(struct vtnet_softc *); 200 static int vtnet_reinit(struct vtnet_softc *); 201 static void vtnet_init_locked(struct vtnet_softc *, int); 202 static void vtnet_init(void *); 203 204 static void vtnet_free_ctrl_vq(struct vtnet_softc *); 205 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, 206 struct sglist *, int, int); 207 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); 208 static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t); 209 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); 210 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool); 211 static int vtnet_set_promisc(struct vtnet_softc *, bool); 212 static int vtnet_set_allmulti(struct vtnet_softc *, bool); 213 static void vtnet_rx_filter(struct vtnet_softc *); 214 static void vtnet_rx_filter_mac(struct vtnet_softc *); 215 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); 216 static void vtnet_rx_filter_vlan(struct vtnet_softc *); 217 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); 218 static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); 219 static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); 220 221 static void vtnet_update_speed_duplex(struct vtnet_softc *); 222 static int vtnet_is_link_up(struct vtnet_softc *); 223 static void vtnet_update_link_status(struct vtnet_softc *); 224 static int vtnet_ifmedia_upd(struct ifnet *); 225 static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); 226 static void vtnet_get_macaddr(struct vtnet_softc *); 227 static void vtnet_set_macaddr(struct vtnet_softc *); 228 static void vtnet_attached_set_macaddr(struct vtnet_softc *); 229 static void vtnet_vlan_tag_remove(struct mbuf *); 230 static void vtnet_set_rx_process_limit(struct vtnet_softc *); 231 232 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, 233 struct sysctl_oid_list *, struct vtnet_rxq *); 234 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, 235 struct sysctl_oid_list *, struct vtnet_txq *); 236 static void vtnet_setup_queue_sysctl(struct vtnet_softc *); 237 static void vtnet_load_tunables(struct vtnet_softc *); 238 static void vtnet_setup_sysctl(struct vtnet_softc *); 239 240 static int vtnet_rxq_enable_intr(struct vtnet_rxq *); 241 static void vtnet_rxq_disable_intr(struct vtnet_rxq *); 242 static int vtnet_txq_enable_intr(struct vtnet_txq *); 243 static void vtnet_txq_disable_intr(struct vtnet_txq *); 244 static void vtnet_enable_rx_interrupts(struct vtnet_softc *); 245 static void vtnet_enable_tx_interrupts(struct vtnet_softc *); 246 static void vtnet_enable_interrupts(struct vtnet_softc *); 247 static void vtnet_disable_rx_interrupts(struct vtnet_softc *); 248 static void vtnet_disable_tx_interrupts(struct vtnet_softc *); 249 static void vtnet_disable_interrupts(struct vtnet_softc *); 250 251 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); 252 253 DEBUGNET_DEFINE(vtnet); 254 255 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val) 256 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val) 257 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val) 258 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val) 259 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val) 260 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val) 261 262 /* Tunables. */ 263 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 264 "VirtIO Net driver parameters"); 265 266 static int vtnet_csum_disable = 0; 267 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, 268 &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); 269 270 static int vtnet_fixup_needs_csum = 0; 271 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN, 272 &vtnet_fixup_needs_csum, 0, 273 "Calculate valid checksum for NEEDS_CSUM packets"); 274 275 static int vtnet_tso_disable = 0; 276 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, 277 &vtnet_tso_disable, 0, "Disables TSO"); 278 279 static int vtnet_lro_disable = 0; 280 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, 281 &vtnet_lro_disable, 0, "Disables hardware LRO"); 282 283 static int vtnet_mq_disable = 0; 284 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, 285 &vtnet_mq_disable, 0, "Disables multiqueue support"); 286 287 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; 288 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, 289 &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs"); 290 291 static int vtnet_tso_maxlen = IP_MAXPACKET; 292 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 293 &vtnet_tso_maxlen, 0, "TSO burst limit"); 294 295 static int vtnet_rx_process_limit = 1024; 296 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 297 &vtnet_rx_process_limit, 0, 298 "Number of RX segments processed in one pass"); 299 300 static int vtnet_lro_entry_count = 128; 301 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 302 &vtnet_lro_entry_count, 0, "Software LRO entry count"); 303 304 /* Enable sorted LRO, and the depth of the mbuf queue. */ 305 static int vtnet_lro_mbufq_depth = 0; 306 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 307 &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue"); 308 309 static uma_zone_t vtnet_tx_header_zone; 310 311 static struct virtio_feature_desc vtnet_feature_desc[] = { 312 { VIRTIO_NET_F_CSUM, "TxChecksum" }, 313 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, 314 { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" }, 315 { VIRTIO_NET_F_MAC, "MAC" }, 316 { VIRTIO_NET_F_GSO, "TxGSO" }, 317 { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" }, 318 { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" }, 319 { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" }, 320 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, 321 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, 322 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, 323 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, 324 { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, 325 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, 326 { VIRTIO_NET_F_STATUS, "Status" }, 327 { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" }, 328 { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" }, 329 { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" }, 330 { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" }, 331 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, 332 { VIRTIO_NET_F_MQ, "Multiqueue" }, 333 { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" }, 334 { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" }, 335 336 { 0, NULL } 337 }; 338 339 static device_method_t vtnet_methods[] = { 340 /* Device methods. */ 341 DEVMETHOD(device_probe, vtnet_probe), 342 DEVMETHOD(device_attach, vtnet_attach), 343 DEVMETHOD(device_detach, vtnet_detach), 344 DEVMETHOD(device_suspend, vtnet_suspend), 345 DEVMETHOD(device_resume, vtnet_resume), 346 DEVMETHOD(device_shutdown, vtnet_shutdown), 347 348 /* VirtIO methods. */ 349 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), 350 DEVMETHOD(virtio_config_change, vtnet_config_change), 351 352 DEVMETHOD_END 353 }; 354 355 #ifdef DEV_NETMAP 356 #include <dev/netmap/if_vtnet_netmap.h> 357 #endif 358 359 static driver_t vtnet_driver = { 360 .name = "vtnet", 361 .methods = vtnet_methods, 362 .size = sizeof(struct vtnet_softc) 363 }; 364 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL); 365 MODULE_VERSION(vtnet, 1); 366 MODULE_DEPEND(vtnet, virtio, 1, 1, 1); 367 #ifdef DEV_NETMAP 368 MODULE_DEPEND(vtnet, netmap, 1, 1, 1); 369 #endif 370 371 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter"); 372 373 static int 374 vtnet_modevent(module_t mod __unused, int type, void *unused __unused) 375 { 376 int error = 0; 377 static int loaded = 0; 378 379 switch (type) { 380 case MOD_LOAD: 381 if (loaded++ == 0) { 382 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", 383 sizeof(struct vtnet_tx_header), 384 NULL, NULL, NULL, NULL, 0, 0); 385 #ifdef DEBUGNET 386 /* 387 * We need to allocate from this zone in the transmit path, so ensure 388 * that we have at least one item per header available. 389 * XXX add a separate zone like we do for mbufs? otherwise we may alloc 390 * buckets 391 */ 392 uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); 393 uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); 394 #endif 395 } 396 break; 397 case MOD_QUIESCE: 398 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) 399 error = EBUSY; 400 break; 401 case MOD_UNLOAD: 402 if (--loaded == 0) { 403 uma_zdestroy(vtnet_tx_header_zone); 404 vtnet_tx_header_zone = NULL; 405 } 406 break; 407 case MOD_SHUTDOWN: 408 break; 409 default: 410 error = EOPNOTSUPP; 411 break; 412 } 413 414 return (error); 415 } 416 417 static int 418 vtnet_probe(device_t dev) 419 { 420 return (VIRTIO_SIMPLE_PROBE(dev, vtnet)); 421 } 422 423 static int 424 vtnet_attach(device_t dev) 425 { 426 struct vtnet_softc *sc; 427 int error; 428 429 sc = device_get_softc(dev); 430 sc->vtnet_dev = dev; 431 virtio_set_feature_desc(dev, vtnet_feature_desc); 432 433 VTNET_CORE_LOCK_INIT(sc); 434 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); 435 vtnet_load_tunables(sc); 436 437 error = vtnet_alloc_interface(sc); 438 if (error) { 439 device_printf(dev, "cannot allocate interface\n"); 440 goto fail; 441 } 442 443 vtnet_setup_sysctl(sc); 444 445 error = vtnet_setup_features(sc); 446 if (error) { 447 device_printf(dev, "cannot setup features\n"); 448 goto fail; 449 } 450 451 error = vtnet_alloc_rx_filters(sc); 452 if (error) { 453 device_printf(dev, "cannot allocate Rx filters\n"); 454 goto fail; 455 } 456 457 error = vtnet_alloc_rxtx_queues(sc); 458 if (error) { 459 device_printf(dev, "cannot allocate queues\n"); 460 goto fail; 461 } 462 463 error = vtnet_alloc_virtqueues(sc); 464 if (error) { 465 device_printf(dev, "cannot allocate virtqueues\n"); 466 goto fail; 467 } 468 469 error = vtnet_setup_interface(sc); 470 if (error) { 471 device_printf(dev, "cannot setup interface\n"); 472 goto fail; 473 } 474 475 error = virtio_setup_intr(dev, INTR_TYPE_NET); 476 if (error) { 477 device_printf(dev, "cannot setup interrupts\n"); 478 ether_ifdetach(sc->vtnet_ifp); 479 goto fail; 480 } 481 482 #ifdef DEV_NETMAP 483 vtnet_netmap_attach(sc); 484 #endif 485 vtnet_start_taskqueues(sc); 486 487 fail: 488 if (error) 489 vtnet_detach(dev); 490 491 return (error); 492 } 493 494 static int 495 vtnet_detach(device_t dev) 496 { 497 struct vtnet_softc *sc; 498 struct ifnet *ifp; 499 500 sc = device_get_softc(dev); 501 ifp = sc->vtnet_ifp; 502 503 if (device_is_attached(dev)) { 504 VTNET_CORE_LOCK(sc); 505 vtnet_stop(sc); 506 VTNET_CORE_UNLOCK(sc); 507 508 callout_drain(&sc->vtnet_tick_ch); 509 vtnet_drain_taskqueues(sc); 510 511 ether_ifdetach(ifp); 512 } 513 514 #ifdef DEV_NETMAP 515 netmap_detach(ifp); 516 #endif 517 518 if (sc->vtnet_pfil != NULL) { 519 pfil_head_unregister(sc->vtnet_pfil); 520 sc->vtnet_pfil = NULL; 521 } 522 523 vtnet_free_taskqueues(sc); 524 525 if (sc->vtnet_vlan_attach != NULL) { 526 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); 527 sc->vtnet_vlan_attach = NULL; 528 } 529 if (sc->vtnet_vlan_detach != NULL) { 530 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); 531 sc->vtnet_vlan_detach = NULL; 532 } 533 534 ifmedia_removeall(&sc->vtnet_media); 535 536 if (ifp != NULL) { 537 if_free(ifp); 538 sc->vtnet_ifp = NULL; 539 } 540 541 vtnet_free_rxtx_queues(sc); 542 vtnet_free_rx_filters(sc); 543 544 if (sc->vtnet_ctrl_vq != NULL) 545 vtnet_free_ctrl_vq(sc); 546 547 VTNET_CORE_LOCK_DESTROY(sc); 548 549 return (0); 550 } 551 552 static int 553 vtnet_suspend(device_t dev) 554 { 555 struct vtnet_softc *sc; 556 557 sc = device_get_softc(dev); 558 559 VTNET_CORE_LOCK(sc); 560 vtnet_stop(sc); 561 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; 562 VTNET_CORE_UNLOCK(sc); 563 564 return (0); 565 } 566 567 static int 568 vtnet_resume(device_t dev) 569 { 570 struct vtnet_softc *sc; 571 struct ifnet *ifp; 572 573 sc = device_get_softc(dev); 574 ifp = sc->vtnet_ifp; 575 576 VTNET_CORE_LOCK(sc); 577 if (ifp->if_flags & IFF_UP) 578 vtnet_init_locked(sc, 0); 579 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; 580 VTNET_CORE_UNLOCK(sc); 581 582 return (0); 583 } 584 585 static int 586 vtnet_shutdown(device_t dev) 587 { 588 /* 589 * Suspend already does all of what we need to 590 * do here; we just never expect to be resumed. 591 */ 592 return (vtnet_suspend(dev)); 593 } 594 595 static int 596 vtnet_attach_completed(device_t dev) 597 { 598 struct vtnet_softc *sc; 599 600 sc = device_get_softc(dev); 601 602 VTNET_CORE_LOCK(sc); 603 vtnet_attached_set_macaddr(sc); 604 VTNET_CORE_UNLOCK(sc); 605 606 return (0); 607 } 608 609 static int 610 vtnet_config_change(device_t dev) 611 { 612 struct vtnet_softc *sc; 613 614 sc = device_get_softc(dev); 615 616 VTNET_CORE_LOCK(sc); 617 vtnet_update_link_status(sc); 618 if (sc->vtnet_link_active != 0) 619 vtnet_tx_start_all(sc); 620 VTNET_CORE_UNLOCK(sc); 621 622 return (0); 623 } 624 625 static int 626 vtnet_negotiate_features(struct vtnet_softc *sc) 627 { 628 device_t dev; 629 uint64_t features, negotiated_features; 630 int no_csum; 631 632 dev = sc->vtnet_dev; 633 features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES : 634 VTNET_LEGACY_FEATURES; 635 636 /* 637 * TSO and LRO are only available when their corresponding checksum 638 * offload feature is also negotiated. 639 */ 640 no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable); 641 if (no_csum) 642 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM); 643 if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) 644 features &= ~VTNET_TSO_FEATURES; 645 if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) 646 features &= ~VTNET_LRO_FEATURES; 647 648 #ifndef VTNET_LEGACY_TX 649 if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) 650 features &= ~VIRTIO_NET_F_MQ; 651 #else 652 features &= ~VIRTIO_NET_F_MQ; 653 #endif 654 655 negotiated_features = virtio_negotiate_features(dev, features); 656 657 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { 658 uint16_t mtu; 659 660 mtu = virtio_read_dev_config_2(dev, 661 offsetof(struct virtio_net_config, mtu)); 662 if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) { 663 device_printf(dev, "Invalid MTU value: %d. " 664 "MTU feature disabled.\n", mtu); 665 features &= ~VIRTIO_NET_F_MTU; 666 negotiated_features = 667 virtio_negotiate_features(dev, features); 668 } 669 } 670 671 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { 672 uint16_t npairs; 673 674 npairs = virtio_read_dev_config_2(dev, 675 offsetof(struct virtio_net_config, max_virtqueue_pairs)); 676 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 677 npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { 678 device_printf(dev, "Invalid max_virtqueue_pairs value: " 679 "%d. Multiqueue feature disabled.\n", npairs); 680 features &= ~VIRTIO_NET_F_MQ; 681 negotiated_features = 682 virtio_negotiate_features(dev, features); 683 } 684 } 685 686 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && 687 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { 688 /* 689 * LRO without mergeable buffers requires special care. This 690 * is not ideal because every receive buffer must be large 691 * enough to hold the maximum TCP packet, the Ethernet header, 692 * and the header. This requires up to 34 descriptors with 693 * MCLBYTES clusters. If we do not have indirect descriptors, 694 * LRO is disabled since the virtqueue will not contain very 695 * many receive buffers. 696 */ 697 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { 698 device_printf(dev, 699 "Host LRO disabled since both mergeable buffers " 700 "and indirect descriptors were not negotiated\n"); 701 features &= ~VTNET_LRO_FEATURES; 702 negotiated_features = 703 virtio_negotiate_features(dev, features); 704 } else 705 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; 706 } 707 708 sc->vtnet_features = negotiated_features; 709 sc->vtnet_negotiated_features = negotiated_features; 710 711 return (virtio_finalize_features(dev)); 712 } 713 714 static int 715 vtnet_setup_features(struct vtnet_softc *sc) 716 { 717 device_t dev; 718 int error; 719 720 dev = sc->vtnet_dev; 721 722 error = vtnet_negotiate_features(sc); 723 if (error) 724 return (error); 725 726 if (virtio_with_feature(dev, VIRTIO_F_VERSION_1)) 727 sc->vtnet_flags |= VTNET_FLAG_MODERN; 728 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 729 sc->vtnet_flags |= VTNET_FLAG_INDIRECT; 730 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) 731 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; 732 733 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { 734 /* This feature should always be negotiated. */ 735 sc->vtnet_flags |= VTNET_FLAG_MAC; 736 } 737 738 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { 739 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev, 740 offsetof(struct virtio_net_config, mtu)); 741 } else 742 sc->vtnet_max_mtu = VTNET_MAX_MTU; 743 744 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { 745 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; 746 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 747 } else if (vtnet_modern(sc)) { 748 /* This is identical to the mergeable header. */ 749 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1); 750 } else 751 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 752 753 if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 754 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE; 755 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 756 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG; 757 else 758 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE; 759 760 /* 761 * Favor "hardware" LRO if negotiated, but support software LRO as 762 * a fallback; there is usually little benefit (or worse) with both. 763 */ 764 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 && 765 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0) 766 sc->vtnet_flags |= VTNET_FLAG_SW_LRO; 767 768 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || 769 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || 770 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 771 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX; 772 else 773 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN; 774 775 sc->vtnet_req_vq_pairs = 1; 776 sc->vtnet_max_vq_pairs = 1; 777 778 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { 779 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; 780 781 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) 782 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; 783 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) 784 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; 785 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) 786 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; 787 788 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { 789 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, 790 offsetof(struct virtio_net_config, 791 max_virtqueue_pairs)); 792 } 793 } 794 795 if (sc->vtnet_max_vq_pairs > 1) { 796 int req; 797 798 /* 799 * Limit the maximum number of requested queue pairs to the 800 * number of CPUs and the configured maximum. 801 */ 802 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); 803 if (req < 0) 804 req = 1; 805 if (req == 0) 806 req = mp_ncpus; 807 if (req > sc->vtnet_max_vq_pairs) 808 req = sc->vtnet_max_vq_pairs; 809 if (req > mp_ncpus) 810 req = mp_ncpus; 811 if (req > 1) { 812 sc->vtnet_req_vq_pairs = req; 813 sc->vtnet_flags |= VTNET_FLAG_MQ; 814 } 815 } 816 817 return (0); 818 } 819 820 static int 821 vtnet_init_rxq(struct vtnet_softc *sc, int id) 822 { 823 struct vtnet_rxq *rxq; 824 825 rxq = &sc->vtnet_rxqs[id]; 826 827 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", 828 device_get_nameunit(sc->vtnet_dev), id); 829 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); 830 831 rxq->vtnrx_sc = sc; 832 rxq->vtnrx_id = id; 833 834 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); 835 if (rxq->vtnrx_sg == NULL) 836 return (ENOMEM); 837 838 #if defined(INET) || defined(INET6) 839 if (vtnet_software_lro(sc)) { 840 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp, 841 sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0) 842 return (ENOMEM); 843 } 844 #endif 845 846 NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); 847 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, 848 taskqueue_thread_enqueue, &rxq->vtnrx_tq); 849 850 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); 851 } 852 853 static int 854 vtnet_init_txq(struct vtnet_softc *sc, int id) 855 { 856 struct vtnet_txq *txq; 857 858 txq = &sc->vtnet_txqs[id]; 859 860 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", 861 device_get_nameunit(sc->vtnet_dev), id); 862 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); 863 864 txq->vtntx_sc = sc; 865 txq->vtntx_id = id; 866 867 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); 868 if (txq->vtntx_sg == NULL) 869 return (ENOMEM); 870 871 #ifndef VTNET_LEGACY_TX 872 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, 873 M_NOWAIT, &txq->vtntx_mtx); 874 if (txq->vtntx_br == NULL) 875 return (ENOMEM); 876 877 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); 878 #endif 879 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); 880 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, 881 taskqueue_thread_enqueue, &txq->vtntx_tq); 882 if (txq->vtntx_tq == NULL) 883 return (ENOMEM); 884 885 return (0); 886 } 887 888 static int 889 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) 890 { 891 int i, npairs, error; 892 893 npairs = sc->vtnet_max_vq_pairs; 894 895 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, 896 M_NOWAIT | M_ZERO); 897 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, 898 M_NOWAIT | M_ZERO); 899 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) 900 return (ENOMEM); 901 902 for (i = 0; i < npairs; i++) { 903 error = vtnet_init_rxq(sc, i); 904 if (error) 905 return (error); 906 error = vtnet_init_txq(sc, i); 907 if (error) 908 return (error); 909 } 910 911 vtnet_set_rx_process_limit(sc); 912 vtnet_setup_queue_sysctl(sc); 913 914 return (0); 915 } 916 917 static void 918 vtnet_destroy_rxq(struct vtnet_rxq *rxq) 919 { 920 921 rxq->vtnrx_sc = NULL; 922 rxq->vtnrx_id = -1; 923 924 #if defined(INET) || defined(INET6) 925 tcp_lro_free(&rxq->vtnrx_lro); 926 #endif 927 928 if (rxq->vtnrx_sg != NULL) { 929 sglist_free(rxq->vtnrx_sg); 930 rxq->vtnrx_sg = NULL; 931 } 932 933 if (mtx_initialized(&rxq->vtnrx_mtx) != 0) 934 mtx_destroy(&rxq->vtnrx_mtx); 935 } 936 937 static void 938 vtnet_destroy_txq(struct vtnet_txq *txq) 939 { 940 941 txq->vtntx_sc = NULL; 942 txq->vtntx_id = -1; 943 944 if (txq->vtntx_sg != NULL) { 945 sglist_free(txq->vtntx_sg); 946 txq->vtntx_sg = NULL; 947 } 948 949 #ifndef VTNET_LEGACY_TX 950 if (txq->vtntx_br != NULL) { 951 buf_ring_free(txq->vtntx_br, M_DEVBUF); 952 txq->vtntx_br = NULL; 953 } 954 #endif 955 956 if (mtx_initialized(&txq->vtntx_mtx) != 0) 957 mtx_destroy(&txq->vtntx_mtx); 958 } 959 960 static void 961 vtnet_free_rxtx_queues(struct vtnet_softc *sc) 962 { 963 int i; 964 965 if (sc->vtnet_rxqs != NULL) { 966 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 967 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); 968 free(sc->vtnet_rxqs, M_DEVBUF); 969 sc->vtnet_rxqs = NULL; 970 } 971 972 if (sc->vtnet_txqs != NULL) { 973 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 974 vtnet_destroy_txq(&sc->vtnet_txqs[i]); 975 free(sc->vtnet_txqs, M_DEVBUF); 976 sc->vtnet_txqs = NULL; 977 } 978 } 979 980 static int 981 vtnet_alloc_rx_filters(struct vtnet_softc *sc) 982 { 983 984 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 985 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), 986 M_DEVBUF, M_NOWAIT | M_ZERO); 987 if (sc->vtnet_mac_filter == NULL) 988 return (ENOMEM); 989 } 990 991 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 992 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * 993 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); 994 if (sc->vtnet_vlan_filter == NULL) 995 return (ENOMEM); 996 } 997 998 return (0); 999 } 1000 1001 static void 1002 vtnet_free_rx_filters(struct vtnet_softc *sc) 1003 { 1004 1005 if (sc->vtnet_mac_filter != NULL) { 1006 free(sc->vtnet_mac_filter, M_DEVBUF); 1007 sc->vtnet_mac_filter = NULL; 1008 } 1009 1010 if (sc->vtnet_vlan_filter != NULL) { 1011 free(sc->vtnet_vlan_filter, M_DEVBUF); 1012 sc->vtnet_vlan_filter = NULL; 1013 } 1014 } 1015 1016 static int 1017 vtnet_alloc_virtqueues(struct vtnet_softc *sc) 1018 { 1019 device_t dev; 1020 struct vq_alloc_info *info; 1021 struct vtnet_rxq *rxq; 1022 struct vtnet_txq *txq; 1023 int i, idx, flags, nvqs, error; 1024 1025 dev = sc->vtnet_dev; 1026 flags = 0; 1027 1028 nvqs = sc->vtnet_max_vq_pairs * 2; 1029 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 1030 nvqs++; 1031 1032 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); 1033 if (info == NULL) 1034 return (ENOMEM); 1035 1036 for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) { 1037 rxq = &sc->vtnet_rxqs[i]; 1038 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, 1039 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, 1040 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); 1041 1042 txq = &sc->vtnet_txqs[i]; 1043 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, 1044 vtnet_tx_vq_intr, txq, &txq->vtntx_vq, 1045 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); 1046 } 1047 1048 /* These queues will not be used so allocate the minimum resources. */ 1049 for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) { 1050 rxq = &sc->vtnet_rxqs[i]; 1051 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq, 1052 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); 1053 1054 txq = &sc->vtnet_txqs[i]; 1055 VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq, 1056 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); 1057 } 1058 1059 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 1060 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, 1061 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); 1062 } 1063 1064 /* 1065 * TODO: Enable interrupt binding if this is multiqueue. This will 1066 * only matter when per-virtqueue MSIX is available. 1067 */ 1068 if (sc->vtnet_flags & VTNET_FLAG_MQ) 1069 flags |= 0; 1070 1071 error = virtio_alloc_virtqueues(dev, flags, nvqs, info); 1072 free(info, M_TEMP); 1073 1074 return (error); 1075 } 1076 1077 static int 1078 vtnet_alloc_interface(struct vtnet_softc *sc) 1079 { 1080 device_t dev; 1081 struct ifnet *ifp; 1082 1083 dev = sc->vtnet_dev; 1084 1085 ifp = if_alloc(IFT_ETHER); 1086 if (ifp == NULL) 1087 return (ENOMEM); 1088 1089 sc->vtnet_ifp = ifp; 1090 ifp->if_softc = sc; 1091 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1092 1093 return (0); 1094 } 1095 1096 static int 1097 vtnet_setup_interface(struct vtnet_softc *sc) 1098 { 1099 device_t dev; 1100 struct pfil_head_args pa; 1101 struct ifnet *ifp; 1102 1103 dev = sc->vtnet_dev; 1104 ifp = sc->vtnet_ifp; 1105 1106 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST | 1107 IFF_KNOWSEPOCH; 1108 ifp->if_baudrate = IF_Gbps(10); 1109 ifp->if_init = vtnet_init; 1110 ifp->if_ioctl = vtnet_ioctl; 1111 ifp->if_get_counter = vtnet_get_counter; 1112 #ifndef VTNET_LEGACY_TX 1113 ifp->if_transmit = vtnet_txq_mq_start; 1114 ifp->if_qflush = vtnet_qflush; 1115 #else 1116 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; 1117 ifp->if_start = vtnet_start; 1118 IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); 1119 ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; 1120 IFQ_SET_READY(&ifp->if_snd); 1121 #endif 1122 1123 vtnet_get_macaddr(sc); 1124 1125 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) 1126 ifp->if_capabilities |= IFCAP_LINKSTATE; 1127 1128 ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts); 1129 ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1130 ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO); 1131 1132 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { 1133 int gso; 1134 1135 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; 1136 1137 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO); 1138 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) 1139 ifp->if_capabilities |= IFCAP_TSO4; 1140 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 1141 ifp->if_capabilities |= IFCAP_TSO6; 1142 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) 1143 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 1144 1145 if (ifp->if_capabilities & (IFCAP_TSO4 | IFCAP_TSO6)) { 1146 int tso_maxlen; 1147 1148 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 1149 1150 tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen", 1151 vtnet_tso_maxlen); 1152 ifp->if_hw_tsomax = tso_maxlen - 1153 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN); 1154 ifp->if_hw_tsomaxsegcount = sc->vtnet_tx_nsegs - 1; 1155 ifp->if_hw_tsomaxsegsize = PAGE_SIZE; 1156 } 1157 } 1158 1159 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { 1160 ifp->if_capabilities |= IFCAP_RXCSUM; 1161 #ifdef notyet 1162 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */ 1163 ifp->if_capabilities |= IFCAP_RXCSUM_IPV6; 1164 #endif 1165 1166 if (vtnet_tunable_int(sc, "fixup_needs_csum", 1167 vtnet_fixup_needs_csum) != 0) 1168 sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM; 1169 1170 /* Support either "hardware" or software LRO. */ 1171 ifp->if_capabilities |= IFCAP_LRO; 1172 } 1173 1174 if (ifp->if_capabilities & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) { 1175 /* 1176 * VirtIO does not support VLAN tagging, but we can fake 1177 * it by inserting and removing the 802.1Q header during 1178 * transmit and receive. We are then able to do checksum 1179 * offloading of VLAN frames. 1180 */ 1181 ifp->if_capabilities |= 1182 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 1183 } 1184 1185 if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO) 1186 ifp->if_capabilities |= IFCAP_JUMBO_MTU; 1187 ifp->if_capabilities |= IFCAP_VLAN_MTU; 1188 1189 /* 1190 * Capabilities after here are not enabled by default. 1191 */ 1192 ifp->if_capenable = ifp->if_capabilities; 1193 1194 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 1195 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; 1196 1197 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1198 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 1199 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1200 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 1201 } 1202 1203 ether_ifattach(ifp, sc->vtnet_hwaddr); 1204 1205 /* Tell the upper layer(s) we support long frames. */ 1206 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 1207 1208 DEBUGNET_SET(ifp, vtnet); 1209 1210 pa.pa_version = PFIL_VERSION; 1211 pa.pa_flags = PFIL_IN; 1212 pa.pa_type = PFIL_TYPE_ETHERNET; 1213 pa.pa_headname = ifp->if_xname; 1214 sc->vtnet_pfil = pfil_head_register(&pa); 1215 1216 return (0); 1217 } 1218 1219 static int 1220 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu) 1221 { 1222 int framesz; 1223 1224 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 1225 return (MJUMPAGESIZE); 1226 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 1227 return (MCLBYTES); 1228 1229 /* 1230 * Try to scale the receive mbuf cluster size from the MTU. We 1231 * could also use the VQ size to influence the selected size, 1232 * but that would only matter for very small queues. 1233 */ 1234 if (vtnet_modern(sc)) { 1235 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1)); 1236 framesz = sizeof(struct virtio_net_hdr_v1); 1237 } else 1238 framesz = sizeof(struct vtnet_rx_header); 1239 framesz += sizeof(struct ether_vlan_header) + mtu; 1240 1241 if (framesz <= MCLBYTES) 1242 return (MCLBYTES); 1243 else if (framesz <= MJUMPAGESIZE) 1244 return (MJUMPAGESIZE); 1245 else if (framesz <= MJUM9BYTES) 1246 return (MJUM9BYTES); 1247 1248 /* Sane default; avoid 16KB clusters. */ 1249 return (MCLBYTES); 1250 } 1251 1252 static int 1253 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu) 1254 { 1255 struct ifnet *ifp; 1256 int clustersz; 1257 1258 ifp = sc->vtnet_ifp; 1259 VTNET_CORE_LOCK_ASSERT(sc); 1260 1261 if (ifp->if_mtu == mtu) 1262 return (0); 1263 else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu) 1264 return (EINVAL); 1265 1266 ifp->if_mtu = mtu; 1267 clustersz = vtnet_rx_cluster_size(sc, mtu); 1268 1269 if (clustersz != sc->vtnet_rx_clustersz && 1270 ifp->if_drv_flags & IFF_DRV_RUNNING) { 1271 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1272 vtnet_init_locked(sc, 0); 1273 } 1274 1275 return (0); 1276 } 1277 1278 static int 1279 vtnet_ioctl_ifflags(struct vtnet_softc *sc) 1280 { 1281 struct ifnet *ifp; 1282 int drv_running; 1283 1284 ifp = sc->vtnet_ifp; 1285 drv_running = (ifp->if_drv_flags & IFF_DRV_RUNNING) != 0; 1286 1287 VTNET_CORE_LOCK_ASSERT(sc); 1288 1289 if ((ifp->if_flags & IFF_UP) == 0) { 1290 if (drv_running) 1291 vtnet_stop(sc); 1292 goto out; 1293 } 1294 1295 if (!drv_running) { 1296 vtnet_init_locked(sc, 0); 1297 goto out; 1298 } 1299 1300 if ((ifp->if_flags ^ sc->vtnet_if_flags) & 1301 (IFF_PROMISC | IFF_ALLMULTI)) { 1302 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) 1303 vtnet_rx_filter(sc); 1304 else { 1305 if ((ifp->if_flags ^ sc->vtnet_if_flags) & IFF_ALLMULTI) 1306 return (ENOTSUP); 1307 ifp->if_flags |= IFF_PROMISC; 1308 } 1309 } 1310 1311 out: 1312 sc->vtnet_if_flags = ifp->if_flags; 1313 return (0); 1314 } 1315 1316 static int 1317 vtnet_ioctl_multi(struct vtnet_softc *sc) 1318 { 1319 struct ifnet *ifp; 1320 1321 ifp = sc->vtnet_ifp; 1322 1323 VTNET_CORE_LOCK_ASSERT(sc); 1324 1325 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX && 1326 ifp->if_drv_flags & IFF_DRV_RUNNING) 1327 vtnet_rx_filter_mac(sc); 1328 1329 return (0); 1330 } 1331 1332 static int 1333 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr) 1334 { 1335 struct ifnet *ifp; 1336 int mask, reinit, update; 1337 1338 ifp = sc->vtnet_ifp; 1339 mask = (ifr->ifr_reqcap & ifp->if_capabilities) ^ ifp->if_capenable; 1340 reinit = update = 0; 1341 1342 VTNET_CORE_LOCK_ASSERT(sc); 1343 1344 if (mask & IFCAP_TXCSUM) 1345 ifp->if_capenable ^= IFCAP_TXCSUM; 1346 if (mask & IFCAP_TXCSUM_IPV6) 1347 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 1348 if (mask & IFCAP_TSO4) 1349 ifp->if_capenable ^= IFCAP_TSO4; 1350 if (mask & IFCAP_TSO6) 1351 ifp->if_capenable ^= IFCAP_TSO6; 1352 1353 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) { 1354 /* 1355 * These Rx features require the negotiated features to 1356 * be updated. Avoid a full reinit if possible. 1357 */ 1358 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 1359 update = 1; 1360 else 1361 reinit = 1; 1362 1363 /* BMV: Avoid needless renegotiation for just software LRO. */ 1364 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) == 1365 IFCAP_LRO && vtnet_software_lro(sc)) 1366 reinit = update = 0; 1367 1368 if (mask & IFCAP_RXCSUM) 1369 ifp->if_capenable ^= IFCAP_RXCSUM; 1370 if (mask & IFCAP_RXCSUM_IPV6) 1371 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 1372 if (mask & IFCAP_LRO) 1373 ifp->if_capenable ^= IFCAP_LRO; 1374 1375 /* 1376 * VirtIO does not distinguish between IPv4 and IPv6 checksums 1377 * so treat them as a pair. Guest TSO (LRO) requires receive 1378 * checksums. 1379 */ 1380 if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 1381 ifp->if_capenable |= IFCAP_RXCSUM; 1382 #ifdef notyet 1383 ifp->if_capenable |= IFCAP_RXCSUM_IPV6; 1384 #endif 1385 } else 1386 ifp->if_capenable &= 1387 ~(IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO); 1388 } 1389 1390 if (mask & IFCAP_VLAN_HWFILTER) { 1391 /* These Rx features require renegotiation. */ 1392 reinit = 1; 1393 1394 if (mask & IFCAP_VLAN_HWFILTER) 1395 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; 1396 } 1397 1398 if (mask & IFCAP_VLAN_HWTSO) 1399 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 1400 if (mask & IFCAP_VLAN_HWTAGGING) 1401 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 1402 1403 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1404 if (reinit) { 1405 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1406 vtnet_init_locked(sc, 0); 1407 } else if (update) 1408 vtnet_update_rx_offloads(sc); 1409 } 1410 1411 return (0); 1412 } 1413 1414 static int 1415 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1416 { 1417 struct vtnet_softc *sc; 1418 struct ifreq *ifr; 1419 int error; 1420 1421 sc = ifp->if_softc; 1422 ifr = (struct ifreq *) data; 1423 error = 0; 1424 1425 switch (cmd) { 1426 case SIOCSIFMTU: 1427 VTNET_CORE_LOCK(sc); 1428 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu); 1429 VTNET_CORE_UNLOCK(sc); 1430 break; 1431 1432 case SIOCSIFFLAGS: 1433 VTNET_CORE_LOCK(sc); 1434 error = vtnet_ioctl_ifflags(sc); 1435 VTNET_CORE_UNLOCK(sc); 1436 break; 1437 1438 case SIOCADDMULTI: 1439 case SIOCDELMULTI: 1440 VTNET_CORE_LOCK(sc); 1441 error = vtnet_ioctl_multi(sc); 1442 VTNET_CORE_UNLOCK(sc); 1443 break; 1444 1445 case SIOCSIFMEDIA: 1446 case SIOCGIFMEDIA: 1447 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); 1448 break; 1449 1450 case SIOCSIFCAP: 1451 VTNET_CORE_LOCK(sc); 1452 error = vtnet_ioctl_ifcap(sc, ifr); 1453 VTNET_CORE_UNLOCK(sc); 1454 VLAN_CAPABILITIES(ifp); 1455 break; 1456 1457 default: 1458 error = ether_ioctl(ifp, cmd, data); 1459 break; 1460 } 1461 1462 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); 1463 1464 return (error); 1465 } 1466 1467 static int 1468 vtnet_rxq_populate(struct vtnet_rxq *rxq) 1469 { 1470 struct virtqueue *vq; 1471 int nbufs, error; 1472 1473 #ifdef DEV_NETMAP 1474 error = vtnet_netmap_rxq_populate(rxq); 1475 if (error >= 0) 1476 return (error); 1477 #endif /* DEV_NETMAP */ 1478 1479 vq = rxq->vtnrx_vq; 1480 error = ENOSPC; 1481 1482 for (nbufs = 0; !virtqueue_full(vq); nbufs++) { 1483 error = vtnet_rxq_new_buf(rxq); 1484 if (error) 1485 break; 1486 } 1487 1488 if (nbufs > 0) { 1489 virtqueue_notify(vq); 1490 /* 1491 * EMSGSIZE signifies the virtqueue did not have enough 1492 * entries available to hold the last mbuf. This is not 1493 * an error. 1494 */ 1495 if (error == EMSGSIZE) 1496 error = 0; 1497 } 1498 1499 return (error); 1500 } 1501 1502 static void 1503 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) 1504 { 1505 struct virtqueue *vq; 1506 struct mbuf *m; 1507 int last; 1508 #ifdef DEV_NETMAP 1509 struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp), 1510 rxq->vtnrx_id, NR_RX); 1511 #else /* !DEV_NETMAP */ 1512 void *kring = NULL; 1513 #endif /* !DEV_NETMAP */ 1514 1515 vq = rxq->vtnrx_vq; 1516 last = 0; 1517 1518 while ((m = virtqueue_drain(vq, &last)) != NULL) { 1519 if (kring == NULL) 1520 m_freem(m); 1521 } 1522 1523 KASSERT(virtqueue_empty(vq), 1524 ("%s: mbufs remaining in rx queue %p", __func__, rxq)); 1525 } 1526 1527 static struct mbuf * 1528 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) 1529 { 1530 struct mbuf *m_head, *m_tail, *m; 1531 int i, size; 1532 1533 m_head = NULL; 1534 size = sc->vtnet_rx_clustersz; 1535 1536 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1537 ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs)); 1538 1539 for (i = 0; i < nbufs; i++) { 1540 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size); 1541 if (m == NULL) { 1542 sc->vtnet_stats.mbuf_alloc_failed++; 1543 m_freem(m_head); 1544 return (NULL); 1545 } 1546 1547 m->m_len = size; 1548 if (m_head != NULL) { 1549 m_tail->m_next = m; 1550 m_tail = m; 1551 } else 1552 m_head = m_tail = m; 1553 } 1554 1555 if (m_tailp != NULL) 1556 *m_tailp = m_tail; 1557 1558 return (m_head); 1559 } 1560 1561 /* 1562 * Slow path for when LRO without mergeable buffers is negotiated. 1563 */ 1564 static int 1565 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, 1566 int len0) 1567 { 1568 struct vtnet_softc *sc; 1569 struct mbuf *m, *m_prev, *m_new, *m_tail; 1570 int len, clustersz, nreplace, error; 1571 1572 sc = rxq->vtnrx_sc; 1573 clustersz = sc->vtnet_rx_clustersz; 1574 1575 m_prev = NULL; 1576 m_tail = NULL; 1577 nreplace = 0; 1578 1579 m = m0; 1580 len = len0; 1581 1582 /* 1583 * Since these mbuf chains are so large, avoid allocating a complete 1584 * replacement when the received frame did not consume the entire 1585 * chain. Unused mbufs are moved to the tail of the replacement mbuf. 1586 */ 1587 while (len > 0) { 1588 if (m == NULL) { 1589 sc->vtnet_stats.rx_frame_too_large++; 1590 return (EMSGSIZE); 1591 } 1592 1593 /* 1594 * Every mbuf should have the expected cluster size since that 1595 * is also used to allocate the replacements. 1596 */ 1597 KASSERT(m->m_len == clustersz, 1598 ("%s: mbuf size %d not expected cluster size %d", __func__, 1599 m->m_len, clustersz)); 1600 1601 m->m_len = MIN(m->m_len, len); 1602 len -= m->m_len; 1603 1604 m_prev = m; 1605 m = m->m_next; 1606 nreplace++; 1607 } 1608 1609 KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs, 1610 ("%s: invalid replacement mbuf count %d max %d", __func__, 1611 nreplace, sc->vtnet_rx_nmbufs)); 1612 1613 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); 1614 if (m_new == NULL) { 1615 m_prev->m_len = clustersz; 1616 return (ENOBUFS); 1617 } 1618 1619 /* 1620 * Move any unused mbufs from the received mbuf chain onto the 1621 * end of the replacement chain. 1622 */ 1623 if (m_prev->m_next != NULL) { 1624 m_tail->m_next = m_prev->m_next; 1625 m_prev->m_next = NULL; 1626 } 1627 1628 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1629 if (error) { 1630 /* 1631 * The replacement is suppose to be an copy of the one 1632 * dequeued so this is a very unexpected error. 1633 * 1634 * Restore the m0 chain to the original state if it was 1635 * modified so we can then discard it. 1636 */ 1637 if (m_tail->m_next != NULL) { 1638 m_prev->m_next = m_tail->m_next; 1639 m_tail->m_next = NULL; 1640 } 1641 m_prev->m_len = clustersz; 1642 sc->vtnet_stats.rx_enq_replacement_failed++; 1643 m_freem(m_new); 1644 } 1645 1646 return (error); 1647 } 1648 1649 static int 1650 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) 1651 { 1652 struct vtnet_softc *sc; 1653 struct mbuf *m_new; 1654 int error; 1655 1656 sc = rxq->vtnrx_sc; 1657 1658 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 1659 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len)); 1660 1661 MPASS(m->m_next == NULL); 1662 if (m->m_len < len) 1663 return (EMSGSIZE); 1664 1665 m_new = vtnet_rx_alloc_buf(sc, 1, NULL); 1666 if (m_new == NULL) 1667 return (ENOBUFS); 1668 1669 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1670 if (error) { 1671 sc->vtnet_stats.rx_enq_replacement_failed++; 1672 m_freem(m_new); 1673 } else 1674 m->m_len = len; 1675 1676 return (error); 1677 } 1678 1679 static int 1680 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1681 { 1682 struct vtnet_softc *sc; 1683 struct sglist *sg; 1684 int header_inlined, error; 1685 1686 sc = rxq->vtnrx_sc; 1687 sg = rxq->vtnrx_sg; 1688 1689 KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1690 ("%s: mbuf chain without LRO_NOMRG", __func__)); 1691 VTNET_RXQ_LOCK_ASSERT(rxq); 1692 1693 sglist_reset(sg); 1694 header_inlined = vtnet_modern(sc) || 1695 (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ 1696 1697 if (header_inlined) 1698 error = sglist_append_mbuf(sg, m); 1699 else { 1700 struct vtnet_rx_header *rxhdr = 1701 mtod(m, struct vtnet_rx_header *); 1702 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); 1703 1704 /* Append the header and remaining mbuf data. */ 1705 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); 1706 if (error) 1707 return (error); 1708 error = sglist_append(sg, &rxhdr[1], 1709 m->m_len - sizeof(struct vtnet_rx_header)); 1710 if (error) 1711 return (error); 1712 1713 if (m->m_next != NULL) 1714 error = sglist_append_mbuf(sg, m->m_next); 1715 } 1716 1717 if (error) 1718 return (error); 1719 1720 return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg)); 1721 } 1722 1723 static int 1724 vtnet_rxq_new_buf(struct vtnet_rxq *rxq) 1725 { 1726 struct vtnet_softc *sc; 1727 struct mbuf *m; 1728 int error; 1729 1730 sc = rxq->vtnrx_sc; 1731 1732 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); 1733 if (m == NULL) 1734 return (ENOBUFS); 1735 1736 error = vtnet_rxq_enqueue_buf(rxq, m); 1737 if (error) 1738 m_freem(m); 1739 1740 return (error); 1741 } 1742 1743 static int 1744 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype, 1745 int hoff, struct virtio_net_hdr *hdr) 1746 { 1747 struct vtnet_softc *sc; 1748 int error; 1749 1750 sc = rxq->vtnrx_sc; 1751 1752 /* 1753 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does 1754 * not have an analogous CSUM flag. The checksum has been validated, 1755 * but is incomplete (TCP/UDP pseudo header). 1756 * 1757 * The packet is likely from another VM on the same host that itself 1758 * performed checksum offloading so Tx/Rx is basically a memcpy and 1759 * the checksum has little value. 1760 * 1761 * Default to receiving the packet as-is for performance reasons, but 1762 * this can cause issues if the packet is to be forwarded because it 1763 * does not contain a valid checksum. This patch may be helpful: 1764 * https://reviews.freebsd.org/D6611. In the meantime, have the driver 1765 * compute the checksum if requested. 1766 * 1767 * BMV: Need to add an CSUM_PARTIAL flag? 1768 */ 1769 if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) { 1770 error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr); 1771 return (error); 1772 } 1773 1774 /* 1775 * Compute the checksum in the driver so the packet will contain a 1776 * valid checksum. The checksum is at csum_offset from csum_start. 1777 */ 1778 switch (etype) { 1779 #if defined(INET) || defined(INET6) 1780 case ETHERTYPE_IP: 1781 case ETHERTYPE_IPV6: { 1782 int csum_off, csum_end; 1783 uint16_t csum; 1784 1785 csum_off = hdr->csum_start + hdr->csum_offset; 1786 csum_end = csum_off + sizeof(uint16_t); 1787 1788 /* Assume checksum will be in the first mbuf. */ 1789 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) 1790 return (1); 1791 1792 /* 1793 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the 1794 * checksum and write it at the specified offset. We could 1795 * try to verify the packet: csum_start should probably 1796 * correspond to the start of the TCP/UDP header. 1797 * 1798 * BMV: Need to properly handle UDP with zero checksum. Is 1799 * the IPv4 header checksum implicitly validated? 1800 */ 1801 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start); 1802 *(uint16_t *)(mtodo(m, csum_off)) = csum; 1803 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1804 m->m_pkthdr.csum_data = 0xFFFF; 1805 break; 1806 } 1807 #endif 1808 default: 1809 sc->vtnet_stats.rx_csum_bad_ethtype++; 1810 return (1); 1811 } 1812 1813 return (0); 1814 } 1815 1816 static int 1817 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, 1818 uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused) 1819 { 1820 #if 0 1821 struct vtnet_softc *sc; 1822 #endif 1823 int protocol; 1824 1825 #if 0 1826 sc = rxq->vtnrx_sc; 1827 #endif 1828 1829 switch (etype) { 1830 #if defined(INET) 1831 case ETHERTYPE_IP: 1832 if (__predict_false(m->m_len < hoff + sizeof(struct ip))) 1833 protocol = IPPROTO_DONE; 1834 else { 1835 struct ip *ip = (struct ip *)(m->m_data + hoff); 1836 protocol = ip->ip_p; 1837 } 1838 break; 1839 #endif 1840 #if defined(INET6) 1841 case ETHERTYPE_IPV6: 1842 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr)) 1843 || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) 1844 protocol = IPPROTO_DONE; 1845 break; 1846 #endif 1847 default: 1848 protocol = IPPROTO_DONE; 1849 break; 1850 } 1851 1852 switch (protocol) { 1853 case IPPROTO_TCP: 1854 case IPPROTO_UDP: 1855 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1856 m->m_pkthdr.csum_data = 0xFFFF; 1857 break; 1858 default: 1859 /* 1860 * FreeBSD does not support checksum offloading of this 1861 * protocol. Let the stack re-verify the checksum later 1862 * if the protocol is supported. 1863 */ 1864 #if 0 1865 if_printf(sc->vtnet_ifp, 1866 "%s: checksum offload of unsupported protocol " 1867 "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n", 1868 __func__, etype, protocol, hdr->csum_start, 1869 hdr->csum_offset); 1870 #endif 1871 break; 1872 } 1873 1874 return (0); 1875 } 1876 1877 static int 1878 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, 1879 struct virtio_net_hdr *hdr) 1880 { 1881 const struct ether_header *eh; 1882 int hoff; 1883 uint16_t etype; 1884 1885 eh = mtod(m, const struct ether_header *); 1886 etype = ntohs(eh->ether_type); 1887 if (etype == ETHERTYPE_VLAN) { 1888 /* TODO BMV: Handle QinQ. */ 1889 const struct ether_vlan_header *evh = 1890 mtod(m, const struct ether_vlan_header *); 1891 etype = ntohs(evh->evl_proto); 1892 hoff = sizeof(struct ether_vlan_header); 1893 } else 1894 hoff = sizeof(struct ether_header); 1895 1896 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1897 return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr)); 1898 else /* VIRTIO_NET_HDR_F_DATA_VALID */ 1899 return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr)); 1900 } 1901 1902 static void 1903 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) 1904 { 1905 struct mbuf *m; 1906 1907 while (--nbufs > 0) { 1908 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); 1909 if (m == NULL) 1910 break; 1911 vtnet_rxq_discard_buf(rxq, m); 1912 } 1913 } 1914 1915 static void 1916 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1917 { 1918 int error __diagused; 1919 1920 /* 1921 * Requeue the discarded mbuf. This should always be successful 1922 * since it was just dequeued. 1923 */ 1924 error = vtnet_rxq_enqueue_buf(rxq, m); 1925 KASSERT(error == 0, 1926 ("%s: cannot requeue discarded mbuf %d", __func__, error)); 1927 } 1928 1929 static int 1930 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) 1931 { 1932 struct vtnet_softc *sc; 1933 struct virtqueue *vq; 1934 struct mbuf *m_tail; 1935 1936 sc = rxq->vtnrx_sc; 1937 vq = rxq->vtnrx_vq; 1938 m_tail = m_head; 1939 1940 while (--nbufs > 0) { 1941 struct mbuf *m; 1942 uint32_t len; 1943 1944 m = virtqueue_dequeue(vq, &len); 1945 if (m == NULL) { 1946 rxq->vtnrx_stats.vrxs_ierrors++; 1947 goto fail; 1948 } 1949 1950 if (vtnet_rxq_new_buf(rxq) != 0) { 1951 rxq->vtnrx_stats.vrxs_iqdrops++; 1952 vtnet_rxq_discard_buf(rxq, m); 1953 if (nbufs > 1) 1954 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1955 goto fail; 1956 } 1957 1958 if (m->m_len < len) 1959 len = m->m_len; 1960 1961 m->m_len = len; 1962 m->m_flags &= ~M_PKTHDR; 1963 1964 m_head->m_pkthdr.len += len; 1965 m_tail->m_next = m; 1966 m_tail = m; 1967 } 1968 1969 return (0); 1970 1971 fail: 1972 sc->vtnet_stats.rx_mergeable_failed++; 1973 m_freem(m_head); 1974 1975 return (1); 1976 } 1977 1978 #if defined(INET) || defined(INET6) 1979 static int 1980 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m) 1981 { 1982 struct lro_ctrl *lro; 1983 1984 lro = &rxq->vtnrx_lro; 1985 1986 if (lro->lro_mbuf_max != 0) { 1987 tcp_lro_queue_mbuf(lro, m); 1988 return (0); 1989 } 1990 1991 return (tcp_lro_rx(lro, m, 0)); 1992 } 1993 #endif 1994 1995 static void 1996 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, 1997 struct virtio_net_hdr *hdr) 1998 { 1999 struct vtnet_softc *sc; 2000 struct ifnet *ifp; 2001 2002 sc = rxq->vtnrx_sc; 2003 ifp = sc->vtnet_ifp; 2004 2005 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { 2006 struct ether_header *eh = mtod(m, struct ether_header *); 2007 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2008 vtnet_vlan_tag_remove(m); 2009 /* 2010 * With the 802.1Q header removed, update the 2011 * checksum starting location accordingly. 2012 */ 2013 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 2014 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; 2015 } 2016 } 2017 2018 m->m_pkthdr.flowid = rxq->vtnrx_id; 2019 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2020 2021 if (hdr->flags & 2022 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { 2023 if (vtnet_rxq_csum(rxq, m, hdr) == 0) 2024 rxq->vtnrx_stats.vrxs_csum++; 2025 else 2026 rxq->vtnrx_stats.vrxs_csum_failed++; 2027 } 2028 2029 if (hdr->gso_size != 0) { 2030 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2031 case VIRTIO_NET_HDR_GSO_TCPV4: 2032 case VIRTIO_NET_HDR_GSO_TCPV6: 2033 m->m_pkthdr.lro_nsegs = 2034 howmany(m->m_pkthdr.len, hdr->gso_size); 2035 rxq->vtnrx_stats.vrxs_host_lro++; 2036 break; 2037 } 2038 } 2039 2040 rxq->vtnrx_stats.vrxs_ipackets++; 2041 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; 2042 2043 #if defined(INET) || defined(INET6) 2044 if (vtnet_software_lro(sc) && ifp->if_capenable & IFCAP_LRO) { 2045 if (vtnet_lro_rx(rxq, m) == 0) 2046 return; 2047 } 2048 #endif 2049 2050 (*ifp->if_input)(ifp, m); 2051 } 2052 2053 static int 2054 vtnet_rxq_eof(struct vtnet_rxq *rxq) 2055 { 2056 struct virtio_net_hdr lhdr, *hdr; 2057 struct vtnet_softc *sc; 2058 struct ifnet *ifp; 2059 struct virtqueue *vq; 2060 int deq, count; 2061 2062 sc = rxq->vtnrx_sc; 2063 vq = rxq->vtnrx_vq; 2064 ifp = sc->vtnet_ifp; 2065 deq = 0; 2066 count = sc->vtnet_rx_process_limit; 2067 2068 VTNET_RXQ_LOCK_ASSERT(rxq); 2069 2070 while (count-- > 0) { 2071 struct mbuf *m; 2072 uint32_t len, nbufs, adjsz; 2073 2074 m = virtqueue_dequeue(vq, &len); 2075 if (m == NULL) 2076 break; 2077 deq++; 2078 2079 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { 2080 rxq->vtnrx_stats.vrxs_ierrors++; 2081 vtnet_rxq_discard_buf(rxq, m); 2082 continue; 2083 } 2084 2085 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) { 2086 struct virtio_net_hdr_mrg_rxbuf *mhdr = 2087 mtod(m, struct virtio_net_hdr_mrg_rxbuf *); 2088 kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED); 2089 nbufs = vtnet_htog16(sc, mhdr->num_buffers); 2090 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2091 } else if (vtnet_modern(sc)) { 2092 nbufs = 1; /* num_buffers is always 1 */ 2093 adjsz = sizeof(struct virtio_net_hdr_v1); 2094 } else { 2095 nbufs = 1; 2096 adjsz = sizeof(struct vtnet_rx_header); 2097 /* 2098 * Account for our gap between the header and start of 2099 * data to keep the segments separated. 2100 */ 2101 len += VTNET_RX_HEADER_PAD; 2102 } 2103 2104 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { 2105 rxq->vtnrx_stats.vrxs_iqdrops++; 2106 vtnet_rxq_discard_buf(rxq, m); 2107 if (nbufs > 1) 2108 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 2109 continue; 2110 } 2111 2112 m->m_pkthdr.len = len; 2113 m->m_pkthdr.rcvif = ifp; 2114 m->m_pkthdr.csum_flags = 0; 2115 2116 if (nbufs > 1) { 2117 /* Dequeue the rest of chain. */ 2118 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) 2119 continue; 2120 } 2121 2122 kmsan_mark_mbuf(m, KMSAN_STATE_INITED); 2123 2124 /* 2125 * Save an endian swapped version of the header prior to it 2126 * being stripped. The header is always at the start of the 2127 * mbuf data. num_buffers was already saved (and not needed) 2128 * so use the standard header. 2129 */ 2130 hdr = mtod(m, struct virtio_net_hdr *); 2131 lhdr.flags = hdr->flags; 2132 lhdr.gso_type = hdr->gso_type; 2133 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len); 2134 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size); 2135 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start); 2136 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset); 2137 m_adj(m, adjsz); 2138 2139 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) { 2140 pfil_return_t pfil; 2141 2142 pfil = pfil_run_hooks(sc->vtnet_pfil, &m, ifp, PFIL_IN, 2143 NULL); 2144 switch (pfil) { 2145 case PFIL_REALLOCED: 2146 m = pfil_mem2mbuf(m->m_data); 2147 break; 2148 case PFIL_DROPPED: 2149 case PFIL_CONSUMED: 2150 continue; 2151 default: 2152 KASSERT(pfil == PFIL_PASS, 2153 ("Filter returned %d!", pfil)); 2154 } 2155 } 2156 2157 vtnet_rxq_input(rxq, m, &lhdr); 2158 } 2159 2160 if (deq > 0) { 2161 #if defined(INET) || defined(INET6) 2162 if (vtnet_software_lro(sc)) 2163 tcp_lro_flush_all(&rxq->vtnrx_lro); 2164 #endif 2165 virtqueue_notify(vq); 2166 } 2167 2168 return (count > 0 ? 0 : EAGAIN); 2169 } 2170 2171 static void 2172 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries) 2173 { 2174 struct vtnet_softc *sc; 2175 struct ifnet *ifp; 2176 u_int more; 2177 #ifdef DEV_NETMAP 2178 int nmirq; 2179 #endif /* DEV_NETMAP */ 2180 2181 sc = rxq->vtnrx_sc; 2182 ifp = sc->vtnet_ifp; 2183 2184 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { 2185 /* 2186 * Ignore this interrupt. Either this is a spurious interrupt 2187 * or multiqueue without per-VQ MSIX so every queue needs to 2188 * be polled (a brain dead configuration we could try harder 2189 * to avoid). 2190 */ 2191 vtnet_rxq_disable_intr(rxq); 2192 return; 2193 } 2194 2195 VTNET_RXQ_LOCK(rxq); 2196 2197 #ifdef DEV_NETMAP 2198 /* 2199 * We call netmap_rx_irq() under lock to prevent concurrent calls. 2200 * This is not necessary to serialize the access to the RX vq, but 2201 * rather to avoid races that may happen if this interface is 2202 * attached to a VALE switch, which would cause received packets 2203 * to stall in the RX queue (nm_kr_tryget() could find the kring 2204 * busy when called from netmap_bwrap_intr_notify()). 2205 */ 2206 nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more); 2207 if (nmirq != NM_IRQ_PASS) { 2208 VTNET_RXQ_UNLOCK(rxq); 2209 if (nmirq == NM_IRQ_RESCHED) { 2210 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2211 } 2212 return; 2213 } 2214 #endif /* DEV_NETMAP */ 2215 2216 again: 2217 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2218 VTNET_RXQ_UNLOCK(rxq); 2219 return; 2220 } 2221 2222 more = vtnet_rxq_eof(rxq); 2223 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 2224 if (!more) 2225 vtnet_rxq_disable_intr(rxq); 2226 /* 2227 * This is an occasional condition or race (when !more), 2228 * so retry a few times before scheduling the taskqueue. 2229 */ 2230 if (tries-- > 0) 2231 goto again; 2232 2233 rxq->vtnrx_stats.vrxs_rescheduled++; 2234 VTNET_RXQ_UNLOCK(rxq); 2235 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2236 } else 2237 VTNET_RXQ_UNLOCK(rxq); 2238 } 2239 2240 static void 2241 vtnet_rx_vq_intr(void *xrxq) 2242 { 2243 struct vtnet_rxq *rxq; 2244 2245 rxq = xrxq; 2246 vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES); 2247 } 2248 2249 static void 2250 vtnet_rxq_tq_intr(void *xrxq, int pending __unused) 2251 { 2252 struct vtnet_rxq *rxq; 2253 2254 rxq = xrxq; 2255 vtnet_rx_vq_process(rxq, 0); 2256 } 2257 2258 static int 2259 vtnet_txq_intr_threshold(struct vtnet_txq *txq) 2260 { 2261 struct vtnet_softc *sc; 2262 int threshold; 2263 2264 sc = txq->vtntx_sc; 2265 2266 /* 2267 * The Tx interrupt is disabled until the queue free count falls 2268 * below our threshold. Completed frames are drained from the Tx 2269 * virtqueue before transmitting new frames and in the watchdog 2270 * callout, so the frequency of Tx interrupts is greatly reduced, 2271 * at the cost of not freeing mbufs as quickly as they otherwise 2272 * would be. 2273 */ 2274 threshold = virtqueue_size(txq->vtntx_vq) / 4; 2275 2276 /* 2277 * Without indirect descriptors, leave enough room for the most 2278 * segments we handle. 2279 */ 2280 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && 2281 threshold < sc->vtnet_tx_nsegs) 2282 threshold = sc->vtnet_tx_nsegs; 2283 2284 return (threshold); 2285 } 2286 2287 static int 2288 vtnet_txq_below_threshold(struct vtnet_txq *txq) 2289 { 2290 struct virtqueue *vq; 2291 2292 vq = txq->vtntx_vq; 2293 2294 return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold); 2295 } 2296 2297 static int 2298 vtnet_txq_notify(struct vtnet_txq *txq) 2299 { 2300 struct virtqueue *vq; 2301 2302 vq = txq->vtntx_vq; 2303 2304 txq->vtntx_watchdog = VTNET_TX_TIMEOUT; 2305 virtqueue_notify(vq); 2306 2307 if (vtnet_txq_enable_intr(txq) == 0) 2308 return (0); 2309 2310 /* 2311 * Drain frames that were completed since last checked. If this 2312 * causes the queue to go above the threshold, the caller should 2313 * continue transmitting. 2314 */ 2315 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { 2316 virtqueue_disable_intr(vq); 2317 return (1); 2318 } 2319 2320 return (0); 2321 } 2322 2323 static void 2324 vtnet_txq_free_mbufs(struct vtnet_txq *txq) 2325 { 2326 struct virtqueue *vq; 2327 struct vtnet_tx_header *txhdr; 2328 int last; 2329 #ifdef DEV_NETMAP 2330 struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp), 2331 txq->vtntx_id, NR_TX); 2332 #else /* !DEV_NETMAP */ 2333 void *kring = NULL; 2334 #endif /* !DEV_NETMAP */ 2335 2336 vq = txq->vtntx_vq; 2337 last = 0; 2338 2339 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { 2340 if (kring == NULL) { 2341 m_freem(txhdr->vth_mbuf); 2342 uma_zfree(vtnet_tx_header_zone, txhdr); 2343 } 2344 } 2345 2346 KASSERT(virtqueue_empty(vq), 2347 ("%s: mbufs remaining in tx queue %p", __func__, txq)); 2348 } 2349 2350 /* 2351 * BMV: This can go away once we finally have offsets in the mbuf header. 2352 */ 2353 static int 2354 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, 2355 int *proto, int *start) 2356 { 2357 struct vtnet_softc *sc; 2358 struct ether_vlan_header *evh; 2359 #if defined(INET) || defined(INET6) 2360 int offset; 2361 #endif 2362 2363 sc = txq->vtntx_sc; 2364 2365 evh = mtod(m, struct ether_vlan_header *); 2366 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 2367 /* BMV: We should handle nested VLAN tags too. */ 2368 *etype = ntohs(evh->evl_proto); 2369 #if defined(INET) || defined(INET6) 2370 offset = sizeof(struct ether_vlan_header); 2371 #endif 2372 } else { 2373 *etype = ntohs(evh->evl_encap_proto); 2374 #if defined(INET) || defined(INET6) 2375 offset = sizeof(struct ether_header); 2376 #endif 2377 } 2378 2379 switch (*etype) { 2380 #if defined(INET) 2381 case ETHERTYPE_IP: { 2382 struct ip *ip, iphdr; 2383 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 2384 m_copydata(m, offset, sizeof(struct ip), 2385 (caddr_t) &iphdr); 2386 ip = &iphdr; 2387 } else 2388 ip = (struct ip *)(m->m_data + offset); 2389 *proto = ip->ip_p; 2390 *start = offset + (ip->ip_hl << 2); 2391 break; 2392 } 2393 #endif 2394 #if defined(INET6) 2395 case ETHERTYPE_IPV6: 2396 *proto = -1; 2397 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 2398 /* Assert the network stack sent us a valid packet. */ 2399 KASSERT(*start > offset, 2400 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 2401 *start, offset, *proto)); 2402 break; 2403 #endif 2404 default: 2405 sc->vtnet_stats.tx_csum_unknown_ethtype++; 2406 return (EINVAL); 2407 } 2408 2409 return (0); 2410 } 2411 2412 static int 2413 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, 2414 int offset, struct virtio_net_hdr *hdr) 2415 { 2416 static struct timeval lastecn; 2417 static int curecn; 2418 struct vtnet_softc *sc; 2419 struct tcphdr *tcp, tcphdr; 2420 2421 sc = txq->vtntx_sc; 2422 2423 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 2424 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 2425 tcp = &tcphdr; 2426 } else 2427 tcp = (struct tcphdr *)(m->m_data + offset); 2428 2429 hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2)); 2430 hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz); 2431 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 2432 VIRTIO_NET_HDR_GSO_TCPV6; 2433 2434 if (__predict_false(tcp->th_flags & TH_CWR)) { 2435 /* 2436 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In 2437 * FreeBSD, ECN support is not on a per-interface basis, 2438 * but globally via the net.inet.tcp.ecn.enable sysctl 2439 * knob. The default is off. 2440 */ 2441 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { 2442 if (ppsratecheck(&lastecn, &curecn, 1)) 2443 if_printf(sc->vtnet_ifp, 2444 "TSO with ECN not negotiated with host\n"); 2445 return (ENOTSUP); 2446 } 2447 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2448 } 2449 2450 txq->vtntx_stats.vtxs_tso++; 2451 2452 return (0); 2453 } 2454 2455 static struct mbuf * 2456 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, 2457 struct virtio_net_hdr *hdr) 2458 { 2459 struct vtnet_softc *sc; 2460 int flags, etype, csum_start, proto, error; 2461 2462 sc = txq->vtntx_sc; 2463 flags = m->m_pkthdr.csum_flags; 2464 2465 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); 2466 if (error) 2467 goto drop; 2468 2469 if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) { 2470 /* Sanity check the parsed mbuf matches the offload flags. */ 2471 if (__predict_false((flags & VTNET_CSUM_OFFLOAD && 2472 etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6 2473 && etype != ETHERTYPE_IPV6))) { 2474 sc->vtnet_stats.tx_csum_proto_mismatch++; 2475 goto drop; 2476 } 2477 2478 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 2479 hdr->csum_start = vtnet_gtoh16(sc, csum_start); 2480 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data); 2481 txq->vtntx_stats.vtxs_csum++; 2482 } 2483 2484 if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) { 2485 /* 2486 * Sanity check the parsed mbuf IP protocol is TCP, and 2487 * VirtIO TSO reqires the checksum offloading above. 2488 */ 2489 if (__predict_false(proto != IPPROTO_TCP)) { 2490 sc->vtnet_stats.tx_tso_not_tcp++; 2491 goto drop; 2492 } else if (__predict_false((hdr->flags & 2493 VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) { 2494 sc->vtnet_stats.tx_tso_without_csum++; 2495 goto drop; 2496 } 2497 2498 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); 2499 if (error) 2500 goto drop; 2501 } 2502 2503 return (m); 2504 2505 drop: 2506 m_freem(m); 2507 return (NULL); 2508 } 2509 2510 static int 2511 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, 2512 struct vtnet_tx_header *txhdr) 2513 { 2514 struct vtnet_softc *sc; 2515 struct virtqueue *vq; 2516 struct sglist *sg; 2517 struct mbuf *m; 2518 int error; 2519 2520 sc = txq->vtntx_sc; 2521 vq = txq->vtntx_vq; 2522 sg = txq->vtntx_sg; 2523 m = *m_head; 2524 2525 sglist_reset(sg); 2526 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); 2527 if (error != 0 || sg->sg_nseg != 1) { 2528 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", 2529 __func__, error, sg->sg_nseg)); 2530 goto fail; 2531 } 2532 2533 error = sglist_append_mbuf(sg, m); 2534 if (error) { 2535 m = m_defrag(m, M_NOWAIT); 2536 if (m == NULL) 2537 goto fail; 2538 2539 *m_head = m; 2540 sc->vtnet_stats.tx_defragged++; 2541 2542 error = sglist_append_mbuf(sg, m); 2543 if (error) 2544 goto fail; 2545 } 2546 2547 txhdr->vth_mbuf = m; 2548 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); 2549 2550 return (error); 2551 2552 fail: 2553 sc->vtnet_stats.tx_defrag_failed++; 2554 m_freem(*m_head); 2555 *m_head = NULL; 2556 2557 return (ENOBUFS); 2558 } 2559 2560 static int 2561 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) 2562 { 2563 struct vtnet_tx_header *txhdr; 2564 struct virtio_net_hdr *hdr; 2565 struct mbuf *m; 2566 int error; 2567 2568 m = *m_head; 2569 M_ASSERTPKTHDR(m); 2570 2571 txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); 2572 if (txhdr == NULL) { 2573 m_freem(m); 2574 *m_head = NULL; 2575 return (ENOMEM); 2576 } 2577 2578 /* 2579 * Always use the non-mergeable header, regardless if mergable headers 2580 * were negotiated, because for transmit num_buffers is always zero. 2581 * The vtnet_hdr_size is used to enqueue the right header size segment. 2582 */ 2583 hdr = &txhdr->vth_uhdr.hdr; 2584 2585 if (m->m_flags & M_VLANTAG) { 2586 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); 2587 if ((*m_head = m) == NULL) { 2588 error = ENOBUFS; 2589 goto fail; 2590 } 2591 m->m_flags &= ~M_VLANTAG; 2592 } 2593 2594 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { 2595 m = vtnet_txq_offload(txq, m, hdr); 2596 if ((*m_head = m) == NULL) { 2597 error = ENOBUFS; 2598 goto fail; 2599 } 2600 } 2601 2602 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); 2603 fail: 2604 if (error) 2605 uma_zfree(vtnet_tx_header_zone, txhdr); 2606 2607 return (error); 2608 } 2609 2610 #ifdef VTNET_LEGACY_TX 2611 2612 static void 2613 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) 2614 { 2615 struct vtnet_softc *sc; 2616 struct virtqueue *vq; 2617 struct mbuf *m0; 2618 int tries, enq; 2619 2620 sc = txq->vtntx_sc; 2621 vq = txq->vtntx_vq; 2622 tries = 0; 2623 2624 VTNET_TXQ_LOCK_ASSERT(txq); 2625 2626 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2627 sc->vtnet_link_active == 0) 2628 return; 2629 2630 vtnet_txq_eof(txq); 2631 2632 again: 2633 enq = 0; 2634 2635 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 2636 if (virtqueue_full(vq)) 2637 break; 2638 2639 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); 2640 if (m0 == NULL) 2641 break; 2642 2643 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { 2644 if (m0 != NULL) 2645 IFQ_DRV_PREPEND(&ifp->if_snd, m0); 2646 break; 2647 } 2648 2649 enq++; 2650 ETHER_BPF_MTAP(ifp, m0); 2651 } 2652 2653 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2654 if (tries++ < VTNET_NOTIFY_RETRIES) 2655 goto again; 2656 2657 txq->vtntx_stats.vtxs_rescheduled++; 2658 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2659 } 2660 } 2661 2662 static void 2663 vtnet_start(struct ifnet *ifp) 2664 { 2665 struct vtnet_softc *sc; 2666 struct vtnet_txq *txq; 2667 2668 sc = ifp->if_softc; 2669 txq = &sc->vtnet_txqs[0]; 2670 2671 VTNET_TXQ_LOCK(txq); 2672 vtnet_start_locked(txq, ifp); 2673 VTNET_TXQ_UNLOCK(txq); 2674 } 2675 2676 #else /* !VTNET_LEGACY_TX */ 2677 2678 static int 2679 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) 2680 { 2681 struct vtnet_softc *sc; 2682 struct virtqueue *vq; 2683 struct buf_ring *br; 2684 struct ifnet *ifp; 2685 int enq, tries, error; 2686 2687 sc = txq->vtntx_sc; 2688 vq = txq->vtntx_vq; 2689 br = txq->vtntx_br; 2690 ifp = sc->vtnet_ifp; 2691 tries = 0; 2692 error = 0; 2693 2694 VTNET_TXQ_LOCK_ASSERT(txq); 2695 2696 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2697 sc->vtnet_link_active == 0) { 2698 if (m != NULL) 2699 error = drbr_enqueue(ifp, br, m); 2700 return (error); 2701 } 2702 2703 if (m != NULL) { 2704 error = drbr_enqueue(ifp, br, m); 2705 if (error) 2706 return (error); 2707 } 2708 2709 vtnet_txq_eof(txq); 2710 2711 again: 2712 enq = 0; 2713 2714 while ((m = drbr_peek(ifp, br)) != NULL) { 2715 if (virtqueue_full(vq)) { 2716 drbr_putback(ifp, br, m); 2717 break; 2718 } 2719 2720 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { 2721 if (m != NULL) 2722 drbr_putback(ifp, br, m); 2723 else 2724 drbr_advance(ifp, br); 2725 break; 2726 } 2727 drbr_advance(ifp, br); 2728 2729 enq++; 2730 ETHER_BPF_MTAP(ifp, m); 2731 } 2732 2733 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2734 if (tries++ < VTNET_NOTIFY_RETRIES) 2735 goto again; 2736 2737 txq->vtntx_stats.vtxs_rescheduled++; 2738 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2739 } 2740 2741 return (0); 2742 } 2743 2744 static int 2745 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2746 { 2747 struct vtnet_softc *sc; 2748 struct vtnet_txq *txq; 2749 int i, npairs, error; 2750 2751 sc = ifp->if_softc; 2752 npairs = sc->vtnet_act_vq_pairs; 2753 2754 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2755 i = m->m_pkthdr.flowid % npairs; 2756 else 2757 i = curcpu % npairs; 2758 2759 txq = &sc->vtnet_txqs[i]; 2760 2761 if (VTNET_TXQ_TRYLOCK(txq) != 0) { 2762 error = vtnet_txq_mq_start_locked(txq, m); 2763 VTNET_TXQ_UNLOCK(txq); 2764 } else { 2765 error = drbr_enqueue(ifp, txq->vtntx_br, m); 2766 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); 2767 } 2768 2769 return (error); 2770 } 2771 2772 static void 2773 vtnet_txq_tq_deferred(void *xtxq, int pending __unused) 2774 { 2775 struct vtnet_softc *sc; 2776 struct vtnet_txq *txq; 2777 2778 txq = xtxq; 2779 sc = txq->vtntx_sc; 2780 2781 VTNET_TXQ_LOCK(txq); 2782 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) 2783 vtnet_txq_mq_start_locked(txq, NULL); 2784 VTNET_TXQ_UNLOCK(txq); 2785 } 2786 2787 #endif /* VTNET_LEGACY_TX */ 2788 2789 static void 2790 vtnet_txq_start(struct vtnet_txq *txq) 2791 { 2792 struct vtnet_softc *sc; 2793 struct ifnet *ifp; 2794 2795 sc = txq->vtntx_sc; 2796 ifp = sc->vtnet_ifp; 2797 2798 #ifdef VTNET_LEGACY_TX 2799 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 2800 vtnet_start_locked(txq, ifp); 2801 #else 2802 if (!drbr_empty(ifp, txq->vtntx_br)) 2803 vtnet_txq_mq_start_locked(txq, NULL); 2804 #endif 2805 } 2806 2807 static void 2808 vtnet_txq_tq_intr(void *xtxq, int pending __unused) 2809 { 2810 struct vtnet_softc *sc; 2811 struct vtnet_txq *txq; 2812 struct ifnet *ifp; 2813 2814 txq = xtxq; 2815 sc = txq->vtntx_sc; 2816 ifp = sc->vtnet_ifp; 2817 2818 VTNET_TXQ_LOCK(txq); 2819 2820 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2821 VTNET_TXQ_UNLOCK(txq); 2822 return; 2823 } 2824 2825 vtnet_txq_eof(txq); 2826 vtnet_txq_start(txq); 2827 2828 VTNET_TXQ_UNLOCK(txq); 2829 } 2830 2831 static int 2832 vtnet_txq_eof(struct vtnet_txq *txq) 2833 { 2834 struct virtqueue *vq; 2835 struct vtnet_tx_header *txhdr; 2836 struct mbuf *m; 2837 int deq; 2838 2839 vq = txq->vtntx_vq; 2840 deq = 0; 2841 VTNET_TXQ_LOCK_ASSERT(txq); 2842 2843 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { 2844 m = txhdr->vth_mbuf; 2845 deq++; 2846 2847 txq->vtntx_stats.vtxs_opackets++; 2848 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; 2849 if (m->m_flags & M_MCAST) 2850 txq->vtntx_stats.vtxs_omcasts++; 2851 2852 m_freem(m); 2853 uma_zfree(vtnet_tx_header_zone, txhdr); 2854 } 2855 2856 if (virtqueue_empty(vq)) 2857 txq->vtntx_watchdog = 0; 2858 2859 return (deq); 2860 } 2861 2862 static void 2863 vtnet_tx_vq_intr(void *xtxq) 2864 { 2865 struct vtnet_softc *sc; 2866 struct vtnet_txq *txq; 2867 struct ifnet *ifp; 2868 2869 txq = xtxq; 2870 sc = txq->vtntx_sc; 2871 ifp = sc->vtnet_ifp; 2872 2873 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { 2874 /* 2875 * Ignore this interrupt. Either this is a spurious interrupt 2876 * or multiqueue without per-VQ MSIX so every queue needs to 2877 * be polled (a brain dead configuration we could try harder 2878 * to avoid). 2879 */ 2880 vtnet_txq_disable_intr(txq); 2881 return; 2882 } 2883 2884 #ifdef DEV_NETMAP 2885 if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS) 2886 return; 2887 #endif /* DEV_NETMAP */ 2888 2889 VTNET_TXQ_LOCK(txq); 2890 2891 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2892 VTNET_TXQ_UNLOCK(txq); 2893 return; 2894 } 2895 2896 vtnet_txq_eof(txq); 2897 vtnet_txq_start(txq); 2898 2899 VTNET_TXQ_UNLOCK(txq); 2900 } 2901 2902 static void 2903 vtnet_tx_start_all(struct vtnet_softc *sc) 2904 { 2905 struct vtnet_txq *txq; 2906 int i; 2907 2908 VTNET_CORE_LOCK_ASSERT(sc); 2909 2910 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2911 txq = &sc->vtnet_txqs[i]; 2912 2913 VTNET_TXQ_LOCK(txq); 2914 vtnet_txq_start(txq); 2915 VTNET_TXQ_UNLOCK(txq); 2916 } 2917 } 2918 2919 #ifndef VTNET_LEGACY_TX 2920 static void 2921 vtnet_qflush(struct ifnet *ifp) 2922 { 2923 struct vtnet_softc *sc; 2924 struct vtnet_txq *txq; 2925 struct mbuf *m; 2926 int i; 2927 2928 sc = ifp->if_softc; 2929 2930 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2931 txq = &sc->vtnet_txqs[i]; 2932 2933 VTNET_TXQ_LOCK(txq); 2934 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) 2935 m_freem(m); 2936 VTNET_TXQ_UNLOCK(txq); 2937 } 2938 2939 if_qflush(ifp); 2940 } 2941 #endif 2942 2943 static int 2944 vtnet_watchdog(struct vtnet_txq *txq) 2945 { 2946 struct ifnet *ifp; 2947 2948 ifp = txq->vtntx_sc->vtnet_ifp; 2949 2950 VTNET_TXQ_LOCK(txq); 2951 if (txq->vtntx_watchdog == 1) { 2952 /* 2953 * Only drain completed frames if the watchdog is about to 2954 * expire. If any frames were drained, there may be enough 2955 * free descriptors now available to transmit queued frames. 2956 * In that case, the timer will immediately be decremented 2957 * below, but the timeout is generous enough that should not 2958 * be a problem. 2959 */ 2960 if (vtnet_txq_eof(txq) != 0) 2961 vtnet_txq_start(txq); 2962 } 2963 2964 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { 2965 VTNET_TXQ_UNLOCK(txq); 2966 return (0); 2967 } 2968 VTNET_TXQ_UNLOCK(txq); 2969 2970 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); 2971 return (1); 2972 } 2973 2974 static void 2975 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, 2976 struct vtnet_txq_stats *txacc) 2977 { 2978 2979 bzero(rxacc, sizeof(struct vtnet_rxq_stats)); 2980 bzero(txacc, sizeof(struct vtnet_txq_stats)); 2981 2982 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2983 struct vtnet_rxq_stats *rxst; 2984 struct vtnet_txq_stats *txst; 2985 2986 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 2987 rxacc->vrxs_ipackets += rxst->vrxs_ipackets; 2988 rxacc->vrxs_ibytes += rxst->vrxs_ibytes; 2989 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; 2990 rxacc->vrxs_csum += rxst->vrxs_csum; 2991 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; 2992 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; 2993 2994 txst = &sc->vtnet_txqs[i].vtntx_stats; 2995 txacc->vtxs_opackets += txst->vtxs_opackets; 2996 txacc->vtxs_obytes += txst->vtxs_obytes; 2997 txacc->vtxs_csum += txst->vtxs_csum; 2998 txacc->vtxs_tso += txst->vtxs_tso; 2999 txacc->vtxs_rescheduled += txst->vtxs_rescheduled; 3000 } 3001 } 3002 3003 static uint64_t 3004 vtnet_get_counter(if_t ifp, ift_counter cnt) 3005 { 3006 struct vtnet_softc *sc; 3007 struct vtnet_rxq_stats rxaccum; 3008 struct vtnet_txq_stats txaccum; 3009 3010 sc = if_getsoftc(ifp); 3011 vtnet_accum_stats(sc, &rxaccum, &txaccum); 3012 3013 switch (cnt) { 3014 case IFCOUNTER_IPACKETS: 3015 return (rxaccum.vrxs_ipackets); 3016 case IFCOUNTER_IQDROPS: 3017 return (rxaccum.vrxs_iqdrops); 3018 case IFCOUNTER_IERRORS: 3019 return (rxaccum.vrxs_ierrors); 3020 case IFCOUNTER_OPACKETS: 3021 return (txaccum.vtxs_opackets); 3022 #ifndef VTNET_LEGACY_TX 3023 case IFCOUNTER_OBYTES: 3024 return (txaccum.vtxs_obytes); 3025 case IFCOUNTER_OMCASTS: 3026 return (txaccum.vtxs_omcasts); 3027 #endif 3028 default: 3029 return (if_get_counter_default(ifp, cnt)); 3030 } 3031 } 3032 3033 static void 3034 vtnet_tick(void *xsc) 3035 { 3036 struct vtnet_softc *sc; 3037 struct ifnet *ifp; 3038 int i, timedout; 3039 3040 sc = xsc; 3041 ifp = sc->vtnet_ifp; 3042 timedout = 0; 3043 3044 VTNET_CORE_LOCK_ASSERT(sc); 3045 3046 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3047 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); 3048 3049 if (timedout != 0) { 3050 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3051 vtnet_init_locked(sc, 0); 3052 } else 3053 callout_schedule(&sc->vtnet_tick_ch, hz); 3054 } 3055 3056 static void 3057 vtnet_start_taskqueues(struct vtnet_softc *sc) 3058 { 3059 device_t dev; 3060 struct vtnet_rxq *rxq; 3061 struct vtnet_txq *txq; 3062 int i, error; 3063 3064 dev = sc->vtnet_dev; 3065 3066 /* 3067 * Errors here are very difficult to recover from - we cannot 3068 * easily fail because, if this is during boot, we will hang 3069 * when freeing any successfully started taskqueues because 3070 * the scheduler isn't up yet. 3071 * 3072 * Most drivers just ignore the return value - it only fails 3073 * with ENOMEM so an error is not likely. 3074 */ 3075 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { 3076 rxq = &sc->vtnet_rxqs[i]; 3077 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, 3078 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); 3079 if (error) { 3080 device_printf(dev, "failed to start rx taskq %d\n", 3081 rxq->vtnrx_id); 3082 } 3083 3084 txq = &sc->vtnet_txqs[i]; 3085 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, 3086 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); 3087 if (error) { 3088 device_printf(dev, "failed to start tx taskq %d\n", 3089 txq->vtntx_id); 3090 } 3091 } 3092 } 3093 3094 static void 3095 vtnet_free_taskqueues(struct vtnet_softc *sc) 3096 { 3097 struct vtnet_rxq *rxq; 3098 struct vtnet_txq *txq; 3099 int i; 3100 3101 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3102 rxq = &sc->vtnet_rxqs[i]; 3103 if (rxq->vtnrx_tq != NULL) { 3104 taskqueue_free(rxq->vtnrx_tq); 3105 rxq->vtnrx_tq = NULL; 3106 } 3107 3108 txq = &sc->vtnet_txqs[i]; 3109 if (txq->vtntx_tq != NULL) { 3110 taskqueue_free(txq->vtntx_tq); 3111 txq->vtntx_tq = NULL; 3112 } 3113 } 3114 } 3115 3116 static void 3117 vtnet_drain_taskqueues(struct vtnet_softc *sc) 3118 { 3119 struct vtnet_rxq *rxq; 3120 struct vtnet_txq *txq; 3121 int i; 3122 3123 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3124 rxq = &sc->vtnet_rxqs[i]; 3125 if (rxq->vtnrx_tq != NULL) 3126 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 3127 3128 txq = &sc->vtnet_txqs[i]; 3129 if (txq->vtntx_tq != NULL) { 3130 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); 3131 #ifndef VTNET_LEGACY_TX 3132 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); 3133 #endif 3134 } 3135 } 3136 } 3137 3138 static void 3139 vtnet_drain_rxtx_queues(struct vtnet_softc *sc) 3140 { 3141 struct vtnet_rxq *rxq; 3142 struct vtnet_txq *txq; 3143 int i; 3144 3145 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3146 rxq = &sc->vtnet_rxqs[i]; 3147 vtnet_rxq_free_mbufs(rxq); 3148 3149 txq = &sc->vtnet_txqs[i]; 3150 vtnet_txq_free_mbufs(txq); 3151 } 3152 } 3153 3154 static void 3155 vtnet_stop_rendezvous(struct vtnet_softc *sc) 3156 { 3157 struct vtnet_rxq *rxq; 3158 struct vtnet_txq *txq; 3159 int i; 3160 3161 VTNET_CORE_LOCK_ASSERT(sc); 3162 3163 /* 3164 * Lock and unlock the per-queue mutex so we known the stop 3165 * state is visible. Doing only the active queues should be 3166 * sufficient, but it does not cost much extra to do all the 3167 * queues. 3168 */ 3169 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3170 rxq = &sc->vtnet_rxqs[i]; 3171 VTNET_RXQ_LOCK(rxq); 3172 VTNET_RXQ_UNLOCK(rxq); 3173 3174 txq = &sc->vtnet_txqs[i]; 3175 VTNET_TXQ_LOCK(txq); 3176 VTNET_TXQ_UNLOCK(txq); 3177 } 3178 } 3179 3180 static void 3181 vtnet_stop(struct vtnet_softc *sc) 3182 { 3183 device_t dev; 3184 struct ifnet *ifp; 3185 3186 dev = sc->vtnet_dev; 3187 ifp = sc->vtnet_ifp; 3188 3189 VTNET_CORE_LOCK_ASSERT(sc); 3190 3191 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3192 sc->vtnet_link_active = 0; 3193 callout_stop(&sc->vtnet_tick_ch); 3194 3195 /* Only advisory. */ 3196 vtnet_disable_interrupts(sc); 3197 3198 #ifdef DEV_NETMAP 3199 /* Stop any pending txsync/rxsync and disable them. */ 3200 netmap_disable_all_rings(ifp); 3201 #endif /* DEV_NETMAP */ 3202 3203 /* 3204 * Stop the host adapter. This resets it to the pre-initialized 3205 * state. It will not generate any interrupts until after it is 3206 * reinitialized. 3207 */ 3208 virtio_stop(dev); 3209 vtnet_stop_rendezvous(sc); 3210 3211 vtnet_drain_rxtx_queues(sc); 3212 sc->vtnet_act_vq_pairs = 1; 3213 } 3214 3215 static int 3216 vtnet_virtio_reinit(struct vtnet_softc *sc) 3217 { 3218 device_t dev; 3219 struct ifnet *ifp; 3220 uint64_t features; 3221 int error; 3222 3223 dev = sc->vtnet_dev; 3224 ifp = sc->vtnet_ifp; 3225 features = sc->vtnet_negotiated_features; 3226 3227 /* 3228 * Re-negotiate with the host, removing any disabled receive 3229 * features. Transmit features are disabled only on our side 3230 * via if_capenable and if_hwassist. 3231 */ 3232 3233 if ((ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0) 3234 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES); 3235 3236 if ((ifp->if_capenable & IFCAP_LRO) == 0) 3237 features &= ~VTNET_LRO_FEATURES; 3238 3239 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) 3240 features &= ~VIRTIO_NET_F_CTRL_VLAN; 3241 3242 error = virtio_reinit(dev, features); 3243 if (error) { 3244 device_printf(dev, "virtio reinit error %d\n", error); 3245 return (error); 3246 } 3247 3248 sc->vtnet_features = features; 3249 virtio_reinit_complete(dev); 3250 3251 return (0); 3252 } 3253 3254 static void 3255 vtnet_init_rx_filters(struct vtnet_softc *sc) 3256 { 3257 struct ifnet *ifp; 3258 3259 ifp = sc->vtnet_ifp; 3260 3261 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 3262 vtnet_rx_filter(sc); 3263 vtnet_rx_filter_mac(sc); 3264 } 3265 3266 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) 3267 vtnet_rx_filter_vlan(sc); 3268 } 3269 3270 static int 3271 vtnet_init_rx_queues(struct vtnet_softc *sc) 3272 { 3273 device_t dev; 3274 struct ifnet *ifp; 3275 struct vtnet_rxq *rxq; 3276 int i, clustersz, error; 3277 3278 dev = sc->vtnet_dev; 3279 ifp = sc->vtnet_ifp; 3280 3281 clustersz = vtnet_rx_cluster_size(sc, ifp->if_mtu); 3282 sc->vtnet_rx_clustersz = clustersz; 3283 3284 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) { 3285 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) + 3286 VTNET_MAX_RX_SIZE, clustersz); 3287 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, 3288 ("%s: too many rx mbufs %d for %d segments", __func__, 3289 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); 3290 } else 3291 sc->vtnet_rx_nmbufs = 1; 3292 3293 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 3294 rxq = &sc->vtnet_rxqs[i]; 3295 3296 /* Hold the lock to satisfy asserts. */ 3297 VTNET_RXQ_LOCK(rxq); 3298 error = vtnet_rxq_populate(rxq); 3299 VTNET_RXQ_UNLOCK(rxq); 3300 3301 if (error) { 3302 device_printf(dev, "cannot populate Rx queue %d\n", i); 3303 return (error); 3304 } 3305 } 3306 3307 return (0); 3308 } 3309 3310 static int 3311 vtnet_init_tx_queues(struct vtnet_softc *sc) 3312 { 3313 struct vtnet_txq *txq; 3314 int i; 3315 3316 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 3317 txq = &sc->vtnet_txqs[i]; 3318 txq->vtntx_watchdog = 0; 3319 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq); 3320 #ifdef DEV_NETMAP 3321 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0); 3322 #endif /* DEV_NETMAP */ 3323 } 3324 3325 return (0); 3326 } 3327 3328 static int 3329 vtnet_init_rxtx_queues(struct vtnet_softc *sc) 3330 { 3331 int error; 3332 3333 error = vtnet_init_rx_queues(sc); 3334 if (error) 3335 return (error); 3336 3337 error = vtnet_init_tx_queues(sc); 3338 if (error) 3339 return (error); 3340 3341 return (0); 3342 } 3343 3344 static void 3345 vtnet_set_active_vq_pairs(struct vtnet_softc *sc) 3346 { 3347 device_t dev; 3348 int npairs; 3349 3350 dev = sc->vtnet_dev; 3351 3352 if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) { 3353 sc->vtnet_act_vq_pairs = 1; 3354 return; 3355 } 3356 3357 npairs = sc->vtnet_req_vq_pairs; 3358 3359 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { 3360 device_printf(dev, "cannot set active queue pairs to %d, " 3361 "falling back to 1 queue pair\n", npairs); 3362 npairs = 1; 3363 } 3364 3365 sc->vtnet_act_vq_pairs = npairs; 3366 } 3367 3368 static void 3369 vtnet_update_rx_offloads(struct vtnet_softc *sc) 3370 { 3371 struct ifnet *ifp; 3372 uint64_t features; 3373 int error; 3374 3375 ifp = sc->vtnet_ifp; 3376 features = sc->vtnet_features; 3377 3378 VTNET_CORE_LOCK_ASSERT(sc); 3379 3380 if (ifp->if_capabilities & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 3381 if (ifp->if_capenable & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) 3382 features |= VIRTIO_NET_F_GUEST_CSUM; 3383 else 3384 features &= ~VIRTIO_NET_F_GUEST_CSUM; 3385 } 3386 3387 if (ifp->if_capabilities & IFCAP_LRO && !vtnet_software_lro(sc)) { 3388 if (ifp->if_capenable & IFCAP_LRO) 3389 features |= VTNET_LRO_FEATURES; 3390 else 3391 features &= ~VTNET_LRO_FEATURES; 3392 } 3393 3394 error = vtnet_ctrl_guest_offloads(sc, 3395 features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | 3396 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN | 3397 VIRTIO_NET_F_GUEST_UFO)); 3398 if (error) { 3399 device_printf(sc->vtnet_dev, 3400 "%s: cannot update Rx features\n", __func__); 3401 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 3402 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 3403 vtnet_init_locked(sc, 0); 3404 } 3405 } else 3406 sc->vtnet_features = features; 3407 } 3408 3409 static int 3410 vtnet_reinit(struct vtnet_softc *sc) 3411 { 3412 struct ifnet *ifp; 3413 int error; 3414 3415 ifp = sc->vtnet_ifp; 3416 3417 bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); 3418 3419 error = vtnet_virtio_reinit(sc); 3420 if (error) 3421 return (error); 3422 3423 vtnet_set_macaddr(sc); 3424 vtnet_set_active_vq_pairs(sc); 3425 3426 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 3427 vtnet_init_rx_filters(sc); 3428 3429 ifp->if_hwassist = 0; 3430 if (ifp->if_capenable & IFCAP_TXCSUM) 3431 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; 3432 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3433 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; 3434 if (ifp->if_capenable & IFCAP_TSO4) 3435 ifp->if_hwassist |= CSUM_IP_TSO; 3436 if (ifp->if_capenable & IFCAP_TSO6) 3437 ifp->if_hwassist |= CSUM_IP6_TSO; 3438 3439 error = vtnet_init_rxtx_queues(sc); 3440 if (error) 3441 return (error); 3442 3443 return (0); 3444 } 3445 3446 static void 3447 vtnet_init_locked(struct vtnet_softc *sc, int init_mode) 3448 { 3449 struct ifnet *ifp; 3450 3451 ifp = sc->vtnet_ifp; 3452 3453 VTNET_CORE_LOCK_ASSERT(sc); 3454 3455 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3456 return; 3457 3458 vtnet_stop(sc); 3459 3460 #ifdef DEV_NETMAP 3461 /* Once stopped we can update the netmap flags, if necessary. */ 3462 switch (init_mode) { 3463 case VTNET_INIT_NETMAP_ENTER: 3464 nm_set_native_flags(NA(ifp)); 3465 break; 3466 case VTNET_INIT_NETMAP_EXIT: 3467 nm_clear_native_flags(NA(ifp)); 3468 break; 3469 } 3470 #endif /* DEV_NETMAP */ 3471 3472 if (vtnet_reinit(sc) != 0) { 3473 vtnet_stop(sc); 3474 return; 3475 } 3476 3477 ifp->if_drv_flags |= IFF_DRV_RUNNING; 3478 vtnet_update_link_status(sc); 3479 vtnet_enable_interrupts(sc); 3480 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); 3481 3482 #ifdef DEV_NETMAP 3483 /* Re-enable txsync/rxsync. */ 3484 netmap_enable_all_rings(ifp); 3485 #endif /* DEV_NETMAP */ 3486 } 3487 3488 static void 3489 vtnet_init(void *xsc) 3490 { 3491 struct vtnet_softc *sc; 3492 3493 sc = xsc; 3494 3495 VTNET_CORE_LOCK(sc); 3496 vtnet_init_locked(sc, 0); 3497 VTNET_CORE_UNLOCK(sc); 3498 } 3499 3500 static void 3501 vtnet_free_ctrl_vq(struct vtnet_softc *sc) 3502 { 3503 3504 /* 3505 * The control virtqueue is only polled and therefore it should 3506 * already be empty. 3507 */ 3508 KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq), 3509 ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq)); 3510 } 3511 3512 static void 3513 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, 3514 struct sglist *sg, int readable, int writable) 3515 { 3516 struct virtqueue *vq; 3517 3518 vq = sc->vtnet_ctrl_vq; 3519 3520 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ); 3521 VTNET_CORE_LOCK_ASSERT(sc); 3522 3523 if (!virtqueue_empty(vq)) 3524 return; 3525 3526 /* 3527 * Poll for the response, but the command is likely completed before 3528 * returning from the notify. 3529 */ 3530 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) { 3531 virtqueue_notify(vq); 3532 virtqueue_poll(vq, NULL); 3533 } 3534 } 3535 3536 static int 3537 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) 3538 { 3539 struct sglist_seg segs[3]; 3540 struct sglist sg; 3541 struct { 3542 struct virtio_net_ctrl_hdr hdr __aligned(2); 3543 uint8_t pad1; 3544 uint8_t addr[ETHER_ADDR_LEN] __aligned(8); 3545 uint8_t pad2; 3546 uint8_t ack; 3547 } s; 3548 int error; 3549 3550 error = 0; 3551 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC); 3552 3553 s.hdr.class = VIRTIO_NET_CTRL_MAC; 3554 s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 3555 bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN); 3556 s.ack = VIRTIO_NET_ERR; 3557 3558 sglist_init(&sg, nitems(segs), segs); 3559 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3560 error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN); 3561 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3562 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3563 3564 if (error == 0) 3565 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3566 3567 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3568 } 3569 3570 static int 3571 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads) 3572 { 3573 struct sglist_seg segs[3]; 3574 struct sglist sg; 3575 struct { 3576 struct virtio_net_ctrl_hdr hdr __aligned(2); 3577 uint8_t pad1; 3578 uint64_t offloads __aligned(8); 3579 uint8_t pad2; 3580 uint8_t ack; 3581 } s; 3582 int error; 3583 3584 error = 0; 3585 MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3586 3587 s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; 3588 s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; 3589 s.offloads = vtnet_gtoh64(sc, offloads); 3590 s.ack = VIRTIO_NET_ERR; 3591 3592 sglist_init(&sg, nitems(segs), segs); 3593 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3594 error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t)); 3595 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3596 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3597 3598 if (error == 0) 3599 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3600 3601 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3602 } 3603 3604 static int 3605 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) 3606 { 3607 struct sglist_seg segs[3]; 3608 struct sglist sg; 3609 struct { 3610 struct virtio_net_ctrl_hdr hdr __aligned(2); 3611 uint8_t pad1; 3612 struct virtio_net_ctrl_mq mq __aligned(2); 3613 uint8_t pad2; 3614 uint8_t ack; 3615 } s; 3616 int error; 3617 3618 error = 0; 3619 MPASS(sc->vtnet_flags & VTNET_FLAG_MQ); 3620 3621 s.hdr.class = VIRTIO_NET_CTRL_MQ; 3622 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 3623 s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs); 3624 s.ack = VIRTIO_NET_ERR; 3625 3626 sglist_init(&sg, nitems(segs), segs); 3627 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3628 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); 3629 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3630 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3631 3632 if (error == 0) 3633 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3634 3635 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3636 } 3637 3638 static int 3639 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on) 3640 { 3641 struct sglist_seg segs[3]; 3642 struct sglist sg; 3643 struct { 3644 struct virtio_net_ctrl_hdr hdr __aligned(2); 3645 uint8_t pad1; 3646 uint8_t onoff; 3647 uint8_t pad2; 3648 uint8_t ack; 3649 } s; 3650 int error; 3651 3652 error = 0; 3653 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); 3654 3655 s.hdr.class = VIRTIO_NET_CTRL_RX; 3656 s.hdr.cmd = cmd; 3657 s.onoff = on; 3658 s.ack = VIRTIO_NET_ERR; 3659 3660 sglist_init(&sg, nitems(segs), segs); 3661 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3662 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); 3663 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3664 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3665 3666 if (error == 0) 3667 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3668 3669 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3670 } 3671 3672 static int 3673 vtnet_set_promisc(struct vtnet_softc *sc, bool on) 3674 { 3675 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); 3676 } 3677 3678 static int 3679 vtnet_set_allmulti(struct vtnet_softc *sc, bool on) 3680 { 3681 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); 3682 } 3683 3684 static void 3685 vtnet_rx_filter(struct vtnet_softc *sc) 3686 { 3687 device_t dev; 3688 struct ifnet *ifp; 3689 3690 dev = sc->vtnet_dev; 3691 ifp = sc->vtnet_ifp; 3692 3693 VTNET_CORE_LOCK_ASSERT(sc); 3694 3695 if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) { 3696 device_printf(dev, "cannot %s promiscuous mode\n", 3697 ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); 3698 } 3699 3700 if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) { 3701 device_printf(dev, "cannot %s all-multicast mode\n", 3702 ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); 3703 } 3704 } 3705 3706 static u_int 3707 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt) 3708 { 3709 struct vtnet_softc *sc = arg; 3710 3711 if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) 3712 return (0); 3713 3714 if (ucnt < VTNET_MAX_MAC_ENTRIES) 3715 bcopy(LLADDR(sdl), 3716 &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt], 3717 ETHER_ADDR_LEN); 3718 3719 return (1); 3720 } 3721 3722 static u_int 3723 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt) 3724 { 3725 struct vtnet_mac_filter *filter = arg; 3726 3727 if (mcnt < VTNET_MAX_MAC_ENTRIES) 3728 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt], 3729 ETHER_ADDR_LEN); 3730 3731 return (1); 3732 } 3733 3734 static void 3735 vtnet_rx_filter_mac(struct vtnet_softc *sc) 3736 { 3737 struct virtio_net_ctrl_hdr hdr __aligned(2); 3738 struct vtnet_mac_filter *filter; 3739 struct sglist_seg segs[4]; 3740 struct sglist sg; 3741 struct ifnet *ifp; 3742 bool promisc, allmulti; 3743 u_int ucnt, mcnt; 3744 int error; 3745 uint8_t ack; 3746 3747 ifp = sc->vtnet_ifp; 3748 filter = sc->vtnet_mac_filter; 3749 error = 0; 3750 3751 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); 3752 VTNET_CORE_LOCK_ASSERT(sc); 3753 3754 /* Unicast MAC addresses: */ 3755 ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc); 3756 promisc = (ucnt > VTNET_MAX_MAC_ENTRIES); 3757 3758 if (promisc) { 3759 ucnt = 0; 3760 if_printf(ifp, "more than %d MAC addresses assigned, " 3761 "falling back to promiscuous mode\n", 3762 VTNET_MAX_MAC_ENTRIES); 3763 } 3764 3765 /* Multicast MAC addresses: */ 3766 mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter); 3767 allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES); 3768 3769 if (allmulti) { 3770 mcnt = 0; 3771 if_printf(ifp, "more than %d multicast MAC addresses " 3772 "assigned, falling back to all-multicast mode\n", 3773 VTNET_MAX_MAC_ENTRIES); 3774 } 3775 3776 if (promisc && allmulti) 3777 goto out; 3778 3779 filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt); 3780 filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt); 3781 3782 hdr.class = VIRTIO_NET_CTRL_MAC; 3783 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 3784 ack = VIRTIO_NET_ERR; 3785 3786 sglist_init(&sg, nitems(segs), segs); 3787 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3788 error |= sglist_append(&sg, &filter->vmf_unicast, 3789 sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN); 3790 error |= sglist_append(&sg, &filter->vmf_multicast, 3791 sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN); 3792 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3793 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3794 3795 if (error == 0) 3796 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3797 if (ack != VIRTIO_NET_OK) 3798 if_printf(ifp, "error setting host MAC filter table\n"); 3799 3800 out: 3801 if (promisc != 0 && vtnet_set_promisc(sc, true) != 0) 3802 if_printf(ifp, "cannot enable promiscuous mode\n"); 3803 if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0) 3804 if_printf(ifp, "cannot enable all-multicast mode\n"); 3805 } 3806 3807 static int 3808 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3809 { 3810 struct sglist_seg segs[3]; 3811 struct sglist sg; 3812 struct { 3813 struct virtio_net_ctrl_hdr hdr __aligned(2); 3814 uint8_t pad1; 3815 uint16_t tag __aligned(2); 3816 uint8_t pad2; 3817 uint8_t ack; 3818 } s; 3819 int error; 3820 3821 error = 0; 3822 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); 3823 3824 s.hdr.class = VIRTIO_NET_CTRL_VLAN; 3825 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 3826 s.tag = vtnet_gtoh16(sc, tag); 3827 s.ack = VIRTIO_NET_ERR; 3828 3829 sglist_init(&sg, nitems(segs), segs); 3830 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3831 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); 3832 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3833 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3834 3835 if (error == 0) 3836 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3837 3838 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3839 } 3840 3841 static void 3842 vtnet_rx_filter_vlan(struct vtnet_softc *sc) 3843 { 3844 int i, bit; 3845 uint32_t w; 3846 uint16_t tag; 3847 3848 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); 3849 VTNET_CORE_LOCK_ASSERT(sc); 3850 3851 /* Enable the filter for each configured VLAN. */ 3852 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { 3853 w = sc->vtnet_vlan_filter[i]; 3854 3855 while ((bit = ffs(w) - 1) != -1) { 3856 w &= ~(1 << bit); 3857 tag = sizeof(w) * CHAR_BIT * i + bit; 3858 3859 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { 3860 device_printf(sc->vtnet_dev, 3861 "cannot enable VLAN %d filter\n", tag); 3862 } 3863 } 3864 } 3865 } 3866 3867 static void 3868 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3869 { 3870 struct ifnet *ifp; 3871 int idx, bit; 3872 3873 ifp = sc->vtnet_ifp; 3874 idx = (tag >> 5) & 0x7F; 3875 bit = tag & 0x1F; 3876 3877 if (tag == 0 || tag > 4095) 3878 return; 3879 3880 VTNET_CORE_LOCK(sc); 3881 3882 if (add) 3883 sc->vtnet_vlan_filter[idx] |= (1 << bit); 3884 else 3885 sc->vtnet_vlan_filter[idx] &= ~(1 << bit); 3886 3887 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && 3888 ifp->if_drv_flags & IFF_DRV_RUNNING && 3889 vtnet_exec_vlan_filter(sc, add, tag) != 0) { 3890 device_printf(sc->vtnet_dev, 3891 "cannot %s VLAN %d %s the host filter table\n", 3892 add ? "add" : "remove", tag, add ? "to" : "from"); 3893 } 3894 3895 VTNET_CORE_UNLOCK(sc); 3896 } 3897 3898 static void 3899 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3900 { 3901 3902 if (ifp->if_softc != arg) 3903 return; 3904 3905 vtnet_update_vlan_filter(arg, 1, tag); 3906 } 3907 3908 static void 3909 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3910 { 3911 3912 if (ifp->if_softc != arg) 3913 return; 3914 3915 vtnet_update_vlan_filter(arg, 0, tag); 3916 } 3917 3918 static void 3919 vtnet_update_speed_duplex(struct vtnet_softc *sc) 3920 { 3921 struct ifnet *ifp; 3922 uint32_t speed; 3923 3924 ifp = sc->vtnet_ifp; 3925 3926 if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0) 3927 return; 3928 3929 /* BMV: Ignore duplex. */ 3930 speed = virtio_read_dev_config_4(sc->vtnet_dev, 3931 offsetof(struct virtio_net_config, speed)); 3932 if (speed != UINT32_MAX) 3933 ifp->if_baudrate = IF_Mbps(speed); 3934 } 3935 3936 static int 3937 vtnet_is_link_up(struct vtnet_softc *sc) 3938 { 3939 uint16_t status; 3940 3941 if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0) 3942 return (1); 3943 3944 status = virtio_read_dev_config_2(sc->vtnet_dev, 3945 offsetof(struct virtio_net_config, status)); 3946 3947 return ((status & VIRTIO_NET_S_LINK_UP) != 0); 3948 } 3949 3950 static void 3951 vtnet_update_link_status(struct vtnet_softc *sc) 3952 { 3953 struct ifnet *ifp; 3954 int link; 3955 3956 ifp = sc->vtnet_ifp; 3957 VTNET_CORE_LOCK_ASSERT(sc); 3958 link = vtnet_is_link_up(sc); 3959 3960 /* Notify if the link status has changed. */ 3961 if (link != 0 && sc->vtnet_link_active == 0) { 3962 vtnet_update_speed_duplex(sc); 3963 sc->vtnet_link_active = 1; 3964 if_link_state_change(ifp, LINK_STATE_UP); 3965 } else if (link == 0 && sc->vtnet_link_active != 0) { 3966 sc->vtnet_link_active = 0; 3967 if_link_state_change(ifp, LINK_STATE_DOWN); 3968 } 3969 } 3970 3971 static int 3972 vtnet_ifmedia_upd(struct ifnet *ifp __unused) 3973 { 3974 return (EOPNOTSUPP); 3975 } 3976 3977 static void 3978 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 3979 { 3980 struct vtnet_softc *sc; 3981 3982 sc = ifp->if_softc; 3983 3984 ifmr->ifm_status = IFM_AVALID; 3985 ifmr->ifm_active = IFM_ETHER; 3986 3987 VTNET_CORE_LOCK(sc); 3988 if (vtnet_is_link_up(sc) != 0) { 3989 ifmr->ifm_status |= IFM_ACTIVE; 3990 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 3991 } else 3992 ifmr->ifm_active |= IFM_NONE; 3993 VTNET_CORE_UNLOCK(sc); 3994 } 3995 3996 static void 3997 vtnet_get_macaddr(struct vtnet_softc *sc) 3998 { 3999 4000 if (sc->vtnet_flags & VTNET_FLAG_MAC) { 4001 virtio_read_device_config_array(sc->vtnet_dev, 4002 offsetof(struct virtio_net_config, mac), 4003 &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN); 4004 } else { 4005 /* Generate a random locally administered unicast address. */ 4006 sc->vtnet_hwaddr[0] = 0xB2; 4007 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); 4008 } 4009 } 4010 4011 static void 4012 vtnet_set_macaddr(struct vtnet_softc *sc) 4013 { 4014 device_t dev; 4015 int error; 4016 4017 dev = sc->vtnet_dev; 4018 4019 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { 4020 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr); 4021 if (error) 4022 device_printf(dev, "unable to set MAC address\n"); 4023 return; 4024 } 4025 4026 /* MAC in config is read-only in modern VirtIO. */ 4027 if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) { 4028 for (int i = 0; i < ETHER_ADDR_LEN; i++) { 4029 virtio_write_dev_config_1(dev, 4030 offsetof(struct virtio_net_config, mac) + i, 4031 sc->vtnet_hwaddr[i]); 4032 } 4033 } 4034 } 4035 4036 static void 4037 vtnet_attached_set_macaddr(struct vtnet_softc *sc) 4038 { 4039 4040 /* Assign MAC address if it was generated. */ 4041 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) 4042 vtnet_set_macaddr(sc); 4043 } 4044 4045 static void 4046 vtnet_vlan_tag_remove(struct mbuf *m) 4047 { 4048 struct ether_vlan_header *evh; 4049 4050 evh = mtod(m, struct ether_vlan_header *); 4051 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); 4052 m->m_flags |= M_VLANTAG; 4053 4054 /* Strip the 802.1Q header. */ 4055 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, 4056 ETHER_HDR_LEN - ETHER_TYPE_LEN); 4057 m_adj(m, ETHER_VLAN_ENCAP_LEN); 4058 } 4059 4060 static void 4061 vtnet_set_rx_process_limit(struct vtnet_softc *sc) 4062 { 4063 int limit; 4064 4065 limit = vtnet_tunable_int(sc, "rx_process_limit", 4066 vtnet_rx_process_limit); 4067 if (limit < 0) 4068 limit = INT_MAX; 4069 sc->vtnet_rx_process_limit = limit; 4070 } 4071 4072 static void 4073 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, 4074 struct sysctl_oid_list *child, struct vtnet_rxq *rxq) 4075 { 4076 struct sysctl_oid *node; 4077 struct sysctl_oid_list *list; 4078 struct vtnet_rxq_stats *stats; 4079 char namebuf[16]; 4080 4081 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); 4082 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 4083 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); 4084 list = SYSCTL_CHILDREN(node); 4085 4086 stats = &rxq->vtnrx_stats; 4087 4088 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, 4089 &stats->vrxs_ipackets, "Receive packets"); 4090 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, 4091 &stats->vrxs_ibytes, "Receive bytes"); 4092 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, 4093 &stats->vrxs_iqdrops, "Receive drops"); 4094 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, 4095 &stats->vrxs_ierrors, "Receive errors"); 4096 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 4097 &stats->vrxs_csum, "Receive checksum offloaded"); 4098 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, 4099 &stats->vrxs_csum_failed, "Receive checksum offload failed"); 4100 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD, 4101 &stats->vrxs_host_lro, "Receive host segmentation offloaded"); 4102 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 4103 &stats->vrxs_rescheduled, 4104 "Receive interrupt handler rescheduled"); 4105 } 4106 4107 static void 4108 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, 4109 struct sysctl_oid_list *child, struct vtnet_txq *txq) 4110 { 4111 struct sysctl_oid *node; 4112 struct sysctl_oid_list *list; 4113 struct vtnet_txq_stats *stats; 4114 char namebuf[16]; 4115 4116 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); 4117 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 4118 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); 4119 list = SYSCTL_CHILDREN(node); 4120 4121 stats = &txq->vtntx_stats; 4122 4123 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, 4124 &stats->vtxs_opackets, "Transmit packets"); 4125 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, 4126 &stats->vtxs_obytes, "Transmit bytes"); 4127 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, 4128 &stats->vtxs_omcasts, "Transmit multicasts"); 4129 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 4130 &stats->vtxs_csum, "Transmit checksum offloaded"); 4131 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, 4132 &stats->vtxs_tso, "Transmit TCP segmentation offloaded"); 4133 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 4134 &stats->vtxs_rescheduled, 4135 "Transmit interrupt handler rescheduled"); 4136 } 4137 4138 static void 4139 vtnet_setup_queue_sysctl(struct vtnet_softc *sc) 4140 { 4141 device_t dev; 4142 struct sysctl_ctx_list *ctx; 4143 struct sysctl_oid *tree; 4144 struct sysctl_oid_list *child; 4145 int i; 4146 4147 dev = sc->vtnet_dev; 4148 ctx = device_get_sysctl_ctx(dev); 4149 tree = device_get_sysctl_tree(dev); 4150 child = SYSCTL_CHILDREN(tree); 4151 4152 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { 4153 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); 4154 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); 4155 } 4156 } 4157 4158 static void 4159 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, 4160 struct sysctl_oid_list *child, struct vtnet_softc *sc) 4161 { 4162 struct vtnet_statistics *stats; 4163 struct vtnet_rxq_stats rxaccum; 4164 struct vtnet_txq_stats txaccum; 4165 4166 vtnet_accum_stats(sc, &rxaccum, &txaccum); 4167 4168 stats = &sc->vtnet_stats; 4169 stats->rx_csum_offloaded = rxaccum.vrxs_csum; 4170 stats->rx_csum_failed = rxaccum.vrxs_csum_failed; 4171 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; 4172 stats->tx_csum_offloaded = txaccum.vtxs_csum; 4173 stats->tx_tso_offloaded = txaccum.vtxs_tso; 4174 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; 4175 4176 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", 4177 CTLFLAG_RD, &stats->mbuf_alloc_failed, 4178 "Mbuf cluster allocation failures"); 4179 4180 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", 4181 CTLFLAG_RD, &stats->rx_frame_too_large, 4182 "Received frame larger than the mbuf chain"); 4183 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", 4184 CTLFLAG_RD, &stats->rx_enq_replacement_failed, 4185 "Enqueuing the replacement receive mbuf failed"); 4186 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", 4187 CTLFLAG_RD, &stats->rx_mergeable_failed, 4188 "Mergeable buffers receive failures"); 4189 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", 4190 CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 4191 "Received checksum offloaded buffer with unsupported " 4192 "Ethernet type"); 4193 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", 4194 CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 4195 "Received checksum offloaded buffer with incorrect IP protocol"); 4196 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", 4197 CTLFLAG_RD, &stats->rx_csum_bad_offset, 4198 "Received checksum offloaded buffer with incorrect offset"); 4199 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", 4200 CTLFLAG_RD, &stats->rx_csum_bad_proto, 4201 "Received checksum offloaded buffer with incorrect protocol"); 4202 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", 4203 CTLFLAG_RD, &stats->rx_csum_failed, 4204 "Received buffer checksum offload failed"); 4205 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", 4206 CTLFLAG_RD, &stats->rx_csum_offloaded, 4207 "Received buffer checksum offload succeeded"); 4208 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", 4209 CTLFLAG_RD, &stats->rx_task_rescheduled, 4210 "Times the receive interrupt task rescheduled itself"); 4211 4212 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype", 4213 CTLFLAG_RD, &stats->tx_csum_unknown_ethtype, 4214 "Aborted transmit of checksum offloaded buffer with unknown " 4215 "Ethernet type"); 4216 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch", 4217 CTLFLAG_RD, &stats->tx_csum_proto_mismatch, 4218 "Aborted transmit of checksum offloaded buffer because mismatched " 4219 "protocols"); 4220 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", 4221 CTLFLAG_RD, &stats->tx_tso_not_tcp, 4222 "Aborted transmit of TSO buffer with non TCP protocol"); 4223 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum", 4224 CTLFLAG_RD, &stats->tx_tso_without_csum, 4225 "Aborted transmit of TSO buffer without TCP checksum offload"); 4226 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", 4227 CTLFLAG_RD, &stats->tx_defragged, 4228 "Transmit mbufs defragged"); 4229 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", 4230 CTLFLAG_RD, &stats->tx_defrag_failed, 4231 "Aborted transmit of buffer because defrag failed"); 4232 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", 4233 CTLFLAG_RD, &stats->tx_csum_offloaded, 4234 "Offloaded checksum of transmitted buffer"); 4235 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", 4236 CTLFLAG_RD, &stats->tx_tso_offloaded, 4237 "Segmentation offload of transmitted buffer"); 4238 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", 4239 CTLFLAG_RD, &stats->tx_task_rescheduled, 4240 "Times the transmit interrupt task rescheduled itself"); 4241 } 4242 4243 static void 4244 vtnet_setup_sysctl(struct vtnet_softc *sc) 4245 { 4246 device_t dev; 4247 struct sysctl_ctx_list *ctx; 4248 struct sysctl_oid *tree; 4249 struct sysctl_oid_list *child; 4250 4251 dev = sc->vtnet_dev; 4252 ctx = device_get_sysctl_ctx(dev); 4253 tree = device_get_sysctl_tree(dev); 4254 child = SYSCTL_CHILDREN(tree); 4255 4256 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", 4257 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, 4258 "Number of maximum supported virtqueue pairs"); 4259 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs", 4260 CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0, 4261 "Number of requested virtqueue pairs"); 4262 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", 4263 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, 4264 "Number of active virtqueue pairs"); 4265 4266 vtnet_setup_stat_sysctl(ctx, child, sc); 4267 } 4268 4269 static void 4270 vtnet_load_tunables(struct vtnet_softc *sc) 4271 { 4272 4273 sc->vtnet_lro_entry_count = vtnet_tunable_int(sc, 4274 "lro_entry_count", vtnet_lro_entry_count); 4275 if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES) 4276 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES; 4277 4278 sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc, 4279 "lro_mbufq_depth", vtnet_lro_mbufq_depth); 4280 } 4281 4282 static int 4283 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) 4284 { 4285 4286 return (virtqueue_enable_intr(rxq->vtnrx_vq)); 4287 } 4288 4289 static void 4290 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) 4291 { 4292 4293 virtqueue_disable_intr(rxq->vtnrx_vq); 4294 } 4295 4296 static int 4297 vtnet_txq_enable_intr(struct vtnet_txq *txq) 4298 { 4299 struct virtqueue *vq; 4300 4301 vq = txq->vtntx_vq; 4302 4303 if (vtnet_txq_below_threshold(txq) != 0) 4304 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); 4305 4306 /* 4307 * The free count is above our threshold. Keep the Tx interrupt 4308 * disabled until the queue is fuller. 4309 */ 4310 return (0); 4311 } 4312 4313 static void 4314 vtnet_txq_disable_intr(struct vtnet_txq *txq) 4315 { 4316 4317 virtqueue_disable_intr(txq->vtntx_vq); 4318 } 4319 4320 static void 4321 vtnet_enable_rx_interrupts(struct vtnet_softc *sc) 4322 { 4323 struct vtnet_rxq *rxq; 4324 int i; 4325 4326 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 4327 rxq = &sc->vtnet_rxqs[i]; 4328 if (vtnet_rxq_enable_intr(rxq) != 0) 4329 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 4330 } 4331 } 4332 4333 static void 4334 vtnet_enable_tx_interrupts(struct vtnet_softc *sc) 4335 { 4336 int i; 4337 4338 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 4339 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); 4340 } 4341 4342 static void 4343 vtnet_enable_interrupts(struct vtnet_softc *sc) 4344 { 4345 4346 vtnet_enable_rx_interrupts(sc); 4347 vtnet_enable_tx_interrupts(sc); 4348 } 4349 4350 static void 4351 vtnet_disable_rx_interrupts(struct vtnet_softc *sc) 4352 { 4353 int i; 4354 4355 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 4356 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); 4357 } 4358 4359 static void 4360 vtnet_disable_tx_interrupts(struct vtnet_softc *sc) 4361 { 4362 int i; 4363 4364 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 4365 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); 4366 } 4367 4368 static void 4369 vtnet_disable_interrupts(struct vtnet_softc *sc) 4370 { 4371 4372 vtnet_disable_rx_interrupts(sc); 4373 vtnet_disable_tx_interrupts(sc); 4374 } 4375 4376 static int 4377 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) 4378 { 4379 char path[64]; 4380 4381 snprintf(path, sizeof(path), 4382 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); 4383 TUNABLE_INT_FETCH(path, &def); 4384 4385 return (def); 4386 } 4387 4388 #ifdef DEBUGNET 4389 static void 4390 vtnet_debugnet_init(struct ifnet *ifp, int *nrxr, int *ncl, int *clsize) 4391 { 4392 struct vtnet_softc *sc; 4393 4394 sc = if_getsoftc(ifp); 4395 4396 VTNET_CORE_LOCK(sc); 4397 *nrxr = sc->vtnet_req_vq_pairs; 4398 *ncl = DEBUGNET_MAX_IN_FLIGHT; 4399 *clsize = sc->vtnet_rx_clustersz; 4400 VTNET_CORE_UNLOCK(sc); 4401 } 4402 4403 static void 4404 vtnet_debugnet_event(struct ifnet *ifp, enum debugnet_ev event) 4405 { 4406 struct vtnet_softc *sc; 4407 static bool sw_lro_enabled = false; 4408 4409 /* 4410 * Disable software LRO, since it would require entering the network 4411 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll(). 4412 */ 4413 sc = if_getsoftc(ifp); 4414 switch (event) { 4415 case DEBUGNET_START: 4416 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0; 4417 if (sw_lro_enabled) 4418 sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO; 4419 break; 4420 case DEBUGNET_END: 4421 if (sw_lro_enabled) 4422 sc->vtnet_flags |= VTNET_FLAG_SW_LRO; 4423 break; 4424 } 4425 } 4426 4427 static int 4428 vtnet_debugnet_transmit(struct ifnet *ifp, struct mbuf *m) 4429 { 4430 struct vtnet_softc *sc; 4431 struct vtnet_txq *txq; 4432 int error; 4433 4434 sc = if_getsoftc(ifp); 4435 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4436 IFF_DRV_RUNNING) 4437 return (EBUSY); 4438 4439 txq = &sc->vtnet_txqs[0]; 4440 error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE); 4441 if (error == 0) 4442 (void)vtnet_txq_notify(txq); 4443 return (error); 4444 } 4445 4446 static int 4447 vtnet_debugnet_poll(struct ifnet *ifp, int count) 4448 { 4449 struct vtnet_softc *sc; 4450 int i; 4451 4452 sc = if_getsoftc(ifp); 4453 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4454 IFF_DRV_RUNNING) 4455 return (EBUSY); 4456 4457 (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); 4458 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 4459 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); 4460 return (0); 4461 } 4462 #endif /* DEBUGNET */ 4463