1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Driver for VirtIO network devices. */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/eventhandler.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/sockio.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/module.h> 42 #include <sys/msan.h> 43 #include <sys/socket.h> 44 #include <sys/sysctl.h> 45 #include <sys/random.h> 46 #include <sys/sglist.h> 47 #include <sys/lock.h> 48 #include <sys/mutex.h> 49 #include <sys/taskqueue.h> 50 #include <sys/smp.h> 51 #include <machine/smp.h> 52 53 #include <vm/uma.h> 54 55 #include <net/debugnet.h> 56 #include <net/ethernet.h> 57 #include <net/pfil.h> 58 #include <net/if.h> 59 #include <net/if_var.h> 60 #include <net/if_arp.h> 61 #include <net/if_dl.h> 62 #include <net/if_types.h> 63 #include <net/if_media.h> 64 #include <net/if_vlan_var.h> 65 66 #include <net/bpf.h> 67 68 #include <netinet/in_systm.h> 69 #include <netinet/in.h> 70 #include <netinet/ip.h> 71 #include <netinet/ip6.h> 72 #include <netinet6/ip6_var.h> 73 #include <netinet/udp.h> 74 #include <netinet/tcp.h> 75 #include <netinet/tcp_lro.h> 76 77 #include <machine/bus.h> 78 #include <machine/resource.h> 79 #include <sys/bus.h> 80 #include <sys/rman.h> 81 82 #include <dev/virtio/virtio.h> 83 #include <dev/virtio/virtqueue.h> 84 #include <dev/virtio/network/virtio_net.h> 85 #include <dev/virtio/network/if_vtnetvar.h> 86 #include "virtio_if.h" 87 88 #include "opt_inet.h" 89 #include "opt_inet6.h" 90 91 #if defined(INET) || defined(INET6) 92 #include <machine/in_cksum.h> 93 #endif 94 95 static int vtnet_modevent(module_t, int, void *); 96 97 static int vtnet_probe(device_t); 98 static int vtnet_attach(device_t); 99 static int vtnet_detach(device_t); 100 static int vtnet_suspend(device_t); 101 static int vtnet_resume(device_t); 102 static int vtnet_shutdown(device_t); 103 static int vtnet_attach_completed(device_t); 104 static int vtnet_config_change(device_t); 105 106 static int vtnet_negotiate_features(struct vtnet_softc *); 107 static int vtnet_setup_features(struct vtnet_softc *); 108 static int vtnet_init_rxq(struct vtnet_softc *, int); 109 static int vtnet_init_txq(struct vtnet_softc *, int); 110 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); 111 static void vtnet_free_rxtx_queues(struct vtnet_softc *); 112 static int vtnet_alloc_rx_filters(struct vtnet_softc *); 113 static void vtnet_free_rx_filters(struct vtnet_softc *); 114 static int vtnet_alloc_virtqueues(struct vtnet_softc *); 115 static int vtnet_alloc_interface(struct vtnet_softc *); 116 static int vtnet_setup_interface(struct vtnet_softc *); 117 static int vtnet_ioctl_mtu(struct vtnet_softc *, u_int); 118 static int vtnet_ioctl_ifflags(struct vtnet_softc *); 119 static int vtnet_ioctl_multi(struct vtnet_softc *); 120 static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *); 121 static int vtnet_ioctl(if_t, u_long, caddr_t); 122 static uint64_t vtnet_get_counter(if_t, ift_counter); 123 124 static int vtnet_rxq_populate(struct vtnet_rxq *); 125 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); 126 static struct mbuf * 127 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); 128 static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *, 129 struct mbuf *, int); 130 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); 131 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); 132 static int vtnet_rxq_new_buf(struct vtnet_rxq *); 133 static int vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *, 134 uint16_t, int, struct virtio_net_hdr *); 135 static int vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, 136 uint16_t, int, struct virtio_net_hdr *); 137 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, 138 struct virtio_net_hdr *); 139 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); 140 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); 141 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); 142 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, 143 struct virtio_net_hdr *); 144 static int vtnet_rxq_eof(struct vtnet_rxq *); 145 static void vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries); 146 static void vtnet_rx_vq_intr(void *); 147 static void vtnet_rxq_tq_intr(void *, int); 148 149 static int vtnet_txq_intr_threshold(struct vtnet_txq *); 150 static int vtnet_txq_below_threshold(struct vtnet_txq *); 151 static int vtnet_txq_notify(struct vtnet_txq *); 152 static void vtnet_txq_free_mbufs(struct vtnet_txq *); 153 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, 154 int *, int *, int *); 155 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, 156 int, struct virtio_net_hdr *); 157 static struct mbuf * 158 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, 159 struct virtio_net_hdr *); 160 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, 161 struct vtnet_tx_header *); 162 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); 163 #ifdef VTNET_LEGACY_TX 164 static void vtnet_start_locked(struct vtnet_txq *, if_t); 165 static void vtnet_start(if_t); 166 #else 167 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); 168 static int vtnet_txq_mq_start(if_t, struct mbuf *); 169 static void vtnet_txq_tq_deferred(void *, int); 170 #endif 171 static void vtnet_txq_start(struct vtnet_txq *); 172 static void vtnet_txq_tq_intr(void *, int); 173 static int vtnet_txq_eof(struct vtnet_txq *); 174 static void vtnet_tx_vq_intr(void *); 175 static void vtnet_tx_start_all(struct vtnet_softc *); 176 177 #ifndef VTNET_LEGACY_TX 178 static void vtnet_qflush(if_t); 179 #endif 180 181 static int vtnet_watchdog(struct vtnet_txq *); 182 static void vtnet_accum_stats(struct vtnet_softc *, 183 struct vtnet_rxq_stats *, struct vtnet_txq_stats *); 184 static void vtnet_tick(void *); 185 186 static void vtnet_start_taskqueues(struct vtnet_softc *); 187 static void vtnet_free_taskqueues(struct vtnet_softc *); 188 static void vtnet_drain_taskqueues(struct vtnet_softc *); 189 190 static void vtnet_drain_rxtx_queues(struct vtnet_softc *); 191 static void vtnet_stop_rendezvous(struct vtnet_softc *); 192 static void vtnet_stop(struct vtnet_softc *); 193 static int vtnet_virtio_reinit(struct vtnet_softc *); 194 static void vtnet_init_rx_filters(struct vtnet_softc *); 195 static int vtnet_init_rx_queues(struct vtnet_softc *); 196 static int vtnet_init_tx_queues(struct vtnet_softc *); 197 static int vtnet_init_rxtx_queues(struct vtnet_softc *); 198 static void vtnet_set_active_vq_pairs(struct vtnet_softc *); 199 static void vtnet_update_rx_offloads(struct vtnet_softc *); 200 static int vtnet_reinit(struct vtnet_softc *); 201 static void vtnet_init_locked(struct vtnet_softc *, int); 202 static void vtnet_init(void *); 203 204 static void vtnet_free_ctrl_vq(struct vtnet_softc *); 205 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, 206 struct sglist *, int, int); 207 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); 208 static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t); 209 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); 210 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool); 211 static int vtnet_set_promisc(struct vtnet_softc *, bool); 212 static int vtnet_set_allmulti(struct vtnet_softc *, bool); 213 static void vtnet_rx_filter(struct vtnet_softc *); 214 static void vtnet_rx_filter_mac(struct vtnet_softc *); 215 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); 216 static void vtnet_rx_filter_vlan(struct vtnet_softc *); 217 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); 218 static void vtnet_register_vlan(void *, if_t, uint16_t); 219 static void vtnet_unregister_vlan(void *, if_t, uint16_t); 220 221 static void vtnet_update_speed_duplex(struct vtnet_softc *); 222 static int vtnet_is_link_up(struct vtnet_softc *); 223 static void vtnet_update_link_status(struct vtnet_softc *); 224 static int vtnet_ifmedia_upd(if_t); 225 static void vtnet_ifmedia_sts(if_t, struct ifmediareq *); 226 static void vtnet_get_macaddr(struct vtnet_softc *); 227 static void vtnet_set_macaddr(struct vtnet_softc *); 228 static void vtnet_attached_set_macaddr(struct vtnet_softc *); 229 static void vtnet_vlan_tag_remove(struct mbuf *); 230 static void vtnet_set_rx_process_limit(struct vtnet_softc *); 231 232 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, 233 struct sysctl_oid_list *, struct vtnet_rxq *); 234 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, 235 struct sysctl_oid_list *, struct vtnet_txq *); 236 static void vtnet_setup_queue_sysctl(struct vtnet_softc *); 237 static void vtnet_load_tunables(struct vtnet_softc *); 238 static void vtnet_setup_sysctl(struct vtnet_softc *); 239 240 static int vtnet_rxq_enable_intr(struct vtnet_rxq *); 241 static void vtnet_rxq_disable_intr(struct vtnet_rxq *); 242 static int vtnet_txq_enable_intr(struct vtnet_txq *); 243 static void vtnet_txq_disable_intr(struct vtnet_txq *); 244 static void vtnet_enable_rx_interrupts(struct vtnet_softc *); 245 static void vtnet_enable_tx_interrupts(struct vtnet_softc *); 246 static void vtnet_enable_interrupts(struct vtnet_softc *); 247 static void vtnet_disable_rx_interrupts(struct vtnet_softc *); 248 static void vtnet_disable_tx_interrupts(struct vtnet_softc *); 249 static void vtnet_disable_interrupts(struct vtnet_softc *); 250 251 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); 252 253 DEBUGNET_DEFINE(vtnet); 254 255 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val) 256 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val) 257 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val) 258 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val) 259 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val) 260 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val) 261 262 /* Tunables. */ 263 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 264 "VirtIO Net driver parameters"); 265 266 static int vtnet_csum_disable = 0; 267 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, 268 &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); 269 270 static int vtnet_fixup_needs_csum = 0; 271 SYSCTL_INT(_hw_vtnet, OID_AUTO, fixup_needs_csum, CTLFLAG_RDTUN, 272 &vtnet_fixup_needs_csum, 0, 273 "Calculate valid checksum for NEEDS_CSUM packets"); 274 275 static int vtnet_tso_disable = 0; 276 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, 277 &vtnet_tso_disable, 0, "Disables TSO"); 278 279 static int vtnet_lro_disable = 0; 280 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, 281 &vtnet_lro_disable, 0, "Disables hardware LRO"); 282 283 static int vtnet_mq_disable = 0; 284 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, 285 &vtnet_mq_disable, 0, "Disables multiqueue support"); 286 287 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; 288 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, 289 &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs"); 290 291 static int vtnet_tso_maxlen = IP_MAXPACKET; 292 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 293 &vtnet_tso_maxlen, 0, "TSO burst limit"); 294 295 static int vtnet_rx_process_limit = 1024; 296 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 297 &vtnet_rx_process_limit, 0, 298 "Number of RX segments processed in one pass"); 299 300 static int vtnet_lro_entry_count = 128; 301 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 302 &vtnet_lro_entry_count, 0, "Software LRO entry count"); 303 304 /* Enable sorted LRO, and the depth of the mbuf queue. */ 305 static int vtnet_lro_mbufq_depth = 0; 306 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 307 &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue"); 308 309 static uma_zone_t vtnet_tx_header_zone; 310 311 static struct virtio_feature_desc vtnet_feature_desc[] = { 312 { VIRTIO_NET_F_CSUM, "TxChecksum" }, 313 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, 314 { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" }, 315 { VIRTIO_NET_F_MAC, "MAC" }, 316 { VIRTIO_NET_F_GSO, "TxGSO" }, 317 { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" }, 318 { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" }, 319 { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" }, 320 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, 321 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, 322 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, 323 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, 324 { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, 325 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, 326 { VIRTIO_NET_F_STATUS, "Status" }, 327 { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" }, 328 { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" }, 329 { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" }, 330 { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" }, 331 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, 332 { VIRTIO_NET_F_MQ, "Multiqueue" }, 333 { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" }, 334 { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" }, 335 336 { 0, NULL } 337 }; 338 339 static device_method_t vtnet_methods[] = { 340 /* Device methods. */ 341 DEVMETHOD(device_probe, vtnet_probe), 342 DEVMETHOD(device_attach, vtnet_attach), 343 DEVMETHOD(device_detach, vtnet_detach), 344 DEVMETHOD(device_suspend, vtnet_suspend), 345 DEVMETHOD(device_resume, vtnet_resume), 346 DEVMETHOD(device_shutdown, vtnet_shutdown), 347 348 /* VirtIO methods. */ 349 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), 350 DEVMETHOD(virtio_config_change, vtnet_config_change), 351 352 DEVMETHOD_END 353 }; 354 355 #ifdef DEV_NETMAP 356 #include <dev/netmap/if_vtnet_netmap.h> 357 #endif 358 359 static driver_t vtnet_driver = { 360 .name = "vtnet", 361 .methods = vtnet_methods, 362 .size = sizeof(struct vtnet_softc) 363 }; 364 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL); 365 MODULE_VERSION(vtnet, 1); 366 MODULE_DEPEND(vtnet, virtio, 1, 1, 1); 367 #ifdef DEV_NETMAP 368 MODULE_DEPEND(vtnet, netmap, 1, 1, 1); 369 #endif 370 371 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter"); 372 373 static int 374 vtnet_modevent(module_t mod __unused, int type, void *unused __unused) 375 { 376 int error = 0; 377 static int loaded = 0; 378 379 switch (type) { 380 case MOD_LOAD: 381 if (loaded++ == 0) { 382 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", 383 sizeof(struct vtnet_tx_header), 384 NULL, NULL, NULL, NULL, 0, 0); 385 #ifdef DEBUGNET 386 /* 387 * We need to allocate from this zone in the transmit path, so ensure 388 * that we have at least one item per header available. 389 * XXX add a separate zone like we do for mbufs? otherwise we may alloc 390 * buckets 391 */ 392 uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); 393 uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); 394 #endif 395 } 396 break; 397 case MOD_QUIESCE: 398 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) 399 error = EBUSY; 400 break; 401 case MOD_UNLOAD: 402 if (--loaded == 0) { 403 uma_zdestroy(vtnet_tx_header_zone); 404 vtnet_tx_header_zone = NULL; 405 } 406 break; 407 case MOD_SHUTDOWN: 408 break; 409 default: 410 error = EOPNOTSUPP; 411 break; 412 } 413 414 return (error); 415 } 416 417 static int 418 vtnet_probe(device_t dev) 419 { 420 return (VIRTIO_SIMPLE_PROBE(dev, vtnet)); 421 } 422 423 static int 424 vtnet_attach(device_t dev) 425 { 426 struct vtnet_softc *sc; 427 int error; 428 429 sc = device_get_softc(dev); 430 sc->vtnet_dev = dev; 431 virtio_set_feature_desc(dev, vtnet_feature_desc); 432 433 VTNET_CORE_LOCK_INIT(sc); 434 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); 435 vtnet_load_tunables(sc); 436 437 error = vtnet_alloc_interface(sc); 438 if (error) { 439 device_printf(dev, "cannot allocate interface\n"); 440 goto fail; 441 } 442 443 vtnet_setup_sysctl(sc); 444 445 error = vtnet_setup_features(sc); 446 if (error) { 447 device_printf(dev, "cannot setup features\n"); 448 goto fail; 449 } 450 451 error = vtnet_alloc_rx_filters(sc); 452 if (error) { 453 device_printf(dev, "cannot allocate Rx filters\n"); 454 goto fail; 455 } 456 457 error = vtnet_alloc_rxtx_queues(sc); 458 if (error) { 459 device_printf(dev, "cannot allocate queues\n"); 460 goto fail; 461 } 462 463 error = vtnet_alloc_virtqueues(sc); 464 if (error) { 465 device_printf(dev, "cannot allocate virtqueues\n"); 466 goto fail; 467 } 468 469 error = vtnet_setup_interface(sc); 470 if (error) { 471 device_printf(dev, "cannot setup interface\n"); 472 goto fail; 473 } 474 475 error = virtio_setup_intr(dev, INTR_TYPE_NET); 476 if (error) { 477 device_printf(dev, "cannot setup interrupts\n"); 478 ether_ifdetach(sc->vtnet_ifp); 479 goto fail; 480 } 481 482 #ifdef DEV_NETMAP 483 vtnet_netmap_attach(sc); 484 #endif 485 vtnet_start_taskqueues(sc); 486 487 fail: 488 if (error) 489 vtnet_detach(dev); 490 491 return (error); 492 } 493 494 static int 495 vtnet_detach(device_t dev) 496 { 497 struct vtnet_softc *sc; 498 if_t ifp; 499 500 sc = device_get_softc(dev); 501 ifp = sc->vtnet_ifp; 502 503 if (device_is_attached(dev)) { 504 VTNET_CORE_LOCK(sc); 505 vtnet_stop(sc); 506 VTNET_CORE_UNLOCK(sc); 507 508 callout_drain(&sc->vtnet_tick_ch); 509 vtnet_drain_taskqueues(sc); 510 511 ether_ifdetach(ifp); 512 } 513 514 #ifdef DEV_NETMAP 515 netmap_detach(ifp); 516 #endif 517 518 if (sc->vtnet_pfil != NULL) { 519 pfil_head_unregister(sc->vtnet_pfil); 520 sc->vtnet_pfil = NULL; 521 } 522 523 vtnet_free_taskqueues(sc); 524 525 if (sc->vtnet_vlan_attach != NULL) { 526 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); 527 sc->vtnet_vlan_attach = NULL; 528 } 529 if (sc->vtnet_vlan_detach != NULL) { 530 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); 531 sc->vtnet_vlan_detach = NULL; 532 } 533 534 ifmedia_removeall(&sc->vtnet_media); 535 536 if (ifp != NULL) { 537 if_free(ifp); 538 sc->vtnet_ifp = NULL; 539 } 540 541 vtnet_free_rxtx_queues(sc); 542 vtnet_free_rx_filters(sc); 543 544 if (sc->vtnet_ctrl_vq != NULL) 545 vtnet_free_ctrl_vq(sc); 546 547 VTNET_CORE_LOCK_DESTROY(sc); 548 549 return (0); 550 } 551 552 static int 553 vtnet_suspend(device_t dev) 554 { 555 struct vtnet_softc *sc; 556 557 sc = device_get_softc(dev); 558 559 VTNET_CORE_LOCK(sc); 560 vtnet_stop(sc); 561 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; 562 VTNET_CORE_UNLOCK(sc); 563 564 return (0); 565 } 566 567 static int 568 vtnet_resume(device_t dev) 569 { 570 struct vtnet_softc *sc; 571 if_t ifp; 572 573 sc = device_get_softc(dev); 574 ifp = sc->vtnet_ifp; 575 576 VTNET_CORE_LOCK(sc); 577 if (if_getflags(ifp) & IFF_UP) 578 vtnet_init_locked(sc, 0); 579 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; 580 VTNET_CORE_UNLOCK(sc); 581 582 return (0); 583 } 584 585 static int 586 vtnet_shutdown(device_t dev) 587 { 588 /* 589 * Suspend already does all of what we need to 590 * do here; we just never expect to be resumed. 591 */ 592 return (vtnet_suspend(dev)); 593 } 594 595 static int 596 vtnet_attach_completed(device_t dev) 597 { 598 struct vtnet_softc *sc; 599 600 sc = device_get_softc(dev); 601 602 VTNET_CORE_LOCK(sc); 603 vtnet_attached_set_macaddr(sc); 604 VTNET_CORE_UNLOCK(sc); 605 606 return (0); 607 } 608 609 static int 610 vtnet_config_change(device_t dev) 611 { 612 struct vtnet_softc *sc; 613 614 sc = device_get_softc(dev); 615 616 VTNET_CORE_LOCK(sc); 617 vtnet_update_link_status(sc); 618 if (sc->vtnet_link_active != 0) 619 vtnet_tx_start_all(sc); 620 VTNET_CORE_UNLOCK(sc); 621 622 return (0); 623 } 624 625 static int 626 vtnet_negotiate_features(struct vtnet_softc *sc) 627 { 628 device_t dev; 629 uint64_t features, negotiated_features; 630 int no_csum; 631 632 dev = sc->vtnet_dev; 633 features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES : 634 VTNET_LEGACY_FEATURES; 635 636 /* 637 * TSO and LRO are only available when their corresponding checksum 638 * offload feature is also negotiated. 639 */ 640 no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable); 641 if (no_csum) 642 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM); 643 if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) 644 features &= ~VTNET_TSO_FEATURES; 645 if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) 646 features &= ~VTNET_LRO_FEATURES; 647 648 #ifndef VTNET_LEGACY_TX 649 if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) 650 features &= ~VIRTIO_NET_F_MQ; 651 #else 652 features &= ~VIRTIO_NET_F_MQ; 653 #endif 654 655 negotiated_features = virtio_negotiate_features(dev, features); 656 657 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { 658 uint16_t mtu; 659 660 mtu = virtio_read_dev_config_2(dev, 661 offsetof(struct virtio_net_config, mtu)); 662 if (mtu < VTNET_MIN_MTU /* || mtu > VTNET_MAX_MTU */) { 663 device_printf(dev, "Invalid MTU value: %d. " 664 "MTU feature disabled.\n", mtu); 665 features &= ~VIRTIO_NET_F_MTU; 666 negotiated_features = 667 virtio_negotiate_features(dev, features); 668 } 669 } 670 671 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { 672 uint16_t npairs; 673 674 npairs = virtio_read_dev_config_2(dev, 675 offsetof(struct virtio_net_config, max_virtqueue_pairs)); 676 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 677 npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { 678 device_printf(dev, "Invalid max_virtqueue_pairs value: " 679 "%d. Multiqueue feature disabled.\n", npairs); 680 features &= ~VIRTIO_NET_F_MQ; 681 negotiated_features = 682 virtio_negotiate_features(dev, features); 683 } 684 } 685 686 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && 687 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { 688 /* 689 * LRO without mergeable buffers requires special care. This 690 * is not ideal because every receive buffer must be large 691 * enough to hold the maximum TCP packet, the Ethernet header, 692 * and the header. This requires up to 34 descriptors with 693 * MCLBYTES clusters. If we do not have indirect descriptors, 694 * LRO is disabled since the virtqueue will not contain very 695 * many receive buffers. 696 */ 697 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { 698 device_printf(dev, 699 "Host LRO disabled since both mergeable buffers " 700 "and indirect descriptors were not negotiated\n"); 701 features &= ~VTNET_LRO_FEATURES; 702 negotiated_features = 703 virtio_negotiate_features(dev, features); 704 } else 705 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; 706 } 707 708 sc->vtnet_features = negotiated_features; 709 sc->vtnet_negotiated_features = negotiated_features; 710 711 return (virtio_finalize_features(dev)); 712 } 713 714 static int 715 vtnet_setup_features(struct vtnet_softc *sc) 716 { 717 device_t dev; 718 int error; 719 720 dev = sc->vtnet_dev; 721 722 error = vtnet_negotiate_features(sc); 723 if (error) 724 return (error); 725 726 if (virtio_with_feature(dev, VIRTIO_F_VERSION_1)) 727 sc->vtnet_flags |= VTNET_FLAG_MODERN; 728 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 729 sc->vtnet_flags |= VTNET_FLAG_INDIRECT; 730 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) 731 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; 732 733 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { 734 /* This feature should always be negotiated. */ 735 sc->vtnet_flags |= VTNET_FLAG_MAC; 736 } 737 738 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { 739 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev, 740 offsetof(struct virtio_net_config, mtu)); 741 } else 742 sc->vtnet_max_mtu = VTNET_MAX_MTU; 743 744 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { 745 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; 746 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 747 } else if (vtnet_modern(sc)) { 748 /* This is identical to the mergeable header. */ 749 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1); 750 } else 751 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 752 753 if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 754 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE; 755 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 756 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG; 757 else 758 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE; 759 760 /* 761 * Favor "hardware" LRO if negotiated, but support software LRO as 762 * a fallback; there is usually little benefit (or worse) with both. 763 */ 764 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 && 765 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0) 766 sc->vtnet_flags |= VTNET_FLAG_SW_LRO; 767 768 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || 769 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || 770 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 771 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX; 772 else 773 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN; 774 775 sc->vtnet_req_vq_pairs = 1; 776 sc->vtnet_max_vq_pairs = 1; 777 778 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { 779 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; 780 781 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) 782 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; 783 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) 784 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; 785 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) 786 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; 787 788 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { 789 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, 790 offsetof(struct virtio_net_config, 791 max_virtqueue_pairs)); 792 } 793 } 794 795 if (sc->vtnet_max_vq_pairs > 1) { 796 int req; 797 798 /* 799 * Limit the maximum number of requested queue pairs to the 800 * number of CPUs and the configured maximum. 801 */ 802 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); 803 if (req < 0) 804 req = 1; 805 if (req == 0) 806 req = mp_ncpus; 807 if (req > sc->vtnet_max_vq_pairs) 808 req = sc->vtnet_max_vq_pairs; 809 if (req > mp_ncpus) 810 req = mp_ncpus; 811 if (req > 1) { 812 sc->vtnet_req_vq_pairs = req; 813 sc->vtnet_flags |= VTNET_FLAG_MQ; 814 } 815 } 816 817 return (0); 818 } 819 820 static int 821 vtnet_init_rxq(struct vtnet_softc *sc, int id) 822 { 823 struct vtnet_rxq *rxq; 824 825 rxq = &sc->vtnet_rxqs[id]; 826 827 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", 828 device_get_nameunit(sc->vtnet_dev), id); 829 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); 830 831 rxq->vtnrx_sc = sc; 832 rxq->vtnrx_id = id; 833 834 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); 835 if (rxq->vtnrx_sg == NULL) 836 return (ENOMEM); 837 838 #if defined(INET) || defined(INET6) 839 if (vtnet_software_lro(sc)) { 840 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp, 841 sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0) 842 return (ENOMEM); 843 } 844 #endif 845 846 NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); 847 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, 848 taskqueue_thread_enqueue, &rxq->vtnrx_tq); 849 850 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); 851 } 852 853 static int 854 vtnet_init_txq(struct vtnet_softc *sc, int id) 855 { 856 struct vtnet_txq *txq; 857 858 txq = &sc->vtnet_txqs[id]; 859 860 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", 861 device_get_nameunit(sc->vtnet_dev), id); 862 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); 863 864 txq->vtntx_sc = sc; 865 txq->vtntx_id = id; 866 867 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); 868 if (txq->vtntx_sg == NULL) 869 return (ENOMEM); 870 871 #ifndef VTNET_LEGACY_TX 872 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, 873 M_NOWAIT, &txq->vtntx_mtx); 874 if (txq->vtntx_br == NULL) 875 return (ENOMEM); 876 877 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); 878 #endif 879 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); 880 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, 881 taskqueue_thread_enqueue, &txq->vtntx_tq); 882 if (txq->vtntx_tq == NULL) 883 return (ENOMEM); 884 885 return (0); 886 } 887 888 static int 889 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) 890 { 891 int i, npairs, error; 892 893 npairs = sc->vtnet_max_vq_pairs; 894 895 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, 896 M_NOWAIT | M_ZERO); 897 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, 898 M_NOWAIT | M_ZERO); 899 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) 900 return (ENOMEM); 901 902 for (i = 0; i < npairs; i++) { 903 error = vtnet_init_rxq(sc, i); 904 if (error) 905 return (error); 906 error = vtnet_init_txq(sc, i); 907 if (error) 908 return (error); 909 } 910 911 vtnet_set_rx_process_limit(sc); 912 vtnet_setup_queue_sysctl(sc); 913 914 return (0); 915 } 916 917 static void 918 vtnet_destroy_rxq(struct vtnet_rxq *rxq) 919 { 920 921 rxq->vtnrx_sc = NULL; 922 rxq->vtnrx_id = -1; 923 924 #if defined(INET) || defined(INET6) 925 tcp_lro_free(&rxq->vtnrx_lro); 926 #endif 927 928 if (rxq->vtnrx_sg != NULL) { 929 sglist_free(rxq->vtnrx_sg); 930 rxq->vtnrx_sg = NULL; 931 } 932 933 if (mtx_initialized(&rxq->vtnrx_mtx) != 0) 934 mtx_destroy(&rxq->vtnrx_mtx); 935 } 936 937 static void 938 vtnet_destroy_txq(struct vtnet_txq *txq) 939 { 940 941 txq->vtntx_sc = NULL; 942 txq->vtntx_id = -1; 943 944 if (txq->vtntx_sg != NULL) { 945 sglist_free(txq->vtntx_sg); 946 txq->vtntx_sg = NULL; 947 } 948 949 #ifndef VTNET_LEGACY_TX 950 if (txq->vtntx_br != NULL) { 951 buf_ring_free(txq->vtntx_br, M_DEVBUF); 952 txq->vtntx_br = NULL; 953 } 954 #endif 955 956 if (mtx_initialized(&txq->vtntx_mtx) != 0) 957 mtx_destroy(&txq->vtntx_mtx); 958 } 959 960 static void 961 vtnet_free_rxtx_queues(struct vtnet_softc *sc) 962 { 963 int i; 964 965 if (sc->vtnet_rxqs != NULL) { 966 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 967 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); 968 free(sc->vtnet_rxqs, M_DEVBUF); 969 sc->vtnet_rxqs = NULL; 970 } 971 972 if (sc->vtnet_txqs != NULL) { 973 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 974 vtnet_destroy_txq(&sc->vtnet_txqs[i]); 975 free(sc->vtnet_txqs, M_DEVBUF); 976 sc->vtnet_txqs = NULL; 977 } 978 } 979 980 static int 981 vtnet_alloc_rx_filters(struct vtnet_softc *sc) 982 { 983 984 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 985 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), 986 M_DEVBUF, M_NOWAIT | M_ZERO); 987 if (sc->vtnet_mac_filter == NULL) 988 return (ENOMEM); 989 } 990 991 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 992 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * 993 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); 994 if (sc->vtnet_vlan_filter == NULL) 995 return (ENOMEM); 996 } 997 998 return (0); 999 } 1000 1001 static void 1002 vtnet_free_rx_filters(struct vtnet_softc *sc) 1003 { 1004 1005 if (sc->vtnet_mac_filter != NULL) { 1006 free(sc->vtnet_mac_filter, M_DEVBUF); 1007 sc->vtnet_mac_filter = NULL; 1008 } 1009 1010 if (sc->vtnet_vlan_filter != NULL) { 1011 free(sc->vtnet_vlan_filter, M_DEVBUF); 1012 sc->vtnet_vlan_filter = NULL; 1013 } 1014 } 1015 1016 static int 1017 vtnet_alloc_virtqueues(struct vtnet_softc *sc) 1018 { 1019 device_t dev; 1020 struct vq_alloc_info *info; 1021 struct vtnet_rxq *rxq; 1022 struct vtnet_txq *txq; 1023 int i, idx, flags, nvqs, error; 1024 1025 dev = sc->vtnet_dev; 1026 flags = 0; 1027 1028 nvqs = sc->vtnet_max_vq_pairs * 2; 1029 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 1030 nvqs++; 1031 1032 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); 1033 if (info == NULL) 1034 return (ENOMEM); 1035 1036 for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) { 1037 rxq = &sc->vtnet_rxqs[i]; 1038 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, 1039 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, 1040 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); 1041 1042 txq = &sc->vtnet_txqs[i]; 1043 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, 1044 vtnet_tx_vq_intr, txq, &txq->vtntx_vq, 1045 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); 1046 } 1047 1048 /* These queues will not be used so allocate the minimum resources. */ 1049 for (/**/; i < sc->vtnet_max_vq_pairs; i++, idx += 2) { 1050 rxq = &sc->vtnet_rxqs[i]; 1051 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq, 1052 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); 1053 1054 txq = &sc->vtnet_txqs[i]; 1055 VQ_ALLOC_INFO_INIT(&info[idx+1], 0, NULL, txq, &txq->vtntx_vq, 1056 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); 1057 } 1058 1059 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 1060 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, 1061 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); 1062 } 1063 1064 /* 1065 * TODO: Enable interrupt binding if this is multiqueue. This will 1066 * only matter when per-virtqueue MSIX is available. 1067 */ 1068 if (sc->vtnet_flags & VTNET_FLAG_MQ) 1069 flags |= 0; 1070 1071 error = virtio_alloc_virtqueues(dev, flags, nvqs, info); 1072 free(info, M_TEMP); 1073 1074 return (error); 1075 } 1076 1077 static int 1078 vtnet_alloc_interface(struct vtnet_softc *sc) 1079 { 1080 device_t dev; 1081 if_t ifp; 1082 1083 dev = sc->vtnet_dev; 1084 1085 ifp = if_alloc(IFT_ETHER); 1086 if (ifp == NULL) 1087 return (ENOMEM); 1088 1089 sc->vtnet_ifp = ifp; 1090 if_setsoftc(ifp, sc); 1091 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1092 1093 return (0); 1094 } 1095 1096 static int 1097 vtnet_setup_interface(struct vtnet_softc *sc) 1098 { 1099 device_t dev; 1100 struct pfil_head_args pa; 1101 if_t ifp; 1102 1103 dev = sc->vtnet_dev; 1104 ifp = sc->vtnet_ifp; 1105 1106 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 1107 if_setbaudrate(ifp, IF_Gbps(10)); 1108 if_setinitfn(ifp, vtnet_init); 1109 if_setioctlfn(ifp, vtnet_ioctl); 1110 if_setgetcounterfn(ifp, vtnet_get_counter); 1111 #ifndef VTNET_LEGACY_TX 1112 if_settransmitfn(ifp, vtnet_txq_mq_start); 1113 if_setqflushfn(ifp, vtnet_qflush); 1114 #else 1115 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; 1116 if_setstartfn(ifp, vtnet_start); 1117 if_setsendqlen(ifp, virtqueue_size(vq) - 1); 1118 if_setsendqready(ifp); 1119 #endif 1120 1121 vtnet_get_macaddr(sc); 1122 1123 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) 1124 if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0); 1125 1126 ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts); 1127 ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1128 ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO); 1129 1130 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { 1131 int gso; 1132 1133 if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0); 1134 1135 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO); 1136 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) 1137 if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0); 1138 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 1139 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0); 1140 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) 1141 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 1142 1143 if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) { 1144 int tso_maxlen; 1145 1146 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0); 1147 1148 tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen", 1149 vtnet_tso_maxlen); 1150 if_sethwtsomax(ifp, tso_maxlen - 1151 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 1152 if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1); 1153 if_sethwtsomaxsegsize(ifp, PAGE_SIZE); 1154 } 1155 } 1156 1157 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { 1158 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0); 1159 #ifdef notyet 1160 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */ 1161 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0); 1162 #endif 1163 1164 if (vtnet_tunable_int(sc, "fixup_needs_csum", 1165 vtnet_fixup_needs_csum) != 0) 1166 sc->vtnet_flags |= VTNET_FLAG_FIXUP_NEEDS_CSUM; 1167 1168 /* Support either "hardware" or software LRO. */ 1169 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0); 1170 } 1171 1172 if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) { 1173 /* 1174 * VirtIO does not support VLAN tagging, but we can fake 1175 * it by inserting and removing the 802.1Q header during 1176 * transmit and receive. We are then able to do checksum 1177 * offloading of VLAN frames. 1178 */ 1179 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0); 1180 } 1181 1182 if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO) 1183 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0); 1184 if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0); 1185 1186 /* 1187 * Capabilities after here are not enabled by default. 1188 */ 1189 if_setcapenable(ifp, if_getcapabilities(ifp)); 1190 1191 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 1192 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0); 1193 1194 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1195 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 1196 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1197 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 1198 } 1199 1200 ether_ifattach(ifp, sc->vtnet_hwaddr); 1201 1202 /* Tell the upper layer(s) we support long frames. */ 1203 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 1204 1205 DEBUGNET_SET(ifp, vtnet); 1206 1207 pa.pa_version = PFIL_VERSION; 1208 pa.pa_flags = PFIL_IN; 1209 pa.pa_type = PFIL_TYPE_ETHERNET; 1210 pa.pa_headname = if_name(ifp); 1211 sc->vtnet_pfil = pfil_head_register(&pa); 1212 1213 return (0); 1214 } 1215 1216 static int 1217 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu) 1218 { 1219 int framesz; 1220 1221 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 1222 return (MJUMPAGESIZE); 1223 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 1224 return (MCLBYTES); 1225 1226 /* 1227 * Try to scale the receive mbuf cluster size from the MTU. We 1228 * could also use the VQ size to influence the selected size, 1229 * but that would only matter for very small queues. 1230 */ 1231 if (vtnet_modern(sc)) { 1232 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1)); 1233 framesz = sizeof(struct virtio_net_hdr_v1); 1234 } else 1235 framesz = sizeof(struct vtnet_rx_header); 1236 framesz += sizeof(struct ether_vlan_header) + mtu; 1237 1238 if (framesz <= MCLBYTES) 1239 return (MCLBYTES); 1240 else if (framesz <= MJUMPAGESIZE) 1241 return (MJUMPAGESIZE); 1242 else if (framesz <= MJUM9BYTES) 1243 return (MJUM9BYTES); 1244 1245 /* Sane default; avoid 16KB clusters. */ 1246 return (MCLBYTES); 1247 } 1248 1249 static int 1250 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu) 1251 { 1252 if_t ifp; 1253 int clustersz; 1254 1255 ifp = sc->vtnet_ifp; 1256 VTNET_CORE_LOCK_ASSERT(sc); 1257 1258 if (if_getmtu(ifp) == mtu) 1259 return (0); 1260 else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu) 1261 return (EINVAL); 1262 1263 if_setmtu(ifp, mtu); 1264 clustersz = vtnet_rx_cluster_size(sc, mtu); 1265 1266 if (clustersz != sc->vtnet_rx_clustersz && 1267 if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1268 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1269 vtnet_init_locked(sc, 0); 1270 } 1271 1272 return (0); 1273 } 1274 1275 static int 1276 vtnet_ioctl_ifflags(struct vtnet_softc *sc) 1277 { 1278 if_t ifp; 1279 int drv_running; 1280 1281 ifp = sc->vtnet_ifp; 1282 drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0; 1283 1284 VTNET_CORE_LOCK_ASSERT(sc); 1285 1286 if ((if_getflags(ifp) & IFF_UP) == 0) { 1287 if (drv_running) 1288 vtnet_stop(sc); 1289 goto out; 1290 } 1291 1292 if (!drv_running) { 1293 vtnet_init_locked(sc, 0); 1294 goto out; 1295 } 1296 1297 if ((if_getflags(ifp) ^ sc->vtnet_if_flags) & 1298 (IFF_PROMISC | IFF_ALLMULTI)) { 1299 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) 1300 vtnet_rx_filter(sc); 1301 else { 1302 /* 1303 * We don't support filtering out multicast, so 1304 * ALLMULTI is always set. 1305 */ 1306 if_setflagbits(ifp, IFF_ALLMULTI, 0); 1307 if_setflagbits(ifp, IFF_PROMISC, 0); 1308 } 1309 } 1310 1311 out: 1312 sc->vtnet_if_flags = if_getflags(ifp); 1313 return (0); 1314 } 1315 1316 static int 1317 vtnet_ioctl_multi(struct vtnet_softc *sc) 1318 { 1319 if_t ifp; 1320 1321 ifp = sc->vtnet_ifp; 1322 1323 VTNET_CORE_LOCK_ASSERT(sc); 1324 1325 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX && 1326 if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1327 vtnet_rx_filter_mac(sc); 1328 1329 return (0); 1330 } 1331 1332 static int 1333 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr) 1334 { 1335 if_t ifp; 1336 int mask, reinit, update; 1337 1338 ifp = sc->vtnet_ifp; 1339 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp); 1340 reinit = update = 0; 1341 1342 VTNET_CORE_LOCK_ASSERT(sc); 1343 1344 if (mask & IFCAP_TXCSUM) 1345 if_togglecapenable(ifp, IFCAP_TXCSUM); 1346 if (mask & IFCAP_TXCSUM_IPV6) 1347 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6); 1348 if (mask & IFCAP_TSO4) 1349 if_togglecapenable(ifp, IFCAP_TSO4); 1350 if (mask & IFCAP_TSO6) 1351 if_togglecapenable(ifp, IFCAP_TSO6); 1352 1353 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) { 1354 /* 1355 * These Rx features require the negotiated features to 1356 * be updated. Avoid a full reinit if possible. 1357 */ 1358 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 1359 update = 1; 1360 else 1361 reinit = 1; 1362 1363 /* BMV: Avoid needless renegotiation for just software LRO. */ 1364 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) == 1365 IFCAP_LRO && vtnet_software_lro(sc)) 1366 reinit = update = 0; 1367 1368 if (mask & IFCAP_RXCSUM) 1369 if_togglecapenable(ifp, IFCAP_RXCSUM); 1370 if (mask & IFCAP_RXCSUM_IPV6) 1371 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6); 1372 if (mask & IFCAP_LRO) 1373 if_togglecapenable(ifp, IFCAP_LRO); 1374 1375 /* 1376 * VirtIO does not distinguish between IPv4 and IPv6 checksums 1377 * so treat them as a pair. Guest TSO (LRO) requires receive 1378 * checksums. 1379 */ 1380 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 1381 if_setcapenablebit(ifp, IFCAP_RXCSUM, 0); 1382 #ifdef notyet 1383 if_setcapenablebit(ifp, IFCAP_RXCSUM_IPV6, 0); 1384 #endif 1385 } else 1386 if_setcapenablebit(ifp, 0, 1387 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)); 1388 } 1389 1390 if (mask & IFCAP_VLAN_HWFILTER) { 1391 /* These Rx features require renegotiation. */ 1392 reinit = 1; 1393 1394 if (mask & IFCAP_VLAN_HWFILTER) 1395 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); 1396 } 1397 1398 if (mask & IFCAP_VLAN_HWTSO) 1399 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 1400 if (mask & IFCAP_VLAN_HWTAGGING) 1401 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); 1402 1403 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1404 if (reinit) { 1405 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1406 vtnet_init_locked(sc, 0); 1407 } else if (update) 1408 vtnet_update_rx_offloads(sc); 1409 } 1410 1411 return (0); 1412 } 1413 1414 static int 1415 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data) 1416 { 1417 struct vtnet_softc *sc; 1418 struct ifreq *ifr; 1419 int error; 1420 1421 sc = if_getsoftc(ifp); 1422 ifr = (struct ifreq *) data; 1423 error = 0; 1424 1425 switch (cmd) { 1426 case SIOCSIFMTU: 1427 VTNET_CORE_LOCK(sc); 1428 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu); 1429 VTNET_CORE_UNLOCK(sc); 1430 break; 1431 1432 case SIOCSIFFLAGS: 1433 VTNET_CORE_LOCK(sc); 1434 error = vtnet_ioctl_ifflags(sc); 1435 VTNET_CORE_UNLOCK(sc); 1436 break; 1437 1438 case SIOCADDMULTI: 1439 case SIOCDELMULTI: 1440 VTNET_CORE_LOCK(sc); 1441 error = vtnet_ioctl_multi(sc); 1442 VTNET_CORE_UNLOCK(sc); 1443 break; 1444 1445 case SIOCSIFMEDIA: 1446 case SIOCGIFMEDIA: 1447 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); 1448 break; 1449 1450 case SIOCSIFCAP: 1451 VTNET_CORE_LOCK(sc); 1452 error = vtnet_ioctl_ifcap(sc, ifr); 1453 VTNET_CORE_UNLOCK(sc); 1454 VLAN_CAPABILITIES(ifp); 1455 break; 1456 1457 default: 1458 error = ether_ioctl(ifp, cmd, data); 1459 break; 1460 } 1461 1462 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); 1463 1464 return (error); 1465 } 1466 1467 static int 1468 vtnet_rxq_populate(struct vtnet_rxq *rxq) 1469 { 1470 struct virtqueue *vq; 1471 int nbufs, error; 1472 1473 #ifdef DEV_NETMAP 1474 error = vtnet_netmap_rxq_populate(rxq); 1475 if (error >= 0) 1476 return (error); 1477 #endif /* DEV_NETMAP */ 1478 1479 vq = rxq->vtnrx_vq; 1480 error = ENOSPC; 1481 1482 for (nbufs = 0; !virtqueue_full(vq); nbufs++) { 1483 error = vtnet_rxq_new_buf(rxq); 1484 if (error) 1485 break; 1486 } 1487 1488 if (nbufs > 0) { 1489 virtqueue_notify(vq); 1490 /* 1491 * EMSGSIZE signifies the virtqueue did not have enough 1492 * entries available to hold the last mbuf. This is not 1493 * an error. 1494 */ 1495 if (error == EMSGSIZE) 1496 error = 0; 1497 } 1498 1499 return (error); 1500 } 1501 1502 static void 1503 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) 1504 { 1505 struct virtqueue *vq; 1506 struct mbuf *m; 1507 int last; 1508 #ifdef DEV_NETMAP 1509 struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp), 1510 rxq->vtnrx_id, NR_RX); 1511 #else /* !DEV_NETMAP */ 1512 void *kring = NULL; 1513 #endif /* !DEV_NETMAP */ 1514 1515 vq = rxq->vtnrx_vq; 1516 last = 0; 1517 1518 while ((m = virtqueue_drain(vq, &last)) != NULL) { 1519 if (kring == NULL) 1520 m_freem(m); 1521 } 1522 1523 KASSERT(virtqueue_empty(vq), 1524 ("%s: mbufs remaining in rx queue %p", __func__, rxq)); 1525 } 1526 1527 static struct mbuf * 1528 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) 1529 { 1530 struct mbuf *m_head, *m_tail, *m; 1531 int i, size; 1532 1533 m_head = NULL; 1534 size = sc->vtnet_rx_clustersz; 1535 1536 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1537 ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs)); 1538 1539 for (i = 0; i < nbufs; i++) { 1540 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size); 1541 if (m == NULL) { 1542 sc->vtnet_stats.mbuf_alloc_failed++; 1543 m_freem(m_head); 1544 return (NULL); 1545 } 1546 1547 m->m_len = size; 1548 if (m_head != NULL) { 1549 m_tail->m_next = m; 1550 m_tail = m; 1551 } else 1552 m_head = m_tail = m; 1553 } 1554 1555 if (m_tailp != NULL) 1556 *m_tailp = m_tail; 1557 1558 return (m_head); 1559 } 1560 1561 /* 1562 * Slow path for when LRO without mergeable buffers is negotiated. 1563 */ 1564 static int 1565 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, 1566 int len0) 1567 { 1568 struct vtnet_softc *sc; 1569 struct mbuf *m, *m_prev, *m_new, *m_tail; 1570 int len, clustersz, nreplace, error; 1571 1572 sc = rxq->vtnrx_sc; 1573 clustersz = sc->vtnet_rx_clustersz; 1574 1575 m_prev = NULL; 1576 m_tail = NULL; 1577 nreplace = 0; 1578 1579 m = m0; 1580 len = len0; 1581 1582 /* 1583 * Since these mbuf chains are so large, avoid allocating a complete 1584 * replacement when the received frame did not consume the entire 1585 * chain. Unused mbufs are moved to the tail of the replacement mbuf. 1586 */ 1587 while (len > 0) { 1588 if (m == NULL) { 1589 sc->vtnet_stats.rx_frame_too_large++; 1590 return (EMSGSIZE); 1591 } 1592 1593 /* 1594 * Every mbuf should have the expected cluster size since that 1595 * is also used to allocate the replacements. 1596 */ 1597 KASSERT(m->m_len == clustersz, 1598 ("%s: mbuf size %d not expected cluster size %d", __func__, 1599 m->m_len, clustersz)); 1600 1601 m->m_len = MIN(m->m_len, len); 1602 len -= m->m_len; 1603 1604 m_prev = m; 1605 m = m->m_next; 1606 nreplace++; 1607 } 1608 1609 KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs, 1610 ("%s: invalid replacement mbuf count %d max %d", __func__, 1611 nreplace, sc->vtnet_rx_nmbufs)); 1612 1613 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); 1614 if (m_new == NULL) { 1615 m_prev->m_len = clustersz; 1616 return (ENOBUFS); 1617 } 1618 1619 /* 1620 * Move any unused mbufs from the received mbuf chain onto the 1621 * end of the replacement chain. 1622 */ 1623 if (m_prev->m_next != NULL) { 1624 m_tail->m_next = m_prev->m_next; 1625 m_prev->m_next = NULL; 1626 } 1627 1628 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1629 if (error) { 1630 /* 1631 * The replacement is suppose to be an copy of the one 1632 * dequeued so this is a very unexpected error. 1633 * 1634 * Restore the m0 chain to the original state if it was 1635 * modified so we can then discard it. 1636 */ 1637 if (m_tail->m_next != NULL) { 1638 m_prev->m_next = m_tail->m_next; 1639 m_tail->m_next = NULL; 1640 } 1641 m_prev->m_len = clustersz; 1642 sc->vtnet_stats.rx_enq_replacement_failed++; 1643 m_freem(m_new); 1644 } 1645 1646 return (error); 1647 } 1648 1649 static int 1650 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) 1651 { 1652 struct vtnet_softc *sc; 1653 struct mbuf *m_new; 1654 int error; 1655 1656 sc = rxq->vtnrx_sc; 1657 1658 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 1659 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len)); 1660 1661 MPASS(m->m_next == NULL); 1662 if (m->m_len < len) 1663 return (EMSGSIZE); 1664 1665 m_new = vtnet_rx_alloc_buf(sc, 1, NULL); 1666 if (m_new == NULL) 1667 return (ENOBUFS); 1668 1669 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1670 if (error) { 1671 sc->vtnet_stats.rx_enq_replacement_failed++; 1672 m_freem(m_new); 1673 } else 1674 m->m_len = len; 1675 1676 return (error); 1677 } 1678 1679 static int 1680 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1681 { 1682 struct vtnet_softc *sc; 1683 struct sglist *sg; 1684 int header_inlined, error; 1685 1686 sc = rxq->vtnrx_sc; 1687 sg = rxq->vtnrx_sg; 1688 1689 KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1690 ("%s: mbuf chain without LRO_NOMRG", __func__)); 1691 VTNET_RXQ_LOCK_ASSERT(rxq); 1692 1693 sglist_reset(sg); 1694 header_inlined = vtnet_modern(sc) || 1695 (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ 1696 1697 if (header_inlined) 1698 error = sglist_append_mbuf(sg, m); 1699 else { 1700 struct vtnet_rx_header *rxhdr = 1701 mtod(m, struct vtnet_rx_header *); 1702 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); 1703 1704 /* Append the header and remaining mbuf data. */ 1705 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); 1706 if (error) 1707 return (error); 1708 error = sglist_append(sg, &rxhdr[1], 1709 m->m_len - sizeof(struct vtnet_rx_header)); 1710 if (error) 1711 return (error); 1712 1713 if (m->m_next != NULL) 1714 error = sglist_append_mbuf(sg, m->m_next); 1715 } 1716 1717 if (error) 1718 return (error); 1719 1720 return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg)); 1721 } 1722 1723 static int 1724 vtnet_rxq_new_buf(struct vtnet_rxq *rxq) 1725 { 1726 struct vtnet_softc *sc; 1727 struct mbuf *m; 1728 int error; 1729 1730 sc = rxq->vtnrx_sc; 1731 1732 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); 1733 if (m == NULL) 1734 return (ENOBUFS); 1735 1736 error = vtnet_rxq_enqueue_buf(rxq, m); 1737 if (error) 1738 m_freem(m); 1739 1740 return (error); 1741 } 1742 1743 static int 1744 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, uint16_t etype, 1745 int hoff, struct virtio_net_hdr *hdr) 1746 { 1747 struct vtnet_softc *sc; 1748 int error; 1749 1750 sc = rxq->vtnrx_sc; 1751 1752 /* 1753 * NEEDS_CSUM corresponds to Linux's CHECKSUM_PARTIAL, but FreeBSD does 1754 * not have an analogous CSUM flag. The checksum has been validated, 1755 * but is incomplete (TCP/UDP pseudo header). 1756 * 1757 * The packet is likely from another VM on the same host that itself 1758 * performed checksum offloading so Tx/Rx is basically a memcpy and 1759 * the checksum has little value. 1760 * 1761 * Default to receiving the packet as-is for performance reasons, but 1762 * this can cause issues if the packet is to be forwarded because it 1763 * does not contain a valid checksum. This patch may be helpful: 1764 * https://reviews.freebsd.org/D6611. In the meantime, have the driver 1765 * compute the checksum if requested. 1766 * 1767 * BMV: Need to add an CSUM_PARTIAL flag? 1768 */ 1769 if ((sc->vtnet_flags & VTNET_FLAG_FIXUP_NEEDS_CSUM) == 0) { 1770 error = vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr); 1771 return (error); 1772 } 1773 1774 /* 1775 * Compute the checksum in the driver so the packet will contain a 1776 * valid checksum. The checksum is at csum_offset from csum_start. 1777 */ 1778 switch (etype) { 1779 #if defined(INET) || defined(INET6) 1780 case ETHERTYPE_IP: 1781 case ETHERTYPE_IPV6: { 1782 int csum_off, csum_end; 1783 uint16_t csum; 1784 1785 csum_off = hdr->csum_start + hdr->csum_offset; 1786 csum_end = csum_off + sizeof(uint16_t); 1787 1788 /* Assume checksum will be in the first mbuf. */ 1789 if (m->m_len < csum_end || m->m_pkthdr.len < csum_end) 1790 return (1); 1791 1792 /* 1793 * Like in_delayed_cksum()/in6_delayed_cksum(), compute the 1794 * checksum and write it at the specified offset. We could 1795 * try to verify the packet: csum_start should probably 1796 * correspond to the start of the TCP/UDP header. 1797 * 1798 * BMV: Need to properly handle UDP with zero checksum. Is 1799 * the IPv4 header checksum implicitly validated? 1800 */ 1801 csum = in_cksum_skip(m, m->m_pkthdr.len, hdr->csum_start); 1802 *(uint16_t *)(mtodo(m, csum_off)) = csum; 1803 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1804 m->m_pkthdr.csum_data = 0xFFFF; 1805 break; 1806 } 1807 #endif 1808 default: 1809 sc->vtnet_stats.rx_csum_bad_ethtype++; 1810 return (1); 1811 } 1812 1813 return (0); 1814 } 1815 1816 static int 1817 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, 1818 uint16_t etype, int hoff, struct virtio_net_hdr *hdr __unused) 1819 { 1820 #if 0 1821 struct vtnet_softc *sc; 1822 #endif 1823 int protocol; 1824 1825 #if 0 1826 sc = rxq->vtnrx_sc; 1827 #endif 1828 1829 switch (etype) { 1830 #if defined(INET) 1831 case ETHERTYPE_IP: 1832 if (__predict_false(m->m_len < hoff + sizeof(struct ip))) 1833 protocol = IPPROTO_DONE; 1834 else { 1835 struct ip *ip = (struct ip *)(m->m_data + hoff); 1836 protocol = ip->ip_p; 1837 } 1838 break; 1839 #endif 1840 #if defined(INET6) 1841 case ETHERTYPE_IPV6: 1842 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr)) 1843 || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) 1844 protocol = IPPROTO_DONE; 1845 break; 1846 #endif 1847 default: 1848 protocol = IPPROTO_DONE; 1849 break; 1850 } 1851 1852 switch (protocol) { 1853 case IPPROTO_TCP: 1854 case IPPROTO_UDP: 1855 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1856 m->m_pkthdr.csum_data = 0xFFFF; 1857 break; 1858 default: 1859 /* 1860 * FreeBSD does not support checksum offloading of this 1861 * protocol. Let the stack re-verify the checksum later 1862 * if the protocol is supported. 1863 */ 1864 #if 0 1865 if_printf(sc->vtnet_ifp, 1866 "%s: checksum offload of unsupported protocol " 1867 "etype=%#x protocol=%d csum_start=%d csum_offset=%d\n", 1868 __func__, etype, protocol, hdr->csum_start, 1869 hdr->csum_offset); 1870 #endif 1871 break; 1872 } 1873 1874 return (0); 1875 } 1876 1877 static int 1878 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, 1879 struct virtio_net_hdr *hdr) 1880 { 1881 const struct ether_header *eh; 1882 int hoff; 1883 uint16_t etype; 1884 1885 eh = mtod(m, const struct ether_header *); 1886 etype = ntohs(eh->ether_type); 1887 if (etype == ETHERTYPE_VLAN) { 1888 /* TODO BMV: Handle QinQ. */ 1889 const struct ether_vlan_header *evh = 1890 mtod(m, const struct ether_vlan_header *); 1891 etype = ntohs(evh->evl_proto); 1892 hoff = sizeof(struct ether_vlan_header); 1893 } else 1894 hoff = sizeof(struct ether_header); 1895 1896 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1897 return (vtnet_rxq_csum_needs_csum(rxq, m, etype, hoff, hdr)); 1898 else /* VIRTIO_NET_HDR_F_DATA_VALID */ 1899 return (vtnet_rxq_csum_data_valid(rxq, m, etype, hoff, hdr)); 1900 } 1901 1902 static void 1903 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) 1904 { 1905 struct mbuf *m; 1906 1907 while (--nbufs > 0) { 1908 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); 1909 if (m == NULL) 1910 break; 1911 vtnet_rxq_discard_buf(rxq, m); 1912 } 1913 } 1914 1915 static void 1916 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1917 { 1918 int error __diagused; 1919 1920 /* 1921 * Requeue the discarded mbuf. This should always be successful 1922 * since it was just dequeued. 1923 */ 1924 error = vtnet_rxq_enqueue_buf(rxq, m); 1925 KASSERT(error == 0, 1926 ("%s: cannot requeue discarded mbuf %d", __func__, error)); 1927 } 1928 1929 static int 1930 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) 1931 { 1932 struct vtnet_softc *sc; 1933 struct virtqueue *vq; 1934 struct mbuf *m_tail; 1935 1936 sc = rxq->vtnrx_sc; 1937 vq = rxq->vtnrx_vq; 1938 m_tail = m_head; 1939 1940 while (--nbufs > 0) { 1941 struct mbuf *m; 1942 uint32_t len; 1943 1944 m = virtqueue_dequeue(vq, &len); 1945 if (m == NULL) { 1946 rxq->vtnrx_stats.vrxs_ierrors++; 1947 goto fail; 1948 } 1949 1950 if (vtnet_rxq_new_buf(rxq) != 0) { 1951 rxq->vtnrx_stats.vrxs_iqdrops++; 1952 vtnet_rxq_discard_buf(rxq, m); 1953 if (nbufs > 1) 1954 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1955 goto fail; 1956 } 1957 1958 if (m->m_len < len) 1959 len = m->m_len; 1960 1961 m->m_len = len; 1962 m->m_flags &= ~M_PKTHDR; 1963 1964 m_head->m_pkthdr.len += len; 1965 m_tail->m_next = m; 1966 m_tail = m; 1967 } 1968 1969 return (0); 1970 1971 fail: 1972 sc->vtnet_stats.rx_mergeable_failed++; 1973 m_freem(m_head); 1974 1975 return (1); 1976 } 1977 1978 #if defined(INET) || defined(INET6) 1979 static int 1980 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m) 1981 { 1982 struct lro_ctrl *lro; 1983 1984 lro = &rxq->vtnrx_lro; 1985 1986 if (lro->lro_mbuf_max != 0) { 1987 tcp_lro_queue_mbuf(lro, m); 1988 return (0); 1989 } 1990 1991 return (tcp_lro_rx(lro, m, 0)); 1992 } 1993 #endif 1994 1995 static void 1996 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, 1997 struct virtio_net_hdr *hdr) 1998 { 1999 struct vtnet_softc *sc; 2000 if_t ifp; 2001 2002 sc = rxq->vtnrx_sc; 2003 ifp = sc->vtnet_ifp; 2004 2005 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { 2006 struct ether_header *eh = mtod(m, struct ether_header *); 2007 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2008 vtnet_vlan_tag_remove(m); 2009 /* 2010 * With the 802.1Q header removed, update the 2011 * checksum starting location accordingly. 2012 */ 2013 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 2014 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; 2015 } 2016 } 2017 2018 m->m_pkthdr.flowid = rxq->vtnrx_id; 2019 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2020 2021 if (hdr->flags & 2022 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { 2023 if (vtnet_rxq_csum(rxq, m, hdr) == 0) 2024 rxq->vtnrx_stats.vrxs_csum++; 2025 else 2026 rxq->vtnrx_stats.vrxs_csum_failed++; 2027 } 2028 2029 if (hdr->gso_size != 0) { 2030 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2031 case VIRTIO_NET_HDR_GSO_TCPV4: 2032 case VIRTIO_NET_HDR_GSO_TCPV6: 2033 m->m_pkthdr.lro_nsegs = 2034 howmany(m->m_pkthdr.len, hdr->gso_size); 2035 rxq->vtnrx_stats.vrxs_host_lro++; 2036 break; 2037 } 2038 } 2039 2040 rxq->vtnrx_stats.vrxs_ipackets++; 2041 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; 2042 2043 #if defined(INET) || defined(INET6) 2044 if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) { 2045 if (vtnet_lro_rx(rxq, m) == 0) 2046 return; 2047 } 2048 #endif 2049 2050 if_input(ifp, m); 2051 } 2052 2053 static int 2054 vtnet_rxq_eof(struct vtnet_rxq *rxq) 2055 { 2056 struct virtio_net_hdr lhdr, *hdr; 2057 struct vtnet_softc *sc; 2058 if_t ifp; 2059 struct virtqueue *vq; 2060 int deq, count; 2061 2062 sc = rxq->vtnrx_sc; 2063 vq = rxq->vtnrx_vq; 2064 ifp = sc->vtnet_ifp; 2065 deq = 0; 2066 count = sc->vtnet_rx_process_limit; 2067 2068 VTNET_RXQ_LOCK_ASSERT(rxq); 2069 2070 while (count-- > 0) { 2071 struct mbuf *m; 2072 uint32_t len, nbufs, adjsz; 2073 2074 m = virtqueue_dequeue(vq, &len); 2075 if (m == NULL) 2076 break; 2077 deq++; 2078 2079 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { 2080 rxq->vtnrx_stats.vrxs_ierrors++; 2081 vtnet_rxq_discard_buf(rxq, m); 2082 continue; 2083 } 2084 2085 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) { 2086 struct virtio_net_hdr_mrg_rxbuf *mhdr = 2087 mtod(m, struct virtio_net_hdr_mrg_rxbuf *); 2088 kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED); 2089 nbufs = vtnet_htog16(sc, mhdr->num_buffers); 2090 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2091 } else if (vtnet_modern(sc)) { 2092 nbufs = 1; /* num_buffers is always 1 */ 2093 adjsz = sizeof(struct virtio_net_hdr_v1); 2094 } else { 2095 nbufs = 1; 2096 adjsz = sizeof(struct vtnet_rx_header); 2097 /* 2098 * Account for our gap between the header and start of 2099 * data to keep the segments separated. 2100 */ 2101 len += VTNET_RX_HEADER_PAD; 2102 } 2103 2104 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { 2105 rxq->vtnrx_stats.vrxs_iqdrops++; 2106 vtnet_rxq_discard_buf(rxq, m); 2107 if (nbufs > 1) 2108 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 2109 continue; 2110 } 2111 2112 m->m_pkthdr.len = len; 2113 m->m_pkthdr.rcvif = ifp; 2114 m->m_pkthdr.csum_flags = 0; 2115 2116 if (nbufs > 1) { 2117 /* Dequeue the rest of chain. */ 2118 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) 2119 continue; 2120 } 2121 2122 kmsan_mark_mbuf(m, KMSAN_STATE_INITED); 2123 2124 /* 2125 * Save an endian swapped version of the header prior to it 2126 * being stripped. The header is always at the start of the 2127 * mbuf data. num_buffers was already saved (and not needed) 2128 * so use the standard header. 2129 */ 2130 hdr = mtod(m, struct virtio_net_hdr *); 2131 lhdr.flags = hdr->flags; 2132 lhdr.gso_type = hdr->gso_type; 2133 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len); 2134 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size); 2135 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start); 2136 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset); 2137 m_adj(m, adjsz); 2138 2139 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) { 2140 pfil_return_t pfil; 2141 2142 pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL); 2143 switch (pfil) { 2144 case PFIL_DROPPED: 2145 case PFIL_CONSUMED: 2146 continue; 2147 default: 2148 KASSERT(pfil == PFIL_PASS, 2149 ("Filter returned %d!", pfil)); 2150 } 2151 } 2152 2153 vtnet_rxq_input(rxq, m, &lhdr); 2154 } 2155 2156 if (deq > 0) { 2157 #if defined(INET) || defined(INET6) 2158 if (vtnet_software_lro(sc)) 2159 tcp_lro_flush_all(&rxq->vtnrx_lro); 2160 #endif 2161 virtqueue_notify(vq); 2162 } 2163 2164 return (count > 0 ? 0 : EAGAIN); 2165 } 2166 2167 static void 2168 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries) 2169 { 2170 struct vtnet_softc *sc; 2171 if_t ifp; 2172 u_int more; 2173 #ifdef DEV_NETMAP 2174 int nmirq; 2175 #endif /* DEV_NETMAP */ 2176 2177 sc = rxq->vtnrx_sc; 2178 ifp = sc->vtnet_ifp; 2179 2180 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { 2181 /* 2182 * Ignore this interrupt. Either this is a spurious interrupt 2183 * or multiqueue without per-VQ MSIX so every queue needs to 2184 * be polled (a brain dead configuration we could try harder 2185 * to avoid). 2186 */ 2187 vtnet_rxq_disable_intr(rxq); 2188 return; 2189 } 2190 2191 VTNET_RXQ_LOCK(rxq); 2192 2193 #ifdef DEV_NETMAP 2194 /* 2195 * We call netmap_rx_irq() under lock to prevent concurrent calls. 2196 * This is not necessary to serialize the access to the RX vq, but 2197 * rather to avoid races that may happen if this interface is 2198 * attached to a VALE switch, which would cause received packets 2199 * to stall in the RX queue (nm_kr_tryget() could find the kring 2200 * busy when called from netmap_bwrap_intr_notify()). 2201 */ 2202 nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more); 2203 if (nmirq != NM_IRQ_PASS) { 2204 VTNET_RXQ_UNLOCK(rxq); 2205 if (nmirq == NM_IRQ_RESCHED) { 2206 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2207 } 2208 return; 2209 } 2210 #endif /* DEV_NETMAP */ 2211 2212 again: 2213 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 2214 VTNET_RXQ_UNLOCK(rxq); 2215 return; 2216 } 2217 2218 more = vtnet_rxq_eof(rxq); 2219 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 2220 if (!more) 2221 vtnet_rxq_disable_intr(rxq); 2222 /* 2223 * This is an occasional condition or race (when !more), 2224 * so retry a few times before scheduling the taskqueue. 2225 */ 2226 if (tries-- > 0) 2227 goto again; 2228 2229 rxq->vtnrx_stats.vrxs_rescheduled++; 2230 VTNET_RXQ_UNLOCK(rxq); 2231 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2232 } else 2233 VTNET_RXQ_UNLOCK(rxq); 2234 } 2235 2236 static void 2237 vtnet_rx_vq_intr(void *xrxq) 2238 { 2239 struct vtnet_rxq *rxq; 2240 2241 rxq = xrxq; 2242 vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES); 2243 } 2244 2245 static void 2246 vtnet_rxq_tq_intr(void *xrxq, int pending __unused) 2247 { 2248 struct vtnet_rxq *rxq; 2249 2250 rxq = xrxq; 2251 vtnet_rx_vq_process(rxq, 0); 2252 } 2253 2254 static int 2255 vtnet_txq_intr_threshold(struct vtnet_txq *txq) 2256 { 2257 struct vtnet_softc *sc; 2258 int threshold; 2259 2260 sc = txq->vtntx_sc; 2261 2262 /* 2263 * The Tx interrupt is disabled until the queue free count falls 2264 * below our threshold. Completed frames are drained from the Tx 2265 * virtqueue before transmitting new frames and in the watchdog 2266 * callout, so the frequency of Tx interrupts is greatly reduced, 2267 * at the cost of not freeing mbufs as quickly as they otherwise 2268 * would be. 2269 */ 2270 threshold = virtqueue_size(txq->vtntx_vq) / 4; 2271 2272 /* 2273 * Without indirect descriptors, leave enough room for the most 2274 * segments we handle. 2275 */ 2276 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && 2277 threshold < sc->vtnet_tx_nsegs) 2278 threshold = sc->vtnet_tx_nsegs; 2279 2280 return (threshold); 2281 } 2282 2283 static int 2284 vtnet_txq_below_threshold(struct vtnet_txq *txq) 2285 { 2286 struct virtqueue *vq; 2287 2288 vq = txq->vtntx_vq; 2289 2290 return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold); 2291 } 2292 2293 static int 2294 vtnet_txq_notify(struct vtnet_txq *txq) 2295 { 2296 struct virtqueue *vq; 2297 2298 vq = txq->vtntx_vq; 2299 2300 txq->vtntx_watchdog = VTNET_TX_TIMEOUT; 2301 virtqueue_notify(vq); 2302 2303 if (vtnet_txq_enable_intr(txq) == 0) 2304 return (0); 2305 2306 /* 2307 * Drain frames that were completed since last checked. If this 2308 * causes the queue to go above the threshold, the caller should 2309 * continue transmitting. 2310 */ 2311 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { 2312 virtqueue_disable_intr(vq); 2313 return (1); 2314 } 2315 2316 return (0); 2317 } 2318 2319 static void 2320 vtnet_txq_free_mbufs(struct vtnet_txq *txq) 2321 { 2322 struct virtqueue *vq; 2323 struct vtnet_tx_header *txhdr; 2324 int last; 2325 #ifdef DEV_NETMAP 2326 struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp), 2327 txq->vtntx_id, NR_TX); 2328 #else /* !DEV_NETMAP */ 2329 void *kring = NULL; 2330 #endif /* !DEV_NETMAP */ 2331 2332 vq = txq->vtntx_vq; 2333 last = 0; 2334 2335 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { 2336 if (kring == NULL) { 2337 m_freem(txhdr->vth_mbuf); 2338 uma_zfree(vtnet_tx_header_zone, txhdr); 2339 } 2340 } 2341 2342 KASSERT(virtqueue_empty(vq), 2343 ("%s: mbufs remaining in tx queue %p", __func__, txq)); 2344 } 2345 2346 /* 2347 * BMV: This can go away once we finally have offsets in the mbuf header. 2348 */ 2349 static int 2350 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, 2351 int *proto, int *start) 2352 { 2353 struct vtnet_softc *sc; 2354 struct ether_vlan_header *evh; 2355 #if defined(INET) || defined(INET6) 2356 int offset; 2357 #endif 2358 2359 sc = txq->vtntx_sc; 2360 2361 evh = mtod(m, struct ether_vlan_header *); 2362 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 2363 /* BMV: We should handle nested VLAN tags too. */ 2364 *etype = ntohs(evh->evl_proto); 2365 #if defined(INET) || defined(INET6) 2366 offset = sizeof(struct ether_vlan_header); 2367 #endif 2368 } else { 2369 *etype = ntohs(evh->evl_encap_proto); 2370 #if defined(INET) || defined(INET6) 2371 offset = sizeof(struct ether_header); 2372 #endif 2373 } 2374 2375 switch (*etype) { 2376 #if defined(INET) 2377 case ETHERTYPE_IP: { 2378 struct ip *ip, iphdr; 2379 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 2380 m_copydata(m, offset, sizeof(struct ip), 2381 (caddr_t) &iphdr); 2382 ip = &iphdr; 2383 } else 2384 ip = (struct ip *)(m->m_data + offset); 2385 *proto = ip->ip_p; 2386 *start = offset + (ip->ip_hl << 2); 2387 break; 2388 } 2389 #endif 2390 #if defined(INET6) 2391 case ETHERTYPE_IPV6: 2392 *proto = -1; 2393 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 2394 /* Assert the network stack sent us a valid packet. */ 2395 KASSERT(*start > offset, 2396 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 2397 *start, offset, *proto)); 2398 break; 2399 #endif 2400 default: 2401 sc->vtnet_stats.tx_csum_unknown_ethtype++; 2402 return (EINVAL); 2403 } 2404 2405 return (0); 2406 } 2407 2408 static int 2409 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, 2410 int offset, struct virtio_net_hdr *hdr) 2411 { 2412 static struct timeval lastecn; 2413 static int curecn; 2414 struct vtnet_softc *sc; 2415 struct tcphdr *tcp, tcphdr; 2416 2417 sc = txq->vtntx_sc; 2418 2419 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 2420 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 2421 tcp = &tcphdr; 2422 } else 2423 tcp = (struct tcphdr *)(m->m_data + offset); 2424 2425 hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2)); 2426 hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz); 2427 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 2428 VIRTIO_NET_HDR_GSO_TCPV6; 2429 2430 if (__predict_false(tcp->th_flags & TH_CWR)) { 2431 /* 2432 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In 2433 * FreeBSD, ECN support is not on a per-interface basis, 2434 * but globally via the net.inet.tcp.ecn.enable sysctl 2435 * knob. The default is off. 2436 */ 2437 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { 2438 if (ppsratecheck(&lastecn, &curecn, 1)) 2439 if_printf(sc->vtnet_ifp, 2440 "TSO with ECN not negotiated with host\n"); 2441 return (ENOTSUP); 2442 } 2443 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2444 } 2445 2446 txq->vtntx_stats.vtxs_tso++; 2447 2448 return (0); 2449 } 2450 2451 static struct mbuf * 2452 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, 2453 struct virtio_net_hdr *hdr) 2454 { 2455 struct vtnet_softc *sc; 2456 int flags, etype, csum_start, proto, error; 2457 2458 sc = txq->vtntx_sc; 2459 flags = m->m_pkthdr.csum_flags; 2460 2461 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); 2462 if (error) 2463 goto drop; 2464 2465 if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) { 2466 /* Sanity check the parsed mbuf matches the offload flags. */ 2467 if (__predict_false((flags & VTNET_CSUM_OFFLOAD && 2468 etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6 2469 && etype != ETHERTYPE_IPV6))) { 2470 sc->vtnet_stats.tx_csum_proto_mismatch++; 2471 goto drop; 2472 } 2473 2474 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 2475 hdr->csum_start = vtnet_gtoh16(sc, csum_start); 2476 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data); 2477 txq->vtntx_stats.vtxs_csum++; 2478 } 2479 2480 if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) { 2481 /* 2482 * Sanity check the parsed mbuf IP protocol is TCP, and 2483 * VirtIO TSO reqires the checksum offloading above. 2484 */ 2485 if (__predict_false(proto != IPPROTO_TCP)) { 2486 sc->vtnet_stats.tx_tso_not_tcp++; 2487 goto drop; 2488 } else if (__predict_false((hdr->flags & 2489 VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) { 2490 sc->vtnet_stats.tx_tso_without_csum++; 2491 goto drop; 2492 } 2493 2494 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); 2495 if (error) 2496 goto drop; 2497 } 2498 2499 return (m); 2500 2501 drop: 2502 m_freem(m); 2503 return (NULL); 2504 } 2505 2506 static int 2507 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, 2508 struct vtnet_tx_header *txhdr) 2509 { 2510 struct vtnet_softc *sc; 2511 struct virtqueue *vq; 2512 struct sglist *sg; 2513 struct mbuf *m; 2514 int error; 2515 2516 sc = txq->vtntx_sc; 2517 vq = txq->vtntx_vq; 2518 sg = txq->vtntx_sg; 2519 m = *m_head; 2520 2521 sglist_reset(sg); 2522 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); 2523 if (error != 0 || sg->sg_nseg != 1) { 2524 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", 2525 __func__, error, sg->sg_nseg)); 2526 goto fail; 2527 } 2528 2529 error = sglist_append_mbuf(sg, m); 2530 if (error) { 2531 m = m_defrag(m, M_NOWAIT); 2532 if (m == NULL) 2533 goto fail; 2534 2535 *m_head = m; 2536 sc->vtnet_stats.tx_defragged++; 2537 2538 error = sglist_append_mbuf(sg, m); 2539 if (error) 2540 goto fail; 2541 } 2542 2543 txhdr->vth_mbuf = m; 2544 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); 2545 2546 return (error); 2547 2548 fail: 2549 sc->vtnet_stats.tx_defrag_failed++; 2550 m_freem(*m_head); 2551 *m_head = NULL; 2552 2553 return (ENOBUFS); 2554 } 2555 2556 static int 2557 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) 2558 { 2559 struct vtnet_tx_header *txhdr; 2560 struct virtio_net_hdr *hdr; 2561 struct mbuf *m; 2562 int error; 2563 2564 m = *m_head; 2565 M_ASSERTPKTHDR(m); 2566 2567 txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); 2568 if (txhdr == NULL) { 2569 m_freem(m); 2570 *m_head = NULL; 2571 return (ENOMEM); 2572 } 2573 2574 /* 2575 * Always use the non-mergeable header, regardless if mergable headers 2576 * were negotiated, because for transmit num_buffers is always zero. 2577 * The vtnet_hdr_size is used to enqueue the right header size segment. 2578 */ 2579 hdr = &txhdr->vth_uhdr.hdr; 2580 2581 if (m->m_flags & M_VLANTAG) { 2582 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); 2583 if ((*m_head = m) == NULL) { 2584 error = ENOBUFS; 2585 goto fail; 2586 } 2587 m->m_flags &= ~M_VLANTAG; 2588 } 2589 2590 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { 2591 m = vtnet_txq_offload(txq, m, hdr); 2592 if ((*m_head = m) == NULL) { 2593 error = ENOBUFS; 2594 goto fail; 2595 } 2596 } 2597 2598 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); 2599 fail: 2600 if (error) 2601 uma_zfree(vtnet_tx_header_zone, txhdr); 2602 2603 return (error); 2604 } 2605 2606 #ifdef VTNET_LEGACY_TX 2607 2608 static void 2609 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp) 2610 { 2611 struct vtnet_softc *sc; 2612 struct virtqueue *vq; 2613 struct mbuf *m0; 2614 int tries, enq; 2615 2616 sc = txq->vtntx_sc; 2617 vq = txq->vtntx_vq; 2618 tries = 0; 2619 2620 VTNET_TXQ_LOCK_ASSERT(txq); 2621 2622 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || 2623 sc->vtnet_link_active == 0) 2624 return; 2625 2626 vtnet_txq_eof(txq); 2627 2628 again: 2629 enq = 0; 2630 2631 while (!if_sendq_empty(ifp)) { 2632 if (virtqueue_full(vq)) 2633 break; 2634 2635 m0 = if_dequeue(ifp); 2636 if (m0 == NULL) 2637 break; 2638 2639 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { 2640 if (m0 != NULL) 2641 if_sendq_prepend(ifp, m0); 2642 break; 2643 } 2644 2645 enq++; 2646 ETHER_BPF_MTAP(ifp, m0); 2647 } 2648 2649 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2650 if (tries++ < VTNET_NOTIFY_RETRIES) 2651 goto again; 2652 2653 txq->vtntx_stats.vtxs_rescheduled++; 2654 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2655 } 2656 } 2657 2658 static void 2659 vtnet_start(if_t ifp) 2660 { 2661 struct vtnet_softc *sc; 2662 struct vtnet_txq *txq; 2663 2664 sc = if_getsoftc(ifp); 2665 txq = &sc->vtnet_txqs[0]; 2666 2667 VTNET_TXQ_LOCK(txq); 2668 vtnet_start_locked(txq, ifp); 2669 VTNET_TXQ_UNLOCK(txq); 2670 } 2671 2672 #else /* !VTNET_LEGACY_TX */ 2673 2674 static int 2675 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) 2676 { 2677 struct vtnet_softc *sc; 2678 struct virtqueue *vq; 2679 struct buf_ring *br; 2680 if_t ifp; 2681 int enq, tries, error; 2682 2683 sc = txq->vtntx_sc; 2684 vq = txq->vtntx_vq; 2685 br = txq->vtntx_br; 2686 ifp = sc->vtnet_ifp; 2687 tries = 0; 2688 error = 0; 2689 2690 VTNET_TXQ_LOCK_ASSERT(txq); 2691 2692 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || 2693 sc->vtnet_link_active == 0) { 2694 if (m != NULL) 2695 error = drbr_enqueue(ifp, br, m); 2696 return (error); 2697 } 2698 2699 if (m != NULL) { 2700 error = drbr_enqueue(ifp, br, m); 2701 if (error) 2702 return (error); 2703 } 2704 2705 vtnet_txq_eof(txq); 2706 2707 again: 2708 enq = 0; 2709 2710 while ((m = drbr_peek(ifp, br)) != NULL) { 2711 if (virtqueue_full(vq)) { 2712 drbr_putback(ifp, br, m); 2713 break; 2714 } 2715 2716 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { 2717 if (m != NULL) 2718 drbr_putback(ifp, br, m); 2719 else 2720 drbr_advance(ifp, br); 2721 break; 2722 } 2723 drbr_advance(ifp, br); 2724 2725 enq++; 2726 ETHER_BPF_MTAP(ifp, m); 2727 } 2728 2729 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2730 if (tries++ < VTNET_NOTIFY_RETRIES) 2731 goto again; 2732 2733 txq->vtntx_stats.vtxs_rescheduled++; 2734 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2735 } 2736 2737 return (0); 2738 } 2739 2740 static int 2741 vtnet_txq_mq_start(if_t ifp, struct mbuf *m) 2742 { 2743 struct vtnet_softc *sc; 2744 struct vtnet_txq *txq; 2745 int i, npairs, error; 2746 2747 sc = if_getsoftc(ifp); 2748 npairs = sc->vtnet_act_vq_pairs; 2749 2750 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2751 i = m->m_pkthdr.flowid % npairs; 2752 else 2753 i = curcpu % npairs; 2754 2755 txq = &sc->vtnet_txqs[i]; 2756 2757 if (VTNET_TXQ_TRYLOCK(txq) != 0) { 2758 error = vtnet_txq_mq_start_locked(txq, m); 2759 VTNET_TXQ_UNLOCK(txq); 2760 } else { 2761 error = drbr_enqueue(ifp, txq->vtntx_br, m); 2762 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); 2763 } 2764 2765 return (error); 2766 } 2767 2768 static void 2769 vtnet_txq_tq_deferred(void *xtxq, int pending __unused) 2770 { 2771 struct vtnet_softc *sc; 2772 struct vtnet_txq *txq; 2773 2774 txq = xtxq; 2775 sc = txq->vtntx_sc; 2776 2777 VTNET_TXQ_LOCK(txq); 2778 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) 2779 vtnet_txq_mq_start_locked(txq, NULL); 2780 VTNET_TXQ_UNLOCK(txq); 2781 } 2782 2783 #endif /* VTNET_LEGACY_TX */ 2784 2785 static void 2786 vtnet_txq_start(struct vtnet_txq *txq) 2787 { 2788 struct vtnet_softc *sc; 2789 if_t ifp; 2790 2791 sc = txq->vtntx_sc; 2792 ifp = sc->vtnet_ifp; 2793 2794 #ifdef VTNET_LEGACY_TX 2795 if (!if_sendq_empty(ifp)) 2796 vtnet_start_locked(txq, ifp); 2797 #else 2798 if (!drbr_empty(ifp, txq->vtntx_br)) 2799 vtnet_txq_mq_start_locked(txq, NULL); 2800 #endif 2801 } 2802 2803 static void 2804 vtnet_txq_tq_intr(void *xtxq, int pending __unused) 2805 { 2806 struct vtnet_softc *sc; 2807 struct vtnet_txq *txq; 2808 if_t ifp; 2809 2810 txq = xtxq; 2811 sc = txq->vtntx_sc; 2812 ifp = sc->vtnet_ifp; 2813 2814 VTNET_TXQ_LOCK(txq); 2815 2816 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 2817 VTNET_TXQ_UNLOCK(txq); 2818 return; 2819 } 2820 2821 vtnet_txq_eof(txq); 2822 vtnet_txq_start(txq); 2823 2824 VTNET_TXQ_UNLOCK(txq); 2825 } 2826 2827 static int 2828 vtnet_txq_eof(struct vtnet_txq *txq) 2829 { 2830 struct virtqueue *vq; 2831 struct vtnet_tx_header *txhdr; 2832 struct mbuf *m; 2833 int deq; 2834 2835 vq = txq->vtntx_vq; 2836 deq = 0; 2837 VTNET_TXQ_LOCK_ASSERT(txq); 2838 2839 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { 2840 m = txhdr->vth_mbuf; 2841 deq++; 2842 2843 txq->vtntx_stats.vtxs_opackets++; 2844 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; 2845 if (m->m_flags & M_MCAST) 2846 txq->vtntx_stats.vtxs_omcasts++; 2847 2848 m_freem(m); 2849 uma_zfree(vtnet_tx_header_zone, txhdr); 2850 } 2851 2852 if (virtqueue_empty(vq)) 2853 txq->vtntx_watchdog = 0; 2854 2855 return (deq); 2856 } 2857 2858 static void 2859 vtnet_tx_vq_intr(void *xtxq) 2860 { 2861 struct vtnet_softc *sc; 2862 struct vtnet_txq *txq; 2863 if_t ifp; 2864 2865 txq = xtxq; 2866 sc = txq->vtntx_sc; 2867 ifp = sc->vtnet_ifp; 2868 2869 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { 2870 /* 2871 * Ignore this interrupt. Either this is a spurious interrupt 2872 * or multiqueue without per-VQ MSIX so every queue needs to 2873 * be polled (a brain dead configuration we could try harder 2874 * to avoid). 2875 */ 2876 vtnet_txq_disable_intr(txq); 2877 return; 2878 } 2879 2880 #ifdef DEV_NETMAP 2881 if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS) 2882 return; 2883 #endif /* DEV_NETMAP */ 2884 2885 VTNET_TXQ_LOCK(txq); 2886 2887 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 2888 VTNET_TXQ_UNLOCK(txq); 2889 return; 2890 } 2891 2892 vtnet_txq_eof(txq); 2893 vtnet_txq_start(txq); 2894 2895 VTNET_TXQ_UNLOCK(txq); 2896 } 2897 2898 static void 2899 vtnet_tx_start_all(struct vtnet_softc *sc) 2900 { 2901 struct vtnet_txq *txq; 2902 int i; 2903 2904 VTNET_CORE_LOCK_ASSERT(sc); 2905 2906 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2907 txq = &sc->vtnet_txqs[i]; 2908 2909 VTNET_TXQ_LOCK(txq); 2910 vtnet_txq_start(txq); 2911 VTNET_TXQ_UNLOCK(txq); 2912 } 2913 } 2914 2915 #ifndef VTNET_LEGACY_TX 2916 static void 2917 vtnet_qflush(if_t ifp) 2918 { 2919 struct vtnet_softc *sc; 2920 struct vtnet_txq *txq; 2921 struct mbuf *m; 2922 int i; 2923 2924 sc = if_getsoftc(ifp); 2925 2926 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2927 txq = &sc->vtnet_txqs[i]; 2928 2929 VTNET_TXQ_LOCK(txq); 2930 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) 2931 m_freem(m); 2932 VTNET_TXQ_UNLOCK(txq); 2933 } 2934 2935 if_qflush(ifp); 2936 } 2937 #endif 2938 2939 static int 2940 vtnet_watchdog(struct vtnet_txq *txq) 2941 { 2942 if_t ifp; 2943 2944 ifp = txq->vtntx_sc->vtnet_ifp; 2945 2946 VTNET_TXQ_LOCK(txq); 2947 if (txq->vtntx_watchdog == 1) { 2948 /* 2949 * Only drain completed frames if the watchdog is about to 2950 * expire. If any frames were drained, there may be enough 2951 * free descriptors now available to transmit queued frames. 2952 * In that case, the timer will immediately be decremented 2953 * below, but the timeout is generous enough that should not 2954 * be a problem. 2955 */ 2956 if (vtnet_txq_eof(txq) != 0) 2957 vtnet_txq_start(txq); 2958 } 2959 2960 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { 2961 VTNET_TXQ_UNLOCK(txq); 2962 return (0); 2963 } 2964 VTNET_TXQ_UNLOCK(txq); 2965 2966 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); 2967 return (1); 2968 } 2969 2970 static void 2971 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, 2972 struct vtnet_txq_stats *txacc) 2973 { 2974 2975 bzero(rxacc, sizeof(struct vtnet_rxq_stats)); 2976 bzero(txacc, sizeof(struct vtnet_txq_stats)); 2977 2978 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2979 struct vtnet_rxq_stats *rxst; 2980 struct vtnet_txq_stats *txst; 2981 2982 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 2983 rxacc->vrxs_ipackets += rxst->vrxs_ipackets; 2984 rxacc->vrxs_ibytes += rxst->vrxs_ibytes; 2985 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; 2986 rxacc->vrxs_csum += rxst->vrxs_csum; 2987 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; 2988 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; 2989 2990 txst = &sc->vtnet_txqs[i].vtntx_stats; 2991 txacc->vtxs_opackets += txst->vtxs_opackets; 2992 txacc->vtxs_obytes += txst->vtxs_obytes; 2993 txacc->vtxs_csum += txst->vtxs_csum; 2994 txacc->vtxs_tso += txst->vtxs_tso; 2995 txacc->vtxs_rescheduled += txst->vtxs_rescheduled; 2996 } 2997 } 2998 2999 static uint64_t 3000 vtnet_get_counter(if_t ifp, ift_counter cnt) 3001 { 3002 struct vtnet_softc *sc; 3003 struct vtnet_rxq_stats rxaccum; 3004 struct vtnet_txq_stats txaccum; 3005 3006 sc = if_getsoftc(ifp); 3007 vtnet_accum_stats(sc, &rxaccum, &txaccum); 3008 3009 switch (cnt) { 3010 case IFCOUNTER_IPACKETS: 3011 return (rxaccum.vrxs_ipackets); 3012 case IFCOUNTER_IQDROPS: 3013 return (rxaccum.vrxs_iqdrops); 3014 case IFCOUNTER_IERRORS: 3015 return (rxaccum.vrxs_ierrors); 3016 case IFCOUNTER_OPACKETS: 3017 return (txaccum.vtxs_opackets); 3018 #ifndef VTNET_LEGACY_TX 3019 case IFCOUNTER_OBYTES: 3020 return (txaccum.vtxs_obytes); 3021 case IFCOUNTER_OMCASTS: 3022 return (txaccum.vtxs_omcasts); 3023 #endif 3024 default: 3025 return (if_get_counter_default(ifp, cnt)); 3026 } 3027 } 3028 3029 static void 3030 vtnet_tick(void *xsc) 3031 { 3032 struct vtnet_softc *sc; 3033 if_t ifp; 3034 int i, timedout; 3035 3036 sc = xsc; 3037 ifp = sc->vtnet_ifp; 3038 timedout = 0; 3039 3040 VTNET_CORE_LOCK_ASSERT(sc); 3041 3042 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3043 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); 3044 3045 if (timedout != 0) { 3046 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 3047 vtnet_init_locked(sc, 0); 3048 } else 3049 callout_schedule(&sc->vtnet_tick_ch, hz); 3050 } 3051 3052 static void 3053 vtnet_start_taskqueues(struct vtnet_softc *sc) 3054 { 3055 device_t dev; 3056 struct vtnet_rxq *rxq; 3057 struct vtnet_txq *txq; 3058 int i, error; 3059 3060 dev = sc->vtnet_dev; 3061 3062 /* 3063 * Errors here are very difficult to recover from - we cannot 3064 * easily fail because, if this is during boot, we will hang 3065 * when freeing any successfully started taskqueues because 3066 * the scheduler isn't up yet. 3067 * 3068 * Most drivers just ignore the return value - it only fails 3069 * with ENOMEM so an error is not likely. 3070 */ 3071 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { 3072 rxq = &sc->vtnet_rxqs[i]; 3073 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, 3074 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); 3075 if (error) { 3076 device_printf(dev, "failed to start rx taskq %d\n", 3077 rxq->vtnrx_id); 3078 } 3079 3080 txq = &sc->vtnet_txqs[i]; 3081 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, 3082 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); 3083 if (error) { 3084 device_printf(dev, "failed to start tx taskq %d\n", 3085 txq->vtntx_id); 3086 } 3087 } 3088 } 3089 3090 static void 3091 vtnet_free_taskqueues(struct vtnet_softc *sc) 3092 { 3093 struct vtnet_rxq *rxq; 3094 struct vtnet_txq *txq; 3095 int i; 3096 3097 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3098 rxq = &sc->vtnet_rxqs[i]; 3099 if (rxq->vtnrx_tq != NULL) { 3100 taskqueue_free(rxq->vtnrx_tq); 3101 rxq->vtnrx_tq = NULL; 3102 } 3103 3104 txq = &sc->vtnet_txqs[i]; 3105 if (txq->vtntx_tq != NULL) { 3106 taskqueue_free(txq->vtntx_tq); 3107 txq->vtntx_tq = NULL; 3108 } 3109 } 3110 } 3111 3112 static void 3113 vtnet_drain_taskqueues(struct vtnet_softc *sc) 3114 { 3115 struct vtnet_rxq *rxq; 3116 struct vtnet_txq *txq; 3117 int i; 3118 3119 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3120 rxq = &sc->vtnet_rxqs[i]; 3121 if (rxq->vtnrx_tq != NULL) 3122 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 3123 3124 txq = &sc->vtnet_txqs[i]; 3125 if (txq->vtntx_tq != NULL) { 3126 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); 3127 #ifndef VTNET_LEGACY_TX 3128 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); 3129 #endif 3130 } 3131 } 3132 } 3133 3134 static void 3135 vtnet_drain_rxtx_queues(struct vtnet_softc *sc) 3136 { 3137 struct vtnet_rxq *rxq; 3138 struct vtnet_txq *txq; 3139 int i; 3140 3141 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3142 rxq = &sc->vtnet_rxqs[i]; 3143 vtnet_rxq_free_mbufs(rxq); 3144 3145 txq = &sc->vtnet_txqs[i]; 3146 vtnet_txq_free_mbufs(txq); 3147 } 3148 } 3149 3150 static void 3151 vtnet_stop_rendezvous(struct vtnet_softc *sc) 3152 { 3153 struct vtnet_rxq *rxq; 3154 struct vtnet_txq *txq; 3155 int i; 3156 3157 VTNET_CORE_LOCK_ASSERT(sc); 3158 3159 /* 3160 * Lock and unlock the per-queue mutex so we known the stop 3161 * state is visible. Doing only the active queues should be 3162 * sufficient, but it does not cost much extra to do all the 3163 * queues. 3164 */ 3165 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3166 rxq = &sc->vtnet_rxqs[i]; 3167 VTNET_RXQ_LOCK(rxq); 3168 VTNET_RXQ_UNLOCK(rxq); 3169 3170 txq = &sc->vtnet_txqs[i]; 3171 VTNET_TXQ_LOCK(txq); 3172 VTNET_TXQ_UNLOCK(txq); 3173 } 3174 } 3175 3176 static void 3177 vtnet_stop(struct vtnet_softc *sc) 3178 { 3179 device_t dev; 3180 if_t ifp; 3181 3182 dev = sc->vtnet_dev; 3183 ifp = sc->vtnet_ifp; 3184 3185 VTNET_CORE_LOCK_ASSERT(sc); 3186 3187 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 3188 sc->vtnet_link_active = 0; 3189 callout_stop(&sc->vtnet_tick_ch); 3190 3191 /* Only advisory. */ 3192 vtnet_disable_interrupts(sc); 3193 3194 #ifdef DEV_NETMAP 3195 /* Stop any pending txsync/rxsync and disable them. */ 3196 netmap_disable_all_rings(ifp); 3197 #endif /* DEV_NETMAP */ 3198 3199 /* 3200 * Stop the host adapter. This resets it to the pre-initialized 3201 * state. It will not generate any interrupts until after it is 3202 * reinitialized. 3203 */ 3204 virtio_stop(dev); 3205 vtnet_stop_rendezvous(sc); 3206 3207 vtnet_drain_rxtx_queues(sc); 3208 sc->vtnet_act_vq_pairs = 1; 3209 } 3210 3211 static int 3212 vtnet_virtio_reinit(struct vtnet_softc *sc) 3213 { 3214 device_t dev; 3215 if_t ifp; 3216 uint64_t features; 3217 int error; 3218 3219 dev = sc->vtnet_dev; 3220 ifp = sc->vtnet_ifp; 3221 features = sc->vtnet_negotiated_features; 3222 3223 /* 3224 * Re-negotiate with the host, removing any disabled receive 3225 * features. Transmit features are disabled only on our side 3226 * via if_capenable and if_hwassist. 3227 */ 3228 3229 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0) 3230 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES); 3231 3232 if ((if_getcapenable(ifp) & IFCAP_LRO) == 0) 3233 features &= ~VTNET_LRO_FEATURES; 3234 3235 if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0) 3236 features &= ~VIRTIO_NET_F_CTRL_VLAN; 3237 3238 error = virtio_reinit(dev, features); 3239 if (error) { 3240 device_printf(dev, "virtio reinit error %d\n", error); 3241 return (error); 3242 } 3243 3244 sc->vtnet_features = features; 3245 virtio_reinit_complete(dev); 3246 3247 return (0); 3248 } 3249 3250 static void 3251 vtnet_init_rx_filters(struct vtnet_softc *sc) 3252 { 3253 if_t ifp; 3254 3255 ifp = sc->vtnet_ifp; 3256 3257 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 3258 vtnet_rx_filter(sc); 3259 vtnet_rx_filter_mac(sc); 3260 } 3261 3262 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 3263 vtnet_rx_filter_vlan(sc); 3264 } 3265 3266 static int 3267 vtnet_init_rx_queues(struct vtnet_softc *sc) 3268 { 3269 device_t dev; 3270 if_t ifp; 3271 struct vtnet_rxq *rxq; 3272 int i, clustersz, error; 3273 3274 dev = sc->vtnet_dev; 3275 ifp = sc->vtnet_ifp; 3276 3277 clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp)); 3278 sc->vtnet_rx_clustersz = clustersz; 3279 3280 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) { 3281 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) + 3282 VTNET_MAX_RX_SIZE, clustersz); 3283 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, 3284 ("%s: too many rx mbufs %d for %d segments", __func__, 3285 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); 3286 } else 3287 sc->vtnet_rx_nmbufs = 1; 3288 3289 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 3290 rxq = &sc->vtnet_rxqs[i]; 3291 3292 /* Hold the lock to satisfy asserts. */ 3293 VTNET_RXQ_LOCK(rxq); 3294 error = vtnet_rxq_populate(rxq); 3295 VTNET_RXQ_UNLOCK(rxq); 3296 3297 if (error) { 3298 device_printf(dev, "cannot populate Rx queue %d\n", i); 3299 return (error); 3300 } 3301 } 3302 3303 return (0); 3304 } 3305 3306 static int 3307 vtnet_init_tx_queues(struct vtnet_softc *sc) 3308 { 3309 struct vtnet_txq *txq; 3310 int i; 3311 3312 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 3313 txq = &sc->vtnet_txqs[i]; 3314 txq->vtntx_watchdog = 0; 3315 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq); 3316 #ifdef DEV_NETMAP 3317 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0); 3318 #endif /* DEV_NETMAP */ 3319 } 3320 3321 return (0); 3322 } 3323 3324 static int 3325 vtnet_init_rxtx_queues(struct vtnet_softc *sc) 3326 { 3327 int error; 3328 3329 error = vtnet_init_rx_queues(sc); 3330 if (error) 3331 return (error); 3332 3333 error = vtnet_init_tx_queues(sc); 3334 if (error) 3335 return (error); 3336 3337 return (0); 3338 } 3339 3340 static void 3341 vtnet_set_active_vq_pairs(struct vtnet_softc *sc) 3342 { 3343 device_t dev; 3344 int npairs; 3345 3346 dev = sc->vtnet_dev; 3347 3348 if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) { 3349 sc->vtnet_act_vq_pairs = 1; 3350 return; 3351 } 3352 3353 npairs = sc->vtnet_req_vq_pairs; 3354 3355 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { 3356 device_printf(dev, "cannot set active queue pairs to %d, " 3357 "falling back to 1 queue pair\n", npairs); 3358 npairs = 1; 3359 } 3360 3361 sc->vtnet_act_vq_pairs = npairs; 3362 } 3363 3364 static void 3365 vtnet_update_rx_offloads(struct vtnet_softc *sc) 3366 { 3367 if_t ifp; 3368 uint64_t features; 3369 int error; 3370 3371 ifp = sc->vtnet_ifp; 3372 features = sc->vtnet_features; 3373 3374 VTNET_CORE_LOCK_ASSERT(sc); 3375 3376 if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 3377 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) 3378 features |= VIRTIO_NET_F_GUEST_CSUM; 3379 else 3380 features &= ~VIRTIO_NET_F_GUEST_CSUM; 3381 } 3382 3383 if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) { 3384 if (if_getcapenable(ifp) & IFCAP_LRO) 3385 features |= VTNET_LRO_FEATURES; 3386 else 3387 features &= ~VTNET_LRO_FEATURES; 3388 } 3389 3390 error = vtnet_ctrl_guest_offloads(sc, 3391 features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | 3392 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN | 3393 VIRTIO_NET_F_GUEST_UFO)); 3394 if (error) { 3395 device_printf(sc->vtnet_dev, 3396 "%s: cannot update Rx features\n", __func__); 3397 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3398 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 3399 vtnet_init_locked(sc, 0); 3400 } 3401 } else 3402 sc->vtnet_features = features; 3403 } 3404 3405 static int 3406 vtnet_reinit(struct vtnet_softc *sc) 3407 { 3408 if_t ifp; 3409 int error; 3410 3411 ifp = sc->vtnet_ifp; 3412 3413 bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); 3414 3415 error = vtnet_virtio_reinit(sc); 3416 if (error) 3417 return (error); 3418 3419 vtnet_set_macaddr(sc); 3420 vtnet_set_active_vq_pairs(sc); 3421 3422 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 3423 vtnet_init_rx_filters(sc); 3424 3425 if_sethwassist(ifp, 0); 3426 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 3427 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0); 3428 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) 3429 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0); 3430 if (if_getcapenable(ifp) & IFCAP_TSO4) 3431 if_sethwassistbits(ifp, CSUM_IP_TSO, 0); 3432 if (if_getcapenable(ifp) & IFCAP_TSO6) 3433 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); 3434 3435 error = vtnet_init_rxtx_queues(sc); 3436 if (error) 3437 return (error); 3438 3439 return (0); 3440 } 3441 3442 static void 3443 vtnet_init_locked(struct vtnet_softc *sc, int init_mode) 3444 { 3445 if_t ifp; 3446 3447 ifp = sc->vtnet_ifp; 3448 3449 VTNET_CORE_LOCK_ASSERT(sc); 3450 3451 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 3452 return; 3453 3454 vtnet_stop(sc); 3455 3456 #ifdef DEV_NETMAP 3457 /* Once stopped we can update the netmap flags, if necessary. */ 3458 switch (init_mode) { 3459 case VTNET_INIT_NETMAP_ENTER: 3460 nm_set_native_flags(NA(ifp)); 3461 break; 3462 case VTNET_INIT_NETMAP_EXIT: 3463 nm_clear_native_flags(NA(ifp)); 3464 break; 3465 } 3466 #endif /* DEV_NETMAP */ 3467 3468 if (vtnet_reinit(sc) != 0) { 3469 vtnet_stop(sc); 3470 return; 3471 } 3472 3473 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 3474 vtnet_update_link_status(sc); 3475 vtnet_enable_interrupts(sc); 3476 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); 3477 3478 #ifdef DEV_NETMAP 3479 /* Re-enable txsync/rxsync. */ 3480 netmap_enable_all_rings(ifp); 3481 #endif /* DEV_NETMAP */ 3482 } 3483 3484 static void 3485 vtnet_init(void *xsc) 3486 { 3487 struct vtnet_softc *sc; 3488 3489 sc = xsc; 3490 3491 VTNET_CORE_LOCK(sc); 3492 vtnet_init_locked(sc, 0); 3493 VTNET_CORE_UNLOCK(sc); 3494 } 3495 3496 static void 3497 vtnet_free_ctrl_vq(struct vtnet_softc *sc) 3498 { 3499 3500 /* 3501 * The control virtqueue is only polled and therefore it should 3502 * already be empty. 3503 */ 3504 KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq), 3505 ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq)); 3506 } 3507 3508 static void 3509 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, 3510 struct sglist *sg, int readable, int writable) 3511 { 3512 struct virtqueue *vq; 3513 3514 vq = sc->vtnet_ctrl_vq; 3515 3516 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ); 3517 VTNET_CORE_LOCK_ASSERT(sc); 3518 3519 if (!virtqueue_empty(vq)) 3520 return; 3521 3522 /* 3523 * Poll for the response, but the command is likely completed before 3524 * returning from the notify. 3525 */ 3526 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) { 3527 virtqueue_notify(vq); 3528 virtqueue_poll(vq, NULL); 3529 } 3530 } 3531 3532 static int 3533 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) 3534 { 3535 struct sglist_seg segs[3]; 3536 struct sglist sg; 3537 struct { 3538 struct virtio_net_ctrl_hdr hdr __aligned(2); 3539 uint8_t pad1; 3540 uint8_t addr[ETHER_ADDR_LEN] __aligned(8); 3541 uint8_t pad2; 3542 uint8_t ack; 3543 } s; 3544 int error; 3545 3546 error = 0; 3547 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC); 3548 3549 s.hdr.class = VIRTIO_NET_CTRL_MAC; 3550 s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 3551 bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN); 3552 s.ack = VIRTIO_NET_ERR; 3553 3554 sglist_init(&sg, nitems(segs), segs); 3555 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3556 error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN); 3557 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3558 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3559 3560 if (error == 0) 3561 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3562 3563 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3564 } 3565 3566 static int 3567 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads) 3568 { 3569 struct sglist_seg segs[3]; 3570 struct sglist sg; 3571 struct { 3572 struct virtio_net_ctrl_hdr hdr __aligned(2); 3573 uint8_t pad1; 3574 uint64_t offloads __aligned(8); 3575 uint8_t pad2; 3576 uint8_t ack; 3577 } s; 3578 int error; 3579 3580 error = 0; 3581 MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3582 3583 s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; 3584 s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; 3585 s.offloads = vtnet_gtoh64(sc, offloads); 3586 s.ack = VIRTIO_NET_ERR; 3587 3588 sglist_init(&sg, nitems(segs), segs); 3589 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3590 error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t)); 3591 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3592 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3593 3594 if (error == 0) 3595 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3596 3597 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3598 } 3599 3600 static int 3601 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) 3602 { 3603 struct sglist_seg segs[3]; 3604 struct sglist sg; 3605 struct { 3606 struct virtio_net_ctrl_hdr hdr __aligned(2); 3607 uint8_t pad1; 3608 struct virtio_net_ctrl_mq mq __aligned(2); 3609 uint8_t pad2; 3610 uint8_t ack; 3611 } s; 3612 int error; 3613 3614 error = 0; 3615 MPASS(sc->vtnet_flags & VTNET_FLAG_MQ); 3616 3617 s.hdr.class = VIRTIO_NET_CTRL_MQ; 3618 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 3619 s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs); 3620 s.ack = VIRTIO_NET_ERR; 3621 3622 sglist_init(&sg, nitems(segs), segs); 3623 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3624 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); 3625 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3626 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3627 3628 if (error == 0) 3629 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3630 3631 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3632 } 3633 3634 static int 3635 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on) 3636 { 3637 struct sglist_seg segs[3]; 3638 struct sglist sg; 3639 struct { 3640 struct virtio_net_ctrl_hdr hdr __aligned(2); 3641 uint8_t pad1; 3642 uint8_t onoff; 3643 uint8_t pad2; 3644 uint8_t ack; 3645 } s; 3646 int error; 3647 3648 error = 0; 3649 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); 3650 3651 s.hdr.class = VIRTIO_NET_CTRL_RX; 3652 s.hdr.cmd = cmd; 3653 s.onoff = on; 3654 s.ack = VIRTIO_NET_ERR; 3655 3656 sglist_init(&sg, nitems(segs), segs); 3657 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3658 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); 3659 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3660 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3661 3662 if (error == 0) 3663 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3664 3665 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3666 } 3667 3668 static int 3669 vtnet_set_promisc(struct vtnet_softc *sc, bool on) 3670 { 3671 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); 3672 } 3673 3674 static int 3675 vtnet_set_allmulti(struct vtnet_softc *sc, bool on) 3676 { 3677 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); 3678 } 3679 3680 static void 3681 vtnet_rx_filter(struct vtnet_softc *sc) 3682 { 3683 device_t dev; 3684 if_t ifp; 3685 3686 dev = sc->vtnet_dev; 3687 ifp = sc->vtnet_ifp; 3688 3689 VTNET_CORE_LOCK_ASSERT(sc); 3690 3691 if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) { 3692 device_printf(dev, "cannot %s promiscuous mode\n", 3693 if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable"); 3694 } 3695 3696 if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) { 3697 device_printf(dev, "cannot %s all-multicast mode\n", 3698 if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable"); 3699 } 3700 } 3701 3702 static u_int 3703 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt) 3704 { 3705 struct vtnet_softc *sc = arg; 3706 3707 if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) 3708 return (0); 3709 3710 if (ucnt < VTNET_MAX_MAC_ENTRIES) 3711 bcopy(LLADDR(sdl), 3712 &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt], 3713 ETHER_ADDR_LEN); 3714 3715 return (1); 3716 } 3717 3718 static u_int 3719 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt) 3720 { 3721 struct vtnet_mac_filter *filter = arg; 3722 3723 if (mcnt < VTNET_MAX_MAC_ENTRIES) 3724 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt], 3725 ETHER_ADDR_LEN); 3726 3727 return (1); 3728 } 3729 3730 static void 3731 vtnet_rx_filter_mac(struct vtnet_softc *sc) 3732 { 3733 struct virtio_net_ctrl_hdr hdr __aligned(2); 3734 struct vtnet_mac_filter *filter; 3735 struct sglist_seg segs[4]; 3736 struct sglist sg; 3737 if_t ifp; 3738 bool promisc, allmulti; 3739 u_int ucnt, mcnt; 3740 int error; 3741 uint8_t ack; 3742 3743 ifp = sc->vtnet_ifp; 3744 filter = sc->vtnet_mac_filter; 3745 error = 0; 3746 3747 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); 3748 VTNET_CORE_LOCK_ASSERT(sc); 3749 3750 /* Unicast MAC addresses: */ 3751 ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc); 3752 promisc = (ucnt > VTNET_MAX_MAC_ENTRIES); 3753 3754 if (promisc) { 3755 ucnt = 0; 3756 if_printf(ifp, "more than %d MAC addresses assigned, " 3757 "falling back to promiscuous mode\n", 3758 VTNET_MAX_MAC_ENTRIES); 3759 } 3760 3761 /* Multicast MAC addresses: */ 3762 mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter); 3763 allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES); 3764 3765 if (allmulti) { 3766 mcnt = 0; 3767 if_printf(ifp, "more than %d multicast MAC addresses " 3768 "assigned, falling back to all-multicast mode\n", 3769 VTNET_MAX_MAC_ENTRIES); 3770 } 3771 3772 if (promisc && allmulti) 3773 goto out; 3774 3775 filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt); 3776 filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt); 3777 3778 hdr.class = VIRTIO_NET_CTRL_MAC; 3779 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 3780 ack = VIRTIO_NET_ERR; 3781 3782 sglist_init(&sg, nitems(segs), segs); 3783 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3784 error |= sglist_append(&sg, &filter->vmf_unicast, 3785 sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN); 3786 error |= sglist_append(&sg, &filter->vmf_multicast, 3787 sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN); 3788 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3789 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3790 3791 if (error == 0) 3792 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3793 if (ack != VIRTIO_NET_OK) 3794 if_printf(ifp, "error setting host MAC filter table\n"); 3795 3796 out: 3797 if (promisc != 0 && vtnet_set_promisc(sc, true) != 0) 3798 if_printf(ifp, "cannot enable promiscuous mode\n"); 3799 if (allmulti != 0 && vtnet_set_allmulti(sc, true) != 0) 3800 if_printf(ifp, "cannot enable all-multicast mode\n"); 3801 } 3802 3803 static int 3804 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3805 { 3806 struct sglist_seg segs[3]; 3807 struct sglist sg; 3808 struct { 3809 struct virtio_net_ctrl_hdr hdr __aligned(2); 3810 uint8_t pad1; 3811 uint16_t tag __aligned(2); 3812 uint8_t pad2; 3813 uint8_t ack; 3814 } s; 3815 int error; 3816 3817 error = 0; 3818 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); 3819 3820 s.hdr.class = VIRTIO_NET_CTRL_VLAN; 3821 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 3822 s.tag = vtnet_gtoh16(sc, tag); 3823 s.ack = VIRTIO_NET_ERR; 3824 3825 sglist_init(&sg, nitems(segs), segs); 3826 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3827 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); 3828 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3829 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3830 3831 if (error == 0) 3832 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3833 3834 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3835 } 3836 3837 static void 3838 vtnet_rx_filter_vlan(struct vtnet_softc *sc) 3839 { 3840 int i, bit; 3841 uint32_t w; 3842 uint16_t tag; 3843 3844 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); 3845 VTNET_CORE_LOCK_ASSERT(sc); 3846 3847 /* Enable the filter for each configured VLAN. */ 3848 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { 3849 w = sc->vtnet_vlan_filter[i]; 3850 3851 while ((bit = ffs(w) - 1) != -1) { 3852 w &= ~(1 << bit); 3853 tag = sizeof(w) * CHAR_BIT * i + bit; 3854 3855 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { 3856 device_printf(sc->vtnet_dev, 3857 "cannot enable VLAN %d filter\n", tag); 3858 } 3859 } 3860 } 3861 } 3862 3863 static void 3864 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3865 { 3866 if_t ifp; 3867 int idx, bit; 3868 3869 ifp = sc->vtnet_ifp; 3870 idx = (tag >> 5) & 0x7F; 3871 bit = tag & 0x1F; 3872 3873 if (tag == 0 || tag > 4095) 3874 return; 3875 3876 VTNET_CORE_LOCK(sc); 3877 3878 if (add) 3879 sc->vtnet_vlan_filter[idx] |= (1 << bit); 3880 else 3881 sc->vtnet_vlan_filter[idx] &= ~(1 << bit); 3882 3883 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER && 3884 if_getdrvflags(ifp) & IFF_DRV_RUNNING && 3885 vtnet_exec_vlan_filter(sc, add, tag) != 0) { 3886 device_printf(sc->vtnet_dev, 3887 "cannot %s VLAN %d %s the host filter table\n", 3888 add ? "add" : "remove", tag, add ? "to" : "from"); 3889 } 3890 3891 VTNET_CORE_UNLOCK(sc); 3892 } 3893 3894 static void 3895 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag) 3896 { 3897 3898 if (if_getsoftc(ifp) != arg) 3899 return; 3900 3901 vtnet_update_vlan_filter(arg, 1, tag); 3902 } 3903 3904 static void 3905 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag) 3906 { 3907 3908 if (if_getsoftc(ifp) != arg) 3909 return; 3910 3911 vtnet_update_vlan_filter(arg, 0, tag); 3912 } 3913 3914 static void 3915 vtnet_update_speed_duplex(struct vtnet_softc *sc) 3916 { 3917 if_t ifp; 3918 uint32_t speed; 3919 3920 ifp = sc->vtnet_ifp; 3921 3922 if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0) 3923 return; 3924 3925 /* BMV: Ignore duplex. */ 3926 speed = virtio_read_dev_config_4(sc->vtnet_dev, 3927 offsetof(struct virtio_net_config, speed)); 3928 if (speed != UINT32_MAX) 3929 if_setbaudrate(ifp, IF_Mbps(speed)); 3930 } 3931 3932 static int 3933 vtnet_is_link_up(struct vtnet_softc *sc) 3934 { 3935 uint16_t status; 3936 3937 if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0) 3938 return (1); 3939 3940 status = virtio_read_dev_config_2(sc->vtnet_dev, 3941 offsetof(struct virtio_net_config, status)); 3942 3943 return ((status & VIRTIO_NET_S_LINK_UP) != 0); 3944 } 3945 3946 static void 3947 vtnet_update_link_status(struct vtnet_softc *sc) 3948 { 3949 if_t ifp; 3950 int link; 3951 3952 ifp = sc->vtnet_ifp; 3953 VTNET_CORE_LOCK_ASSERT(sc); 3954 link = vtnet_is_link_up(sc); 3955 3956 /* Notify if the link status has changed. */ 3957 if (link != 0 && sc->vtnet_link_active == 0) { 3958 vtnet_update_speed_duplex(sc); 3959 sc->vtnet_link_active = 1; 3960 if_link_state_change(ifp, LINK_STATE_UP); 3961 } else if (link == 0 && sc->vtnet_link_active != 0) { 3962 sc->vtnet_link_active = 0; 3963 if_link_state_change(ifp, LINK_STATE_DOWN); 3964 } 3965 } 3966 3967 static int 3968 vtnet_ifmedia_upd(if_t ifp __unused) 3969 { 3970 return (EOPNOTSUPP); 3971 } 3972 3973 static void 3974 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) 3975 { 3976 struct vtnet_softc *sc; 3977 3978 sc = if_getsoftc(ifp); 3979 3980 ifmr->ifm_status = IFM_AVALID; 3981 ifmr->ifm_active = IFM_ETHER; 3982 3983 VTNET_CORE_LOCK(sc); 3984 if (vtnet_is_link_up(sc) != 0) { 3985 ifmr->ifm_status |= IFM_ACTIVE; 3986 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 3987 } else 3988 ifmr->ifm_active |= IFM_NONE; 3989 VTNET_CORE_UNLOCK(sc); 3990 } 3991 3992 static void 3993 vtnet_get_macaddr(struct vtnet_softc *sc) 3994 { 3995 3996 if (sc->vtnet_flags & VTNET_FLAG_MAC) { 3997 virtio_read_device_config_array(sc->vtnet_dev, 3998 offsetof(struct virtio_net_config, mac), 3999 &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN); 4000 } else { 4001 /* Generate a random locally administered unicast address. */ 4002 sc->vtnet_hwaddr[0] = 0xB2; 4003 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); 4004 } 4005 } 4006 4007 static void 4008 vtnet_set_macaddr(struct vtnet_softc *sc) 4009 { 4010 device_t dev; 4011 int error; 4012 4013 dev = sc->vtnet_dev; 4014 4015 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { 4016 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr); 4017 if (error) 4018 device_printf(dev, "unable to set MAC address\n"); 4019 return; 4020 } 4021 4022 /* MAC in config is read-only in modern VirtIO. */ 4023 if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) { 4024 for (int i = 0; i < ETHER_ADDR_LEN; i++) { 4025 virtio_write_dev_config_1(dev, 4026 offsetof(struct virtio_net_config, mac) + i, 4027 sc->vtnet_hwaddr[i]); 4028 } 4029 } 4030 } 4031 4032 static void 4033 vtnet_attached_set_macaddr(struct vtnet_softc *sc) 4034 { 4035 4036 /* Assign MAC address if it was generated. */ 4037 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) 4038 vtnet_set_macaddr(sc); 4039 } 4040 4041 static void 4042 vtnet_vlan_tag_remove(struct mbuf *m) 4043 { 4044 struct ether_vlan_header *evh; 4045 4046 evh = mtod(m, struct ether_vlan_header *); 4047 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); 4048 m->m_flags |= M_VLANTAG; 4049 4050 /* Strip the 802.1Q header. */ 4051 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, 4052 ETHER_HDR_LEN - ETHER_TYPE_LEN); 4053 m_adj(m, ETHER_VLAN_ENCAP_LEN); 4054 } 4055 4056 static void 4057 vtnet_set_rx_process_limit(struct vtnet_softc *sc) 4058 { 4059 int limit; 4060 4061 limit = vtnet_tunable_int(sc, "rx_process_limit", 4062 vtnet_rx_process_limit); 4063 if (limit < 0) 4064 limit = INT_MAX; 4065 sc->vtnet_rx_process_limit = limit; 4066 } 4067 4068 static void 4069 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, 4070 struct sysctl_oid_list *child, struct vtnet_rxq *rxq) 4071 { 4072 struct sysctl_oid *node; 4073 struct sysctl_oid_list *list; 4074 struct vtnet_rxq_stats *stats; 4075 char namebuf[16]; 4076 4077 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); 4078 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 4079 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); 4080 list = SYSCTL_CHILDREN(node); 4081 4082 stats = &rxq->vtnrx_stats; 4083 4084 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, 4085 &stats->vrxs_ipackets, "Receive packets"); 4086 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, 4087 &stats->vrxs_ibytes, "Receive bytes"); 4088 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, 4089 &stats->vrxs_iqdrops, "Receive drops"); 4090 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, 4091 &stats->vrxs_ierrors, "Receive errors"); 4092 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 4093 &stats->vrxs_csum, "Receive checksum offloaded"); 4094 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, 4095 &stats->vrxs_csum_failed, "Receive checksum offload failed"); 4096 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", CTLFLAG_RD, 4097 &stats->vrxs_host_lro, "Receive host segmentation offloaded"); 4098 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 4099 &stats->vrxs_rescheduled, 4100 "Receive interrupt handler rescheduled"); 4101 } 4102 4103 static void 4104 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, 4105 struct sysctl_oid_list *child, struct vtnet_txq *txq) 4106 { 4107 struct sysctl_oid *node; 4108 struct sysctl_oid_list *list; 4109 struct vtnet_txq_stats *stats; 4110 char namebuf[16]; 4111 4112 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); 4113 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 4114 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); 4115 list = SYSCTL_CHILDREN(node); 4116 4117 stats = &txq->vtntx_stats; 4118 4119 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, 4120 &stats->vtxs_opackets, "Transmit packets"); 4121 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, 4122 &stats->vtxs_obytes, "Transmit bytes"); 4123 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, 4124 &stats->vtxs_omcasts, "Transmit multicasts"); 4125 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 4126 &stats->vtxs_csum, "Transmit checksum offloaded"); 4127 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, 4128 &stats->vtxs_tso, "Transmit TCP segmentation offloaded"); 4129 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 4130 &stats->vtxs_rescheduled, 4131 "Transmit interrupt handler rescheduled"); 4132 } 4133 4134 static void 4135 vtnet_setup_queue_sysctl(struct vtnet_softc *sc) 4136 { 4137 device_t dev; 4138 struct sysctl_ctx_list *ctx; 4139 struct sysctl_oid *tree; 4140 struct sysctl_oid_list *child; 4141 int i; 4142 4143 dev = sc->vtnet_dev; 4144 ctx = device_get_sysctl_ctx(dev); 4145 tree = device_get_sysctl_tree(dev); 4146 child = SYSCTL_CHILDREN(tree); 4147 4148 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { 4149 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); 4150 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); 4151 } 4152 } 4153 4154 static void 4155 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, 4156 struct sysctl_oid_list *child, struct vtnet_softc *sc) 4157 { 4158 struct vtnet_statistics *stats; 4159 struct vtnet_rxq_stats rxaccum; 4160 struct vtnet_txq_stats txaccum; 4161 4162 vtnet_accum_stats(sc, &rxaccum, &txaccum); 4163 4164 stats = &sc->vtnet_stats; 4165 stats->rx_csum_offloaded = rxaccum.vrxs_csum; 4166 stats->rx_csum_failed = rxaccum.vrxs_csum_failed; 4167 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; 4168 stats->tx_csum_offloaded = txaccum.vtxs_csum; 4169 stats->tx_tso_offloaded = txaccum.vtxs_tso; 4170 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; 4171 4172 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", 4173 CTLFLAG_RD, &stats->mbuf_alloc_failed, 4174 "Mbuf cluster allocation failures"); 4175 4176 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", 4177 CTLFLAG_RD, &stats->rx_frame_too_large, 4178 "Received frame larger than the mbuf chain"); 4179 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", 4180 CTLFLAG_RD, &stats->rx_enq_replacement_failed, 4181 "Enqueuing the replacement receive mbuf failed"); 4182 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", 4183 CTLFLAG_RD, &stats->rx_mergeable_failed, 4184 "Mergeable buffers receive failures"); 4185 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", 4186 CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 4187 "Received checksum offloaded buffer with unsupported " 4188 "Ethernet type"); 4189 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", 4190 CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 4191 "Received checksum offloaded buffer with incorrect IP protocol"); 4192 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", 4193 CTLFLAG_RD, &stats->rx_csum_bad_offset, 4194 "Received checksum offloaded buffer with incorrect offset"); 4195 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", 4196 CTLFLAG_RD, &stats->rx_csum_bad_proto, 4197 "Received checksum offloaded buffer with incorrect protocol"); 4198 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", 4199 CTLFLAG_RD, &stats->rx_csum_failed, 4200 "Received buffer checksum offload failed"); 4201 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", 4202 CTLFLAG_RD, &stats->rx_csum_offloaded, 4203 "Received buffer checksum offload succeeded"); 4204 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", 4205 CTLFLAG_RD, &stats->rx_task_rescheduled, 4206 "Times the receive interrupt task rescheduled itself"); 4207 4208 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype", 4209 CTLFLAG_RD, &stats->tx_csum_unknown_ethtype, 4210 "Aborted transmit of checksum offloaded buffer with unknown " 4211 "Ethernet type"); 4212 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch", 4213 CTLFLAG_RD, &stats->tx_csum_proto_mismatch, 4214 "Aborted transmit of checksum offloaded buffer because mismatched " 4215 "protocols"); 4216 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", 4217 CTLFLAG_RD, &stats->tx_tso_not_tcp, 4218 "Aborted transmit of TSO buffer with non TCP protocol"); 4219 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum", 4220 CTLFLAG_RD, &stats->tx_tso_without_csum, 4221 "Aborted transmit of TSO buffer without TCP checksum offload"); 4222 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", 4223 CTLFLAG_RD, &stats->tx_defragged, 4224 "Transmit mbufs defragged"); 4225 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", 4226 CTLFLAG_RD, &stats->tx_defrag_failed, 4227 "Aborted transmit of buffer because defrag failed"); 4228 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", 4229 CTLFLAG_RD, &stats->tx_csum_offloaded, 4230 "Offloaded checksum of transmitted buffer"); 4231 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", 4232 CTLFLAG_RD, &stats->tx_tso_offloaded, 4233 "Segmentation offload of transmitted buffer"); 4234 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", 4235 CTLFLAG_RD, &stats->tx_task_rescheduled, 4236 "Times the transmit interrupt task rescheduled itself"); 4237 } 4238 4239 static void 4240 vtnet_setup_sysctl(struct vtnet_softc *sc) 4241 { 4242 device_t dev; 4243 struct sysctl_ctx_list *ctx; 4244 struct sysctl_oid *tree; 4245 struct sysctl_oid_list *child; 4246 4247 dev = sc->vtnet_dev; 4248 ctx = device_get_sysctl_ctx(dev); 4249 tree = device_get_sysctl_tree(dev); 4250 child = SYSCTL_CHILDREN(tree); 4251 4252 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", 4253 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, 4254 "Number of maximum supported virtqueue pairs"); 4255 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs", 4256 CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0, 4257 "Number of requested virtqueue pairs"); 4258 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", 4259 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, 4260 "Number of active virtqueue pairs"); 4261 4262 vtnet_setup_stat_sysctl(ctx, child, sc); 4263 } 4264 4265 static void 4266 vtnet_load_tunables(struct vtnet_softc *sc) 4267 { 4268 4269 sc->vtnet_lro_entry_count = vtnet_tunable_int(sc, 4270 "lro_entry_count", vtnet_lro_entry_count); 4271 if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES) 4272 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES; 4273 4274 sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc, 4275 "lro_mbufq_depth", vtnet_lro_mbufq_depth); 4276 } 4277 4278 static int 4279 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) 4280 { 4281 4282 return (virtqueue_enable_intr(rxq->vtnrx_vq)); 4283 } 4284 4285 static void 4286 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) 4287 { 4288 4289 virtqueue_disable_intr(rxq->vtnrx_vq); 4290 } 4291 4292 static int 4293 vtnet_txq_enable_intr(struct vtnet_txq *txq) 4294 { 4295 struct virtqueue *vq; 4296 4297 vq = txq->vtntx_vq; 4298 4299 if (vtnet_txq_below_threshold(txq) != 0) 4300 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); 4301 4302 /* 4303 * The free count is above our threshold. Keep the Tx interrupt 4304 * disabled until the queue is fuller. 4305 */ 4306 return (0); 4307 } 4308 4309 static void 4310 vtnet_txq_disable_intr(struct vtnet_txq *txq) 4311 { 4312 4313 virtqueue_disable_intr(txq->vtntx_vq); 4314 } 4315 4316 static void 4317 vtnet_enable_rx_interrupts(struct vtnet_softc *sc) 4318 { 4319 struct vtnet_rxq *rxq; 4320 int i; 4321 4322 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 4323 rxq = &sc->vtnet_rxqs[i]; 4324 if (vtnet_rxq_enable_intr(rxq) != 0) 4325 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 4326 } 4327 } 4328 4329 static void 4330 vtnet_enable_tx_interrupts(struct vtnet_softc *sc) 4331 { 4332 int i; 4333 4334 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 4335 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); 4336 } 4337 4338 static void 4339 vtnet_enable_interrupts(struct vtnet_softc *sc) 4340 { 4341 4342 vtnet_enable_rx_interrupts(sc); 4343 vtnet_enable_tx_interrupts(sc); 4344 } 4345 4346 static void 4347 vtnet_disable_rx_interrupts(struct vtnet_softc *sc) 4348 { 4349 int i; 4350 4351 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 4352 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); 4353 } 4354 4355 static void 4356 vtnet_disable_tx_interrupts(struct vtnet_softc *sc) 4357 { 4358 int i; 4359 4360 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 4361 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); 4362 } 4363 4364 static void 4365 vtnet_disable_interrupts(struct vtnet_softc *sc) 4366 { 4367 4368 vtnet_disable_rx_interrupts(sc); 4369 vtnet_disable_tx_interrupts(sc); 4370 } 4371 4372 static int 4373 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) 4374 { 4375 char path[64]; 4376 4377 snprintf(path, sizeof(path), 4378 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); 4379 TUNABLE_INT_FETCH(path, &def); 4380 4381 return (def); 4382 } 4383 4384 #ifdef DEBUGNET 4385 static void 4386 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) 4387 { 4388 struct vtnet_softc *sc; 4389 4390 sc = if_getsoftc(ifp); 4391 4392 VTNET_CORE_LOCK(sc); 4393 *nrxr = sc->vtnet_req_vq_pairs; 4394 *ncl = DEBUGNET_MAX_IN_FLIGHT; 4395 *clsize = sc->vtnet_rx_clustersz; 4396 VTNET_CORE_UNLOCK(sc); 4397 } 4398 4399 static void 4400 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event) 4401 { 4402 struct vtnet_softc *sc; 4403 static bool sw_lro_enabled = false; 4404 4405 /* 4406 * Disable software LRO, since it would require entering the network 4407 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll(). 4408 */ 4409 sc = if_getsoftc(ifp); 4410 switch (event) { 4411 case DEBUGNET_START: 4412 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0; 4413 if (sw_lro_enabled) 4414 sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO; 4415 break; 4416 case DEBUGNET_END: 4417 if (sw_lro_enabled) 4418 sc->vtnet_flags |= VTNET_FLAG_SW_LRO; 4419 break; 4420 } 4421 } 4422 4423 static int 4424 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m) 4425 { 4426 struct vtnet_softc *sc; 4427 struct vtnet_txq *txq; 4428 int error; 4429 4430 sc = if_getsoftc(ifp); 4431 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4432 IFF_DRV_RUNNING) 4433 return (EBUSY); 4434 4435 txq = &sc->vtnet_txqs[0]; 4436 error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE); 4437 if (error == 0) 4438 (void)vtnet_txq_notify(txq); 4439 return (error); 4440 } 4441 4442 static int 4443 vtnet_debugnet_poll(if_t ifp, int count) 4444 { 4445 struct vtnet_softc *sc; 4446 int i; 4447 4448 sc = if_getsoftc(ifp); 4449 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4450 IFF_DRV_RUNNING) 4451 return (EBUSY); 4452 4453 (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); 4454 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 4455 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); 4456 return (0); 4457 } 4458 #endif /* DEBUGNET */ 4459