1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO network devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/eventhandler.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sockio.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/random.h> 43 #include <sys/sglist.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/taskqueue.h> 47 #include <sys/smp.h> 48 #include <machine/smp.h> 49 50 #include <vm/uma.h> 51 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/if_media.h> 59 #include <net/if_vlan_var.h> 60 61 #include <net/bpf.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip6.h> 67 #include <netinet6/ip6_var.h> 68 #include <netinet/udp.h> 69 #include <netinet/tcp.h> 70 #include <netinet/sctp.h> 71 72 #include <machine/bus.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <dev/virtio/virtio.h> 78 #include <dev/virtio/virtqueue.h> 79 #include <dev/virtio/network/virtio_net.h> 80 #include <dev/virtio/network/if_vtnetvar.h> 81 82 #include "virtio_if.h" 83 84 #include "opt_inet.h" 85 #include "opt_inet6.h" 86 87 static int vtnet_modevent(module_t, int, void *); 88 89 static int vtnet_probe(device_t); 90 static int vtnet_attach(device_t); 91 static int vtnet_detach(device_t); 92 static int vtnet_suspend(device_t); 93 static int vtnet_resume(device_t); 94 static int vtnet_shutdown(device_t); 95 static int vtnet_attach_completed(device_t); 96 static int vtnet_config_change(device_t); 97 98 static void vtnet_negotiate_features(struct vtnet_softc *); 99 static void vtnet_setup_features(struct vtnet_softc *); 100 static int vtnet_init_rxq(struct vtnet_softc *, int); 101 static int vtnet_init_txq(struct vtnet_softc *, int); 102 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); 103 static void vtnet_free_rxtx_queues(struct vtnet_softc *); 104 static int vtnet_alloc_rx_filters(struct vtnet_softc *); 105 static void vtnet_free_rx_filters(struct vtnet_softc *); 106 static int vtnet_alloc_virtqueues(struct vtnet_softc *); 107 static int vtnet_setup_interface(struct vtnet_softc *); 108 static int vtnet_change_mtu(struct vtnet_softc *, int); 109 static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); 110 static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); 111 112 static int vtnet_rxq_populate(struct vtnet_rxq *); 113 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); 114 static struct mbuf * 115 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); 116 static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, 117 struct mbuf *, int); 118 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); 119 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); 120 static int vtnet_rxq_new_buf(struct vtnet_rxq *); 121 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, 122 struct virtio_net_hdr *); 123 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); 124 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); 125 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); 126 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, 127 struct virtio_net_hdr *); 128 static int vtnet_rxq_eof(struct vtnet_rxq *); 129 static void vtnet_rx_vq_intr(void *); 130 static void vtnet_rxq_tq_intr(void *, int); 131 132 static int vtnet_txq_below_threshold(struct vtnet_txq *); 133 static int vtnet_txq_notify(struct vtnet_txq *); 134 static void vtnet_txq_free_mbufs(struct vtnet_txq *); 135 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, 136 int *, int *, int *); 137 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, 138 int, struct virtio_net_hdr *); 139 static struct mbuf * 140 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, 141 struct virtio_net_hdr *); 142 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, 143 struct vtnet_tx_header *); 144 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); 145 #ifdef VTNET_LEGACY_TX 146 static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); 147 static void vtnet_start(struct ifnet *); 148 #else 149 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); 150 static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); 151 static void vtnet_txq_tq_deferred(void *, int); 152 #endif 153 static void vtnet_txq_start(struct vtnet_txq *); 154 static void vtnet_txq_tq_intr(void *, int); 155 static int vtnet_txq_eof(struct vtnet_txq *); 156 static void vtnet_tx_vq_intr(void *); 157 static void vtnet_tx_start_all(struct vtnet_softc *); 158 159 #ifndef VTNET_LEGACY_TX 160 static void vtnet_qflush(struct ifnet *); 161 #endif 162 163 static int vtnet_watchdog(struct vtnet_txq *); 164 static void vtnet_accum_stats(struct vtnet_softc *, 165 struct vtnet_rxq_stats *, struct vtnet_txq_stats *); 166 static void vtnet_tick(void *); 167 168 static void vtnet_start_taskqueues(struct vtnet_softc *); 169 static void vtnet_free_taskqueues(struct vtnet_softc *); 170 static void vtnet_drain_taskqueues(struct vtnet_softc *); 171 172 static void vtnet_drain_rxtx_queues(struct vtnet_softc *); 173 static void vtnet_stop_rendezvous(struct vtnet_softc *); 174 static void vtnet_stop(struct vtnet_softc *); 175 static int vtnet_virtio_reinit(struct vtnet_softc *); 176 static void vtnet_init_rx_filters(struct vtnet_softc *); 177 static int vtnet_init_rx_queues(struct vtnet_softc *); 178 static int vtnet_init_tx_queues(struct vtnet_softc *); 179 static int vtnet_init_rxtx_queues(struct vtnet_softc *); 180 static void vtnet_set_active_vq_pairs(struct vtnet_softc *); 181 static int vtnet_reinit(struct vtnet_softc *); 182 static void vtnet_init_locked(struct vtnet_softc *); 183 static void vtnet_init(void *); 184 185 static void vtnet_free_ctrl_vq(struct vtnet_softc *); 186 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, 187 struct sglist *, int, int); 188 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); 189 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); 190 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); 191 static int vtnet_set_promisc(struct vtnet_softc *, int); 192 static int vtnet_set_allmulti(struct vtnet_softc *, int); 193 static void vtnet_attach_disable_promisc(struct vtnet_softc *); 194 static void vtnet_rx_filter(struct vtnet_softc *); 195 static void vtnet_rx_filter_mac(struct vtnet_softc *); 196 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); 197 static void vtnet_rx_filter_vlan(struct vtnet_softc *); 198 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); 199 static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); 200 static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); 201 202 static int vtnet_is_link_up(struct vtnet_softc *); 203 static void vtnet_update_link_status(struct vtnet_softc *); 204 static int vtnet_ifmedia_upd(struct ifnet *); 205 static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); 206 static void vtnet_get_hwaddr(struct vtnet_softc *); 207 static void vtnet_set_hwaddr(struct vtnet_softc *); 208 static void vtnet_vlan_tag_remove(struct mbuf *); 209 static void vtnet_set_rx_process_limit(struct vtnet_softc *); 210 static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); 211 212 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, 213 struct sysctl_oid_list *, struct vtnet_rxq *); 214 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, 215 struct sysctl_oid_list *, struct vtnet_txq *); 216 static void vtnet_setup_queue_sysctl(struct vtnet_softc *); 217 static void vtnet_setup_sysctl(struct vtnet_softc *); 218 219 static int vtnet_rxq_enable_intr(struct vtnet_rxq *); 220 static void vtnet_rxq_disable_intr(struct vtnet_rxq *); 221 static int vtnet_txq_enable_intr(struct vtnet_txq *); 222 static void vtnet_txq_disable_intr(struct vtnet_txq *); 223 static void vtnet_enable_rx_interrupts(struct vtnet_softc *); 224 static void vtnet_enable_tx_interrupts(struct vtnet_softc *); 225 static void vtnet_enable_interrupts(struct vtnet_softc *); 226 static void vtnet_disable_rx_interrupts(struct vtnet_softc *); 227 static void vtnet_disable_tx_interrupts(struct vtnet_softc *); 228 static void vtnet_disable_interrupts(struct vtnet_softc *); 229 230 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); 231 232 /* Tunables. */ 233 static int vtnet_csum_disable = 0; 234 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); 235 static int vtnet_tso_disable = 0; 236 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); 237 static int vtnet_lro_disable = 0; 238 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); 239 static int vtnet_mq_disable = 0; 240 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); 241 static int vtnet_mq_max_pairs = 0; 242 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); 243 static int vtnet_rx_process_limit = 512; 244 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); 245 246 static uma_zone_t vtnet_tx_header_zone; 247 248 static struct virtio_feature_desc vtnet_feature_desc[] = { 249 { VIRTIO_NET_F_CSUM, "TxChecksum" }, 250 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, 251 { VIRTIO_NET_F_MAC, "MacAddress" }, 252 { VIRTIO_NET_F_GSO, "TxAllGSO" }, 253 { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, 254 { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, 255 { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, 256 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, 257 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, 258 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, 259 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, 260 { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, 261 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, 262 { VIRTIO_NET_F_STATUS, "Status" }, 263 { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, 264 { VIRTIO_NET_F_CTRL_RX, "RxMode" }, 265 { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, 266 { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, 267 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, 268 { VIRTIO_NET_F_MQ, "Multiqueue" }, 269 { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, 270 271 { 0, NULL } 272 }; 273 274 static device_method_t vtnet_methods[] = { 275 /* Device methods. */ 276 DEVMETHOD(device_probe, vtnet_probe), 277 DEVMETHOD(device_attach, vtnet_attach), 278 DEVMETHOD(device_detach, vtnet_detach), 279 DEVMETHOD(device_suspend, vtnet_suspend), 280 DEVMETHOD(device_resume, vtnet_resume), 281 DEVMETHOD(device_shutdown, vtnet_shutdown), 282 283 /* VirtIO methods. */ 284 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), 285 DEVMETHOD(virtio_config_change, vtnet_config_change), 286 287 DEVMETHOD_END 288 }; 289 290 #ifdef DEV_NETMAP 291 #include <dev/netmap/if_vtnet_netmap.h> 292 #endif /* DEV_NETMAP */ 293 294 static driver_t vtnet_driver = { 295 "vtnet", 296 vtnet_methods, 297 sizeof(struct vtnet_softc) 298 }; 299 static devclass_t vtnet_devclass; 300 301 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, 302 vtnet_modevent, 0); 303 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, 304 vtnet_modevent, 0); 305 MODULE_VERSION(vtnet, 1); 306 MODULE_DEPEND(vtnet, virtio, 1, 1, 1); 307 #ifdef DEV_NETMAP 308 MODULE_DEPEND(vtnet, netmap, 1, 1, 1); 309 #endif /* DEV_NETMAP */ 310 311 static int 312 vtnet_modevent(module_t mod, int type, void *unused) 313 { 314 int error = 0; 315 static int loaded = 0; 316 317 switch (type) { 318 case MOD_LOAD: 319 if (loaded++ == 0) 320 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", 321 sizeof(struct vtnet_tx_header), 322 NULL, NULL, NULL, NULL, 0, 0); 323 break; 324 case MOD_QUIESCE: 325 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) 326 error = EBUSY; 327 break; 328 case MOD_UNLOAD: 329 if (--loaded == 0) { 330 uma_zdestroy(vtnet_tx_header_zone); 331 vtnet_tx_header_zone = NULL; 332 } 333 break; 334 case MOD_SHUTDOWN: 335 break; 336 default: 337 error = EOPNOTSUPP; 338 break; 339 } 340 341 return (error); 342 } 343 344 static int 345 vtnet_probe(device_t dev) 346 { 347 348 if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) 349 return (ENXIO); 350 351 device_set_desc(dev, "VirtIO Networking Adapter"); 352 353 return (BUS_PROBE_DEFAULT); 354 } 355 356 static int 357 vtnet_attach(device_t dev) 358 { 359 struct vtnet_softc *sc; 360 int error; 361 362 sc = device_get_softc(dev); 363 sc->vtnet_dev = dev; 364 365 /* Register our feature descriptions. */ 366 virtio_set_feature_desc(dev, vtnet_feature_desc); 367 368 VTNET_CORE_LOCK_INIT(sc); 369 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); 370 371 vtnet_setup_sysctl(sc); 372 vtnet_setup_features(sc); 373 374 error = vtnet_alloc_rx_filters(sc); 375 if (error) { 376 device_printf(dev, "cannot allocate Rx filters\n"); 377 goto fail; 378 } 379 380 error = vtnet_alloc_rxtx_queues(sc); 381 if (error) { 382 device_printf(dev, "cannot allocate queues\n"); 383 goto fail; 384 } 385 386 error = vtnet_alloc_virtqueues(sc); 387 if (error) { 388 device_printf(dev, "cannot allocate virtqueues\n"); 389 goto fail; 390 } 391 392 error = vtnet_setup_interface(sc); 393 if (error) { 394 device_printf(dev, "cannot setup interface\n"); 395 goto fail; 396 } 397 398 error = virtio_setup_intr(dev, INTR_TYPE_NET); 399 if (error) { 400 device_printf(dev, "cannot setup virtqueue interrupts\n"); 401 /* BMV: This will crash if during boot! */ 402 ether_ifdetach(sc->vtnet_ifp); 403 goto fail; 404 } 405 406 #ifdef DEV_NETMAP 407 vtnet_netmap_attach(sc); 408 #endif /* DEV_NETMAP */ 409 410 vtnet_start_taskqueues(sc); 411 412 fail: 413 if (error) 414 vtnet_detach(dev); 415 416 return (error); 417 } 418 419 static int 420 vtnet_detach(device_t dev) 421 { 422 struct vtnet_softc *sc; 423 struct ifnet *ifp; 424 425 sc = device_get_softc(dev); 426 ifp = sc->vtnet_ifp; 427 428 if (device_is_attached(dev)) { 429 VTNET_CORE_LOCK(sc); 430 vtnet_stop(sc); 431 VTNET_CORE_UNLOCK(sc); 432 433 callout_drain(&sc->vtnet_tick_ch); 434 vtnet_drain_taskqueues(sc); 435 436 ether_ifdetach(ifp); 437 } 438 439 #ifdef DEV_NETMAP 440 netmap_detach(ifp); 441 #endif /* DEV_NETMAP */ 442 443 vtnet_free_taskqueues(sc); 444 445 if (sc->vtnet_vlan_attach != NULL) { 446 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); 447 sc->vtnet_vlan_attach = NULL; 448 } 449 if (sc->vtnet_vlan_detach != NULL) { 450 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); 451 sc->vtnet_vlan_detach = NULL; 452 } 453 454 ifmedia_removeall(&sc->vtnet_media); 455 456 if (ifp != NULL) { 457 if_free(ifp); 458 sc->vtnet_ifp = NULL; 459 } 460 461 vtnet_free_rxtx_queues(sc); 462 vtnet_free_rx_filters(sc); 463 464 if (sc->vtnet_ctrl_vq != NULL) 465 vtnet_free_ctrl_vq(sc); 466 467 VTNET_CORE_LOCK_DESTROY(sc); 468 469 return (0); 470 } 471 472 static int 473 vtnet_suspend(device_t dev) 474 { 475 struct vtnet_softc *sc; 476 477 sc = device_get_softc(dev); 478 479 VTNET_CORE_LOCK(sc); 480 vtnet_stop(sc); 481 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; 482 VTNET_CORE_UNLOCK(sc); 483 484 return (0); 485 } 486 487 static int 488 vtnet_resume(device_t dev) 489 { 490 struct vtnet_softc *sc; 491 struct ifnet *ifp; 492 493 sc = device_get_softc(dev); 494 ifp = sc->vtnet_ifp; 495 496 VTNET_CORE_LOCK(sc); 497 if (ifp->if_flags & IFF_UP) 498 vtnet_init_locked(sc); 499 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; 500 VTNET_CORE_UNLOCK(sc); 501 502 return (0); 503 } 504 505 static int 506 vtnet_shutdown(device_t dev) 507 { 508 509 /* 510 * Suspend already does all of what we need to 511 * do here; we just never expect to be resumed. 512 */ 513 return (vtnet_suspend(dev)); 514 } 515 516 static int 517 vtnet_attach_completed(device_t dev) 518 { 519 520 vtnet_attach_disable_promisc(device_get_softc(dev)); 521 522 return (0); 523 } 524 525 static int 526 vtnet_config_change(device_t dev) 527 { 528 struct vtnet_softc *sc; 529 530 sc = device_get_softc(dev); 531 532 VTNET_CORE_LOCK(sc); 533 vtnet_update_link_status(sc); 534 if (sc->vtnet_link_active != 0) 535 vtnet_tx_start_all(sc); 536 VTNET_CORE_UNLOCK(sc); 537 538 return (0); 539 } 540 541 static void 542 vtnet_negotiate_features(struct vtnet_softc *sc) 543 { 544 device_t dev; 545 uint64_t mask, features; 546 547 dev = sc->vtnet_dev; 548 mask = 0; 549 550 /* 551 * TSO and LRO are only available when their corresponding checksum 552 * offload feature is also negotiated. 553 */ 554 if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { 555 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; 556 mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; 557 } 558 if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) 559 mask |= VTNET_TSO_FEATURES; 560 if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) 561 mask |= VTNET_LRO_FEATURES; 562 #ifndef VTNET_LEGACY_TX 563 if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) 564 mask |= VIRTIO_NET_F_MQ; 565 #else 566 mask |= VIRTIO_NET_F_MQ; 567 #endif 568 569 features = VTNET_FEATURES & ~mask; 570 sc->vtnet_features = virtio_negotiate_features(dev, features); 571 572 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && 573 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { 574 /* 575 * LRO without mergeable buffers requires special care. This 576 * is not ideal because every receive buffer must be large 577 * enough to hold the maximum TCP packet, the Ethernet header, 578 * and the header. This requires up to 34 descriptors with 579 * MCLBYTES clusters. If we do not have indirect descriptors, 580 * LRO is disabled since the virtqueue will not contain very 581 * many receive buffers. 582 */ 583 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { 584 device_printf(dev, 585 "LRO disabled due to both mergeable buffers and " 586 "indirect descriptors not negotiated\n"); 587 588 features &= ~VTNET_LRO_FEATURES; 589 sc->vtnet_features = 590 virtio_negotiate_features(dev, features); 591 } else 592 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; 593 } 594 } 595 596 static void 597 vtnet_setup_features(struct vtnet_softc *sc) 598 { 599 device_t dev; 600 int max_pairs, max; 601 602 dev = sc->vtnet_dev; 603 604 vtnet_negotiate_features(sc); 605 606 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 607 sc->vtnet_flags |= VTNET_FLAG_INDIRECT; 608 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) 609 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; 610 611 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { 612 /* This feature should always be negotiated. */ 613 sc->vtnet_flags |= VTNET_FLAG_MAC; 614 } 615 616 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { 617 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; 618 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 619 } else 620 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 621 622 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 623 sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; 624 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 625 sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; 626 else 627 sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; 628 629 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || 630 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || 631 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 632 sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; 633 else 634 sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; 635 636 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { 637 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; 638 639 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) 640 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; 641 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) 642 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; 643 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) 644 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; 645 } 646 647 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && 648 sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 649 max_pairs = virtio_read_dev_config_2(dev, 650 offsetof(struct virtio_net_config, max_virtqueue_pairs)); 651 if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 652 max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) 653 max_pairs = 1; 654 } else 655 max_pairs = 1; 656 657 if (max_pairs > 1) { 658 /* 659 * Limit the maximum number of queue pairs to the number of 660 * CPUs or the configured maximum. The actual number of 661 * queues that get used may be less. 662 */ 663 max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); 664 if (max > 0 && max_pairs > max) 665 max_pairs = max; 666 if (max_pairs > mp_ncpus) 667 max_pairs = mp_ncpus; 668 if (max_pairs > VTNET_MAX_QUEUE_PAIRS) 669 max_pairs = VTNET_MAX_QUEUE_PAIRS; 670 if (max_pairs > 1) 671 sc->vtnet_flags |= VTNET_FLAG_MULTIQ; 672 } 673 674 sc->vtnet_max_vq_pairs = max_pairs; 675 } 676 677 static int 678 vtnet_init_rxq(struct vtnet_softc *sc, int id) 679 { 680 struct vtnet_rxq *rxq; 681 682 rxq = &sc->vtnet_rxqs[id]; 683 684 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", 685 device_get_nameunit(sc->vtnet_dev), id); 686 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); 687 688 rxq->vtnrx_sc = sc; 689 rxq->vtnrx_id = id; 690 691 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); 692 if (rxq->vtnrx_sg == NULL) 693 return (ENOMEM); 694 695 TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); 696 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, 697 taskqueue_thread_enqueue, &rxq->vtnrx_tq); 698 699 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); 700 } 701 702 static int 703 vtnet_init_txq(struct vtnet_softc *sc, int id) 704 { 705 struct vtnet_txq *txq; 706 707 txq = &sc->vtnet_txqs[id]; 708 709 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", 710 device_get_nameunit(sc->vtnet_dev), id); 711 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); 712 713 txq->vtntx_sc = sc; 714 txq->vtntx_id = id; 715 716 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); 717 if (txq->vtntx_sg == NULL) 718 return (ENOMEM); 719 720 #ifndef VTNET_LEGACY_TX 721 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, 722 M_NOWAIT, &txq->vtntx_mtx); 723 if (txq->vtntx_br == NULL) 724 return (ENOMEM); 725 726 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); 727 #endif 728 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); 729 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, 730 taskqueue_thread_enqueue, &txq->vtntx_tq); 731 if (txq->vtntx_tq == NULL) 732 return (ENOMEM); 733 734 return (0); 735 } 736 737 static int 738 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) 739 { 740 int i, npairs, error; 741 742 npairs = sc->vtnet_max_vq_pairs; 743 744 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, 745 M_NOWAIT | M_ZERO); 746 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, 747 M_NOWAIT | M_ZERO); 748 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) 749 return (ENOMEM); 750 751 for (i = 0; i < npairs; i++) { 752 error = vtnet_init_rxq(sc, i); 753 if (error) 754 return (error); 755 error = vtnet_init_txq(sc, i); 756 if (error) 757 return (error); 758 } 759 760 vtnet_setup_queue_sysctl(sc); 761 762 return (0); 763 } 764 765 static void 766 vtnet_destroy_rxq(struct vtnet_rxq *rxq) 767 { 768 769 rxq->vtnrx_sc = NULL; 770 rxq->vtnrx_id = -1; 771 772 if (rxq->vtnrx_sg != NULL) { 773 sglist_free(rxq->vtnrx_sg); 774 rxq->vtnrx_sg = NULL; 775 } 776 777 if (mtx_initialized(&rxq->vtnrx_mtx) != 0) 778 mtx_destroy(&rxq->vtnrx_mtx); 779 } 780 781 static void 782 vtnet_destroy_txq(struct vtnet_txq *txq) 783 { 784 785 txq->vtntx_sc = NULL; 786 txq->vtntx_id = -1; 787 788 if (txq->vtntx_sg != NULL) { 789 sglist_free(txq->vtntx_sg); 790 txq->vtntx_sg = NULL; 791 } 792 793 #ifndef VTNET_LEGACY_TX 794 if (txq->vtntx_br != NULL) { 795 buf_ring_free(txq->vtntx_br, M_DEVBUF); 796 txq->vtntx_br = NULL; 797 } 798 #endif 799 800 if (mtx_initialized(&txq->vtntx_mtx) != 0) 801 mtx_destroy(&txq->vtntx_mtx); 802 } 803 804 static void 805 vtnet_free_rxtx_queues(struct vtnet_softc *sc) 806 { 807 int i; 808 809 if (sc->vtnet_rxqs != NULL) { 810 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 811 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); 812 free(sc->vtnet_rxqs, M_DEVBUF); 813 sc->vtnet_rxqs = NULL; 814 } 815 816 if (sc->vtnet_txqs != NULL) { 817 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 818 vtnet_destroy_txq(&sc->vtnet_txqs[i]); 819 free(sc->vtnet_txqs, M_DEVBUF); 820 sc->vtnet_txqs = NULL; 821 } 822 } 823 824 static int 825 vtnet_alloc_rx_filters(struct vtnet_softc *sc) 826 { 827 828 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 829 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), 830 M_DEVBUF, M_NOWAIT | M_ZERO); 831 if (sc->vtnet_mac_filter == NULL) 832 return (ENOMEM); 833 } 834 835 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 836 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * 837 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); 838 if (sc->vtnet_vlan_filter == NULL) 839 return (ENOMEM); 840 } 841 842 return (0); 843 } 844 845 static void 846 vtnet_free_rx_filters(struct vtnet_softc *sc) 847 { 848 849 if (sc->vtnet_mac_filter != NULL) { 850 free(sc->vtnet_mac_filter, M_DEVBUF); 851 sc->vtnet_mac_filter = NULL; 852 } 853 854 if (sc->vtnet_vlan_filter != NULL) { 855 free(sc->vtnet_vlan_filter, M_DEVBUF); 856 sc->vtnet_vlan_filter = NULL; 857 } 858 } 859 860 static int 861 vtnet_alloc_virtqueues(struct vtnet_softc *sc) 862 { 863 device_t dev; 864 struct vq_alloc_info *info; 865 struct vtnet_rxq *rxq; 866 struct vtnet_txq *txq; 867 int i, idx, flags, nvqs, error; 868 869 dev = sc->vtnet_dev; 870 flags = 0; 871 872 nvqs = sc->vtnet_max_vq_pairs * 2; 873 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 874 nvqs++; 875 876 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); 877 if (info == NULL) 878 return (ENOMEM); 879 880 for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { 881 rxq = &sc->vtnet_rxqs[i]; 882 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, 883 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, 884 "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); 885 886 txq = &sc->vtnet_txqs[i]; 887 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, 888 vtnet_tx_vq_intr, txq, &txq->vtntx_vq, 889 "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); 890 } 891 892 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 893 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, 894 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); 895 } 896 897 /* 898 * Enable interrupt binding if this is multiqueue. This only matters 899 * when per-vq MSIX is available. 900 */ 901 if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) 902 flags |= 0; 903 904 error = virtio_alloc_virtqueues(dev, flags, nvqs, info); 905 free(info, M_TEMP); 906 907 return (error); 908 } 909 910 static int 911 vtnet_setup_interface(struct vtnet_softc *sc) 912 { 913 device_t dev; 914 struct ifnet *ifp; 915 916 dev = sc->vtnet_dev; 917 918 ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); 919 if (ifp == NULL) { 920 device_printf(dev, "cannot allocate ifnet structure\n"); 921 return (ENOSPC); 922 } 923 924 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 925 ifp->if_baudrate = IF_Gbps(10); /* Approx. */ 926 ifp->if_softc = sc; 927 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 928 ifp->if_init = vtnet_init; 929 ifp->if_ioctl = vtnet_ioctl; 930 ifp->if_get_counter = vtnet_get_counter; 931 #ifndef VTNET_LEGACY_TX 932 ifp->if_transmit = vtnet_txq_mq_start; 933 ifp->if_qflush = vtnet_qflush; 934 #else 935 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; 936 ifp->if_start = vtnet_start; 937 IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); 938 ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; 939 IFQ_SET_READY(&ifp->if_snd); 940 #endif 941 942 ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, 943 vtnet_ifmedia_sts); 944 ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); 945 ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); 946 947 /* Read (or generate) the MAC address for the adapter. */ 948 vtnet_get_hwaddr(sc); 949 950 ether_ifattach(ifp, sc->vtnet_hwaddr); 951 952 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) 953 ifp->if_capabilities |= IFCAP_LINKSTATE; 954 955 /* Tell the upper layer(s) we support long frames. */ 956 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 957 ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; 958 959 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { 960 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; 961 962 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { 963 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; 964 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 965 } else { 966 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) 967 ifp->if_capabilities |= IFCAP_TSO4; 968 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 969 ifp->if_capabilities |= IFCAP_TSO6; 970 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) 971 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 972 } 973 974 if (ifp->if_capabilities & IFCAP_TSO) 975 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 976 } 977 978 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { 979 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; 980 981 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || 982 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) 983 ifp->if_capabilities |= IFCAP_LRO; 984 } 985 986 if (ifp->if_capabilities & IFCAP_HWCSUM) { 987 /* 988 * VirtIO does not support VLAN tagging, but we can fake 989 * it by inserting and removing the 802.1Q header during 990 * transmit and receive. We are then able to do checksum 991 * offloading of VLAN frames. 992 */ 993 ifp->if_capabilities |= 994 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 995 } 996 997 ifp->if_capenable = ifp->if_capabilities; 998 999 /* 1000 * Capabilities after here are not enabled by default. 1001 */ 1002 1003 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 1004 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; 1005 1006 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1007 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 1008 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1009 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 1010 } 1011 1012 vtnet_set_rx_process_limit(sc); 1013 vtnet_set_tx_intr_threshold(sc); 1014 1015 return (0); 1016 } 1017 1018 static int 1019 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) 1020 { 1021 struct ifnet *ifp; 1022 int frame_size, clsize; 1023 1024 ifp = sc->vtnet_ifp; 1025 1026 if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) 1027 return (EINVAL); 1028 1029 frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + 1030 new_mtu; 1031 1032 /* 1033 * Based on the new MTU (and hence frame size) determine which 1034 * cluster size is most appropriate for the receive queues. 1035 */ 1036 if (frame_size <= MCLBYTES) { 1037 clsize = MCLBYTES; 1038 } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1039 /* Avoid going past 9K jumbos. */ 1040 if (frame_size > MJUM9BYTES) 1041 return (EINVAL); 1042 clsize = MJUM9BYTES; 1043 } else 1044 clsize = MJUMPAGESIZE; 1045 1046 ifp->if_mtu = new_mtu; 1047 sc->vtnet_rx_new_clsize = clsize; 1048 1049 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1050 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1051 vtnet_init_locked(sc); 1052 } 1053 1054 return (0); 1055 } 1056 1057 static int 1058 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1059 { 1060 struct vtnet_softc *sc; 1061 struct ifreq *ifr; 1062 int reinit, mask, error; 1063 1064 sc = ifp->if_softc; 1065 ifr = (struct ifreq *) data; 1066 error = 0; 1067 1068 switch (cmd) { 1069 case SIOCSIFMTU: 1070 if (ifp->if_mtu != ifr->ifr_mtu) { 1071 VTNET_CORE_LOCK(sc); 1072 error = vtnet_change_mtu(sc, ifr->ifr_mtu); 1073 VTNET_CORE_UNLOCK(sc); 1074 } 1075 break; 1076 1077 case SIOCSIFFLAGS: 1078 VTNET_CORE_LOCK(sc); 1079 if ((ifp->if_flags & IFF_UP) == 0) { 1080 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1081 vtnet_stop(sc); 1082 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1083 if ((ifp->if_flags ^ sc->vtnet_if_flags) & 1084 (IFF_PROMISC | IFF_ALLMULTI)) { 1085 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) 1086 vtnet_rx_filter(sc); 1087 else { 1088 ifp->if_flags |= IFF_PROMISC; 1089 if ((ifp->if_flags ^ sc->vtnet_if_flags) 1090 & IFF_ALLMULTI) 1091 error = ENOTSUP; 1092 } 1093 } 1094 } else 1095 vtnet_init_locked(sc); 1096 1097 if (error == 0) 1098 sc->vtnet_if_flags = ifp->if_flags; 1099 VTNET_CORE_UNLOCK(sc); 1100 break; 1101 1102 case SIOCADDMULTI: 1103 case SIOCDELMULTI: 1104 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) 1105 break; 1106 VTNET_CORE_LOCK(sc); 1107 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1108 vtnet_rx_filter_mac(sc); 1109 VTNET_CORE_UNLOCK(sc); 1110 break; 1111 1112 case SIOCSIFMEDIA: 1113 case SIOCGIFMEDIA: 1114 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); 1115 break; 1116 1117 case SIOCSIFCAP: 1118 VTNET_CORE_LOCK(sc); 1119 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1120 1121 if (mask & IFCAP_TXCSUM) 1122 ifp->if_capenable ^= IFCAP_TXCSUM; 1123 if (mask & IFCAP_TXCSUM_IPV6) 1124 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 1125 if (mask & IFCAP_TSO4) 1126 ifp->if_capenable ^= IFCAP_TSO4; 1127 if (mask & IFCAP_TSO6) 1128 ifp->if_capenable ^= IFCAP_TSO6; 1129 1130 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | 1131 IFCAP_VLAN_HWFILTER)) { 1132 /* These Rx features require us to renegotiate. */ 1133 reinit = 1; 1134 1135 if (mask & IFCAP_RXCSUM) 1136 ifp->if_capenable ^= IFCAP_RXCSUM; 1137 if (mask & IFCAP_RXCSUM_IPV6) 1138 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 1139 if (mask & IFCAP_LRO) 1140 ifp->if_capenable ^= IFCAP_LRO; 1141 if (mask & IFCAP_VLAN_HWFILTER) 1142 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; 1143 } else 1144 reinit = 0; 1145 1146 if (mask & IFCAP_VLAN_HWTSO) 1147 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 1148 if (mask & IFCAP_VLAN_HWTAGGING) 1149 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 1150 1151 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1152 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1153 vtnet_init_locked(sc); 1154 } 1155 1156 VTNET_CORE_UNLOCK(sc); 1157 VLAN_CAPABILITIES(ifp); 1158 1159 break; 1160 1161 default: 1162 error = ether_ioctl(ifp, cmd, data); 1163 break; 1164 } 1165 1166 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); 1167 1168 return (error); 1169 } 1170 1171 static int 1172 vtnet_rxq_populate(struct vtnet_rxq *rxq) 1173 { 1174 struct virtqueue *vq; 1175 int nbufs, error; 1176 1177 vq = rxq->vtnrx_vq; 1178 error = ENOSPC; 1179 1180 for (nbufs = 0; !virtqueue_full(vq); nbufs++) { 1181 error = vtnet_rxq_new_buf(rxq); 1182 if (error) 1183 break; 1184 } 1185 1186 if (nbufs > 0) { 1187 virtqueue_notify(vq); 1188 /* 1189 * EMSGSIZE signifies the virtqueue did not have enough 1190 * entries available to hold the last mbuf. This is not 1191 * an error. 1192 */ 1193 if (error == EMSGSIZE) 1194 error = 0; 1195 } 1196 1197 return (error); 1198 } 1199 1200 static void 1201 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) 1202 { 1203 struct virtqueue *vq; 1204 struct mbuf *m; 1205 int last; 1206 1207 vq = rxq->vtnrx_vq; 1208 last = 0; 1209 1210 while ((m = virtqueue_drain(vq, &last)) != NULL) 1211 m_freem(m); 1212 1213 KASSERT(virtqueue_empty(vq), 1214 ("%s: mbufs remaining in rx queue %p", __func__, rxq)); 1215 } 1216 1217 static struct mbuf * 1218 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) 1219 { 1220 struct mbuf *m_head, *m_tail, *m; 1221 int i, clsize; 1222 1223 clsize = sc->vtnet_rx_clsize; 1224 1225 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1226 ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); 1227 1228 m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); 1229 if (m_head == NULL) 1230 goto fail; 1231 1232 m_head->m_len = clsize; 1233 m_tail = m_head; 1234 1235 /* Allocate the rest of the chain. */ 1236 for (i = 1; i < nbufs; i++) { 1237 m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); 1238 if (m == NULL) 1239 goto fail; 1240 1241 m->m_len = clsize; 1242 m_tail->m_next = m; 1243 m_tail = m; 1244 } 1245 1246 if (m_tailp != NULL) 1247 *m_tailp = m_tail; 1248 1249 return (m_head); 1250 1251 fail: 1252 sc->vtnet_stats.mbuf_alloc_failed++; 1253 m_freem(m_head); 1254 1255 return (NULL); 1256 } 1257 1258 /* 1259 * Slow path for when LRO without mergeable buffers is negotiated. 1260 */ 1261 static int 1262 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, 1263 int len0) 1264 { 1265 struct vtnet_softc *sc; 1266 struct mbuf *m, *m_prev; 1267 struct mbuf *m_new, *m_tail; 1268 int len, clsize, nreplace, error; 1269 1270 sc = rxq->vtnrx_sc; 1271 clsize = sc->vtnet_rx_clsize; 1272 1273 m_prev = NULL; 1274 m_tail = NULL; 1275 nreplace = 0; 1276 1277 m = m0; 1278 len = len0; 1279 1280 /* 1281 * Since these mbuf chains are so large, we avoid allocating an 1282 * entire replacement chain if possible. When the received frame 1283 * did not consume the entire chain, the unused mbufs are moved 1284 * to the replacement chain. 1285 */ 1286 while (len > 0) { 1287 /* 1288 * Something is seriously wrong if we received a frame 1289 * larger than the chain. Drop it. 1290 */ 1291 if (m == NULL) { 1292 sc->vtnet_stats.rx_frame_too_large++; 1293 return (EMSGSIZE); 1294 } 1295 1296 /* We always allocate the same cluster size. */ 1297 KASSERT(m->m_len == clsize, 1298 ("%s: mbuf size %d is not the cluster size %d", 1299 __func__, m->m_len, clsize)); 1300 1301 m->m_len = MIN(m->m_len, len); 1302 len -= m->m_len; 1303 1304 m_prev = m; 1305 m = m->m_next; 1306 nreplace++; 1307 } 1308 1309 KASSERT(nreplace <= sc->vtnet_rx_nmbufs, 1310 ("%s: too many replacement mbufs %d max %d", __func__, nreplace, 1311 sc->vtnet_rx_nmbufs)); 1312 1313 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); 1314 if (m_new == NULL) { 1315 m_prev->m_len = clsize; 1316 return (ENOBUFS); 1317 } 1318 1319 /* 1320 * Move any unused mbufs from the received chain onto the end 1321 * of the new chain. 1322 */ 1323 if (m_prev->m_next != NULL) { 1324 m_tail->m_next = m_prev->m_next; 1325 m_prev->m_next = NULL; 1326 } 1327 1328 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1329 if (error) { 1330 /* 1331 * BAD! We could not enqueue the replacement mbuf chain. We 1332 * must restore the m0 chain to the original state if it was 1333 * modified so we can subsequently discard it. 1334 * 1335 * NOTE: The replacement is suppose to be an identical copy 1336 * to the one just dequeued so this is an unexpected error. 1337 */ 1338 sc->vtnet_stats.rx_enq_replacement_failed++; 1339 1340 if (m_tail->m_next != NULL) { 1341 m_prev->m_next = m_tail->m_next; 1342 m_tail->m_next = NULL; 1343 } 1344 1345 m_prev->m_len = clsize; 1346 m_freem(m_new); 1347 } 1348 1349 return (error); 1350 } 1351 1352 static int 1353 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) 1354 { 1355 struct vtnet_softc *sc; 1356 struct mbuf *m_new; 1357 int error; 1358 1359 sc = rxq->vtnrx_sc; 1360 1361 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, 1362 ("%s: chained mbuf without LRO_NOMRG", __func__)); 1363 1364 if (m->m_next == NULL) { 1365 /* Fast-path for the common case of just one mbuf. */ 1366 if (m->m_len < len) 1367 return (EINVAL); 1368 1369 m_new = vtnet_rx_alloc_buf(sc, 1, NULL); 1370 if (m_new == NULL) 1371 return (ENOBUFS); 1372 1373 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1374 if (error) { 1375 /* 1376 * The new mbuf is suppose to be an identical 1377 * copy of the one just dequeued so this is an 1378 * unexpected error. 1379 */ 1380 m_freem(m_new); 1381 sc->vtnet_stats.rx_enq_replacement_failed++; 1382 } else 1383 m->m_len = len; 1384 } else 1385 error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); 1386 1387 return (error); 1388 } 1389 1390 static int 1391 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1392 { 1393 struct vtnet_softc *sc; 1394 struct sglist *sg; 1395 struct vtnet_rx_header *rxhdr; 1396 uint8_t *mdata; 1397 int offset, error; 1398 1399 sc = rxq->vtnrx_sc; 1400 sg = rxq->vtnrx_sg; 1401 mdata = mtod(m, uint8_t *); 1402 1403 VTNET_RXQ_LOCK_ASSERT(rxq); 1404 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, 1405 ("%s: chained mbuf without LRO_NOMRG", __func__)); 1406 KASSERT(m->m_len == sc->vtnet_rx_clsize, 1407 ("%s: unexpected cluster size %d/%d", __func__, m->m_len, 1408 sc->vtnet_rx_clsize)); 1409 1410 sglist_reset(sg); 1411 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1412 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); 1413 rxhdr = (struct vtnet_rx_header *) mdata; 1414 sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); 1415 offset = sizeof(struct vtnet_rx_header); 1416 } else 1417 offset = 0; 1418 1419 sglist_append(sg, mdata + offset, m->m_len - offset); 1420 if (m->m_next != NULL) { 1421 error = sglist_append_mbuf(sg, m->m_next); 1422 MPASS(error == 0); 1423 } 1424 1425 error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); 1426 1427 return (error); 1428 } 1429 1430 static int 1431 vtnet_rxq_new_buf(struct vtnet_rxq *rxq) 1432 { 1433 struct vtnet_softc *sc; 1434 struct mbuf *m; 1435 int error; 1436 1437 sc = rxq->vtnrx_sc; 1438 1439 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); 1440 if (m == NULL) 1441 return (ENOBUFS); 1442 1443 error = vtnet_rxq_enqueue_buf(rxq, m); 1444 if (error) 1445 m_freem(m); 1446 1447 return (error); 1448 } 1449 1450 /* 1451 * Use the checksum offset in the VirtIO header to set the 1452 * correct CSUM_* flags. 1453 */ 1454 static int 1455 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, 1456 uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) 1457 { 1458 struct vtnet_softc *sc; 1459 #if defined(INET) || defined(INET6) 1460 int offset = hdr->csum_start + hdr->csum_offset; 1461 #endif 1462 1463 sc = rxq->vtnrx_sc; 1464 1465 /* Only do a basic sanity check on the offset. */ 1466 switch (eth_type) { 1467 #if defined(INET) 1468 case ETHERTYPE_IP: 1469 if (__predict_false(offset < ip_start + sizeof(struct ip))) 1470 return (1); 1471 break; 1472 #endif 1473 #if defined(INET6) 1474 case ETHERTYPE_IPV6: 1475 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) 1476 return (1); 1477 break; 1478 #endif 1479 default: 1480 sc->vtnet_stats.rx_csum_bad_ethtype++; 1481 return (1); 1482 } 1483 1484 /* 1485 * Use the offset to determine the appropriate CSUM_* flags. This is 1486 * a bit dirty, but we can get by with it since the checksum offsets 1487 * happen to be different. We assume the host host does not do IPv4 1488 * header checksum offloading. 1489 */ 1490 switch (hdr->csum_offset) { 1491 case offsetof(struct udphdr, uh_sum): 1492 case offsetof(struct tcphdr, th_sum): 1493 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1494 m->m_pkthdr.csum_data = 0xFFFF; 1495 break; 1496 case offsetof(struct sctphdr, checksum): 1497 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 1498 break; 1499 default: 1500 sc->vtnet_stats.rx_csum_bad_offset++; 1501 return (1); 1502 } 1503 1504 return (0); 1505 } 1506 1507 static int 1508 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, 1509 uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) 1510 { 1511 struct vtnet_softc *sc; 1512 int offset, proto; 1513 1514 sc = rxq->vtnrx_sc; 1515 1516 switch (eth_type) { 1517 #if defined(INET) 1518 case ETHERTYPE_IP: { 1519 struct ip *ip; 1520 if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) 1521 return (1); 1522 ip = (struct ip *)(m->m_data + ip_start); 1523 proto = ip->ip_p; 1524 offset = ip_start + (ip->ip_hl << 2); 1525 break; 1526 } 1527 #endif 1528 #if defined(INET6) 1529 case ETHERTYPE_IPV6: 1530 if (__predict_false(m->m_len < ip_start + 1531 sizeof(struct ip6_hdr))) 1532 return (1); 1533 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); 1534 if (__predict_false(offset < 0)) 1535 return (1); 1536 break; 1537 #endif 1538 default: 1539 sc->vtnet_stats.rx_csum_bad_ethtype++; 1540 return (1); 1541 } 1542 1543 switch (proto) { 1544 case IPPROTO_TCP: 1545 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) 1546 return (1); 1547 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1548 m->m_pkthdr.csum_data = 0xFFFF; 1549 break; 1550 case IPPROTO_UDP: 1551 if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) 1552 return (1); 1553 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1554 m->m_pkthdr.csum_data = 0xFFFF; 1555 break; 1556 case IPPROTO_SCTP: 1557 if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) 1558 return (1); 1559 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 1560 break; 1561 default: 1562 /* 1563 * For the remaining protocols, FreeBSD does not support 1564 * checksum offloading, so the checksum will be recomputed. 1565 */ 1566 #if 0 1567 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " 1568 "protocol eth_type=%#x proto=%d csum_start=%d " 1569 "csum_offset=%d\n", __func__, eth_type, proto, 1570 hdr->csum_start, hdr->csum_offset); 1571 #endif 1572 break; 1573 } 1574 1575 return (0); 1576 } 1577 1578 /* 1579 * Set the appropriate CSUM_* flags. Unfortunately, the information 1580 * provided is not directly useful to us. The VirtIO header gives the 1581 * offset of the checksum, which is all Linux needs, but this is not 1582 * how FreeBSD does things. We are forced to peek inside the packet 1583 * a bit. 1584 * 1585 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD 1586 * could accept the offsets and let the stack figure it out. 1587 */ 1588 static int 1589 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, 1590 struct virtio_net_hdr *hdr) 1591 { 1592 struct ether_header *eh; 1593 struct ether_vlan_header *evh; 1594 uint16_t eth_type; 1595 int offset, error; 1596 1597 eh = mtod(m, struct ether_header *); 1598 eth_type = ntohs(eh->ether_type); 1599 if (eth_type == ETHERTYPE_VLAN) { 1600 /* BMV: We should handle nested VLAN tags too. */ 1601 evh = mtod(m, struct ether_vlan_header *); 1602 eth_type = ntohs(evh->evl_proto); 1603 offset = sizeof(struct ether_vlan_header); 1604 } else 1605 offset = sizeof(struct ether_header); 1606 1607 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1608 error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); 1609 else 1610 error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); 1611 1612 return (error); 1613 } 1614 1615 static void 1616 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) 1617 { 1618 struct mbuf *m; 1619 1620 while (--nbufs > 0) { 1621 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); 1622 if (m == NULL) 1623 break; 1624 vtnet_rxq_discard_buf(rxq, m); 1625 } 1626 } 1627 1628 static void 1629 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1630 { 1631 int error; 1632 1633 /* 1634 * Requeue the discarded mbuf. This should always be successful 1635 * since it was just dequeued. 1636 */ 1637 error = vtnet_rxq_enqueue_buf(rxq, m); 1638 KASSERT(error == 0, 1639 ("%s: cannot requeue discarded mbuf %d", __func__, error)); 1640 } 1641 1642 static int 1643 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) 1644 { 1645 struct vtnet_softc *sc; 1646 struct virtqueue *vq; 1647 struct mbuf *m, *m_tail; 1648 int len; 1649 1650 sc = rxq->vtnrx_sc; 1651 vq = rxq->vtnrx_vq; 1652 m_tail = m_head; 1653 1654 while (--nbufs > 0) { 1655 m = virtqueue_dequeue(vq, &len); 1656 if (m == NULL) { 1657 rxq->vtnrx_stats.vrxs_ierrors++; 1658 goto fail; 1659 } 1660 1661 if (vtnet_rxq_new_buf(rxq) != 0) { 1662 rxq->vtnrx_stats.vrxs_iqdrops++; 1663 vtnet_rxq_discard_buf(rxq, m); 1664 if (nbufs > 1) 1665 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1666 goto fail; 1667 } 1668 1669 if (m->m_len < len) 1670 len = m->m_len; 1671 1672 m->m_len = len; 1673 m->m_flags &= ~M_PKTHDR; 1674 1675 m_head->m_pkthdr.len += len; 1676 m_tail->m_next = m; 1677 m_tail = m; 1678 } 1679 1680 return (0); 1681 1682 fail: 1683 sc->vtnet_stats.rx_mergeable_failed++; 1684 m_freem(m_head); 1685 1686 return (1); 1687 } 1688 1689 static void 1690 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, 1691 struct virtio_net_hdr *hdr) 1692 { 1693 struct vtnet_softc *sc; 1694 struct ifnet *ifp; 1695 struct ether_header *eh; 1696 1697 sc = rxq->vtnrx_sc; 1698 ifp = sc->vtnet_ifp; 1699 1700 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { 1701 eh = mtod(m, struct ether_header *); 1702 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1703 vtnet_vlan_tag_remove(m); 1704 /* 1705 * With the 802.1Q header removed, update the 1706 * checksum starting location accordingly. 1707 */ 1708 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1709 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; 1710 } 1711 } 1712 1713 m->m_pkthdr.flowid = rxq->vtnrx_id; 1714 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 1715 1716 /* 1717 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum 1718 * distinction that Linux does. Need to reevaluate if performing 1719 * offloading for the NEEDS_CSUM case is really appropriate. 1720 */ 1721 if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | 1722 VIRTIO_NET_HDR_F_DATA_VALID)) { 1723 if (vtnet_rxq_csum(rxq, m, hdr) == 0) 1724 rxq->vtnrx_stats.vrxs_csum++; 1725 else 1726 rxq->vtnrx_stats.vrxs_csum_failed++; 1727 } 1728 1729 rxq->vtnrx_stats.vrxs_ipackets++; 1730 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; 1731 1732 VTNET_RXQ_UNLOCK(rxq); 1733 (*ifp->if_input)(ifp, m); 1734 VTNET_RXQ_LOCK(rxq); 1735 } 1736 1737 static int 1738 vtnet_rxq_eof(struct vtnet_rxq *rxq) 1739 { 1740 struct virtio_net_hdr lhdr, *hdr; 1741 struct vtnet_softc *sc; 1742 struct ifnet *ifp; 1743 struct virtqueue *vq; 1744 struct mbuf *m; 1745 struct virtio_net_hdr_mrg_rxbuf *mhdr; 1746 int len, deq, nbufs, adjsz, count; 1747 1748 sc = rxq->vtnrx_sc; 1749 vq = rxq->vtnrx_vq; 1750 ifp = sc->vtnet_ifp; 1751 hdr = &lhdr; 1752 deq = 0; 1753 count = sc->vtnet_rx_process_limit; 1754 1755 VTNET_RXQ_LOCK_ASSERT(rxq); 1756 1757 #ifdef DEV_NETMAP 1758 if (netmap_rx_irq(ifp, 0, &deq)) { 1759 return (FALSE); 1760 } 1761 #endif /* DEV_NETMAP */ 1762 1763 while (count-- > 0) { 1764 m = virtqueue_dequeue(vq, &len); 1765 if (m == NULL) 1766 break; 1767 deq++; 1768 1769 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { 1770 rxq->vtnrx_stats.vrxs_ierrors++; 1771 vtnet_rxq_discard_buf(rxq, m); 1772 continue; 1773 } 1774 1775 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1776 nbufs = 1; 1777 adjsz = sizeof(struct vtnet_rx_header); 1778 /* 1779 * Account for our pad inserted between the header 1780 * and the actual start of the frame. 1781 */ 1782 len += VTNET_RX_HEADER_PAD; 1783 } else { 1784 mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); 1785 nbufs = mhdr->num_buffers; 1786 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1787 } 1788 1789 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { 1790 rxq->vtnrx_stats.vrxs_iqdrops++; 1791 vtnet_rxq_discard_buf(rxq, m); 1792 if (nbufs > 1) 1793 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1794 continue; 1795 } 1796 1797 m->m_pkthdr.len = len; 1798 m->m_pkthdr.rcvif = ifp; 1799 m->m_pkthdr.csum_flags = 0; 1800 1801 if (nbufs > 1) { 1802 /* Dequeue the rest of chain. */ 1803 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) 1804 continue; 1805 } 1806 1807 /* 1808 * Save copy of header before we strip it. For both mergeable 1809 * and non-mergeable, the header is at the beginning of the 1810 * mbuf data. We no longer need num_buffers, so always use a 1811 * regular header. 1812 * 1813 * BMV: Is this memcpy() expensive? We know the mbuf data is 1814 * still valid even after the m_adj(). 1815 */ 1816 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); 1817 m_adj(m, adjsz); 1818 1819 vtnet_rxq_input(rxq, m, hdr); 1820 1821 /* Must recheck after dropping the Rx lock. */ 1822 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1823 break; 1824 } 1825 1826 if (deq > 0) 1827 virtqueue_notify(vq); 1828 1829 return (count > 0 ? 0 : EAGAIN); 1830 } 1831 1832 static void 1833 vtnet_rx_vq_intr(void *xrxq) 1834 { 1835 struct vtnet_softc *sc; 1836 struct vtnet_rxq *rxq; 1837 struct ifnet *ifp; 1838 int tries, more; 1839 1840 rxq = xrxq; 1841 sc = rxq->vtnrx_sc; 1842 ifp = sc->vtnet_ifp; 1843 tries = 0; 1844 1845 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { 1846 /* 1847 * Ignore this interrupt. Either this is a spurious interrupt 1848 * or multiqueue without per-VQ MSIX so every queue needs to 1849 * be polled (a brain dead configuration we could try harder 1850 * to avoid). 1851 */ 1852 vtnet_rxq_disable_intr(rxq); 1853 return; 1854 } 1855 1856 VTNET_RXQ_LOCK(rxq); 1857 1858 again: 1859 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1860 VTNET_RXQ_UNLOCK(rxq); 1861 return; 1862 } 1863 1864 more = vtnet_rxq_eof(rxq); 1865 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 1866 if (!more) 1867 vtnet_rxq_disable_intr(rxq); 1868 /* 1869 * This is an occasional condition or race (when !more), 1870 * so retry a few times before scheduling the taskqueue. 1871 */ 1872 if (tries++ < VTNET_INTR_DISABLE_RETRIES) 1873 goto again; 1874 1875 VTNET_RXQ_UNLOCK(rxq); 1876 rxq->vtnrx_stats.vrxs_rescheduled++; 1877 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 1878 } else 1879 VTNET_RXQ_UNLOCK(rxq); 1880 } 1881 1882 static void 1883 vtnet_rxq_tq_intr(void *xrxq, int pending) 1884 { 1885 struct vtnet_softc *sc; 1886 struct vtnet_rxq *rxq; 1887 struct ifnet *ifp; 1888 int more; 1889 1890 rxq = xrxq; 1891 sc = rxq->vtnrx_sc; 1892 ifp = sc->vtnet_ifp; 1893 1894 VTNET_RXQ_LOCK(rxq); 1895 1896 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1897 VTNET_RXQ_UNLOCK(rxq); 1898 return; 1899 } 1900 1901 more = vtnet_rxq_eof(rxq); 1902 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 1903 if (!more) 1904 vtnet_rxq_disable_intr(rxq); 1905 rxq->vtnrx_stats.vrxs_rescheduled++; 1906 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 1907 } 1908 1909 VTNET_RXQ_UNLOCK(rxq); 1910 } 1911 1912 static int 1913 vtnet_txq_below_threshold(struct vtnet_txq *txq) 1914 { 1915 struct vtnet_softc *sc; 1916 struct virtqueue *vq; 1917 1918 sc = txq->vtntx_sc; 1919 vq = txq->vtntx_vq; 1920 1921 return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); 1922 } 1923 1924 static int 1925 vtnet_txq_notify(struct vtnet_txq *txq) 1926 { 1927 struct virtqueue *vq; 1928 1929 vq = txq->vtntx_vq; 1930 1931 txq->vtntx_watchdog = VTNET_TX_TIMEOUT; 1932 virtqueue_notify(vq); 1933 1934 if (vtnet_txq_enable_intr(txq) == 0) 1935 return (0); 1936 1937 /* 1938 * Drain frames that were completed since last checked. If this 1939 * causes the queue to go above the threshold, the caller should 1940 * continue transmitting. 1941 */ 1942 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { 1943 virtqueue_disable_intr(vq); 1944 return (1); 1945 } 1946 1947 return (0); 1948 } 1949 1950 static void 1951 vtnet_txq_free_mbufs(struct vtnet_txq *txq) 1952 { 1953 struct virtqueue *vq; 1954 struct vtnet_tx_header *txhdr; 1955 int last; 1956 1957 vq = txq->vtntx_vq; 1958 last = 0; 1959 1960 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { 1961 m_freem(txhdr->vth_mbuf); 1962 uma_zfree(vtnet_tx_header_zone, txhdr); 1963 } 1964 1965 KASSERT(virtqueue_empty(vq), 1966 ("%s: mbufs remaining in tx queue %p", __func__, txq)); 1967 } 1968 1969 /* 1970 * BMV: Much of this can go away once we finally have offsets in 1971 * the mbuf packet header. Bug andre@. 1972 */ 1973 static int 1974 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, 1975 int *etype, int *proto, int *start) 1976 { 1977 struct vtnet_softc *sc; 1978 struct ether_vlan_header *evh; 1979 int offset; 1980 1981 sc = txq->vtntx_sc; 1982 1983 evh = mtod(m, struct ether_vlan_header *); 1984 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1985 /* BMV: We should handle nested VLAN tags too. */ 1986 *etype = ntohs(evh->evl_proto); 1987 offset = sizeof(struct ether_vlan_header); 1988 } else { 1989 *etype = ntohs(evh->evl_encap_proto); 1990 offset = sizeof(struct ether_header); 1991 } 1992 1993 switch (*etype) { 1994 #if defined(INET) 1995 case ETHERTYPE_IP: { 1996 struct ip *ip, iphdr; 1997 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 1998 m_copydata(m, offset, sizeof(struct ip), 1999 (caddr_t) &iphdr); 2000 ip = &iphdr; 2001 } else 2002 ip = (struct ip *)(m->m_data + offset); 2003 *proto = ip->ip_p; 2004 *start = offset + (ip->ip_hl << 2); 2005 break; 2006 } 2007 #endif 2008 #if defined(INET6) 2009 case ETHERTYPE_IPV6: 2010 *proto = -1; 2011 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 2012 /* Assert the network stack sent us a valid packet. */ 2013 KASSERT(*start > offset, 2014 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 2015 *start, offset, *proto)); 2016 break; 2017 #endif 2018 default: 2019 sc->vtnet_stats.tx_csum_bad_ethtype++; 2020 return (EINVAL); 2021 } 2022 2023 return (0); 2024 } 2025 2026 static int 2027 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, 2028 int offset, struct virtio_net_hdr *hdr) 2029 { 2030 static struct timeval lastecn; 2031 static int curecn; 2032 struct vtnet_softc *sc; 2033 struct tcphdr *tcp, tcphdr; 2034 2035 sc = txq->vtntx_sc; 2036 2037 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 2038 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 2039 tcp = &tcphdr; 2040 } else 2041 tcp = (struct tcphdr *)(m->m_data + offset); 2042 2043 hdr->hdr_len = offset + (tcp->th_off << 2); 2044 hdr->gso_size = m->m_pkthdr.tso_segsz; 2045 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 2046 VIRTIO_NET_HDR_GSO_TCPV6; 2047 2048 if (tcp->th_flags & TH_CWR) { 2049 /* 2050 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, 2051 * ECN support is not on a per-interface basis, but globally via 2052 * the net.inet.tcp.ecn.enable sysctl knob. The default is off. 2053 */ 2054 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { 2055 if (ppsratecheck(&lastecn, &curecn, 1)) 2056 if_printf(sc->vtnet_ifp, 2057 "TSO with ECN not negotiated with host\n"); 2058 return (ENOTSUP); 2059 } 2060 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2061 } 2062 2063 txq->vtntx_stats.vtxs_tso++; 2064 2065 return (0); 2066 } 2067 2068 static struct mbuf * 2069 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, 2070 struct virtio_net_hdr *hdr) 2071 { 2072 struct vtnet_softc *sc; 2073 int flags, etype, csum_start, proto, error; 2074 2075 sc = txq->vtntx_sc; 2076 flags = m->m_pkthdr.csum_flags; 2077 2078 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); 2079 if (error) 2080 goto drop; 2081 2082 if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || 2083 (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { 2084 /* 2085 * We could compare the IP protocol vs the CSUM_ flag too, 2086 * but that really should not be necessary. 2087 */ 2088 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 2089 hdr->csum_start = csum_start; 2090 hdr->csum_offset = m->m_pkthdr.csum_data; 2091 txq->vtntx_stats.vtxs_csum++; 2092 } 2093 2094 if (flags & CSUM_TSO) { 2095 if (__predict_false(proto != IPPROTO_TCP)) { 2096 /* Likely failed to correctly parse the mbuf. */ 2097 sc->vtnet_stats.tx_tso_not_tcp++; 2098 goto drop; 2099 } 2100 2101 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, 2102 ("%s: mbuf %p TSO without checksum offload %#x", 2103 __func__, m, flags)); 2104 2105 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); 2106 if (error) 2107 goto drop; 2108 } 2109 2110 return (m); 2111 2112 drop: 2113 m_freem(m); 2114 return (NULL); 2115 } 2116 2117 static int 2118 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, 2119 struct vtnet_tx_header *txhdr) 2120 { 2121 struct vtnet_softc *sc; 2122 struct virtqueue *vq; 2123 struct sglist *sg; 2124 struct mbuf *m; 2125 int error; 2126 2127 sc = txq->vtntx_sc; 2128 vq = txq->vtntx_vq; 2129 sg = txq->vtntx_sg; 2130 m = *m_head; 2131 2132 sglist_reset(sg); 2133 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); 2134 KASSERT(error == 0 && sg->sg_nseg == 1, 2135 ("%s: error %d adding header to sglist", __func__, error)); 2136 2137 error = sglist_append_mbuf(sg, m); 2138 if (error) { 2139 m = m_defrag(m, M_NOWAIT); 2140 if (m == NULL) 2141 goto fail; 2142 2143 *m_head = m; 2144 sc->vtnet_stats.tx_defragged++; 2145 2146 error = sglist_append_mbuf(sg, m); 2147 if (error) 2148 goto fail; 2149 } 2150 2151 txhdr->vth_mbuf = m; 2152 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); 2153 2154 return (error); 2155 2156 fail: 2157 sc->vtnet_stats.tx_defrag_failed++; 2158 m_freem(*m_head); 2159 *m_head = NULL; 2160 2161 return (ENOBUFS); 2162 } 2163 2164 static int 2165 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) 2166 { 2167 struct vtnet_tx_header *txhdr; 2168 struct virtio_net_hdr *hdr; 2169 struct mbuf *m; 2170 int error; 2171 2172 m = *m_head; 2173 M_ASSERTPKTHDR(m); 2174 2175 txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); 2176 if (txhdr == NULL) { 2177 m_freem(m); 2178 *m_head = NULL; 2179 return (ENOMEM); 2180 } 2181 2182 /* 2183 * Always use the non-mergeable header, regardless if the feature 2184 * was negotiated. For transmit, num_buffers is always zero. The 2185 * vtnet_hdr_size is used to enqueue the correct header size. 2186 */ 2187 hdr = &txhdr->vth_uhdr.hdr; 2188 2189 if (m->m_flags & M_VLANTAG) { 2190 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); 2191 if ((*m_head = m) == NULL) { 2192 error = ENOBUFS; 2193 goto fail; 2194 } 2195 m->m_flags &= ~M_VLANTAG; 2196 } 2197 2198 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { 2199 m = vtnet_txq_offload(txq, m, hdr); 2200 if ((*m_head = m) == NULL) { 2201 error = ENOBUFS; 2202 goto fail; 2203 } 2204 } 2205 2206 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); 2207 if (error == 0) 2208 return (0); 2209 2210 fail: 2211 uma_zfree(vtnet_tx_header_zone, txhdr); 2212 2213 return (error); 2214 } 2215 2216 #ifdef VTNET_LEGACY_TX 2217 2218 static void 2219 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) 2220 { 2221 struct vtnet_softc *sc; 2222 struct virtqueue *vq; 2223 struct mbuf *m0; 2224 int tries, enq; 2225 2226 sc = txq->vtntx_sc; 2227 vq = txq->vtntx_vq; 2228 tries = 0; 2229 2230 VTNET_TXQ_LOCK_ASSERT(txq); 2231 2232 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2233 sc->vtnet_link_active == 0) 2234 return; 2235 2236 vtnet_txq_eof(txq); 2237 2238 again: 2239 enq = 0; 2240 2241 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 2242 if (virtqueue_full(vq)) 2243 break; 2244 2245 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); 2246 if (m0 == NULL) 2247 break; 2248 2249 if (vtnet_txq_encap(txq, &m0) != 0) { 2250 if (m0 != NULL) 2251 IFQ_DRV_PREPEND(&ifp->if_snd, m0); 2252 break; 2253 } 2254 2255 enq++; 2256 ETHER_BPF_MTAP(ifp, m0); 2257 } 2258 2259 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2260 if (tries++ < VTNET_NOTIFY_RETRIES) 2261 goto again; 2262 2263 txq->vtntx_stats.vtxs_rescheduled++; 2264 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2265 } 2266 } 2267 2268 static void 2269 vtnet_start(struct ifnet *ifp) 2270 { 2271 struct vtnet_softc *sc; 2272 struct vtnet_txq *txq; 2273 2274 sc = ifp->if_softc; 2275 txq = &sc->vtnet_txqs[0]; 2276 2277 VTNET_TXQ_LOCK(txq); 2278 vtnet_start_locked(txq, ifp); 2279 VTNET_TXQ_UNLOCK(txq); 2280 } 2281 2282 #else /* !VTNET_LEGACY_TX */ 2283 2284 static int 2285 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) 2286 { 2287 struct vtnet_softc *sc; 2288 struct virtqueue *vq; 2289 struct buf_ring *br; 2290 struct ifnet *ifp; 2291 int enq, tries, error; 2292 2293 sc = txq->vtntx_sc; 2294 vq = txq->vtntx_vq; 2295 br = txq->vtntx_br; 2296 ifp = sc->vtnet_ifp; 2297 tries = 0; 2298 error = 0; 2299 2300 VTNET_TXQ_LOCK_ASSERT(txq); 2301 2302 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2303 sc->vtnet_link_active == 0) { 2304 if (m != NULL) 2305 error = drbr_enqueue(ifp, br, m); 2306 return (error); 2307 } 2308 2309 if (m != NULL) { 2310 error = drbr_enqueue(ifp, br, m); 2311 if (error) 2312 return (error); 2313 } 2314 2315 vtnet_txq_eof(txq); 2316 2317 again: 2318 enq = 0; 2319 2320 while ((m = drbr_peek(ifp, br)) != NULL) { 2321 if (virtqueue_full(vq)) { 2322 drbr_putback(ifp, br, m); 2323 break; 2324 } 2325 2326 if (vtnet_txq_encap(txq, &m) != 0) { 2327 if (m != NULL) 2328 drbr_putback(ifp, br, m); 2329 else 2330 drbr_advance(ifp, br); 2331 break; 2332 } 2333 drbr_advance(ifp, br); 2334 2335 enq++; 2336 ETHER_BPF_MTAP(ifp, m); 2337 } 2338 2339 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2340 if (tries++ < VTNET_NOTIFY_RETRIES) 2341 goto again; 2342 2343 txq->vtntx_stats.vtxs_rescheduled++; 2344 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2345 } 2346 2347 return (0); 2348 } 2349 2350 static int 2351 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2352 { 2353 struct vtnet_softc *sc; 2354 struct vtnet_txq *txq; 2355 int i, npairs, error; 2356 2357 sc = ifp->if_softc; 2358 npairs = sc->vtnet_act_vq_pairs; 2359 2360 /* check if flowid is set */ 2361 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2362 i = m->m_pkthdr.flowid % npairs; 2363 else 2364 i = curcpu % npairs; 2365 2366 txq = &sc->vtnet_txqs[i]; 2367 2368 if (VTNET_TXQ_TRYLOCK(txq) != 0) { 2369 error = vtnet_txq_mq_start_locked(txq, m); 2370 VTNET_TXQ_UNLOCK(txq); 2371 } else { 2372 error = drbr_enqueue(ifp, txq->vtntx_br, m); 2373 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); 2374 } 2375 2376 return (error); 2377 } 2378 2379 static void 2380 vtnet_txq_tq_deferred(void *xtxq, int pending) 2381 { 2382 struct vtnet_softc *sc; 2383 struct vtnet_txq *txq; 2384 2385 txq = xtxq; 2386 sc = txq->vtntx_sc; 2387 2388 VTNET_TXQ_LOCK(txq); 2389 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) 2390 vtnet_txq_mq_start_locked(txq, NULL); 2391 VTNET_TXQ_UNLOCK(txq); 2392 } 2393 2394 #endif /* VTNET_LEGACY_TX */ 2395 2396 static void 2397 vtnet_txq_start(struct vtnet_txq *txq) 2398 { 2399 struct vtnet_softc *sc; 2400 struct ifnet *ifp; 2401 2402 sc = txq->vtntx_sc; 2403 ifp = sc->vtnet_ifp; 2404 2405 #ifdef VTNET_LEGACY_TX 2406 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 2407 vtnet_start_locked(txq, ifp); 2408 #else 2409 if (!drbr_empty(ifp, txq->vtntx_br)) 2410 vtnet_txq_mq_start_locked(txq, NULL); 2411 #endif 2412 } 2413 2414 static void 2415 vtnet_txq_tq_intr(void *xtxq, int pending) 2416 { 2417 struct vtnet_softc *sc; 2418 struct vtnet_txq *txq; 2419 struct ifnet *ifp; 2420 2421 txq = xtxq; 2422 sc = txq->vtntx_sc; 2423 ifp = sc->vtnet_ifp; 2424 2425 VTNET_TXQ_LOCK(txq); 2426 2427 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2428 VTNET_TXQ_UNLOCK(txq); 2429 return; 2430 } 2431 2432 vtnet_txq_eof(txq); 2433 vtnet_txq_start(txq); 2434 2435 VTNET_TXQ_UNLOCK(txq); 2436 } 2437 2438 static int 2439 vtnet_txq_eof(struct vtnet_txq *txq) 2440 { 2441 struct virtqueue *vq; 2442 struct vtnet_tx_header *txhdr; 2443 struct mbuf *m; 2444 int deq; 2445 2446 vq = txq->vtntx_vq; 2447 deq = 0; 2448 VTNET_TXQ_LOCK_ASSERT(txq); 2449 2450 #ifdef DEV_NETMAP 2451 if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { 2452 virtqueue_disable_intr(vq); // XXX luigi 2453 return 0; // XXX or 1 ? 2454 } 2455 #endif /* DEV_NETMAP */ 2456 2457 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { 2458 m = txhdr->vth_mbuf; 2459 deq++; 2460 2461 txq->vtntx_stats.vtxs_opackets++; 2462 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; 2463 if (m->m_flags & M_MCAST) 2464 txq->vtntx_stats.vtxs_omcasts++; 2465 2466 m_freem(m); 2467 uma_zfree(vtnet_tx_header_zone, txhdr); 2468 } 2469 2470 if (virtqueue_empty(vq)) 2471 txq->vtntx_watchdog = 0; 2472 2473 return (deq); 2474 } 2475 2476 static void 2477 vtnet_tx_vq_intr(void *xtxq) 2478 { 2479 struct vtnet_softc *sc; 2480 struct vtnet_txq *txq; 2481 struct ifnet *ifp; 2482 2483 txq = xtxq; 2484 sc = txq->vtntx_sc; 2485 ifp = sc->vtnet_ifp; 2486 2487 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { 2488 /* 2489 * Ignore this interrupt. Either this is a spurious interrupt 2490 * or multiqueue without per-VQ MSIX so every queue needs to 2491 * be polled (a brain dead configuration we could try harder 2492 * to avoid). 2493 */ 2494 vtnet_txq_disable_intr(txq); 2495 return; 2496 } 2497 2498 VTNET_TXQ_LOCK(txq); 2499 2500 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2501 VTNET_TXQ_UNLOCK(txq); 2502 return; 2503 } 2504 2505 vtnet_txq_eof(txq); 2506 vtnet_txq_start(txq); 2507 2508 VTNET_TXQ_UNLOCK(txq); 2509 } 2510 2511 static void 2512 vtnet_tx_start_all(struct vtnet_softc *sc) 2513 { 2514 struct vtnet_txq *txq; 2515 int i; 2516 2517 VTNET_CORE_LOCK_ASSERT(sc); 2518 2519 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2520 txq = &sc->vtnet_txqs[i]; 2521 2522 VTNET_TXQ_LOCK(txq); 2523 vtnet_txq_start(txq); 2524 VTNET_TXQ_UNLOCK(txq); 2525 } 2526 } 2527 2528 #ifndef VTNET_LEGACY_TX 2529 static void 2530 vtnet_qflush(struct ifnet *ifp) 2531 { 2532 struct vtnet_softc *sc; 2533 struct vtnet_txq *txq; 2534 struct mbuf *m; 2535 int i; 2536 2537 sc = ifp->if_softc; 2538 2539 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2540 txq = &sc->vtnet_txqs[i]; 2541 2542 VTNET_TXQ_LOCK(txq); 2543 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) 2544 m_freem(m); 2545 VTNET_TXQ_UNLOCK(txq); 2546 } 2547 2548 if_qflush(ifp); 2549 } 2550 #endif 2551 2552 static int 2553 vtnet_watchdog(struct vtnet_txq *txq) 2554 { 2555 struct ifnet *ifp; 2556 2557 ifp = txq->vtntx_sc->vtnet_ifp; 2558 2559 VTNET_TXQ_LOCK(txq); 2560 if (txq->vtntx_watchdog == 1) { 2561 /* 2562 * Only drain completed frames if the watchdog is about to 2563 * expire. If any frames were drained, there may be enough 2564 * free descriptors now available to transmit queued frames. 2565 * In that case, the timer will immediately be decremented 2566 * below, but the timeout is generous enough that should not 2567 * be a problem. 2568 */ 2569 if (vtnet_txq_eof(txq) != 0) 2570 vtnet_txq_start(txq); 2571 } 2572 2573 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { 2574 VTNET_TXQ_UNLOCK(txq); 2575 return (0); 2576 } 2577 VTNET_TXQ_UNLOCK(txq); 2578 2579 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); 2580 return (1); 2581 } 2582 2583 static void 2584 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, 2585 struct vtnet_txq_stats *txacc) 2586 { 2587 2588 bzero(rxacc, sizeof(struct vtnet_rxq_stats)); 2589 bzero(txacc, sizeof(struct vtnet_txq_stats)); 2590 2591 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2592 struct vtnet_rxq_stats *rxst; 2593 struct vtnet_txq_stats *txst; 2594 2595 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 2596 rxacc->vrxs_ipackets += rxst->vrxs_ipackets; 2597 rxacc->vrxs_ibytes += rxst->vrxs_ibytes; 2598 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; 2599 rxacc->vrxs_csum += rxst->vrxs_csum; 2600 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; 2601 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; 2602 2603 txst = &sc->vtnet_txqs[i].vtntx_stats; 2604 txacc->vtxs_opackets += txst->vtxs_opackets; 2605 txacc->vtxs_obytes += txst->vtxs_obytes; 2606 txacc->vtxs_csum += txst->vtxs_csum; 2607 txacc->vtxs_tso += txst->vtxs_tso; 2608 txacc->vtxs_rescheduled += txst->vtxs_rescheduled; 2609 } 2610 } 2611 2612 static uint64_t 2613 vtnet_get_counter(if_t ifp, ift_counter cnt) 2614 { 2615 struct vtnet_softc *sc; 2616 struct vtnet_rxq_stats rxaccum; 2617 struct vtnet_txq_stats txaccum; 2618 2619 sc = if_getsoftc(ifp); 2620 vtnet_accum_stats(sc, &rxaccum, &txaccum); 2621 2622 switch (cnt) { 2623 case IFCOUNTER_IPACKETS: 2624 return (rxaccum.vrxs_ipackets); 2625 case IFCOUNTER_IQDROPS: 2626 return (rxaccum.vrxs_iqdrops); 2627 case IFCOUNTER_IERRORS: 2628 return (rxaccum.vrxs_ierrors); 2629 case IFCOUNTER_OPACKETS: 2630 return (txaccum.vtxs_opackets); 2631 #ifndef VTNET_LEGACY_TX 2632 case IFCOUNTER_OBYTES: 2633 return (txaccum.vtxs_obytes); 2634 case IFCOUNTER_OMCASTS: 2635 return (txaccum.vtxs_omcasts); 2636 #endif 2637 default: 2638 return (if_get_counter_default(ifp, cnt)); 2639 } 2640 } 2641 2642 static void 2643 vtnet_tick(void *xsc) 2644 { 2645 struct vtnet_softc *sc; 2646 struct ifnet *ifp; 2647 int i, timedout; 2648 2649 sc = xsc; 2650 ifp = sc->vtnet_ifp; 2651 timedout = 0; 2652 2653 VTNET_CORE_LOCK_ASSERT(sc); 2654 2655 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 2656 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); 2657 2658 if (timedout != 0) { 2659 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2660 vtnet_init_locked(sc); 2661 } else 2662 callout_schedule(&sc->vtnet_tick_ch, hz); 2663 } 2664 2665 static void 2666 vtnet_start_taskqueues(struct vtnet_softc *sc) 2667 { 2668 device_t dev; 2669 struct vtnet_rxq *rxq; 2670 struct vtnet_txq *txq; 2671 int i, error; 2672 2673 dev = sc->vtnet_dev; 2674 2675 /* 2676 * Errors here are very difficult to recover from - we cannot 2677 * easily fail because, if this is during boot, we will hang 2678 * when freeing any successfully started taskqueues because 2679 * the scheduler isn't up yet. 2680 * 2681 * Most drivers just ignore the return value - it only fails 2682 * with ENOMEM so an error is not likely. 2683 */ 2684 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2685 rxq = &sc->vtnet_rxqs[i]; 2686 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, 2687 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); 2688 if (error) { 2689 device_printf(dev, "failed to start rx taskq %d\n", 2690 rxq->vtnrx_id); 2691 } 2692 2693 txq = &sc->vtnet_txqs[i]; 2694 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, 2695 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); 2696 if (error) { 2697 device_printf(dev, "failed to start tx taskq %d\n", 2698 txq->vtntx_id); 2699 } 2700 } 2701 } 2702 2703 static void 2704 vtnet_free_taskqueues(struct vtnet_softc *sc) 2705 { 2706 struct vtnet_rxq *rxq; 2707 struct vtnet_txq *txq; 2708 int i; 2709 2710 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2711 rxq = &sc->vtnet_rxqs[i]; 2712 if (rxq->vtnrx_tq != NULL) { 2713 taskqueue_free(rxq->vtnrx_tq); 2714 rxq->vtnrx_vq = NULL; 2715 } 2716 2717 txq = &sc->vtnet_txqs[i]; 2718 if (txq->vtntx_tq != NULL) { 2719 taskqueue_free(txq->vtntx_tq); 2720 txq->vtntx_tq = NULL; 2721 } 2722 } 2723 } 2724 2725 static void 2726 vtnet_drain_taskqueues(struct vtnet_softc *sc) 2727 { 2728 struct vtnet_rxq *rxq; 2729 struct vtnet_txq *txq; 2730 int i; 2731 2732 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2733 rxq = &sc->vtnet_rxqs[i]; 2734 if (rxq->vtnrx_tq != NULL) 2735 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2736 2737 txq = &sc->vtnet_txqs[i]; 2738 if (txq->vtntx_tq != NULL) { 2739 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); 2740 #ifndef VTNET_LEGACY_TX 2741 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); 2742 #endif 2743 } 2744 } 2745 } 2746 2747 static void 2748 vtnet_drain_rxtx_queues(struct vtnet_softc *sc) 2749 { 2750 struct vtnet_rxq *rxq; 2751 struct vtnet_txq *txq; 2752 int i; 2753 2754 #ifdef DEV_NETMAP 2755 if (nm_native_on(NA(sc->vtnet_ifp))) 2756 return; 2757 #endif /* DEV_NETMAP */ 2758 2759 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2760 rxq = &sc->vtnet_rxqs[i]; 2761 vtnet_rxq_free_mbufs(rxq); 2762 2763 txq = &sc->vtnet_txqs[i]; 2764 vtnet_txq_free_mbufs(txq); 2765 } 2766 } 2767 2768 static void 2769 vtnet_stop_rendezvous(struct vtnet_softc *sc) 2770 { 2771 struct vtnet_rxq *rxq; 2772 struct vtnet_txq *txq; 2773 int i; 2774 2775 /* 2776 * Lock and unlock the per-queue mutex so we known the stop 2777 * state is visible. Doing only the active queues should be 2778 * sufficient, but it does not cost much extra to do all the 2779 * queues. Note we hold the core mutex here too. 2780 */ 2781 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2782 rxq = &sc->vtnet_rxqs[i]; 2783 VTNET_RXQ_LOCK(rxq); 2784 VTNET_RXQ_UNLOCK(rxq); 2785 2786 txq = &sc->vtnet_txqs[i]; 2787 VTNET_TXQ_LOCK(txq); 2788 VTNET_TXQ_UNLOCK(txq); 2789 } 2790 } 2791 2792 static void 2793 vtnet_stop(struct vtnet_softc *sc) 2794 { 2795 device_t dev; 2796 struct ifnet *ifp; 2797 2798 dev = sc->vtnet_dev; 2799 ifp = sc->vtnet_ifp; 2800 2801 VTNET_CORE_LOCK_ASSERT(sc); 2802 2803 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2804 sc->vtnet_link_active = 0; 2805 callout_stop(&sc->vtnet_tick_ch); 2806 2807 /* Only advisory. */ 2808 vtnet_disable_interrupts(sc); 2809 2810 /* 2811 * Stop the host adapter. This resets it to the pre-initialized 2812 * state. It will not generate any interrupts until after it is 2813 * reinitialized. 2814 */ 2815 virtio_stop(dev); 2816 vtnet_stop_rendezvous(sc); 2817 2818 /* Free any mbufs left in the virtqueues. */ 2819 vtnet_drain_rxtx_queues(sc); 2820 } 2821 2822 static int 2823 vtnet_virtio_reinit(struct vtnet_softc *sc) 2824 { 2825 device_t dev; 2826 struct ifnet *ifp; 2827 uint64_t features; 2828 int mask, error; 2829 2830 dev = sc->vtnet_dev; 2831 ifp = sc->vtnet_ifp; 2832 features = sc->vtnet_features; 2833 2834 mask = 0; 2835 #if defined(INET) 2836 mask |= IFCAP_RXCSUM; 2837 #endif 2838 #if defined (INET6) 2839 mask |= IFCAP_RXCSUM_IPV6; 2840 #endif 2841 2842 /* 2843 * Re-negotiate with the host, removing any disabled receive 2844 * features. Transmit features are disabled only on our side 2845 * via if_capenable and if_hwassist. 2846 */ 2847 2848 if (ifp->if_capabilities & mask) { 2849 /* 2850 * We require both IPv4 and IPv6 offloading to be enabled 2851 * in order to negotiated it: VirtIO does not distinguish 2852 * between the two. 2853 */ 2854 if ((ifp->if_capenable & mask) != mask) 2855 features &= ~VIRTIO_NET_F_GUEST_CSUM; 2856 } 2857 2858 if (ifp->if_capabilities & IFCAP_LRO) { 2859 if ((ifp->if_capenable & IFCAP_LRO) == 0) 2860 features &= ~VTNET_LRO_FEATURES; 2861 } 2862 2863 if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { 2864 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) 2865 features &= ~VIRTIO_NET_F_CTRL_VLAN; 2866 } 2867 2868 error = virtio_reinit(dev, features); 2869 if (error) 2870 device_printf(dev, "virtio reinit error %d\n", error); 2871 2872 return (error); 2873 } 2874 2875 static void 2876 vtnet_init_rx_filters(struct vtnet_softc *sc) 2877 { 2878 struct ifnet *ifp; 2879 2880 ifp = sc->vtnet_ifp; 2881 2882 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 2883 /* Restore promiscuous and all-multicast modes. */ 2884 vtnet_rx_filter(sc); 2885 /* Restore filtered MAC addresses. */ 2886 vtnet_rx_filter_mac(sc); 2887 } 2888 2889 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) 2890 vtnet_rx_filter_vlan(sc); 2891 } 2892 2893 static int 2894 vtnet_init_rx_queues(struct vtnet_softc *sc) 2895 { 2896 device_t dev; 2897 struct vtnet_rxq *rxq; 2898 int i, clsize, error; 2899 2900 dev = sc->vtnet_dev; 2901 2902 /* 2903 * Use the new cluster size if one has been set (via a MTU 2904 * change). Otherwise, use the standard 2K clusters. 2905 * 2906 * BMV: It might make sense to use page sized clusters as 2907 * the default (depending on the features negotiated). 2908 */ 2909 if (sc->vtnet_rx_new_clsize != 0) { 2910 clsize = sc->vtnet_rx_new_clsize; 2911 sc->vtnet_rx_new_clsize = 0; 2912 } else 2913 clsize = MCLBYTES; 2914 2915 sc->vtnet_rx_clsize = clsize; 2916 sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); 2917 2918 KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || 2919 sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, 2920 ("%s: too many rx mbufs %d for %d segments", __func__, 2921 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); 2922 2923 #ifdef DEV_NETMAP 2924 if (vtnet_netmap_init_rx_buffers(sc)) 2925 return 0; 2926 #endif /* DEV_NETMAP */ 2927 2928 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2929 rxq = &sc->vtnet_rxqs[i]; 2930 2931 /* Hold the lock to satisfy asserts. */ 2932 VTNET_RXQ_LOCK(rxq); 2933 error = vtnet_rxq_populate(rxq); 2934 VTNET_RXQ_UNLOCK(rxq); 2935 2936 if (error) { 2937 device_printf(dev, 2938 "cannot allocate mbufs for Rx queue %d\n", i); 2939 return (error); 2940 } 2941 } 2942 2943 return (0); 2944 } 2945 2946 static int 2947 vtnet_init_tx_queues(struct vtnet_softc *sc) 2948 { 2949 struct vtnet_txq *txq; 2950 int i; 2951 2952 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2953 txq = &sc->vtnet_txqs[i]; 2954 txq->vtntx_watchdog = 0; 2955 } 2956 2957 return (0); 2958 } 2959 2960 static int 2961 vtnet_init_rxtx_queues(struct vtnet_softc *sc) 2962 { 2963 int error; 2964 2965 error = vtnet_init_rx_queues(sc); 2966 if (error) 2967 return (error); 2968 2969 error = vtnet_init_tx_queues(sc); 2970 if (error) 2971 return (error); 2972 2973 return (0); 2974 } 2975 2976 static void 2977 vtnet_set_active_vq_pairs(struct vtnet_softc *sc) 2978 { 2979 device_t dev; 2980 int npairs; 2981 2982 dev = sc->vtnet_dev; 2983 2984 if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { 2985 MPASS(sc->vtnet_max_vq_pairs == 1); 2986 sc->vtnet_act_vq_pairs = 1; 2987 return; 2988 } 2989 2990 /* BMV: Just use the maximum configured for now. */ 2991 npairs = sc->vtnet_max_vq_pairs; 2992 2993 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { 2994 device_printf(dev, 2995 "cannot set active queue pairs to %d\n", npairs); 2996 npairs = 1; 2997 } 2998 2999 sc->vtnet_act_vq_pairs = npairs; 3000 } 3001 3002 static int 3003 vtnet_reinit(struct vtnet_softc *sc) 3004 { 3005 struct ifnet *ifp; 3006 int error; 3007 3008 ifp = sc->vtnet_ifp; 3009 3010 /* Use the current MAC address. */ 3011 bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); 3012 vtnet_set_hwaddr(sc); 3013 3014 vtnet_set_active_vq_pairs(sc); 3015 3016 ifp->if_hwassist = 0; 3017 if (ifp->if_capenable & IFCAP_TXCSUM) 3018 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; 3019 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3020 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; 3021 if (ifp->if_capenable & IFCAP_TSO4) 3022 ifp->if_hwassist |= CSUM_IP_TSO; 3023 if (ifp->if_capenable & IFCAP_TSO6) 3024 ifp->if_hwassist |= CSUM_IP6_TSO; 3025 3026 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 3027 vtnet_init_rx_filters(sc); 3028 3029 error = vtnet_init_rxtx_queues(sc); 3030 if (error) 3031 return (error); 3032 3033 vtnet_enable_interrupts(sc); 3034 ifp->if_drv_flags |= IFF_DRV_RUNNING; 3035 3036 return (0); 3037 } 3038 3039 static void 3040 vtnet_init_locked(struct vtnet_softc *sc) 3041 { 3042 device_t dev; 3043 struct ifnet *ifp; 3044 3045 dev = sc->vtnet_dev; 3046 ifp = sc->vtnet_ifp; 3047 3048 VTNET_CORE_LOCK_ASSERT(sc); 3049 3050 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3051 return; 3052 3053 vtnet_stop(sc); 3054 3055 /* Reinitialize with the host. */ 3056 if (vtnet_virtio_reinit(sc) != 0) 3057 goto fail; 3058 3059 if (vtnet_reinit(sc) != 0) 3060 goto fail; 3061 3062 virtio_reinit_complete(dev); 3063 3064 vtnet_update_link_status(sc); 3065 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); 3066 3067 return; 3068 3069 fail: 3070 vtnet_stop(sc); 3071 } 3072 3073 static void 3074 vtnet_init(void *xsc) 3075 { 3076 struct vtnet_softc *sc; 3077 3078 sc = xsc; 3079 3080 #ifdef DEV_NETMAP 3081 if (!NA(sc->vtnet_ifp)) { 3082 D("try to attach again"); 3083 vtnet_netmap_attach(sc); 3084 } 3085 #endif /* DEV_NETMAP */ 3086 3087 VTNET_CORE_LOCK(sc); 3088 vtnet_init_locked(sc); 3089 VTNET_CORE_UNLOCK(sc); 3090 } 3091 3092 static void 3093 vtnet_free_ctrl_vq(struct vtnet_softc *sc) 3094 { 3095 struct virtqueue *vq; 3096 3097 vq = sc->vtnet_ctrl_vq; 3098 3099 /* 3100 * The control virtqueue is only polled and therefore it should 3101 * already be empty. 3102 */ 3103 KASSERT(virtqueue_empty(vq), 3104 ("%s: ctrl vq %p not empty", __func__, vq)); 3105 } 3106 3107 static void 3108 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, 3109 struct sglist *sg, int readable, int writable) 3110 { 3111 struct virtqueue *vq; 3112 3113 vq = sc->vtnet_ctrl_vq; 3114 3115 VTNET_CORE_LOCK_ASSERT(sc); 3116 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, 3117 ("%s: CTRL_VQ feature not negotiated", __func__)); 3118 3119 if (!virtqueue_empty(vq)) 3120 return; 3121 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) 3122 return; 3123 3124 /* 3125 * Poll for the response, but the command is likely already 3126 * done when we return from the notify. 3127 */ 3128 virtqueue_notify(vq); 3129 virtqueue_poll(vq, NULL); 3130 } 3131 3132 static int 3133 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) 3134 { 3135 struct virtio_net_ctrl_hdr hdr __aligned(2); 3136 struct sglist_seg segs[3]; 3137 struct sglist sg; 3138 uint8_t ack; 3139 int error; 3140 3141 hdr.class = VIRTIO_NET_CTRL_MAC; 3142 hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 3143 ack = VIRTIO_NET_ERR; 3144 3145 sglist_init(&sg, 3, segs); 3146 error = 0; 3147 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3148 error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); 3149 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3150 KASSERT(error == 0 && sg.sg_nseg == 3, 3151 ("%s: error %d adding set MAC msg to sglist", __func__, error)); 3152 3153 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3154 3155 return (ack == VIRTIO_NET_OK ? 0 : EIO); 3156 } 3157 3158 static int 3159 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) 3160 { 3161 struct sglist_seg segs[3]; 3162 struct sglist sg; 3163 struct { 3164 struct virtio_net_ctrl_hdr hdr; 3165 uint8_t pad1; 3166 struct virtio_net_ctrl_mq mq; 3167 uint8_t pad2; 3168 uint8_t ack; 3169 } s __aligned(2); 3170 int error; 3171 3172 s.hdr.class = VIRTIO_NET_CTRL_MQ; 3173 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 3174 s.mq.virtqueue_pairs = npairs; 3175 s.ack = VIRTIO_NET_ERR; 3176 3177 sglist_init(&sg, 3, segs); 3178 error = 0; 3179 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3180 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); 3181 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3182 KASSERT(error == 0 && sg.sg_nseg == 3, 3183 ("%s: error %d adding MQ message to sglist", __func__, error)); 3184 3185 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3186 3187 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3188 } 3189 3190 static int 3191 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) 3192 { 3193 struct sglist_seg segs[3]; 3194 struct sglist sg; 3195 struct { 3196 struct virtio_net_ctrl_hdr hdr; 3197 uint8_t pad1; 3198 uint8_t onoff; 3199 uint8_t pad2; 3200 uint8_t ack; 3201 } s __aligned(2); 3202 int error; 3203 3204 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, 3205 ("%s: CTRL_RX feature not negotiated", __func__)); 3206 3207 s.hdr.class = VIRTIO_NET_CTRL_RX; 3208 s.hdr.cmd = cmd; 3209 s.onoff = !!on; 3210 s.ack = VIRTIO_NET_ERR; 3211 3212 sglist_init(&sg, 3, segs); 3213 error = 0; 3214 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3215 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); 3216 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3217 KASSERT(error == 0 && sg.sg_nseg == 3, 3218 ("%s: error %d adding Rx message to sglist", __func__, error)); 3219 3220 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3221 3222 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3223 } 3224 3225 static int 3226 vtnet_set_promisc(struct vtnet_softc *sc, int on) 3227 { 3228 3229 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); 3230 } 3231 3232 static int 3233 vtnet_set_allmulti(struct vtnet_softc *sc, int on) 3234 { 3235 3236 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); 3237 } 3238 3239 /* 3240 * The device defaults to promiscuous mode for backwards compatibility. 3241 * Turn it off at attach time if possible. 3242 */ 3243 static void 3244 vtnet_attach_disable_promisc(struct vtnet_softc *sc) 3245 { 3246 struct ifnet *ifp; 3247 3248 ifp = sc->vtnet_ifp; 3249 3250 VTNET_CORE_LOCK(sc); 3251 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { 3252 ifp->if_flags |= IFF_PROMISC; 3253 } else if (vtnet_set_promisc(sc, 0) != 0) { 3254 ifp->if_flags |= IFF_PROMISC; 3255 device_printf(sc->vtnet_dev, 3256 "cannot disable default promiscuous mode\n"); 3257 } 3258 VTNET_CORE_UNLOCK(sc); 3259 } 3260 3261 static void 3262 vtnet_rx_filter(struct vtnet_softc *sc) 3263 { 3264 device_t dev; 3265 struct ifnet *ifp; 3266 3267 dev = sc->vtnet_dev; 3268 ifp = sc->vtnet_ifp; 3269 3270 VTNET_CORE_LOCK_ASSERT(sc); 3271 3272 if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) 3273 device_printf(dev, "cannot %s promiscuous mode\n", 3274 ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); 3275 3276 if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) 3277 device_printf(dev, "cannot %s all-multicast mode\n", 3278 ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); 3279 } 3280 3281 static void 3282 vtnet_rx_filter_mac(struct vtnet_softc *sc) 3283 { 3284 struct virtio_net_ctrl_hdr hdr __aligned(2); 3285 struct vtnet_mac_filter *filter; 3286 struct sglist_seg segs[4]; 3287 struct sglist sg; 3288 struct ifnet *ifp; 3289 struct ifaddr *ifa; 3290 struct ifmultiaddr *ifma; 3291 int ucnt, mcnt, promisc, allmulti, error; 3292 uint8_t ack; 3293 3294 ifp = sc->vtnet_ifp; 3295 filter = sc->vtnet_mac_filter; 3296 ucnt = 0; 3297 mcnt = 0; 3298 promisc = 0; 3299 allmulti = 0; 3300 3301 VTNET_CORE_LOCK_ASSERT(sc); 3302 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, 3303 ("%s: CTRL_RX feature not negotiated", __func__)); 3304 3305 /* Unicast MAC addresses: */ 3306 if_addr_rlock(ifp); 3307 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 3308 if (ifa->ifa_addr->sa_family != AF_LINK) 3309 continue; 3310 else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 3311 sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) 3312 continue; 3313 else if (ucnt == VTNET_MAX_MAC_ENTRIES) { 3314 promisc = 1; 3315 break; 3316 } 3317 3318 bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 3319 &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); 3320 ucnt++; 3321 } 3322 if_addr_runlock(ifp); 3323 3324 if (promisc != 0) { 3325 filter->vmf_unicast.nentries = 0; 3326 if_printf(ifp, "more than %d MAC addresses assigned, " 3327 "falling back to promiscuous mode\n", 3328 VTNET_MAX_MAC_ENTRIES); 3329 } else 3330 filter->vmf_unicast.nentries = ucnt; 3331 3332 /* Multicast MAC addresses: */ 3333 if_maddr_rlock(ifp); 3334 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3335 if (ifma->ifma_addr->sa_family != AF_LINK) 3336 continue; 3337 else if (mcnt == VTNET_MAX_MAC_ENTRIES) { 3338 allmulti = 1; 3339 break; 3340 } 3341 3342 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 3343 &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); 3344 mcnt++; 3345 } 3346 if_maddr_runlock(ifp); 3347 3348 if (allmulti != 0) { 3349 filter->vmf_multicast.nentries = 0; 3350 if_printf(ifp, "more than %d multicast MAC addresses " 3351 "assigned, falling back to all-multicast mode\n", 3352 VTNET_MAX_MAC_ENTRIES); 3353 } else 3354 filter->vmf_multicast.nentries = mcnt; 3355 3356 if (promisc != 0 && allmulti != 0) 3357 goto out; 3358 3359 hdr.class = VIRTIO_NET_CTRL_MAC; 3360 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 3361 ack = VIRTIO_NET_ERR; 3362 3363 sglist_init(&sg, 4, segs); 3364 error = 0; 3365 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3366 error |= sglist_append(&sg, &filter->vmf_unicast, 3367 sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); 3368 error |= sglist_append(&sg, &filter->vmf_multicast, 3369 sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); 3370 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3371 KASSERT(error == 0 && sg.sg_nseg == 4, 3372 ("%s: error %d adding MAC filter msg to sglist", __func__, error)); 3373 3374 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3375 3376 if (ack != VIRTIO_NET_OK) 3377 if_printf(ifp, "error setting host MAC filter table\n"); 3378 3379 out: 3380 if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) 3381 if_printf(ifp, "cannot enable promiscuous mode\n"); 3382 if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) 3383 if_printf(ifp, "cannot enable all-multicast mode\n"); 3384 } 3385 3386 static int 3387 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3388 { 3389 struct sglist_seg segs[3]; 3390 struct sglist sg; 3391 struct { 3392 struct virtio_net_ctrl_hdr hdr; 3393 uint8_t pad1; 3394 uint16_t tag; 3395 uint8_t pad2; 3396 uint8_t ack; 3397 } s __aligned(2); 3398 int error; 3399 3400 s.hdr.class = VIRTIO_NET_CTRL_VLAN; 3401 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 3402 s.tag = tag; 3403 s.ack = VIRTIO_NET_ERR; 3404 3405 sglist_init(&sg, 3, segs); 3406 error = 0; 3407 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3408 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); 3409 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3410 KASSERT(error == 0 && sg.sg_nseg == 3, 3411 ("%s: error %d adding VLAN message to sglist", __func__, error)); 3412 3413 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3414 3415 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3416 } 3417 3418 static void 3419 vtnet_rx_filter_vlan(struct vtnet_softc *sc) 3420 { 3421 uint32_t w; 3422 uint16_t tag; 3423 int i, bit; 3424 3425 VTNET_CORE_LOCK_ASSERT(sc); 3426 KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, 3427 ("%s: VLAN_FILTER feature not negotiated", __func__)); 3428 3429 /* Enable the filter for each configured VLAN. */ 3430 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { 3431 w = sc->vtnet_vlan_filter[i]; 3432 3433 while ((bit = ffs(w) - 1) != -1) { 3434 w &= ~(1 << bit); 3435 tag = sizeof(w) * CHAR_BIT * i + bit; 3436 3437 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { 3438 device_printf(sc->vtnet_dev, 3439 "cannot enable VLAN %d filter\n", tag); 3440 } 3441 } 3442 } 3443 } 3444 3445 static void 3446 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3447 { 3448 struct ifnet *ifp; 3449 int idx, bit; 3450 3451 ifp = sc->vtnet_ifp; 3452 idx = (tag >> 5) & 0x7F; 3453 bit = tag & 0x1F; 3454 3455 if (tag == 0 || tag > 4095) 3456 return; 3457 3458 VTNET_CORE_LOCK(sc); 3459 3460 if (add) 3461 sc->vtnet_vlan_filter[idx] |= (1 << bit); 3462 else 3463 sc->vtnet_vlan_filter[idx] &= ~(1 << bit); 3464 3465 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && 3466 vtnet_exec_vlan_filter(sc, add, tag) != 0) { 3467 device_printf(sc->vtnet_dev, 3468 "cannot %s VLAN %d %s the host filter table\n", 3469 add ? "add" : "remove", tag, add ? "to" : "from"); 3470 } 3471 3472 VTNET_CORE_UNLOCK(sc); 3473 } 3474 3475 static void 3476 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3477 { 3478 3479 if (ifp->if_softc != arg) 3480 return; 3481 3482 vtnet_update_vlan_filter(arg, 1, tag); 3483 } 3484 3485 static void 3486 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3487 { 3488 3489 if (ifp->if_softc != arg) 3490 return; 3491 3492 vtnet_update_vlan_filter(arg, 0, tag); 3493 } 3494 3495 static int 3496 vtnet_is_link_up(struct vtnet_softc *sc) 3497 { 3498 device_t dev; 3499 struct ifnet *ifp; 3500 uint16_t status; 3501 3502 dev = sc->vtnet_dev; 3503 ifp = sc->vtnet_ifp; 3504 3505 if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) 3506 status = VIRTIO_NET_S_LINK_UP; 3507 else 3508 status = virtio_read_dev_config_2(dev, 3509 offsetof(struct virtio_net_config, status)); 3510 3511 return ((status & VIRTIO_NET_S_LINK_UP) != 0); 3512 } 3513 3514 static void 3515 vtnet_update_link_status(struct vtnet_softc *sc) 3516 { 3517 struct ifnet *ifp; 3518 int link; 3519 3520 ifp = sc->vtnet_ifp; 3521 3522 VTNET_CORE_LOCK_ASSERT(sc); 3523 link = vtnet_is_link_up(sc); 3524 3525 /* Notify if the link status has changed. */ 3526 if (link != 0 && sc->vtnet_link_active == 0) { 3527 sc->vtnet_link_active = 1; 3528 if_link_state_change(ifp, LINK_STATE_UP); 3529 } else if (link == 0 && sc->vtnet_link_active != 0) { 3530 sc->vtnet_link_active = 0; 3531 if_link_state_change(ifp, LINK_STATE_DOWN); 3532 } 3533 } 3534 3535 static int 3536 vtnet_ifmedia_upd(struct ifnet *ifp) 3537 { 3538 struct vtnet_softc *sc; 3539 struct ifmedia *ifm; 3540 3541 sc = ifp->if_softc; 3542 ifm = &sc->vtnet_media; 3543 3544 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 3545 return (EINVAL); 3546 3547 return (0); 3548 } 3549 3550 static void 3551 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 3552 { 3553 struct vtnet_softc *sc; 3554 3555 sc = ifp->if_softc; 3556 3557 ifmr->ifm_status = IFM_AVALID; 3558 ifmr->ifm_active = IFM_ETHER; 3559 3560 VTNET_CORE_LOCK(sc); 3561 if (vtnet_is_link_up(sc) != 0) { 3562 ifmr->ifm_status |= IFM_ACTIVE; 3563 ifmr->ifm_active |= VTNET_MEDIATYPE; 3564 } else 3565 ifmr->ifm_active |= IFM_NONE; 3566 VTNET_CORE_UNLOCK(sc); 3567 } 3568 3569 static void 3570 vtnet_set_hwaddr(struct vtnet_softc *sc) 3571 { 3572 device_t dev; 3573 int i; 3574 3575 dev = sc->vtnet_dev; 3576 3577 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { 3578 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) 3579 device_printf(dev, "unable to set MAC address\n"); 3580 } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { 3581 for (i = 0; i < ETHER_ADDR_LEN; i++) { 3582 virtio_write_dev_config_1(dev, 3583 offsetof(struct virtio_net_config, mac) + i, 3584 sc->vtnet_hwaddr[i]); 3585 } 3586 } 3587 } 3588 3589 static void 3590 vtnet_get_hwaddr(struct vtnet_softc *sc) 3591 { 3592 device_t dev; 3593 int i; 3594 3595 dev = sc->vtnet_dev; 3596 3597 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { 3598 /* 3599 * Generate a random locally administered unicast address. 3600 * 3601 * It would be nice to generate the same MAC address across 3602 * reboots, but it seems all the hosts currently available 3603 * support the MAC feature, so this isn't too important. 3604 */ 3605 sc->vtnet_hwaddr[0] = 0xB2; 3606 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); 3607 vtnet_set_hwaddr(sc); 3608 return; 3609 } 3610 3611 for (i = 0; i < ETHER_ADDR_LEN; i++) { 3612 sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, 3613 offsetof(struct virtio_net_config, mac) + i); 3614 } 3615 } 3616 3617 static void 3618 vtnet_vlan_tag_remove(struct mbuf *m) 3619 { 3620 struct ether_vlan_header *evh; 3621 3622 evh = mtod(m, struct ether_vlan_header *); 3623 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); 3624 m->m_flags |= M_VLANTAG; 3625 3626 /* Strip the 802.1Q header. */ 3627 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, 3628 ETHER_HDR_LEN - ETHER_TYPE_LEN); 3629 m_adj(m, ETHER_VLAN_ENCAP_LEN); 3630 } 3631 3632 static void 3633 vtnet_set_rx_process_limit(struct vtnet_softc *sc) 3634 { 3635 int limit; 3636 3637 limit = vtnet_tunable_int(sc, "rx_process_limit", 3638 vtnet_rx_process_limit); 3639 if (limit < 0) 3640 limit = INT_MAX; 3641 sc->vtnet_rx_process_limit = limit; 3642 } 3643 3644 static void 3645 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) 3646 { 3647 int size, thresh; 3648 3649 size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); 3650 3651 /* 3652 * The Tx interrupt is disabled until the queue free count falls 3653 * below our threshold. Completed frames are drained from the Tx 3654 * virtqueue before transmitting new frames and in the watchdog 3655 * callout, so the frequency of Tx interrupts is greatly reduced, 3656 * at the cost of not freeing mbufs as quickly as they otherwise 3657 * would be. 3658 * 3659 * N.B. We assume all the Tx queues are the same size. 3660 */ 3661 thresh = size / 4; 3662 3663 /* 3664 * Without indirect descriptors, leave enough room for the most 3665 * segments we handle. 3666 */ 3667 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && 3668 thresh < sc->vtnet_tx_nsegs) 3669 thresh = sc->vtnet_tx_nsegs; 3670 3671 sc->vtnet_tx_intr_thresh = thresh; 3672 } 3673 3674 static void 3675 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, 3676 struct sysctl_oid_list *child, struct vtnet_rxq *rxq) 3677 { 3678 struct sysctl_oid *node; 3679 struct sysctl_oid_list *list; 3680 struct vtnet_rxq_stats *stats; 3681 char namebuf[16]; 3682 3683 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); 3684 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 3685 CTLFLAG_RD, NULL, "Receive Queue"); 3686 list = SYSCTL_CHILDREN(node); 3687 3688 stats = &rxq->vtnrx_stats; 3689 3690 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, 3691 &stats->vrxs_ipackets, "Receive packets"); 3692 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, 3693 &stats->vrxs_ibytes, "Receive bytes"); 3694 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, 3695 &stats->vrxs_iqdrops, "Receive drops"); 3696 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, 3697 &stats->vrxs_ierrors, "Receive errors"); 3698 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 3699 &stats->vrxs_csum, "Receive checksum offloaded"); 3700 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, 3701 &stats->vrxs_csum_failed, "Receive checksum offload failed"); 3702 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 3703 &stats->vrxs_rescheduled, 3704 "Receive interrupt handler rescheduled"); 3705 } 3706 3707 static void 3708 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, 3709 struct sysctl_oid_list *child, struct vtnet_txq *txq) 3710 { 3711 struct sysctl_oid *node; 3712 struct sysctl_oid_list *list; 3713 struct vtnet_txq_stats *stats; 3714 char namebuf[16]; 3715 3716 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); 3717 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 3718 CTLFLAG_RD, NULL, "Transmit Queue"); 3719 list = SYSCTL_CHILDREN(node); 3720 3721 stats = &txq->vtntx_stats; 3722 3723 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, 3724 &stats->vtxs_opackets, "Transmit packets"); 3725 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, 3726 &stats->vtxs_obytes, "Transmit bytes"); 3727 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, 3728 &stats->vtxs_omcasts, "Transmit multicasts"); 3729 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 3730 &stats->vtxs_csum, "Transmit checksum offloaded"); 3731 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, 3732 &stats->vtxs_tso, "Transmit segmentation offloaded"); 3733 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 3734 &stats->vtxs_rescheduled, 3735 "Transmit interrupt handler rescheduled"); 3736 } 3737 3738 static void 3739 vtnet_setup_queue_sysctl(struct vtnet_softc *sc) 3740 { 3741 device_t dev; 3742 struct sysctl_ctx_list *ctx; 3743 struct sysctl_oid *tree; 3744 struct sysctl_oid_list *child; 3745 int i; 3746 3747 dev = sc->vtnet_dev; 3748 ctx = device_get_sysctl_ctx(dev); 3749 tree = device_get_sysctl_tree(dev); 3750 child = SYSCTL_CHILDREN(tree); 3751 3752 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3753 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); 3754 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); 3755 } 3756 } 3757 3758 static void 3759 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, 3760 struct sysctl_oid_list *child, struct vtnet_softc *sc) 3761 { 3762 struct vtnet_statistics *stats; 3763 struct vtnet_rxq_stats rxaccum; 3764 struct vtnet_txq_stats txaccum; 3765 3766 vtnet_accum_stats(sc, &rxaccum, &txaccum); 3767 3768 stats = &sc->vtnet_stats; 3769 stats->rx_csum_offloaded = rxaccum.vrxs_csum; 3770 stats->rx_csum_failed = rxaccum.vrxs_csum_failed; 3771 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; 3772 stats->tx_csum_offloaded = txaccum.vtxs_csum; 3773 stats->tx_tso_offloaded = txaccum.vtxs_tso; 3774 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; 3775 3776 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", 3777 CTLFLAG_RD, &stats->mbuf_alloc_failed, 3778 "Mbuf cluster allocation failures"); 3779 3780 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", 3781 CTLFLAG_RD, &stats->rx_frame_too_large, 3782 "Received frame larger than the mbuf chain"); 3783 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", 3784 CTLFLAG_RD, &stats->rx_enq_replacement_failed, 3785 "Enqueuing the replacement receive mbuf failed"); 3786 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", 3787 CTLFLAG_RD, &stats->rx_mergeable_failed, 3788 "Mergeable buffers receive failures"); 3789 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", 3790 CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 3791 "Received checksum offloaded buffer with unsupported " 3792 "Ethernet type"); 3793 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", 3794 CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 3795 "Received checksum offloaded buffer with incorrect IP protocol"); 3796 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", 3797 CTLFLAG_RD, &stats->rx_csum_bad_offset, 3798 "Received checksum offloaded buffer with incorrect offset"); 3799 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", 3800 CTLFLAG_RD, &stats->rx_csum_bad_proto, 3801 "Received checksum offloaded buffer with incorrect protocol"); 3802 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", 3803 CTLFLAG_RD, &stats->rx_csum_failed, 3804 "Received buffer checksum offload failed"); 3805 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", 3806 CTLFLAG_RD, &stats->rx_csum_offloaded, 3807 "Received buffer checksum offload succeeded"); 3808 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", 3809 CTLFLAG_RD, &stats->rx_task_rescheduled, 3810 "Times the receive interrupt task rescheduled itself"); 3811 3812 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", 3813 CTLFLAG_RD, &stats->tx_csum_bad_ethtype, 3814 "Aborted transmit of checksum offloaded buffer with unknown " 3815 "Ethernet type"); 3816 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", 3817 CTLFLAG_RD, &stats->tx_tso_bad_ethtype, 3818 "Aborted transmit of TSO buffer with unknown Ethernet type"); 3819 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", 3820 CTLFLAG_RD, &stats->tx_tso_not_tcp, 3821 "Aborted transmit of TSO buffer with non TCP protocol"); 3822 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", 3823 CTLFLAG_RD, &stats->tx_defragged, 3824 "Transmit mbufs defragged"); 3825 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", 3826 CTLFLAG_RD, &stats->tx_defrag_failed, 3827 "Aborted transmit of buffer because defrag failed"); 3828 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", 3829 CTLFLAG_RD, &stats->tx_csum_offloaded, 3830 "Offloaded checksum of transmitted buffer"); 3831 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", 3832 CTLFLAG_RD, &stats->tx_tso_offloaded, 3833 "Segmentation offload of transmitted buffer"); 3834 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", 3835 CTLFLAG_RD, &stats->tx_task_rescheduled, 3836 "Times the transmit interrupt task rescheduled itself"); 3837 } 3838 3839 static void 3840 vtnet_setup_sysctl(struct vtnet_softc *sc) 3841 { 3842 device_t dev; 3843 struct sysctl_ctx_list *ctx; 3844 struct sysctl_oid *tree; 3845 struct sysctl_oid_list *child; 3846 3847 dev = sc->vtnet_dev; 3848 ctx = device_get_sysctl_ctx(dev); 3849 tree = device_get_sysctl_tree(dev); 3850 child = SYSCTL_CHILDREN(tree); 3851 3852 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", 3853 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, 3854 "Maximum number of supported virtqueue pairs"); 3855 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", 3856 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, 3857 "Number of active virtqueue pairs"); 3858 3859 vtnet_setup_stat_sysctl(ctx, child, sc); 3860 } 3861 3862 static int 3863 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) 3864 { 3865 3866 return (virtqueue_enable_intr(rxq->vtnrx_vq)); 3867 } 3868 3869 static void 3870 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) 3871 { 3872 3873 virtqueue_disable_intr(rxq->vtnrx_vq); 3874 } 3875 3876 static int 3877 vtnet_txq_enable_intr(struct vtnet_txq *txq) 3878 { 3879 struct virtqueue *vq; 3880 3881 vq = txq->vtntx_vq; 3882 3883 if (vtnet_txq_below_threshold(txq) != 0) 3884 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); 3885 3886 /* 3887 * The free count is above our threshold. Keep the Tx interrupt 3888 * disabled until the queue is fuller. 3889 */ 3890 return (0); 3891 } 3892 3893 static void 3894 vtnet_txq_disable_intr(struct vtnet_txq *txq) 3895 { 3896 3897 virtqueue_disable_intr(txq->vtntx_vq); 3898 } 3899 3900 static void 3901 vtnet_enable_rx_interrupts(struct vtnet_softc *sc) 3902 { 3903 int i; 3904 3905 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3906 vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); 3907 } 3908 3909 static void 3910 vtnet_enable_tx_interrupts(struct vtnet_softc *sc) 3911 { 3912 int i; 3913 3914 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3915 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); 3916 } 3917 3918 static void 3919 vtnet_enable_interrupts(struct vtnet_softc *sc) 3920 { 3921 3922 vtnet_enable_rx_interrupts(sc); 3923 vtnet_enable_tx_interrupts(sc); 3924 } 3925 3926 static void 3927 vtnet_disable_rx_interrupts(struct vtnet_softc *sc) 3928 { 3929 int i; 3930 3931 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3932 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); 3933 } 3934 3935 static void 3936 vtnet_disable_tx_interrupts(struct vtnet_softc *sc) 3937 { 3938 int i; 3939 3940 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3941 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); 3942 } 3943 3944 static void 3945 vtnet_disable_interrupts(struct vtnet_softc *sc) 3946 { 3947 3948 vtnet_disable_rx_interrupts(sc); 3949 vtnet_disable_tx_interrupts(sc); 3950 } 3951 3952 static int 3953 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) 3954 { 3955 char path[64]; 3956 3957 snprintf(path, sizeof(path), 3958 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); 3959 TUNABLE_INT_FETCH(path, &def); 3960 3961 return (def); 3962 } 3963