1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO network devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/eventhandler.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sockio.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/random.h> 43 #include <sys/sglist.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/taskqueue.h> 47 #include <sys/smp.h> 48 #include <machine/smp.h> 49 50 #include <vm/uma.h> 51 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/if_media.h> 59 #include <net/if_vlan_var.h> 60 61 #include <net/bpf.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip6.h> 67 #include <netinet6/ip6_var.h> 68 #include <netinet/udp.h> 69 #include <netinet/tcp.h> 70 #include <netinet/sctp.h> 71 72 #include <machine/bus.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <dev/virtio/virtio.h> 78 #include <dev/virtio/virtqueue.h> 79 #include <dev/virtio/network/virtio_net.h> 80 #include <dev/virtio/network/if_vtnetvar.h> 81 82 #include "virtio_if.h" 83 84 #include "opt_inet.h" 85 #include "opt_inet6.h" 86 87 static int vtnet_modevent(module_t, int, void *); 88 89 static int vtnet_probe(device_t); 90 static int vtnet_attach(device_t); 91 static int vtnet_detach(device_t); 92 static int vtnet_suspend(device_t); 93 static int vtnet_resume(device_t); 94 static int vtnet_shutdown(device_t); 95 static int vtnet_attach_completed(device_t); 96 static int vtnet_config_change(device_t); 97 98 static void vtnet_negotiate_features(struct vtnet_softc *); 99 static void vtnet_setup_features(struct vtnet_softc *); 100 static int vtnet_init_rxq(struct vtnet_softc *, int); 101 static int vtnet_init_txq(struct vtnet_softc *, int); 102 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); 103 static void vtnet_free_rxtx_queues(struct vtnet_softc *); 104 static int vtnet_alloc_rx_filters(struct vtnet_softc *); 105 static void vtnet_free_rx_filters(struct vtnet_softc *); 106 static int vtnet_alloc_virtqueues(struct vtnet_softc *); 107 static int vtnet_setup_interface(struct vtnet_softc *); 108 static int vtnet_change_mtu(struct vtnet_softc *, int); 109 static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); 110 static uint64_t vtnet_get_counter(struct ifnet *, ift_counter); 111 112 static int vtnet_rxq_populate(struct vtnet_rxq *); 113 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); 114 static struct mbuf * 115 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); 116 static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, 117 struct mbuf *, int); 118 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); 119 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); 120 static int vtnet_rxq_new_buf(struct vtnet_rxq *); 121 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, 122 struct virtio_net_hdr *); 123 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); 124 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); 125 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); 126 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, 127 struct virtio_net_hdr *); 128 static int vtnet_rxq_eof(struct vtnet_rxq *); 129 static void vtnet_rx_vq_intr(void *); 130 static void vtnet_rxq_tq_intr(void *, int); 131 132 static int vtnet_txq_below_threshold(struct vtnet_txq *); 133 static int vtnet_txq_notify(struct vtnet_txq *); 134 static void vtnet_txq_free_mbufs(struct vtnet_txq *); 135 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, 136 int *, int *, int *); 137 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, 138 int, struct virtio_net_hdr *); 139 static struct mbuf * 140 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, 141 struct virtio_net_hdr *); 142 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, 143 struct vtnet_tx_header *); 144 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); 145 #ifdef VTNET_LEGACY_TX 146 static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); 147 static void vtnet_start(struct ifnet *); 148 #else 149 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); 150 static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); 151 static void vtnet_txq_tq_deferred(void *, int); 152 #endif 153 static void vtnet_txq_start(struct vtnet_txq *); 154 static void vtnet_txq_tq_intr(void *, int); 155 static int vtnet_txq_eof(struct vtnet_txq *); 156 static void vtnet_tx_vq_intr(void *); 157 static void vtnet_tx_start_all(struct vtnet_softc *); 158 159 #ifndef VTNET_LEGACY_TX 160 static void vtnet_qflush(struct ifnet *); 161 #endif 162 163 static int vtnet_watchdog(struct vtnet_txq *); 164 static void vtnet_accum_stats(struct vtnet_softc *, 165 struct vtnet_rxq_stats *, struct vtnet_txq_stats *); 166 static void vtnet_tick(void *); 167 168 static void vtnet_start_taskqueues(struct vtnet_softc *); 169 static void vtnet_free_taskqueues(struct vtnet_softc *); 170 static void vtnet_drain_taskqueues(struct vtnet_softc *); 171 172 static void vtnet_drain_rxtx_queues(struct vtnet_softc *); 173 static void vtnet_stop_rendezvous(struct vtnet_softc *); 174 static void vtnet_stop(struct vtnet_softc *); 175 static int vtnet_virtio_reinit(struct vtnet_softc *); 176 static void vtnet_init_rx_filters(struct vtnet_softc *); 177 static int vtnet_init_rx_queues(struct vtnet_softc *); 178 static int vtnet_init_tx_queues(struct vtnet_softc *); 179 static int vtnet_init_rxtx_queues(struct vtnet_softc *); 180 static void vtnet_set_active_vq_pairs(struct vtnet_softc *); 181 static int vtnet_reinit(struct vtnet_softc *); 182 static void vtnet_init_locked(struct vtnet_softc *); 183 static void vtnet_init(void *); 184 185 static void vtnet_free_ctrl_vq(struct vtnet_softc *); 186 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, 187 struct sglist *, int, int); 188 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); 189 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); 190 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); 191 static int vtnet_set_promisc(struct vtnet_softc *, int); 192 static int vtnet_set_allmulti(struct vtnet_softc *, int); 193 static void vtnet_attach_disable_promisc(struct vtnet_softc *); 194 static void vtnet_rx_filter(struct vtnet_softc *); 195 static void vtnet_rx_filter_mac(struct vtnet_softc *); 196 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); 197 static void vtnet_rx_filter_vlan(struct vtnet_softc *); 198 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); 199 static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); 200 static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); 201 202 static int vtnet_is_link_up(struct vtnet_softc *); 203 static void vtnet_update_link_status(struct vtnet_softc *); 204 static int vtnet_ifmedia_upd(struct ifnet *); 205 static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); 206 static void vtnet_get_hwaddr(struct vtnet_softc *); 207 static void vtnet_set_hwaddr(struct vtnet_softc *); 208 static void vtnet_vlan_tag_remove(struct mbuf *); 209 static void vtnet_set_rx_process_limit(struct vtnet_softc *); 210 static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); 211 212 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, 213 struct sysctl_oid_list *, struct vtnet_rxq *); 214 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, 215 struct sysctl_oid_list *, struct vtnet_txq *); 216 static void vtnet_setup_queue_sysctl(struct vtnet_softc *); 217 static void vtnet_setup_sysctl(struct vtnet_softc *); 218 219 static int vtnet_rxq_enable_intr(struct vtnet_rxq *); 220 static void vtnet_rxq_disable_intr(struct vtnet_rxq *); 221 static int vtnet_txq_enable_intr(struct vtnet_txq *); 222 static void vtnet_txq_disable_intr(struct vtnet_txq *); 223 static void vtnet_enable_rx_interrupts(struct vtnet_softc *); 224 static void vtnet_enable_tx_interrupts(struct vtnet_softc *); 225 static void vtnet_enable_interrupts(struct vtnet_softc *); 226 static void vtnet_disable_rx_interrupts(struct vtnet_softc *); 227 static void vtnet_disable_tx_interrupts(struct vtnet_softc *); 228 static void vtnet_disable_interrupts(struct vtnet_softc *); 229 230 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); 231 232 /* Tunables. */ 233 static int vtnet_csum_disable = 0; 234 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); 235 static int vtnet_tso_disable = 0; 236 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); 237 static int vtnet_lro_disable = 0; 238 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); 239 static int vtnet_mq_disable = 0; 240 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); 241 static int vtnet_mq_max_pairs = 0; 242 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); 243 static int vtnet_rx_process_limit = 512; 244 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); 245 246 static uma_zone_t vtnet_tx_header_zone; 247 248 static struct virtio_feature_desc vtnet_feature_desc[] = { 249 { VIRTIO_NET_F_CSUM, "TxChecksum" }, 250 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, 251 { VIRTIO_NET_F_MAC, "MacAddress" }, 252 { VIRTIO_NET_F_GSO, "TxAllGSO" }, 253 { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, 254 { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, 255 { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, 256 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, 257 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, 258 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, 259 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, 260 { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, 261 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, 262 { VIRTIO_NET_F_STATUS, "Status" }, 263 { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, 264 { VIRTIO_NET_F_CTRL_RX, "RxMode" }, 265 { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, 266 { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, 267 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, 268 { VIRTIO_NET_F_MQ, "Multiqueue" }, 269 { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, 270 271 { 0, NULL } 272 }; 273 274 static device_method_t vtnet_methods[] = { 275 /* Device methods. */ 276 DEVMETHOD(device_probe, vtnet_probe), 277 DEVMETHOD(device_attach, vtnet_attach), 278 DEVMETHOD(device_detach, vtnet_detach), 279 DEVMETHOD(device_suspend, vtnet_suspend), 280 DEVMETHOD(device_resume, vtnet_resume), 281 DEVMETHOD(device_shutdown, vtnet_shutdown), 282 283 /* VirtIO methods. */ 284 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), 285 DEVMETHOD(virtio_config_change, vtnet_config_change), 286 287 DEVMETHOD_END 288 }; 289 290 #ifdef DEV_NETMAP 291 #include <dev/netmap/if_vtnet_netmap.h> 292 #endif /* DEV_NETMAP */ 293 294 static driver_t vtnet_driver = { 295 "vtnet", 296 vtnet_methods, 297 sizeof(struct vtnet_softc) 298 }; 299 static devclass_t vtnet_devclass; 300 301 DRIVER_MODULE(vtnet, virtio_mmio, vtnet_driver, vtnet_devclass, 302 vtnet_modevent, 0); 303 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, 304 vtnet_modevent, 0); 305 MODULE_VERSION(vtnet, 1); 306 MODULE_DEPEND(vtnet, virtio, 1, 1, 1); 307 308 static int 309 vtnet_modevent(module_t mod, int type, void *unused) 310 { 311 int error; 312 313 error = 0; 314 315 switch (type) { 316 case MOD_LOAD: 317 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", 318 sizeof(struct vtnet_tx_header), 319 NULL, NULL, NULL, NULL, 0, 0); 320 break; 321 case MOD_QUIESCE: 322 case MOD_UNLOAD: 323 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) 324 error = EBUSY; 325 else if (type == MOD_UNLOAD) { 326 uma_zdestroy(vtnet_tx_header_zone); 327 vtnet_tx_header_zone = NULL; 328 } 329 break; 330 case MOD_SHUTDOWN: 331 break; 332 default: 333 error = EOPNOTSUPP; 334 break; 335 } 336 337 return (error); 338 } 339 340 static int 341 vtnet_probe(device_t dev) 342 { 343 344 if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) 345 return (ENXIO); 346 347 device_set_desc(dev, "VirtIO Networking Adapter"); 348 349 return (BUS_PROBE_DEFAULT); 350 } 351 352 static int 353 vtnet_attach(device_t dev) 354 { 355 struct vtnet_softc *sc; 356 int error; 357 358 sc = device_get_softc(dev); 359 sc->vtnet_dev = dev; 360 361 /* Register our feature descriptions. */ 362 virtio_set_feature_desc(dev, vtnet_feature_desc); 363 364 VTNET_CORE_LOCK_INIT(sc); 365 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); 366 367 vtnet_setup_sysctl(sc); 368 vtnet_setup_features(sc); 369 370 error = vtnet_alloc_rx_filters(sc); 371 if (error) { 372 device_printf(dev, "cannot allocate Rx filters\n"); 373 goto fail; 374 } 375 376 error = vtnet_alloc_rxtx_queues(sc); 377 if (error) { 378 device_printf(dev, "cannot allocate queues\n"); 379 goto fail; 380 } 381 382 error = vtnet_alloc_virtqueues(sc); 383 if (error) { 384 device_printf(dev, "cannot allocate virtqueues\n"); 385 goto fail; 386 } 387 388 error = vtnet_setup_interface(sc); 389 if (error) { 390 device_printf(dev, "cannot setup interface\n"); 391 goto fail; 392 } 393 394 error = virtio_setup_intr(dev, INTR_TYPE_NET); 395 if (error) { 396 device_printf(dev, "cannot setup virtqueue interrupts\n"); 397 /* BMV: This will crash if during boot! */ 398 ether_ifdetach(sc->vtnet_ifp); 399 goto fail; 400 } 401 402 #ifdef DEV_NETMAP 403 vtnet_netmap_attach(sc); 404 #endif /* DEV_NETMAP */ 405 406 vtnet_start_taskqueues(sc); 407 408 fail: 409 if (error) 410 vtnet_detach(dev); 411 412 return (error); 413 } 414 415 static int 416 vtnet_detach(device_t dev) 417 { 418 struct vtnet_softc *sc; 419 struct ifnet *ifp; 420 421 sc = device_get_softc(dev); 422 ifp = sc->vtnet_ifp; 423 424 if (device_is_attached(dev)) { 425 VTNET_CORE_LOCK(sc); 426 vtnet_stop(sc); 427 VTNET_CORE_UNLOCK(sc); 428 429 callout_drain(&sc->vtnet_tick_ch); 430 vtnet_drain_taskqueues(sc); 431 432 ether_ifdetach(ifp); 433 } 434 435 #ifdef DEV_NETMAP 436 netmap_detach(ifp); 437 #endif /* DEV_NETMAP */ 438 439 vtnet_free_taskqueues(sc); 440 441 if (sc->vtnet_vlan_attach != NULL) { 442 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); 443 sc->vtnet_vlan_attach = NULL; 444 } 445 if (sc->vtnet_vlan_detach != NULL) { 446 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); 447 sc->vtnet_vlan_detach = NULL; 448 } 449 450 ifmedia_removeall(&sc->vtnet_media); 451 452 if (ifp != NULL) { 453 if_free(ifp); 454 sc->vtnet_ifp = NULL; 455 } 456 457 vtnet_free_rxtx_queues(sc); 458 vtnet_free_rx_filters(sc); 459 460 if (sc->vtnet_ctrl_vq != NULL) 461 vtnet_free_ctrl_vq(sc); 462 463 VTNET_CORE_LOCK_DESTROY(sc); 464 465 return (0); 466 } 467 468 static int 469 vtnet_suspend(device_t dev) 470 { 471 struct vtnet_softc *sc; 472 473 sc = device_get_softc(dev); 474 475 VTNET_CORE_LOCK(sc); 476 vtnet_stop(sc); 477 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; 478 VTNET_CORE_UNLOCK(sc); 479 480 return (0); 481 } 482 483 static int 484 vtnet_resume(device_t dev) 485 { 486 struct vtnet_softc *sc; 487 struct ifnet *ifp; 488 489 sc = device_get_softc(dev); 490 ifp = sc->vtnet_ifp; 491 492 VTNET_CORE_LOCK(sc); 493 if (ifp->if_flags & IFF_UP) 494 vtnet_init_locked(sc); 495 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; 496 VTNET_CORE_UNLOCK(sc); 497 498 return (0); 499 } 500 501 static int 502 vtnet_shutdown(device_t dev) 503 { 504 505 /* 506 * Suspend already does all of what we need to 507 * do here; we just never expect to be resumed. 508 */ 509 return (vtnet_suspend(dev)); 510 } 511 512 static int 513 vtnet_attach_completed(device_t dev) 514 { 515 516 vtnet_attach_disable_promisc(device_get_softc(dev)); 517 518 return (0); 519 } 520 521 static int 522 vtnet_config_change(device_t dev) 523 { 524 struct vtnet_softc *sc; 525 526 sc = device_get_softc(dev); 527 528 VTNET_CORE_LOCK(sc); 529 vtnet_update_link_status(sc); 530 if (sc->vtnet_link_active != 0) 531 vtnet_tx_start_all(sc); 532 VTNET_CORE_UNLOCK(sc); 533 534 return (0); 535 } 536 537 static void 538 vtnet_negotiate_features(struct vtnet_softc *sc) 539 { 540 device_t dev; 541 uint64_t mask, features; 542 543 dev = sc->vtnet_dev; 544 mask = 0; 545 546 /* 547 * TSO and LRO are only available when their corresponding checksum 548 * offload feature is also negotiated. 549 */ 550 if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { 551 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; 552 mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; 553 } 554 if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) 555 mask |= VTNET_TSO_FEATURES; 556 if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) 557 mask |= VTNET_LRO_FEATURES; 558 #ifndef VTNET_LEGACY_TX 559 if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) 560 mask |= VIRTIO_NET_F_MQ; 561 #else 562 mask |= VIRTIO_NET_F_MQ; 563 #endif 564 565 features = VTNET_FEATURES & ~mask; 566 sc->vtnet_features = virtio_negotiate_features(dev, features); 567 568 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && 569 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { 570 /* 571 * LRO without mergeable buffers requires special care. This 572 * is not ideal because every receive buffer must be large 573 * enough to hold the maximum TCP packet, the Ethernet header, 574 * and the header. This requires up to 34 descriptors with 575 * MCLBYTES clusters. If we do not have indirect descriptors, 576 * LRO is disabled since the virtqueue will not contain very 577 * many receive buffers. 578 */ 579 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { 580 device_printf(dev, 581 "LRO disabled due to both mergeable buffers and " 582 "indirect descriptors not negotiated\n"); 583 584 features &= ~VTNET_LRO_FEATURES; 585 sc->vtnet_features = 586 virtio_negotiate_features(dev, features); 587 } else 588 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; 589 } 590 } 591 592 static void 593 vtnet_setup_features(struct vtnet_softc *sc) 594 { 595 device_t dev; 596 int max_pairs, max; 597 598 dev = sc->vtnet_dev; 599 600 vtnet_negotiate_features(sc); 601 602 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 603 sc->vtnet_flags |= VTNET_FLAG_INDIRECT; 604 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) 605 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; 606 607 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { 608 /* This feature should always be negotiated. */ 609 sc->vtnet_flags |= VTNET_FLAG_MAC; 610 } 611 612 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { 613 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; 614 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 615 } else 616 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 617 618 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 619 sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; 620 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 621 sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; 622 else 623 sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; 624 625 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || 626 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || 627 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 628 sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; 629 else 630 sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; 631 632 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { 633 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; 634 635 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) 636 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; 637 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) 638 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; 639 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) 640 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; 641 } 642 643 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && 644 sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 645 max_pairs = virtio_read_dev_config_2(dev, 646 offsetof(struct virtio_net_config, max_virtqueue_pairs)); 647 if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 648 max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) 649 max_pairs = 1; 650 } else 651 max_pairs = 1; 652 653 if (max_pairs > 1) { 654 /* 655 * Limit the maximum number of queue pairs to the number of 656 * CPUs or the configured maximum. The actual number of 657 * queues that get used may be less. 658 */ 659 max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); 660 if (max > 0 && max_pairs > max) 661 max_pairs = max; 662 if (max_pairs > mp_ncpus) 663 max_pairs = mp_ncpus; 664 if (max_pairs > VTNET_MAX_QUEUE_PAIRS) 665 max_pairs = VTNET_MAX_QUEUE_PAIRS; 666 if (max_pairs > 1) 667 sc->vtnet_flags |= VTNET_FLAG_MULTIQ; 668 } 669 670 sc->vtnet_max_vq_pairs = max_pairs; 671 } 672 673 static int 674 vtnet_init_rxq(struct vtnet_softc *sc, int id) 675 { 676 struct vtnet_rxq *rxq; 677 678 rxq = &sc->vtnet_rxqs[id]; 679 680 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", 681 device_get_nameunit(sc->vtnet_dev), id); 682 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); 683 684 rxq->vtnrx_sc = sc; 685 rxq->vtnrx_id = id; 686 687 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); 688 if (rxq->vtnrx_sg == NULL) 689 return (ENOMEM); 690 691 TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); 692 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, 693 taskqueue_thread_enqueue, &rxq->vtnrx_tq); 694 695 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); 696 } 697 698 static int 699 vtnet_init_txq(struct vtnet_softc *sc, int id) 700 { 701 struct vtnet_txq *txq; 702 703 txq = &sc->vtnet_txqs[id]; 704 705 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", 706 device_get_nameunit(sc->vtnet_dev), id); 707 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); 708 709 txq->vtntx_sc = sc; 710 txq->vtntx_id = id; 711 712 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); 713 if (txq->vtntx_sg == NULL) 714 return (ENOMEM); 715 716 #ifndef VTNET_LEGACY_TX 717 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, 718 M_NOWAIT, &txq->vtntx_mtx); 719 if (txq->vtntx_br == NULL) 720 return (ENOMEM); 721 722 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); 723 #endif 724 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); 725 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, 726 taskqueue_thread_enqueue, &txq->vtntx_tq); 727 if (txq->vtntx_tq == NULL) 728 return (ENOMEM); 729 730 return (0); 731 } 732 733 static int 734 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) 735 { 736 int i, npairs, error; 737 738 npairs = sc->vtnet_max_vq_pairs; 739 740 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, 741 M_NOWAIT | M_ZERO); 742 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, 743 M_NOWAIT | M_ZERO); 744 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) 745 return (ENOMEM); 746 747 for (i = 0; i < npairs; i++) { 748 error = vtnet_init_rxq(sc, i); 749 if (error) 750 return (error); 751 error = vtnet_init_txq(sc, i); 752 if (error) 753 return (error); 754 } 755 756 vtnet_setup_queue_sysctl(sc); 757 758 return (0); 759 } 760 761 static void 762 vtnet_destroy_rxq(struct vtnet_rxq *rxq) 763 { 764 765 rxq->vtnrx_sc = NULL; 766 rxq->vtnrx_id = -1; 767 768 if (rxq->vtnrx_sg != NULL) { 769 sglist_free(rxq->vtnrx_sg); 770 rxq->vtnrx_sg = NULL; 771 } 772 773 if (mtx_initialized(&rxq->vtnrx_mtx) != 0) 774 mtx_destroy(&rxq->vtnrx_mtx); 775 } 776 777 static void 778 vtnet_destroy_txq(struct vtnet_txq *txq) 779 { 780 781 txq->vtntx_sc = NULL; 782 txq->vtntx_id = -1; 783 784 if (txq->vtntx_sg != NULL) { 785 sglist_free(txq->vtntx_sg); 786 txq->vtntx_sg = NULL; 787 } 788 789 #ifndef VTNET_LEGACY_TX 790 if (txq->vtntx_br != NULL) { 791 buf_ring_free(txq->vtntx_br, M_DEVBUF); 792 txq->vtntx_br = NULL; 793 } 794 #endif 795 796 if (mtx_initialized(&txq->vtntx_mtx) != 0) 797 mtx_destroy(&txq->vtntx_mtx); 798 } 799 800 static void 801 vtnet_free_rxtx_queues(struct vtnet_softc *sc) 802 { 803 int i; 804 805 if (sc->vtnet_rxqs != NULL) { 806 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 807 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); 808 free(sc->vtnet_rxqs, M_DEVBUF); 809 sc->vtnet_rxqs = NULL; 810 } 811 812 if (sc->vtnet_txqs != NULL) { 813 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 814 vtnet_destroy_txq(&sc->vtnet_txqs[i]); 815 free(sc->vtnet_txqs, M_DEVBUF); 816 sc->vtnet_txqs = NULL; 817 } 818 } 819 820 static int 821 vtnet_alloc_rx_filters(struct vtnet_softc *sc) 822 { 823 824 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 825 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), 826 M_DEVBUF, M_NOWAIT | M_ZERO); 827 if (sc->vtnet_mac_filter == NULL) 828 return (ENOMEM); 829 } 830 831 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 832 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * 833 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); 834 if (sc->vtnet_vlan_filter == NULL) 835 return (ENOMEM); 836 } 837 838 return (0); 839 } 840 841 static void 842 vtnet_free_rx_filters(struct vtnet_softc *sc) 843 { 844 845 if (sc->vtnet_mac_filter != NULL) { 846 free(sc->vtnet_mac_filter, M_DEVBUF); 847 sc->vtnet_mac_filter = NULL; 848 } 849 850 if (sc->vtnet_vlan_filter != NULL) { 851 free(sc->vtnet_vlan_filter, M_DEVBUF); 852 sc->vtnet_vlan_filter = NULL; 853 } 854 } 855 856 static int 857 vtnet_alloc_virtqueues(struct vtnet_softc *sc) 858 { 859 device_t dev; 860 struct vq_alloc_info *info; 861 struct vtnet_rxq *rxq; 862 struct vtnet_txq *txq; 863 int i, idx, flags, nvqs, error; 864 865 dev = sc->vtnet_dev; 866 flags = 0; 867 868 nvqs = sc->vtnet_max_vq_pairs * 2; 869 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 870 nvqs++; 871 872 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); 873 if (info == NULL) 874 return (ENOMEM); 875 876 for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { 877 rxq = &sc->vtnet_rxqs[i]; 878 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, 879 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, 880 "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); 881 882 txq = &sc->vtnet_txqs[i]; 883 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, 884 vtnet_tx_vq_intr, txq, &txq->vtntx_vq, 885 "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); 886 } 887 888 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 889 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, 890 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); 891 } 892 893 /* 894 * Enable interrupt binding if this is multiqueue. This only matters 895 * when per-vq MSIX is available. 896 */ 897 if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) 898 flags |= 0; 899 900 error = virtio_alloc_virtqueues(dev, flags, nvqs, info); 901 free(info, M_TEMP); 902 903 return (error); 904 } 905 906 static int 907 vtnet_setup_interface(struct vtnet_softc *sc) 908 { 909 device_t dev; 910 struct ifnet *ifp; 911 912 dev = sc->vtnet_dev; 913 914 ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); 915 if (ifp == NULL) { 916 device_printf(dev, "cannot allocate ifnet structure\n"); 917 return (ENOSPC); 918 } 919 920 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 921 ifp->if_baudrate = IF_Gbps(10); /* Approx. */ 922 ifp->if_softc = sc; 923 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 924 ifp->if_init = vtnet_init; 925 ifp->if_ioctl = vtnet_ioctl; 926 ifp->if_get_counter = vtnet_get_counter; 927 #ifndef VTNET_LEGACY_TX 928 ifp->if_transmit = vtnet_txq_mq_start; 929 ifp->if_qflush = vtnet_qflush; 930 #else 931 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; 932 ifp->if_start = vtnet_start; 933 IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); 934 ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; 935 IFQ_SET_READY(&ifp->if_snd); 936 #endif 937 938 ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, 939 vtnet_ifmedia_sts); 940 ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); 941 ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); 942 943 /* Read (or generate) the MAC address for the adapter. */ 944 vtnet_get_hwaddr(sc); 945 946 ether_ifattach(ifp, sc->vtnet_hwaddr); 947 948 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) 949 ifp->if_capabilities |= IFCAP_LINKSTATE; 950 951 /* Tell the upper layer(s) we support long frames. */ 952 ifp->if_hdrlen = sizeof(struct ether_vlan_header); 953 ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; 954 955 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { 956 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; 957 958 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { 959 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; 960 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 961 } else { 962 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) 963 ifp->if_capabilities |= IFCAP_TSO4; 964 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 965 ifp->if_capabilities |= IFCAP_TSO6; 966 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) 967 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 968 } 969 970 if (ifp->if_capabilities & IFCAP_TSO) 971 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 972 } 973 974 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { 975 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; 976 977 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || 978 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) 979 ifp->if_capabilities |= IFCAP_LRO; 980 } 981 982 if (ifp->if_capabilities & IFCAP_HWCSUM) { 983 /* 984 * VirtIO does not support VLAN tagging, but we can fake 985 * it by inserting and removing the 802.1Q header during 986 * transmit and receive. We are then able to do checksum 987 * offloading of VLAN frames. 988 */ 989 ifp->if_capabilities |= 990 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 991 } 992 993 ifp->if_capenable = ifp->if_capabilities; 994 995 /* 996 * Capabilities after here are not enabled by default. 997 */ 998 999 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 1000 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; 1001 1002 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1003 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 1004 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1005 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 1006 } 1007 1008 vtnet_set_rx_process_limit(sc); 1009 vtnet_set_tx_intr_threshold(sc); 1010 1011 return (0); 1012 } 1013 1014 static int 1015 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) 1016 { 1017 struct ifnet *ifp; 1018 int frame_size, clsize; 1019 1020 ifp = sc->vtnet_ifp; 1021 1022 if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) 1023 return (EINVAL); 1024 1025 frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + 1026 new_mtu; 1027 1028 /* 1029 * Based on the new MTU (and hence frame size) determine which 1030 * cluster size is most appropriate for the receive queues. 1031 */ 1032 if (frame_size <= MCLBYTES) { 1033 clsize = MCLBYTES; 1034 } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1035 /* Avoid going past 9K jumbos. */ 1036 if (frame_size > MJUM9BYTES) 1037 return (EINVAL); 1038 clsize = MJUM9BYTES; 1039 } else 1040 clsize = MJUMPAGESIZE; 1041 1042 ifp->if_mtu = new_mtu; 1043 sc->vtnet_rx_new_clsize = clsize; 1044 1045 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1046 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1047 vtnet_init_locked(sc); 1048 } 1049 1050 return (0); 1051 } 1052 1053 static int 1054 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1055 { 1056 struct vtnet_softc *sc; 1057 struct ifreq *ifr; 1058 int reinit, mask, error; 1059 1060 sc = ifp->if_softc; 1061 ifr = (struct ifreq *) data; 1062 error = 0; 1063 1064 switch (cmd) { 1065 case SIOCSIFMTU: 1066 if (ifp->if_mtu != ifr->ifr_mtu) { 1067 VTNET_CORE_LOCK(sc); 1068 error = vtnet_change_mtu(sc, ifr->ifr_mtu); 1069 VTNET_CORE_UNLOCK(sc); 1070 } 1071 break; 1072 1073 case SIOCSIFFLAGS: 1074 VTNET_CORE_LOCK(sc); 1075 if ((ifp->if_flags & IFF_UP) == 0) { 1076 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1077 vtnet_stop(sc); 1078 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1079 if ((ifp->if_flags ^ sc->vtnet_if_flags) & 1080 (IFF_PROMISC | IFF_ALLMULTI)) { 1081 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) 1082 vtnet_rx_filter(sc); 1083 else { 1084 ifp->if_flags |= IFF_PROMISC; 1085 if ((ifp->if_flags ^ sc->vtnet_if_flags) 1086 & IFF_ALLMULTI) 1087 error = ENOTSUP; 1088 } 1089 } 1090 } else 1091 vtnet_init_locked(sc); 1092 1093 if (error == 0) 1094 sc->vtnet_if_flags = ifp->if_flags; 1095 VTNET_CORE_UNLOCK(sc); 1096 break; 1097 1098 case SIOCADDMULTI: 1099 case SIOCDELMULTI: 1100 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) 1101 break; 1102 VTNET_CORE_LOCK(sc); 1103 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1104 vtnet_rx_filter_mac(sc); 1105 VTNET_CORE_UNLOCK(sc); 1106 break; 1107 1108 case SIOCSIFMEDIA: 1109 case SIOCGIFMEDIA: 1110 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); 1111 break; 1112 1113 case SIOCSIFCAP: 1114 VTNET_CORE_LOCK(sc); 1115 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1116 1117 if (mask & IFCAP_TXCSUM) 1118 ifp->if_capenable ^= IFCAP_TXCSUM; 1119 if (mask & IFCAP_TXCSUM_IPV6) 1120 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 1121 if (mask & IFCAP_TSO4) 1122 ifp->if_capenable ^= IFCAP_TSO4; 1123 if (mask & IFCAP_TSO6) 1124 ifp->if_capenable ^= IFCAP_TSO6; 1125 1126 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | 1127 IFCAP_VLAN_HWFILTER)) { 1128 /* These Rx features require us to renegotiate. */ 1129 reinit = 1; 1130 1131 if (mask & IFCAP_RXCSUM) 1132 ifp->if_capenable ^= IFCAP_RXCSUM; 1133 if (mask & IFCAP_RXCSUM_IPV6) 1134 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 1135 if (mask & IFCAP_LRO) 1136 ifp->if_capenable ^= IFCAP_LRO; 1137 if (mask & IFCAP_VLAN_HWFILTER) 1138 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; 1139 } else 1140 reinit = 0; 1141 1142 if (mask & IFCAP_VLAN_HWTSO) 1143 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 1144 if (mask & IFCAP_VLAN_HWTAGGING) 1145 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 1146 1147 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1148 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1149 vtnet_init_locked(sc); 1150 } 1151 1152 VTNET_CORE_UNLOCK(sc); 1153 VLAN_CAPABILITIES(ifp); 1154 1155 break; 1156 1157 default: 1158 error = ether_ioctl(ifp, cmd, data); 1159 break; 1160 } 1161 1162 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); 1163 1164 return (error); 1165 } 1166 1167 static int 1168 vtnet_rxq_populate(struct vtnet_rxq *rxq) 1169 { 1170 struct virtqueue *vq; 1171 int nbufs, error; 1172 1173 vq = rxq->vtnrx_vq; 1174 error = ENOSPC; 1175 1176 for (nbufs = 0; !virtqueue_full(vq); nbufs++) { 1177 error = vtnet_rxq_new_buf(rxq); 1178 if (error) 1179 break; 1180 } 1181 1182 if (nbufs > 0) { 1183 virtqueue_notify(vq); 1184 /* 1185 * EMSGSIZE signifies the virtqueue did not have enough 1186 * entries available to hold the last mbuf. This is not 1187 * an error. 1188 */ 1189 if (error == EMSGSIZE) 1190 error = 0; 1191 } 1192 1193 return (error); 1194 } 1195 1196 static void 1197 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) 1198 { 1199 struct virtqueue *vq; 1200 struct mbuf *m; 1201 int last; 1202 1203 vq = rxq->vtnrx_vq; 1204 last = 0; 1205 1206 while ((m = virtqueue_drain(vq, &last)) != NULL) 1207 m_freem(m); 1208 1209 KASSERT(virtqueue_empty(vq), 1210 ("%s: mbufs remaining in rx queue %p", __func__, rxq)); 1211 } 1212 1213 static struct mbuf * 1214 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) 1215 { 1216 struct mbuf *m_head, *m_tail, *m; 1217 int i, clsize; 1218 1219 clsize = sc->vtnet_rx_clsize; 1220 1221 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1222 ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); 1223 1224 m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); 1225 if (m_head == NULL) 1226 goto fail; 1227 1228 m_head->m_len = clsize; 1229 m_tail = m_head; 1230 1231 /* Allocate the rest of the chain. */ 1232 for (i = 1; i < nbufs; i++) { 1233 m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); 1234 if (m == NULL) 1235 goto fail; 1236 1237 m->m_len = clsize; 1238 m_tail->m_next = m; 1239 m_tail = m; 1240 } 1241 1242 if (m_tailp != NULL) 1243 *m_tailp = m_tail; 1244 1245 return (m_head); 1246 1247 fail: 1248 sc->vtnet_stats.mbuf_alloc_failed++; 1249 m_freem(m_head); 1250 1251 return (NULL); 1252 } 1253 1254 /* 1255 * Slow path for when LRO without mergeable buffers is negotiated. 1256 */ 1257 static int 1258 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, 1259 int len0) 1260 { 1261 struct vtnet_softc *sc; 1262 struct mbuf *m, *m_prev; 1263 struct mbuf *m_new, *m_tail; 1264 int len, clsize, nreplace, error; 1265 1266 sc = rxq->vtnrx_sc; 1267 clsize = sc->vtnet_rx_clsize; 1268 1269 m_prev = NULL; 1270 m_tail = NULL; 1271 nreplace = 0; 1272 1273 m = m0; 1274 len = len0; 1275 1276 /* 1277 * Since these mbuf chains are so large, we avoid allocating an 1278 * entire replacement chain if possible. When the received frame 1279 * did not consume the entire chain, the unused mbufs are moved 1280 * to the replacement chain. 1281 */ 1282 while (len > 0) { 1283 /* 1284 * Something is seriously wrong if we received a frame 1285 * larger than the chain. Drop it. 1286 */ 1287 if (m == NULL) { 1288 sc->vtnet_stats.rx_frame_too_large++; 1289 return (EMSGSIZE); 1290 } 1291 1292 /* We always allocate the same cluster size. */ 1293 KASSERT(m->m_len == clsize, 1294 ("%s: mbuf size %d is not the cluster size %d", 1295 __func__, m->m_len, clsize)); 1296 1297 m->m_len = MIN(m->m_len, len); 1298 len -= m->m_len; 1299 1300 m_prev = m; 1301 m = m->m_next; 1302 nreplace++; 1303 } 1304 1305 KASSERT(nreplace <= sc->vtnet_rx_nmbufs, 1306 ("%s: too many replacement mbufs %d max %d", __func__, nreplace, 1307 sc->vtnet_rx_nmbufs)); 1308 1309 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); 1310 if (m_new == NULL) { 1311 m_prev->m_len = clsize; 1312 return (ENOBUFS); 1313 } 1314 1315 /* 1316 * Move any unused mbufs from the received chain onto the end 1317 * of the new chain. 1318 */ 1319 if (m_prev->m_next != NULL) { 1320 m_tail->m_next = m_prev->m_next; 1321 m_prev->m_next = NULL; 1322 } 1323 1324 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1325 if (error) { 1326 /* 1327 * BAD! We could not enqueue the replacement mbuf chain. We 1328 * must restore the m0 chain to the original state if it was 1329 * modified so we can subsequently discard it. 1330 * 1331 * NOTE: The replacement is suppose to be an identical copy 1332 * to the one just dequeued so this is an unexpected error. 1333 */ 1334 sc->vtnet_stats.rx_enq_replacement_failed++; 1335 1336 if (m_tail->m_next != NULL) { 1337 m_prev->m_next = m_tail->m_next; 1338 m_tail->m_next = NULL; 1339 } 1340 1341 m_prev->m_len = clsize; 1342 m_freem(m_new); 1343 } 1344 1345 return (error); 1346 } 1347 1348 static int 1349 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) 1350 { 1351 struct vtnet_softc *sc; 1352 struct mbuf *m_new; 1353 int error; 1354 1355 sc = rxq->vtnrx_sc; 1356 1357 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, 1358 ("%s: chained mbuf without LRO_NOMRG", __func__)); 1359 1360 if (m->m_next == NULL) { 1361 /* Fast-path for the common case of just one mbuf. */ 1362 if (m->m_len < len) 1363 return (EINVAL); 1364 1365 m_new = vtnet_rx_alloc_buf(sc, 1, NULL); 1366 if (m_new == NULL) 1367 return (ENOBUFS); 1368 1369 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1370 if (error) { 1371 /* 1372 * The new mbuf is suppose to be an identical 1373 * copy of the one just dequeued so this is an 1374 * unexpected error. 1375 */ 1376 m_freem(m_new); 1377 sc->vtnet_stats.rx_enq_replacement_failed++; 1378 } else 1379 m->m_len = len; 1380 } else 1381 error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); 1382 1383 return (error); 1384 } 1385 1386 static int 1387 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1388 { 1389 struct vtnet_softc *sc; 1390 struct sglist *sg; 1391 struct vtnet_rx_header *rxhdr; 1392 uint8_t *mdata; 1393 int offset, error; 1394 1395 sc = rxq->vtnrx_sc; 1396 sg = rxq->vtnrx_sg; 1397 mdata = mtod(m, uint8_t *); 1398 1399 VTNET_RXQ_LOCK_ASSERT(rxq); 1400 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, 1401 ("%s: chained mbuf without LRO_NOMRG", __func__)); 1402 KASSERT(m->m_len == sc->vtnet_rx_clsize, 1403 ("%s: unexpected cluster size %d/%d", __func__, m->m_len, 1404 sc->vtnet_rx_clsize)); 1405 1406 sglist_reset(sg); 1407 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1408 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); 1409 rxhdr = (struct vtnet_rx_header *) mdata; 1410 sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); 1411 offset = sizeof(struct vtnet_rx_header); 1412 } else 1413 offset = 0; 1414 1415 sglist_append(sg, mdata + offset, m->m_len - offset); 1416 if (m->m_next != NULL) { 1417 error = sglist_append_mbuf(sg, m->m_next); 1418 MPASS(error == 0); 1419 } 1420 1421 error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); 1422 1423 return (error); 1424 } 1425 1426 static int 1427 vtnet_rxq_new_buf(struct vtnet_rxq *rxq) 1428 { 1429 struct vtnet_softc *sc; 1430 struct mbuf *m; 1431 int error; 1432 1433 sc = rxq->vtnrx_sc; 1434 1435 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); 1436 if (m == NULL) 1437 return (ENOBUFS); 1438 1439 error = vtnet_rxq_enqueue_buf(rxq, m); 1440 if (error) 1441 m_freem(m); 1442 1443 return (error); 1444 } 1445 1446 /* 1447 * Use the checksum offset in the VirtIO header to set the 1448 * correct CSUM_* flags. 1449 */ 1450 static int 1451 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, 1452 uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) 1453 { 1454 struct vtnet_softc *sc; 1455 #if defined(INET) || defined(INET6) 1456 int offset = hdr->csum_start + hdr->csum_offset; 1457 #endif 1458 1459 sc = rxq->vtnrx_sc; 1460 1461 /* Only do a basic sanity check on the offset. */ 1462 switch (eth_type) { 1463 #if defined(INET) 1464 case ETHERTYPE_IP: 1465 if (__predict_false(offset < ip_start + sizeof(struct ip))) 1466 return (1); 1467 break; 1468 #endif 1469 #if defined(INET6) 1470 case ETHERTYPE_IPV6: 1471 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) 1472 return (1); 1473 break; 1474 #endif 1475 default: 1476 sc->vtnet_stats.rx_csum_bad_ethtype++; 1477 return (1); 1478 } 1479 1480 /* 1481 * Use the offset to determine the appropriate CSUM_* flags. This is 1482 * a bit dirty, but we can get by with it since the checksum offsets 1483 * happen to be different. We assume the host host does not do IPv4 1484 * header checksum offloading. 1485 */ 1486 switch (hdr->csum_offset) { 1487 case offsetof(struct udphdr, uh_sum): 1488 case offsetof(struct tcphdr, th_sum): 1489 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1490 m->m_pkthdr.csum_data = 0xFFFF; 1491 break; 1492 case offsetof(struct sctphdr, checksum): 1493 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 1494 break; 1495 default: 1496 sc->vtnet_stats.rx_csum_bad_offset++; 1497 return (1); 1498 } 1499 1500 return (0); 1501 } 1502 1503 static int 1504 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, 1505 uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) 1506 { 1507 struct vtnet_softc *sc; 1508 int offset, proto; 1509 1510 sc = rxq->vtnrx_sc; 1511 1512 switch (eth_type) { 1513 #if defined(INET) 1514 case ETHERTYPE_IP: { 1515 struct ip *ip; 1516 if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) 1517 return (1); 1518 ip = (struct ip *)(m->m_data + ip_start); 1519 proto = ip->ip_p; 1520 offset = ip_start + (ip->ip_hl << 2); 1521 break; 1522 } 1523 #endif 1524 #if defined(INET6) 1525 case ETHERTYPE_IPV6: 1526 if (__predict_false(m->m_len < ip_start + 1527 sizeof(struct ip6_hdr))) 1528 return (1); 1529 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); 1530 if (__predict_false(offset < 0)) 1531 return (1); 1532 break; 1533 #endif 1534 default: 1535 sc->vtnet_stats.rx_csum_bad_ethtype++; 1536 return (1); 1537 } 1538 1539 switch (proto) { 1540 case IPPROTO_TCP: 1541 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) 1542 return (1); 1543 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1544 m->m_pkthdr.csum_data = 0xFFFF; 1545 break; 1546 case IPPROTO_UDP: 1547 if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) 1548 return (1); 1549 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1550 m->m_pkthdr.csum_data = 0xFFFF; 1551 break; 1552 case IPPROTO_SCTP: 1553 if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) 1554 return (1); 1555 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 1556 break; 1557 default: 1558 /* 1559 * For the remaining protocols, FreeBSD does not support 1560 * checksum offloading, so the checksum will be recomputed. 1561 */ 1562 #if 0 1563 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " 1564 "protocol eth_type=%#x proto=%d csum_start=%d " 1565 "csum_offset=%d\n", __func__, eth_type, proto, 1566 hdr->csum_start, hdr->csum_offset); 1567 #endif 1568 break; 1569 } 1570 1571 return (0); 1572 } 1573 1574 /* 1575 * Set the appropriate CSUM_* flags. Unfortunately, the information 1576 * provided is not directly useful to us. The VirtIO header gives the 1577 * offset of the checksum, which is all Linux needs, but this is not 1578 * how FreeBSD does things. We are forced to peek inside the packet 1579 * a bit. 1580 * 1581 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD 1582 * could accept the offsets and let the stack figure it out. 1583 */ 1584 static int 1585 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, 1586 struct virtio_net_hdr *hdr) 1587 { 1588 struct ether_header *eh; 1589 struct ether_vlan_header *evh; 1590 uint16_t eth_type; 1591 int offset, error; 1592 1593 eh = mtod(m, struct ether_header *); 1594 eth_type = ntohs(eh->ether_type); 1595 if (eth_type == ETHERTYPE_VLAN) { 1596 /* BMV: We should handle nested VLAN tags too. */ 1597 evh = mtod(m, struct ether_vlan_header *); 1598 eth_type = ntohs(evh->evl_proto); 1599 offset = sizeof(struct ether_vlan_header); 1600 } else 1601 offset = sizeof(struct ether_header); 1602 1603 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1604 error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); 1605 else 1606 error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); 1607 1608 return (error); 1609 } 1610 1611 static void 1612 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) 1613 { 1614 struct mbuf *m; 1615 1616 while (--nbufs > 0) { 1617 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); 1618 if (m == NULL) 1619 break; 1620 vtnet_rxq_discard_buf(rxq, m); 1621 } 1622 } 1623 1624 static void 1625 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1626 { 1627 int error; 1628 1629 /* 1630 * Requeue the discarded mbuf. This should always be successful 1631 * since it was just dequeued. 1632 */ 1633 error = vtnet_rxq_enqueue_buf(rxq, m); 1634 KASSERT(error == 0, 1635 ("%s: cannot requeue discarded mbuf %d", __func__, error)); 1636 } 1637 1638 static int 1639 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) 1640 { 1641 struct vtnet_softc *sc; 1642 struct ifnet *ifp; 1643 struct virtqueue *vq; 1644 struct mbuf *m, *m_tail; 1645 int len; 1646 1647 sc = rxq->vtnrx_sc; 1648 vq = rxq->vtnrx_vq; 1649 ifp = sc->vtnet_ifp; 1650 m_tail = m_head; 1651 1652 while (--nbufs > 0) { 1653 m = virtqueue_dequeue(vq, &len); 1654 if (m == NULL) { 1655 rxq->vtnrx_stats.vrxs_ierrors++; 1656 goto fail; 1657 } 1658 1659 if (vtnet_rxq_new_buf(rxq) != 0) { 1660 rxq->vtnrx_stats.vrxs_iqdrops++; 1661 vtnet_rxq_discard_buf(rxq, m); 1662 if (nbufs > 1) 1663 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1664 goto fail; 1665 } 1666 1667 if (m->m_len < len) 1668 len = m->m_len; 1669 1670 m->m_len = len; 1671 m->m_flags &= ~M_PKTHDR; 1672 1673 m_head->m_pkthdr.len += len; 1674 m_tail->m_next = m; 1675 m_tail = m; 1676 } 1677 1678 return (0); 1679 1680 fail: 1681 sc->vtnet_stats.rx_mergeable_failed++; 1682 m_freem(m_head); 1683 1684 return (1); 1685 } 1686 1687 static void 1688 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, 1689 struct virtio_net_hdr *hdr) 1690 { 1691 struct vtnet_softc *sc; 1692 struct ifnet *ifp; 1693 struct ether_header *eh; 1694 1695 sc = rxq->vtnrx_sc; 1696 ifp = sc->vtnet_ifp; 1697 1698 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { 1699 eh = mtod(m, struct ether_header *); 1700 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1701 vtnet_vlan_tag_remove(m); 1702 /* 1703 * With the 802.1Q header removed, update the 1704 * checksum starting location accordingly. 1705 */ 1706 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1707 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; 1708 } 1709 } 1710 1711 m->m_pkthdr.flowid = rxq->vtnrx_id; 1712 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 1713 1714 /* 1715 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum 1716 * distinction that Linux does. Need to reevaluate if performing 1717 * offloading for the NEEDS_CSUM case is really appropriate. 1718 */ 1719 if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | 1720 VIRTIO_NET_HDR_F_DATA_VALID)) { 1721 if (vtnet_rxq_csum(rxq, m, hdr) == 0) 1722 rxq->vtnrx_stats.vrxs_csum++; 1723 else 1724 rxq->vtnrx_stats.vrxs_csum_failed++; 1725 } 1726 1727 rxq->vtnrx_stats.vrxs_ipackets++; 1728 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; 1729 1730 VTNET_RXQ_UNLOCK(rxq); 1731 (*ifp->if_input)(ifp, m); 1732 VTNET_RXQ_LOCK(rxq); 1733 } 1734 1735 static int 1736 vtnet_rxq_eof(struct vtnet_rxq *rxq) 1737 { 1738 struct virtio_net_hdr lhdr, *hdr; 1739 struct vtnet_softc *sc; 1740 struct ifnet *ifp; 1741 struct virtqueue *vq; 1742 struct mbuf *m; 1743 struct virtio_net_hdr_mrg_rxbuf *mhdr; 1744 int len, deq, nbufs, adjsz, count; 1745 1746 sc = rxq->vtnrx_sc; 1747 vq = rxq->vtnrx_vq; 1748 ifp = sc->vtnet_ifp; 1749 hdr = &lhdr; 1750 deq = 0; 1751 count = sc->vtnet_rx_process_limit; 1752 1753 VTNET_RXQ_LOCK_ASSERT(rxq); 1754 1755 #ifdef DEV_NETMAP 1756 if (netmap_rx_irq(ifp, 0, &deq)) { 1757 return (FALSE); 1758 } 1759 #endif /* DEV_NETMAP */ 1760 1761 while (count-- > 0) { 1762 m = virtqueue_dequeue(vq, &len); 1763 if (m == NULL) 1764 break; 1765 deq++; 1766 1767 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { 1768 rxq->vtnrx_stats.vrxs_ierrors++; 1769 vtnet_rxq_discard_buf(rxq, m); 1770 continue; 1771 } 1772 1773 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1774 nbufs = 1; 1775 adjsz = sizeof(struct vtnet_rx_header); 1776 /* 1777 * Account for our pad inserted between the header 1778 * and the actual start of the frame. 1779 */ 1780 len += VTNET_RX_HEADER_PAD; 1781 } else { 1782 mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); 1783 nbufs = mhdr->num_buffers; 1784 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1785 } 1786 1787 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { 1788 rxq->vtnrx_stats.vrxs_iqdrops++; 1789 vtnet_rxq_discard_buf(rxq, m); 1790 if (nbufs > 1) 1791 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1792 continue; 1793 } 1794 1795 m->m_pkthdr.len = len; 1796 m->m_pkthdr.rcvif = ifp; 1797 m->m_pkthdr.csum_flags = 0; 1798 1799 if (nbufs > 1) { 1800 /* Dequeue the rest of chain. */ 1801 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) 1802 continue; 1803 } 1804 1805 /* 1806 * Save copy of header before we strip it. For both mergeable 1807 * and non-mergeable, the header is at the beginning of the 1808 * mbuf data. We no longer need num_buffers, so always use a 1809 * regular header. 1810 * 1811 * BMV: Is this memcpy() expensive? We know the mbuf data is 1812 * still valid even after the m_adj(). 1813 */ 1814 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); 1815 m_adj(m, adjsz); 1816 1817 vtnet_rxq_input(rxq, m, hdr); 1818 1819 /* Must recheck after dropping the Rx lock. */ 1820 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1821 break; 1822 } 1823 1824 if (deq > 0) 1825 virtqueue_notify(vq); 1826 1827 return (count > 0 ? 0 : EAGAIN); 1828 } 1829 1830 static void 1831 vtnet_rx_vq_intr(void *xrxq) 1832 { 1833 struct vtnet_softc *sc; 1834 struct vtnet_rxq *rxq; 1835 struct ifnet *ifp; 1836 int tries, more; 1837 1838 rxq = xrxq; 1839 sc = rxq->vtnrx_sc; 1840 ifp = sc->vtnet_ifp; 1841 tries = 0; 1842 1843 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { 1844 /* 1845 * Ignore this interrupt. Either this is a spurious interrupt 1846 * or multiqueue without per-VQ MSIX so every queue needs to 1847 * be polled (a brain dead configuration we could try harder 1848 * to avoid). 1849 */ 1850 vtnet_rxq_disable_intr(rxq); 1851 return; 1852 } 1853 1854 VTNET_RXQ_LOCK(rxq); 1855 1856 again: 1857 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1858 VTNET_RXQ_UNLOCK(rxq); 1859 return; 1860 } 1861 1862 more = vtnet_rxq_eof(rxq); 1863 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 1864 if (!more) 1865 vtnet_rxq_disable_intr(rxq); 1866 /* 1867 * This is an occasional condition or race (when !more), 1868 * so retry a few times before scheduling the taskqueue. 1869 */ 1870 if (tries++ < VTNET_INTR_DISABLE_RETRIES) 1871 goto again; 1872 1873 VTNET_RXQ_UNLOCK(rxq); 1874 rxq->vtnrx_stats.vrxs_rescheduled++; 1875 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 1876 } else 1877 VTNET_RXQ_UNLOCK(rxq); 1878 } 1879 1880 static void 1881 vtnet_rxq_tq_intr(void *xrxq, int pending) 1882 { 1883 struct vtnet_softc *sc; 1884 struct vtnet_rxq *rxq; 1885 struct ifnet *ifp; 1886 int more; 1887 1888 rxq = xrxq; 1889 sc = rxq->vtnrx_sc; 1890 ifp = sc->vtnet_ifp; 1891 1892 VTNET_RXQ_LOCK(rxq); 1893 1894 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1895 VTNET_RXQ_UNLOCK(rxq); 1896 return; 1897 } 1898 1899 more = vtnet_rxq_eof(rxq); 1900 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 1901 if (!more) 1902 vtnet_rxq_disable_intr(rxq); 1903 rxq->vtnrx_stats.vrxs_rescheduled++; 1904 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 1905 } 1906 1907 VTNET_RXQ_UNLOCK(rxq); 1908 } 1909 1910 static int 1911 vtnet_txq_below_threshold(struct vtnet_txq *txq) 1912 { 1913 struct vtnet_softc *sc; 1914 struct virtqueue *vq; 1915 1916 sc = txq->vtntx_sc; 1917 vq = txq->vtntx_vq; 1918 1919 return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); 1920 } 1921 1922 static int 1923 vtnet_txq_notify(struct vtnet_txq *txq) 1924 { 1925 struct virtqueue *vq; 1926 1927 vq = txq->vtntx_vq; 1928 1929 txq->vtntx_watchdog = VTNET_TX_TIMEOUT; 1930 virtqueue_notify(vq); 1931 1932 if (vtnet_txq_enable_intr(txq) == 0) 1933 return (0); 1934 1935 /* 1936 * Drain frames that were completed since last checked. If this 1937 * causes the queue to go above the threshold, the caller should 1938 * continue transmitting. 1939 */ 1940 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { 1941 virtqueue_disable_intr(vq); 1942 return (1); 1943 } 1944 1945 return (0); 1946 } 1947 1948 static void 1949 vtnet_txq_free_mbufs(struct vtnet_txq *txq) 1950 { 1951 struct virtqueue *vq; 1952 struct vtnet_tx_header *txhdr; 1953 int last; 1954 1955 vq = txq->vtntx_vq; 1956 last = 0; 1957 1958 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { 1959 m_freem(txhdr->vth_mbuf); 1960 uma_zfree(vtnet_tx_header_zone, txhdr); 1961 } 1962 1963 KASSERT(virtqueue_empty(vq), 1964 ("%s: mbufs remaining in tx queue %p", __func__, txq)); 1965 } 1966 1967 /* 1968 * BMV: Much of this can go away once we finally have offsets in 1969 * the mbuf packet header. Bug andre@. 1970 */ 1971 static int 1972 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, 1973 int *etype, int *proto, int *start) 1974 { 1975 struct vtnet_softc *sc; 1976 struct ether_vlan_header *evh; 1977 int offset; 1978 1979 sc = txq->vtntx_sc; 1980 1981 evh = mtod(m, struct ether_vlan_header *); 1982 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1983 /* BMV: We should handle nested VLAN tags too. */ 1984 *etype = ntohs(evh->evl_proto); 1985 offset = sizeof(struct ether_vlan_header); 1986 } else { 1987 *etype = ntohs(evh->evl_encap_proto); 1988 offset = sizeof(struct ether_header); 1989 } 1990 1991 switch (*etype) { 1992 #if defined(INET) 1993 case ETHERTYPE_IP: { 1994 struct ip *ip, iphdr; 1995 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 1996 m_copydata(m, offset, sizeof(struct ip), 1997 (caddr_t) &iphdr); 1998 ip = &iphdr; 1999 } else 2000 ip = (struct ip *)(m->m_data + offset); 2001 *proto = ip->ip_p; 2002 *start = offset + (ip->ip_hl << 2); 2003 break; 2004 } 2005 #endif 2006 #if defined(INET6) 2007 case ETHERTYPE_IPV6: 2008 *proto = -1; 2009 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 2010 /* Assert the network stack sent us a valid packet. */ 2011 KASSERT(*start > offset, 2012 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 2013 *start, offset, *proto)); 2014 break; 2015 #endif 2016 default: 2017 sc->vtnet_stats.tx_csum_bad_ethtype++; 2018 return (EINVAL); 2019 } 2020 2021 return (0); 2022 } 2023 2024 static int 2025 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, 2026 int offset, struct virtio_net_hdr *hdr) 2027 { 2028 static struct timeval lastecn; 2029 static int curecn; 2030 struct vtnet_softc *sc; 2031 struct tcphdr *tcp, tcphdr; 2032 2033 sc = txq->vtntx_sc; 2034 2035 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 2036 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 2037 tcp = &tcphdr; 2038 } else 2039 tcp = (struct tcphdr *)(m->m_data + offset); 2040 2041 hdr->hdr_len = offset + (tcp->th_off << 2); 2042 hdr->gso_size = m->m_pkthdr.tso_segsz; 2043 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 2044 VIRTIO_NET_HDR_GSO_TCPV6; 2045 2046 if (tcp->th_flags & TH_CWR) { 2047 /* 2048 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, 2049 * ECN support is not on a per-interface basis, but globally via 2050 * the net.inet.tcp.ecn.enable sysctl knob. The default is off. 2051 */ 2052 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { 2053 if (ppsratecheck(&lastecn, &curecn, 1)) 2054 if_printf(sc->vtnet_ifp, 2055 "TSO with ECN not negotiated with host\n"); 2056 return (ENOTSUP); 2057 } 2058 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2059 } 2060 2061 txq->vtntx_stats.vtxs_tso++; 2062 2063 return (0); 2064 } 2065 2066 static struct mbuf * 2067 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, 2068 struct virtio_net_hdr *hdr) 2069 { 2070 struct vtnet_softc *sc; 2071 int flags, etype, csum_start, proto, error; 2072 2073 sc = txq->vtntx_sc; 2074 flags = m->m_pkthdr.csum_flags; 2075 2076 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); 2077 if (error) 2078 goto drop; 2079 2080 if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || 2081 (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { 2082 /* 2083 * We could compare the IP protocol vs the CSUM_ flag too, 2084 * but that really should not be necessary. 2085 */ 2086 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 2087 hdr->csum_start = csum_start; 2088 hdr->csum_offset = m->m_pkthdr.csum_data; 2089 txq->vtntx_stats.vtxs_csum++; 2090 } 2091 2092 if (flags & CSUM_TSO) { 2093 if (__predict_false(proto != IPPROTO_TCP)) { 2094 /* Likely failed to correctly parse the mbuf. */ 2095 sc->vtnet_stats.tx_tso_not_tcp++; 2096 goto drop; 2097 } 2098 2099 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, 2100 ("%s: mbuf %p TSO without checksum offload %#x", 2101 __func__, m, flags)); 2102 2103 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); 2104 if (error) 2105 goto drop; 2106 } 2107 2108 return (m); 2109 2110 drop: 2111 m_freem(m); 2112 return (NULL); 2113 } 2114 2115 static int 2116 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, 2117 struct vtnet_tx_header *txhdr) 2118 { 2119 struct vtnet_softc *sc; 2120 struct virtqueue *vq; 2121 struct sglist *sg; 2122 struct mbuf *m; 2123 int error; 2124 2125 sc = txq->vtntx_sc; 2126 vq = txq->vtntx_vq; 2127 sg = txq->vtntx_sg; 2128 m = *m_head; 2129 2130 sglist_reset(sg); 2131 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); 2132 KASSERT(error == 0 && sg->sg_nseg == 1, 2133 ("%s: error %d adding header to sglist", __func__, error)); 2134 2135 error = sglist_append_mbuf(sg, m); 2136 if (error) { 2137 m = m_defrag(m, M_NOWAIT); 2138 if (m == NULL) 2139 goto fail; 2140 2141 *m_head = m; 2142 sc->vtnet_stats.tx_defragged++; 2143 2144 error = sglist_append_mbuf(sg, m); 2145 if (error) 2146 goto fail; 2147 } 2148 2149 txhdr->vth_mbuf = m; 2150 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); 2151 2152 return (error); 2153 2154 fail: 2155 sc->vtnet_stats.tx_defrag_failed++; 2156 m_freem(*m_head); 2157 *m_head = NULL; 2158 2159 return (ENOBUFS); 2160 } 2161 2162 static int 2163 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) 2164 { 2165 struct vtnet_tx_header *txhdr; 2166 struct virtio_net_hdr *hdr; 2167 struct mbuf *m; 2168 int error; 2169 2170 m = *m_head; 2171 M_ASSERTPKTHDR(m); 2172 2173 txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); 2174 if (txhdr == NULL) { 2175 m_freem(m); 2176 *m_head = NULL; 2177 return (ENOMEM); 2178 } 2179 2180 /* 2181 * Always use the non-mergeable header, regardless if the feature 2182 * was negotiated. For transmit, num_buffers is always zero. The 2183 * vtnet_hdr_size is used to enqueue the correct header size. 2184 */ 2185 hdr = &txhdr->vth_uhdr.hdr; 2186 2187 if (m->m_flags & M_VLANTAG) { 2188 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); 2189 if ((*m_head = m) == NULL) { 2190 error = ENOBUFS; 2191 goto fail; 2192 } 2193 m->m_flags &= ~M_VLANTAG; 2194 } 2195 2196 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { 2197 m = vtnet_txq_offload(txq, m, hdr); 2198 if ((*m_head = m) == NULL) { 2199 error = ENOBUFS; 2200 goto fail; 2201 } 2202 } 2203 2204 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); 2205 if (error == 0) 2206 return (0); 2207 2208 fail: 2209 uma_zfree(vtnet_tx_header_zone, txhdr); 2210 2211 return (error); 2212 } 2213 2214 #ifdef VTNET_LEGACY_TX 2215 2216 static void 2217 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) 2218 { 2219 struct vtnet_softc *sc; 2220 struct virtqueue *vq; 2221 struct mbuf *m0; 2222 int tries, enq; 2223 2224 sc = txq->vtntx_sc; 2225 vq = txq->vtntx_vq; 2226 tries = 0; 2227 2228 VTNET_TXQ_LOCK_ASSERT(txq); 2229 2230 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2231 sc->vtnet_link_active == 0) 2232 return; 2233 2234 vtnet_txq_eof(txq); 2235 2236 again: 2237 enq = 0; 2238 2239 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 2240 if (virtqueue_full(vq)) 2241 break; 2242 2243 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); 2244 if (m0 == NULL) 2245 break; 2246 2247 if (vtnet_txq_encap(txq, &m0) != 0) { 2248 if (m0 != NULL) 2249 IFQ_DRV_PREPEND(&ifp->if_snd, m0); 2250 break; 2251 } 2252 2253 enq++; 2254 ETHER_BPF_MTAP(ifp, m0); 2255 } 2256 2257 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2258 if (tries++ < VTNET_NOTIFY_RETRIES) 2259 goto again; 2260 2261 txq->vtntx_stats.vtxs_rescheduled++; 2262 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2263 } 2264 } 2265 2266 static void 2267 vtnet_start(struct ifnet *ifp) 2268 { 2269 struct vtnet_softc *sc; 2270 struct vtnet_txq *txq; 2271 2272 sc = ifp->if_softc; 2273 txq = &sc->vtnet_txqs[0]; 2274 2275 VTNET_TXQ_LOCK(txq); 2276 vtnet_start_locked(txq, ifp); 2277 VTNET_TXQ_UNLOCK(txq); 2278 } 2279 2280 #else /* !VTNET_LEGACY_TX */ 2281 2282 static int 2283 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) 2284 { 2285 struct vtnet_softc *sc; 2286 struct virtqueue *vq; 2287 struct buf_ring *br; 2288 struct ifnet *ifp; 2289 int enq, tries, error; 2290 2291 sc = txq->vtntx_sc; 2292 vq = txq->vtntx_vq; 2293 br = txq->vtntx_br; 2294 ifp = sc->vtnet_ifp; 2295 tries = 0; 2296 error = 0; 2297 2298 VTNET_TXQ_LOCK_ASSERT(txq); 2299 2300 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2301 sc->vtnet_link_active == 0) { 2302 if (m != NULL) 2303 error = drbr_enqueue(ifp, br, m); 2304 return (error); 2305 } 2306 2307 if (m != NULL) { 2308 error = drbr_enqueue(ifp, br, m); 2309 if (error) 2310 return (error); 2311 } 2312 2313 vtnet_txq_eof(txq); 2314 2315 again: 2316 enq = 0; 2317 2318 while ((m = drbr_peek(ifp, br)) != NULL) { 2319 if (virtqueue_full(vq)) { 2320 drbr_putback(ifp, br, m); 2321 break; 2322 } 2323 2324 if (vtnet_txq_encap(txq, &m) != 0) { 2325 if (m != NULL) 2326 drbr_putback(ifp, br, m); 2327 else 2328 drbr_advance(ifp, br); 2329 break; 2330 } 2331 drbr_advance(ifp, br); 2332 2333 enq++; 2334 ETHER_BPF_MTAP(ifp, m); 2335 } 2336 2337 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2338 if (tries++ < VTNET_NOTIFY_RETRIES) 2339 goto again; 2340 2341 txq->vtntx_stats.vtxs_rescheduled++; 2342 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2343 } 2344 2345 return (0); 2346 } 2347 2348 static int 2349 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2350 { 2351 struct vtnet_softc *sc; 2352 struct vtnet_txq *txq; 2353 int i, npairs, error; 2354 2355 sc = ifp->if_softc; 2356 npairs = sc->vtnet_act_vq_pairs; 2357 2358 /* check if flowid is set */ 2359 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2360 i = m->m_pkthdr.flowid % npairs; 2361 else 2362 i = curcpu % npairs; 2363 2364 txq = &sc->vtnet_txqs[i]; 2365 2366 if (VTNET_TXQ_TRYLOCK(txq) != 0) { 2367 error = vtnet_txq_mq_start_locked(txq, m); 2368 VTNET_TXQ_UNLOCK(txq); 2369 } else { 2370 error = drbr_enqueue(ifp, txq->vtntx_br, m); 2371 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); 2372 } 2373 2374 return (error); 2375 } 2376 2377 static void 2378 vtnet_txq_tq_deferred(void *xtxq, int pending) 2379 { 2380 struct vtnet_softc *sc; 2381 struct vtnet_txq *txq; 2382 2383 txq = xtxq; 2384 sc = txq->vtntx_sc; 2385 2386 VTNET_TXQ_LOCK(txq); 2387 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) 2388 vtnet_txq_mq_start_locked(txq, NULL); 2389 VTNET_TXQ_UNLOCK(txq); 2390 } 2391 2392 #endif /* VTNET_LEGACY_TX */ 2393 2394 static void 2395 vtnet_txq_start(struct vtnet_txq *txq) 2396 { 2397 struct vtnet_softc *sc; 2398 struct ifnet *ifp; 2399 2400 sc = txq->vtntx_sc; 2401 ifp = sc->vtnet_ifp; 2402 2403 #ifdef VTNET_LEGACY_TX 2404 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 2405 vtnet_start_locked(txq, ifp); 2406 #else 2407 if (!drbr_empty(ifp, txq->vtntx_br)) 2408 vtnet_txq_mq_start_locked(txq, NULL); 2409 #endif 2410 } 2411 2412 static void 2413 vtnet_txq_tq_intr(void *xtxq, int pending) 2414 { 2415 struct vtnet_softc *sc; 2416 struct vtnet_txq *txq; 2417 struct ifnet *ifp; 2418 2419 txq = xtxq; 2420 sc = txq->vtntx_sc; 2421 ifp = sc->vtnet_ifp; 2422 2423 VTNET_TXQ_LOCK(txq); 2424 2425 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2426 VTNET_TXQ_UNLOCK(txq); 2427 return; 2428 } 2429 2430 vtnet_txq_eof(txq); 2431 vtnet_txq_start(txq); 2432 2433 VTNET_TXQ_UNLOCK(txq); 2434 } 2435 2436 static int 2437 vtnet_txq_eof(struct vtnet_txq *txq) 2438 { 2439 struct virtqueue *vq; 2440 struct vtnet_tx_header *txhdr; 2441 struct mbuf *m; 2442 int deq; 2443 2444 vq = txq->vtntx_vq; 2445 deq = 0; 2446 VTNET_TXQ_LOCK_ASSERT(txq); 2447 2448 #ifdef DEV_NETMAP 2449 if (netmap_tx_irq(txq->vtntx_sc->vtnet_ifp, txq->vtntx_id)) { 2450 virtqueue_disable_intr(vq); // XXX luigi 2451 return 0; // XXX or 1 ? 2452 } 2453 #endif /* DEV_NETMAP */ 2454 2455 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { 2456 m = txhdr->vth_mbuf; 2457 deq++; 2458 2459 txq->vtntx_stats.vtxs_opackets++; 2460 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; 2461 if (m->m_flags & M_MCAST) 2462 txq->vtntx_stats.vtxs_omcasts++; 2463 2464 m_freem(m); 2465 uma_zfree(vtnet_tx_header_zone, txhdr); 2466 } 2467 2468 if (virtqueue_empty(vq)) 2469 txq->vtntx_watchdog = 0; 2470 2471 return (deq); 2472 } 2473 2474 static void 2475 vtnet_tx_vq_intr(void *xtxq) 2476 { 2477 struct vtnet_softc *sc; 2478 struct vtnet_txq *txq; 2479 struct ifnet *ifp; 2480 2481 txq = xtxq; 2482 sc = txq->vtntx_sc; 2483 ifp = sc->vtnet_ifp; 2484 2485 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { 2486 /* 2487 * Ignore this interrupt. Either this is a spurious interrupt 2488 * or multiqueue without per-VQ MSIX so every queue needs to 2489 * be polled (a brain dead configuration we could try harder 2490 * to avoid). 2491 */ 2492 vtnet_txq_disable_intr(txq); 2493 return; 2494 } 2495 2496 VTNET_TXQ_LOCK(txq); 2497 2498 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2499 VTNET_TXQ_UNLOCK(txq); 2500 return; 2501 } 2502 2503 vtnet_txq_eof(txq); 2504 vtnet_txq_start(txq); 2505 2506 VTNET_TXQ_UNLOCK(txq); 2507 } 2508 2509 static void 2510 vtnet_tx_start_all(struct vtnet_softc *sc) 2511 { 2512 struct vtnet_txq *txq; 2513 int i; 2514 2515 VTNET_CORE_LOCK_ASSERT(sc); 2516 2517 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2518 txq = &sc->vtnet_txqs[i]; 2519 2520 VTNET_TXQ_LOCK(txq); 2521 vtnet_txq_start(txq); 2522 VTNET_TXQ_UNLOCK(txq); 2523 } 2524 } 2525 2526 #ifndef VTNET_LEGACY_TX 2527 static void 2528 vtnet_qflush(struct ifnet *ifp) 2529 { 2530 struct vtnet_softc *sc; 2531 struct vtnet_txq *txq; 2532 struct mbuf *m; 2533 int i; 2534 2535 sc = ifp->if_softc; 2536 2537 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2538 txq = &sc->vtnet_txqs[i]; 2539 2540 VTNET_TXQ_LOCK(txq); 2541 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) 2542 m_freem(m); 2543 VTNET_TXQ_UNLOCK(txq); 2544 } 2545 2546 if_qflush(ifp); 2547 } 2548 #endif 2549 2550 static int 2551 vtnet_watchdog(struct vtnet_txq *txq) 2552 { 2553 struct ifnet *ifp; 2554 2555 ifp = txq->vtntx_sc->vtnet_ifp; 2556 2557 VTNET_TXQ_LOCK(txq); 2558 if (txq->vtntx_watchdog == 1) { 2559 /* 2560 * Only drain completed frames if the watchdog is about to 2561 * expire. If any frames were drained, there may be enough 2562 * free descriptors now available to transmit queued frames. 2563 * In that case, the timer will immediately be decremented 2564 * below, but the timeout is generous enough that should not 2565 * be a problem. 2566 */ 2567 if (vtnet_txq_eof(txq) != 0) 2568 vtnet_txq_start(txq); 2569 } 2570 2571 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { 2572 VTNET_TXQ_UNLOCK(txq); 2573 return (0); 2574 } 2575 VTNET_TXQ_UNLOCK(txq); 2576 2577 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); 2578 return (1); 2579 } 2580 2581 static void 2582 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, 2583 struct vtnet_txq_stats *txacc) 2584 { 2585 2586 bzero(rxacc, sizeof(struct vtnet_rxq_stats)); 2587 bzero(txacc, sizeof(struct vtnet_txq_stats)); 2588 2589 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2590 struct vtnet_rxq_stats *rxst; 2591 struct vtnet_txq_stats *txst; 2592 2593 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 2594 rxacc->vrxs_ipackets += rxst->vrxs_ipackets; 2595 rxacc->vrxs_ibytes += rxst->vrxs_ibytes; 2596 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; 2597 rxacc->vrxs_csum += rxst->vrxs_csum; 2598 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; 2599 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; 2600 2601 txst = &sc->vtnet_txqs[i].vtntx_stats; 2602 txacc->vtxs_opackets += txst->vtxs_opackets; 2603 txacc->vtxs_obytes += txst->vtxs_obytes; 2604 txacc->vtxs_csum += txst->vtxs_csum; 2605 txacc->vtxs_tso += txst->vtxs_tso; 2606 txacc->vtxs_rescheduled += txst->vtxs_rescheduled; 2607 } 2608 } 2609 2610 static uint64_t 2611 vtnet_get_counter(if_t ifp, ift_counter cnt) 2612 { 2613 struct vtnet_softc *sc; 2614 struct vtnet_rxq_stats rxaccum; 2615 struct vtnet_txq_stats txaccum; 2616 2617 sc = if_getsoftc(ifp); 2618 vtnet_accum_stats(sc, &rxaccum, &txaccum); 2619 2620 switch (cnt) { 2621 case IFCOUNTER_IPACKETS: 2622 return (rxaccum.vrxs_ipackets); 2623 case IFCOUNTER_IQDROPS: 2624 return (rxaccum.vrxs_iqdrops); 2625 case IFCOUNTER_IERRORS: 2626 return (rxaccum.vrxs_ierrors); 2627 case IFCOUNTER_OPACKETS: 2628 return (txaccum.vtxs_opackets); 2629 #ifndef VTNET_LEGACY_TX 2630 case IFCOUNTER_OBYTES: 2631 return (txaccum.vtxs_obytes); 2632 case IFCOUNTER_OMCASTS: 2633 return (txaccum.vtxs_omcasts); 2634 #endif 2635 default: 2636 return (if_get_counter_default(ifp, cnt)); 2637 } 2638 } 2639 2640 static void 2641 vtnet_tick(void *xsc) 2642 { 2643 struct vtnet_softc *sc; 2644 struct ifnet *ifp; 2645 int i, timedout; 2646 2647 sc = xsc; 2648 ifp = sc->vtnet_ifp; 2649 timedout = 0; 2650 2651 VTNET_CORE_LOCK_ASSERT(sc); 2652 2653 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 2654 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); 2655 2656 if (timedout != 0) { 2657 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2658 vtnet_init_locked(sc); 2659 } else 2660 callout_schedule(&sc->vtnet_tick_ch, hz); 2661 } 2662 2663 static void 2664 vtnet_start_taskqueues(struct vtnet_softc *sc) 2665 { 2666 device_t dev; 2667 struct vtnet_rxq *rxq; 2668 struct vtnet_txq *txq; 2669 int i, error; 2670 2671 dev = sc->vtnet_dev; 2672 2673 /* 2674 * Errors here are very difficult to recover from - we cannot 2675 * easily fail because, if this is during boot, we will hang 2676 * when freeing any successfully started taskqueues because 2677 * the scheduler isn't up yet. 2678 * 2679 * Most drivers just ignore the return value - it only fails 2680 * with ENOMEM so an error is not likely. 2681 */ 2682 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2683 rxq = &sc->vtnet_rxqs[i]; 2684 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, 2685 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); 2686 if (error) { 2687 device_printf(dev, "failed to start rx taskq %d\n", 2688 rxq->vtnrx_id); 2689 } 2690 2691 txq = &sc->vtnet_txqs[i]; 2692 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, 2693 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); 2694 if (error) { 2695 device_printf(dev, "failed to start tx taskq %d\n", 2696 txq->vtntx_id); 2697 } 2698 } 2699 } 2700 2701 static void 2702 vtnet_free_taskqueues(struct vtnet_softc *sc) 2703 { 2704 struct vtnet_rxq *rxq; 2705 struct vtnet_txq *txq; 2706 int i; 2707 2708 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2709 rxq = &sc->vtnet_rxqs[i]; 2710 if (rxq->vtnrx_tq != NULL) { 2711 taskqueue_free(rxq->vtnrx_tq); 2712 rxq->vtnrx_vq = NULL; 2713 } 2714 2715 txq = &sc->vtnet_txqs[i]; 2716 if (txq->vtntx_tq != NULL) { 2717 taskqueue_free(txq->vtntx_tq); 2718 txq->vtntx_tq = NULL; 2719 } 2720 } 2721 } 2722 2723 static void 2724 vtnet_drain_taskqueues(struct vtnet_softc *sc) 2725 { 2726 struct vtnet_rxq *rxq; 2727 struct vtnet_txq *txq; 2728 int i; 2729 2730 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2731 rxq = &sc->vtnet_rxqs[i]; 2732 if (rxq->vtnrx_tq != NULL) 2733 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2734 2735 txq = &sc->vtnet_txqs[i]; 2736 if (txq->vtntx_tq != NULL) { 2737 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); 2738 #ifndef VTNET_LEGACY_TX 2739 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); 2740 #endif 2741 } 2742 } 2743 } 2744 2745 static void 2746 vtnet_drain_rxtx_queues(struct vtnet_softc *sc) 2747 { 2748 struct vtnet_rxq *rxq; 2749 struct vtnet_txq *txq; 2750 int i; 2751 2752 #ifdef DEV_NETMAP 2753 if (nm_native_on(NA(sc->vtnet_ifp))) 2754 return; 2755 #endif /* DEV_NETMAP */ 2756 2757 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2758 rxq = &sc->vtnet_rxqs[i]; 2759 vtnet_rxq_free_mbufs(rxq); 2760 2761 txq = &sc->vtnet_txqs[i]; 2762 vtnet_txq_free_mbufs(txq); 2763 } 2764 } 2765 2766 static void 2767 vtnet_stop_rendezvous(struct vtnet_softc *sc) 2768 { 2769 struct vtnet_rxq *rxq; 2770 struct vtnet_txq *txq; 2771 int i; 2772 2773 /* 2774 * Lock and unlock the per-queue mutex so we known the stop 2775 * state is visible. Doing only the active queues should be 2776 * sufficient, but it does not cost much extra to do all the 2777 * queues. Note we hold the core mutex here too. 2778 */ 2779 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2780 rxq = &sc->vtnet_rxqs[i]; 2781 VTNET_RXQ_LOCK(rxq); 2782 VTNET_RXQ_UNLOCK(rxq); 2783 2784 txq = &sc->vtnet_txqs[i]; 2785 VTNET_TXQ_LOCK(txq); 2786 VTNET_TXQ_UNLOCK(txq); 2787 } 2788 } 2789 2790 static void 2791 vtnet_stop(struct vtnet_softc *sc) 2792 { 2793 device_t dev; 2794 struct ifnet *ifp; 2795 2796 dev = sc->vtnet_dev; 2797 ifp = sc->vtnet_ifp; 2798 2799 VTNET_CORE_LOCK_ASSERT(sc); 2800 2801 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2802 sc->vtnet_link_active = 0; 2803 callout_stop(&sc->vtnet_tick_ch); 2804 2805 /* Only advisory. */ 2806 vtnet_disable_interrupts(sc); 2807 2808 /* 2809 * Stop the host adapter. This resets it to the pre-initialized 2810 * state. It will not generate any interrupts until after it is 2811 * reinitialized. 2812 */ 2813 virtio_stop(dev); 2814 vtnet_stop_rendezvous(sc); 2815 2816 /* Free any mbufs left in the virtqueues. */ 2817 vtnet_drain_rxtx_queues(sc); 2818 } 2819 2820 static int 2821 vtnet_virtio_reinit(struct vtnet_softc *sc) 2822 { 2823 device_t dev; 2824 struct ifnet *ifp; 2825 uint64_t features; 2826 int mask, error; 2827 2828 dev = sc->vtnet_dev; 2829 ifp = sc->vtnet_ifp; 2830 features = sc->vtnet_features; 2831 2832 mask = 0; 2833 #if defined(INET) 2834 mask |= IFCAP_RXCSUM; 2835 #endif 2836 #if defined (INET6) 2837 mask |= IFCAP_RXCSUM_IPV6; 2838 #endif 2839 2840 /* 2841 * Re-negotiate with the host, removing any disabled receive 2842 * features. Transmit features are disabled only on our side 2843 * via if_capenable and if_hwassist. 2844 */ 2845 2846 if (ifp->if_capabilities & mask) { 2847 /* 2848 * We require both IPv4 and IPv6 offloading to be enabled 2849 * in order to negotiated it: VirtIO does not distinguish 2850 * between the two. 2851 */ 2852 if ((ifp->if_capenable & mask) != mask) 2853 features &= ~VIRTIO_NET_F_GUEST_CSUM; 2854 } 2855 2856 if (ifp->if_capabilities & IFCAP_LRO) { 2857 if ((ifp->if_capenable & IFCAP_LRO) == 0) 2858 features &= ~VTNET_LRO_FEATURES; 2859 } 2860 2861 if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { 2862 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) 2863 features &= ~VIRTIO_NET_F_CTRL_VLAN; 2864 } 2865 2866 error = virtio_reinit(dev, features); 2867 if (error) 2868 device_printf(dev, "virtio reinit error %d\n", error); 2869 2870 return (error); 2871 } 2872 2873 static void 2874 vtnet_init_rx_filters(struct vtnet_softc *sc) 2875 { 2876 struct ifnet *ifp; 2877 2878 ifp = sc->vtnet_ifp; 2879 2880 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 2881 /* Restore promiscuous and all-multicast modes. */ 2882 vtnet_rx_filter(sc); 2883 /* Restore filtered MAC addresses. */ 2884 vtnet_rx_filter_mac(sc); 2885 } 2886 2887 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) 2888 vtnet_rx_filter_vlan(sc); 2889 } 2890 2891 static int 2892 vtnet_init_rx_queues(struct vtnet_softc *sc) 2893 { 2894 device_t dev; 2895 struct vtnet_rxq *rxq; 2896 int i, clsize, error; 2897 2898 dev = sc->vtnet_dev; 2899 2900 /* 2901 * Use the new cluster size if one has been set (via a MTU 2902 * change). Otherwise, use the standard 2K clusters. 2903 * 2904 * BMV: It might make sense to use page sized clusters as 2905 * the default (depending on the features negotiated). 2906 */ 2907 if (sc->vtnet_rx_new_clsize != 0) { 2908 clsize = sc->vtnet_rx_new_clsize; 2909 sc->vtnet_rx_new_clsize = 0; 2910 } else 2911 clsize = MCLBYTES; 2912 2913 sc->vtnet_rx_clsize = clsize; 2914 sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); 2915 2916 KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || 2917 sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, 2918 ("%s: too many rx mbufs %d for %d segments", __func__, 2919 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); 2920 2921 #ifdef DEV_NETMAP 2922 if (vtnet_netmap_init_rx_buffers(sc)) 2923 return 0; 2924 #endif /* DEV_NETMAP */ 2925 2926 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2927 rxq = &sc->vtnet_rxqs[i]; 2928 2929 /* Hold the lock to satisfy asserts. */ 2930 VTNET_RXQ_LOCK(rxq); 2931 error = vtnet_rxq_populate(rxq); 2932 VTNET_RXQ_UNLOCK(rxq); 2933 2934 if (error) { 2935 device_printf(dev, 2936 "cannot allocate mbufs for Rx queue %d\n", i); 2937 return (error); 2938 } 2939 } 2940 2941 return (0); 2942 } 2943 2944 static int 2945 vtnet_init_tx_queues(struct vtnet_softc *sc) 2946 { 2947 struct vtnet_txq *txq; 2948 int i; 2949 2950 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2951 txq = &sc->vtnet_txqs[i]; 2952 txq->vtntx_watchdog = 0; 2953 } 2954 2955 return (0); 2956 } 2957 2958 static int 2959 vtnet_init_rxtx_queues(struct vtnet_softc *sc) 2960 { 2961 int error; 2962 2963 error = vtnet_init_rx_queues(sc); 2964 if (error) 2965 return (error); 2966 2967 error = vtnet_init_tx_queues(sc); 2968 if (error) 2969 return (error); 2970 2971 return (0); 2972 } 2973 2974 static void 2975 vtnet_set_active_vq_pairs(struct vtnet_softc *sc) 2976 { 2977 device_t dev; 2978 int npairs; 2979 2980 dev = sc->vtnet_dev; 2981 2982 if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { 2983 MPASS(sc->vtnet_max_vq_pairs == 1); 2984 sc->vtnet_act_vq_pairs = 1; 2985 return; 2986 } 2987 2988 /* BMV: Just use the maximum configured for now. */ 2989 npairs = sc->vtnet_max_vq_pairs; 2990 2991 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { 2992 device_printf(dev, 2993 "cannot set active queue pairs to %d\n", npairs); 2994 npairs = 1; 2995 } 2996 2997 sc->vtnet_act_vq_pairs = npairs; 2998 } 2999 3000 static int 3001 vtnet_reinit(struct vtnet_softc *sc) 3002 { 3003 struct ifnet *ifp; 3004 int error; 3005 3006 ifp = sc->vtnet_ifp; 3007 3008 /* Use the current MAC address. */ 3009 bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); 3010 vtnet_set_hwaddr(sc); 3011 3012 vtnet_set_active_vq_pairs(sc); 3013 3014 ifp->if_hwassist = 0; 3015 if (ifp->if_capenable & IFCAP_TXCSUM) 3016 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; 3017 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 3018 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; 3019 if (ifp->if_capenable & IFCAP_TSO4) 3020 ifp->if_hwassist |= CSUM_IP_TSO; 3021 if (ifp->if_capenable & IFCAP_TSO6) 3022 ifp->if_hwassist |= CSUM_IP6_TSO; 3023 3024 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 3025 vtnet_init_rx_filters(sc); 3026 3027 error = vtnet_init_rxtx_queues(sc); 3028 if (error) 3029 return (error); 3030 3031 vtnet_enable_interrupts(sc); 3032 ifp->if_drv_flags |= IFF_DRV_RUNNING; 3033 3034 return (0); 3035 } 3036 3037 static void 3038 vtnet_init_locked(struct vtnet_softc *sc) 3039 { 3040 device_t dev; 3041 struct ifnet *ifp; 3042 3043 dev = sc->vtnet_dev; 3044 ifp = sc->vtnet_ifp; 3045 3046 VTNET_CORE_LOCK_ASSERT(sc); 3047 3048 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3049 return; 3050 3051 vtnet_stop(sc); 3052 3053 /* Reinitialize with the host. */ 3054 if (vtnet_virtio_reinit(sc) != 0) 3055 goto fail; 3056 3057 if (vtnet_reinit(sc) != 0) 3058 goto fail; 3059 3060 virtio_reinit_complete(dev); 3061 3062 vtnet_update_link_status(sc); 3063 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); 3064 3065 return; 3066 3067 fail: 3068 vtnet_stop(sc); 3069 } 3070 3071 static void 3072 vtnet_init(void *xsc) 3073 { 3074 struct vtnet_softc *sc; 3075 3076 sc = xsc; 3077 3078 #ifdef DEV_NETMAP 3079 if (!NA(sc->vtnet_ifp)) { 3080 D("try to attach again"); 3081 vtnet_netmap_attach(sc); 3082 } 3083 #endif /* DEV_NETMAP */ 3084 3085 VTNET_CORE_LOCK(sc); 3086 vtnet_init_locked(sc); 3087 VTNET_CORE_UNLOCK(sc); 3088 } 3089 3090 static void 3091 vtnet_free_ctrl_vq(struct vtnet_softc *sc) 3092 { 3093 struct virtqueue *vq; 3094 3095 vq = sc->vtnet_ctrl_vq; 3096 3097 /* 3098 * The control virtqueue is only polled and therefore it should 3099 * already be empty. 3100 */ 3101 KASSERT(virtqueue_empty(vq), 3102 ("%s: ctrl vq %p not empty", __func__, vq)); 3103 } 3104 3105 static void 3106 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, 3107 struct sglist *sg, int readable, int writable) 3108 { 3109 struct virtqueue *vq; 3110 3111 vq = sc->vtnet_ctrl_vq; 3112 3113 VTNET_CORE_LOCK_ASSERT(sc); 3114 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, 3115 ("%s: CTRL_VQ feature not negotiated", __func__)); 3116 3117 if (!virtqueue_empty(vq)) 3118 return; 3119 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) 3120 return; 3121 3122 /* 3123 * Poll for the response, but the command is likely already 3124 * done when we return from the notify. 3125 */ 3126 virtqueue_notify(vq); 3127 virtqueue_poll(vq, NULL); 3128 } 3129 3130 static int 3131 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) 3132 { 3133 struct virtio_net_ctrl_hdr hdr __aligned(2); 3134 struct sglist_seg segs[3]; 3135 struct sglist sg; 3136 uint8_t ack; 3137 int error; 3138 3139 hdr.class = VIRTIO_NET_CTRL_MAC; 3140 hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 3141 ack = VIRTIO_NET_ERR; 3142 3143 sglist_init(&sg, 3, segs); 3144 error = 0; 3145 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3146 error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); 3147 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3148 KASSERT(error == 0 && sg.sg_nseg == 3, 3149 ("%s: error %d adding set MAC msg to sglist", __func__, error)); 3150 3151 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3152 3153 return (ack == VIRTIO_NET_OK ? 0 : EIO); 3154 } 3155 3156 static int 3157 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) 3158 { 3159 struct sglist_seg segs[3]; 3160 struct sglist sg; 3161 struct { 3162 struct virtio_net_ctrl_hdr hdr; 3163 uint8_t pad1; 3164 struct virtio_net_ctrl_mq mq; 3165 uint8_t pad2; 3166 uint8_t ack; 3167 } s __aligned(2); 3168 int error; 3169 3170 s.hdr.class = VIRTIO_NET_CTRL_MQ; 3171 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 3172 s.mq.virtqueue_pairs = npairs; 3173 s.ack = VIRTIO_NET_ERR; 3174 3175 sglist_init(&sg, 3, segs); 3176 error = 0; 3177 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3178 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); 3179 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3180 KASSERT(error == 0 && sg.sg_nseg == 3, 3181 ("%s: error %d adding MQ message to sglist", __func__, error)); 3182 3183 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3184 3185 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3186 } 3187 3188 static int 3189 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) 3190 { 3191 struct sglist_seg segs[3]; 3192 struct sglist sg; 3193 struct { 3194 struct virtio_net_ctrl_hdr hdr; 3195 uint8_t pad1; 3196 uint8_t onoff; 3197 uint8_t pad2; 3198 uint8_t ack; 3199 } s __aligned(2); 3200 int error; 3201 3202 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, 3203 ("%s: CTRL_RX feature not negotiated", __func__)); 3204 3205 s.hdr.class = VIRTIO_NET_CTRL_RX; 3206 s.hdr.cmd = cmd; 3207 s.onoff = !!on; 3208 s.ack = VIRTIO_NET_ERR; 3209 3210 sglist_init(&sg, 3, segs); 3211 error = 0; 3212 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3213 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); 3214 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3215 KASSERT(error == 0 && sg.sg_nseg == 3, 3216 ("%s: error %d adding Rx message to sglist", __func__, error)); 3217 3218 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3219 3220 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3221 } 3222 3223 static int 3224 vtnet_set_promisc(struct vtnet_softc *sc, int on) 3225 { 3226 3227 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); 3228 } 3229 3230 static int 3231 vtnet_set_allmulti(struct vtnet_softc *sc, int on) 3232 { 3233 3234 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); 3235 } 3236 3237 /* 3238 * The device defaults to promiscuous mode for backwards compatibility. 3239 * Turn it off at attach time if possible. 3240 */ 3241 static void 3242 vtnet_attach_disable_promisc(struct vtnet_softc *sc) 3243 { 3244 struct ifnet *ifp; 3245 3246 ifp = sc->vtnet_ifp; 3247 3248 VTNET_CORE_LOCK(sc); 3249 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { 3250 ifp->if_flags |= IFF_PROMISC; 3251 } else if (vtnet_set_promisc(sc, 0) != 0) { 3252 ifp->if_flags |= IFF_PROMISC; 3253 device_printf(sc->vtnet_dev, 3254 "cannot disable default promiscuous mode\n"); 3255 } 3256 VTNET_CORE_UNLOCK(sc); 3257 } 3258 3259 static void 3260 vtnet_rx_filter(struct vtnet_softc *sc) 3261 { 3262 device_t dev; 3263 struct ifnet *ifp; 3264 3265 dev = sc->vtnet_dev; 3266 ifp = sc->vtnet_ifp; 3267 3268 VTNET_CORE_LOCK_ASSERT(sc); 3269 3270 if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) 3271 device_printf(dev, "cannot %s promiscuous mode\n", 3272 ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); 3273 3274 if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) 3275 device_printf(dev, "cannot %s all-multicast mode\n", 3276 ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); 3277 } 3278 3279 static void 3280 vtnet_rx_filter_mac(struct vtnet_softc *sc) 3281 { 3282 struct virtio_net_ctrl_hdr hdr __aligned(2); 3283 struct vtnet_mac_filter *filter; 3284 struct sglist_seg segs[4]; 3285 struct sglist sg; 3286 struct ifnet *ifp; 3287 struct ifaddr *ifa; 3288 struct ifmultiaddr *ifma; 3289 int ucnt, mcnt, promisc, allmulti, error; 3290 uint8_t ack; 3291 3292 ifp = sc->vtnet_ifp; 3293 filter = sc->vtnet_mac_filter; 3294 ucnt = 0; 3295 mcnt = 0; 3296 promisc = 0; 3297 allmulti = 0; 3298 3299 VTNET_CORE_LOCK_ASSERT(sc); 3300 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, 3301 ("%s: CTRL_RX feature not negotiated", __func__)); 3302 3303 /* Unicast MAC addresses: */ 3304 if_addr_rlock(ifp); 3305 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 3306 if (ifa->ifa_addr->sa_family != AF_LINK) 3307 continue; 3308 else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 3309 sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) 3310 continue; 3311 else if (ucnt == VTNET_MAX_MAC_ENTRIES) { 3312 promisc = 1; 3313 break; 3314 } 3315 3316 bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 3317 &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); 3318 ucnt++; 3319 } 3320 if_addr_runlock(ifp); 3321 3322 if (promisc != 0) { 3323 filter->vmf_unicast.nentries = 0; 3324 if_printf(ifp, "more than %d MAC addresses assigned, " 3325 "falling back to promiscuous mode\n", 3326 VTNET_MAX_MAC_ENTRIES); 3327 } else 3328 filter->vmf_unicast.nentries = ucnt; 3329 3330 /* Multicast MAC addresses: */ 3331 if_maddr_rlock(ifp); 3332 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3333 if (ifma->ifma_addr->sa_family != AF_LINK) 3334 continue; 3335 else if (mcnt == VTNET_MAX_MAC_ENTRIES) { 3336 allmulti = 1; 3337 break; 3338 } 3339 3340 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 3341 &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); 3342 mcnt++; 3343 } 3344 if_maddr_runlock(ifp); 3345 3346 if (allmulti != 0) { 3347 filter->vmf_multicast.nentries = 0; 3348 if_printf(ifp, "more than %d multicast MAC addresses " 3349 "assigned, falling back to all-multicast mode\n", 3350 VTNET_MAX_MAC_ENTRIES); 3351 } else 3352 filter->vmf_multicast.nentries = mcnt; 3353 3354 if (promisc != 0 && allmulti != 0) 3355 goto out; 3356 3357 hdr.class = VIRTIO_NET_CTRL_MAC; 3358 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 3359 ack = VIRTIO_NET_ERR; 3360 3361 sglist_init(&sg, 4, segs); 3362 error = 0; 3363 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3364 error |= sglist_append(&sg, &filter->vmf_unicast, 3365 sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); 3366 error |= sglist_append(&sg, &filter->vmf_multicast, 3367 sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); 3368 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3369 KASSERT(error == 0 && sg.sg_nseg == 4, 3370 ("%s: error %d adding MAC filter msg to sglist", __func__, error)); 3371 3372 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3373 3374 if (ack != VIRTIO_NET_OK) 3375 if_printf(ifp, "error setting host MAC filter table\n"); 3376 3377 out: 3378 if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) 3379 if_printf(ifp, "cannot enable promiscuous mode\n"); 3380 if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) 3381 if_printf(ifp, "cannot enable all-multicast mode\n"); 3382 } 3383 3384 static int 3385 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3386 { 3387 struct sglist_seg segs[3]; 3388 struct sglist sg; 3389 struct { 3390 struct virtio_net_ctrl_hdr hdr; 3391 uint8_t pad1; 3392 uint16_t tag; 3393 uint8_t pad2; 3394 uint8_t ack; 3395 } s __aligned(2); 3396 int error; 3397 3398 s.hdr.class = VIRTIO_NET_CTRL_VLAN; 3399 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 3400 s.tag = tag; 3401 s.ack = VIRTIO_NET_ERR; 3402 3403 sglist_init(&sg, 3, segs); 3404 error = 0; 3405 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3406 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); 3407 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3408 KASSERT(error == 0 && sg.sg_nseg == 3, 3409 ("%s: error %d adding VLAN message to sglist", __func__, error)); 3410 3411 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3412 3413 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3414 } 3415 3416 static void 3417 vtnet_rx_filter_vlan(struct vtnet_softc *sc) 3418 { 3419 uint32_t w; 3420 uint16_t tag; 3421 int i, bit; 3422 3423 VTNET_CORE_LOCK_ASSERT(sc); 3424 KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, 3425 ("%s: VLAN_FILTER feature not negotiated", __func__)); 3426 3427 /* Enable the filter for each configured VLAN. */ 3428 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { 3429 w = sc->vtnet_vlan_filter[i]; 3430 3431 while ((bit = ffs(w) - 1) != -1) { 3432 w &= ~(1 << bit); 3433 tag = sizeof(w) * CHAR_BIT * i + bit; 3434 3435 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { 3436 device_printf(sc->vtnet_dev, 3437 "cannot enable VLAN %d filter\n", tag); 3438 } 3439 } 3440 } 3441 } 3442 3443 static void 3444 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3445 { 3446 struct ifnet *ifp; 3447 int idx, bit; 3448 3449 ifp = sc->vtnet_ifp; 3450 idx = (tag >> 5) & 0x7F; 3451 bit = tag & 0x1F; 3452 3453 if (tag == 0 || tag > 4095) 3454 return; 3455 3456 VTNET_CORE_LOCK(sc); 3457 3458 if (add) 3459 sc->vtnet_vlan_filter[idx] |= (1 << bit); 3460 else 3461 sc->vtnet_vlan_filter[idx] &= ~(1 << bit); 3462 3463 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && 3464 vtnet_exec_vlan_filter(sc, add, tag) != 0) { 3465 device_printf(sc->vtnet_dev, 3466 "cannot %s VLAN %d %s the host filter table\n", 3467 add ? "add" : "remove", tag, add ? "to" : "from"); 3468 } 3469 3470 VTNET_CORE_UNLOCK(sc); 3471 } 3472 3473 static void 3474 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3475 { 3476 3477 if (ifp->if_softc != arg) 3478 return; 3479 3480 vtnet_update_vlan_filter(arg, 1, tag); 3481 } 3482 3483 static void 3484 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3485 { 3486 3487 if (ifp->if_softc != arg) 3488 return; 3489 3490 vtnet_update_vlan_filter(arg, 0, tag); 3491 } 3492 3493 static int 3494 vtnet_is_link_up(struct vtnet_softc *sc) 3495 { 3496 device_t dev; 3497 struct ifnet *ifp; 3498 uint16_t status; 3499 3500 dev = sc->vtnet_dev; 3501 ifp = sc->vtnet_ifp; 3502 3503 if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) 3504 status = VIRTIO_NET_S_LINK_UP; 3505 else 3506 status = virtio_read_dev_config_2(dev, 3507 offsetof(struct virtio_net_config, status)); 3508 3509 return ((status & VIRTIO_NET_S_LINK_UP) != 0); 3510 } 3511 3512 static void 3513 vtnet_update_link_status(struct vtnet_softc *sc) 3514 { 3515 struct ifnet *ifp; 3516 int link; 3517 3518 ifp = sc->vtnet_ifp; 3519 3520 VTNET_CORE_LOCK_ASSERT(sc); 3521 link = vtnet_is_link_up(sc); 3522 3523 /* Notify if the link status has changed. */ 3524 if (link != 0 && sc->vtnet_link_active == 0) { 3525 sc->vtnet_link_active = 1; 3526 if_link_state_change(ifp, LINK_STATE_UP); 3527 } else if (link == 0 && sc->vtnet_link_active != 0) { 3528 sc->vtnet_link_active = 0; 3529 if_link_state_change(ifp, LINK_STATE_DOWN); 3530 } 3531 } 3532 3533 static int 3534 vtnet_ifmedia_upd(struct ifnet *ifp) 3535 { 3536 struct vtnet_softc *sc; 3537 struct ifmedia *ifm; 3538 3539 sc = ifp->if_softc; 3540 ifm = &sc->vtnet_media; 3541 3542 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 3543 return (EINVAL); 3544 3545 return (0); 3546 } 3547 3548 static void 3549 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 3550 { 3551 struct vtnet_softc *sc; 3552 3553 sc = ifp->if_softc; 3554 3555 ifmr->ifm_status = IFM_AVALID; 3556 ifmr->ifm_active = IFM_ETHER; 3557 3558 VTNET_CORE_LOCK(sc); 3559 if (vtnet_is_link_up(sc) != 0) { 3560 ifmr->ifm_status |= IFM_ACTIVE; 3561 ifmr->ifm_active |= VTNET_MEDIATYPE; 3562 } else 3563 ifmr->ifm_active |= IFM_NONE; 3564 VTNET_CORE_UNLOCK(sc); 3565 } 3566 3567 static void 3568 vtnet_set_hwaddr(struct vtnet_softc *sc) 3569 { 3570 device_t dev; 3571 int i; 3572 3573 dev = sc->vtnet_dev; 3574 3575 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { 3576 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) 3577 device_printf(dev, "unable to set MAC address\n"); 3578 } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { 3579 for (i = 0; i < ETHER_ADDR_LEN; i++) { 3580 virtio_write_dev_config_1(dev, 3581 offsetof(struct virtio_net_config, mac) + i, 3582 sc->vtnet_hwaddr[i]); 3583 } 3584 } 3585 } 3586 3587 static void 3588 vtnet_get_hwaddr(struct vtnet_softc *sc) 3589 { 3590 device_t dev; 3591 int i; 3592 3593 dev = sc->vtnet_dev; 3594 3595 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { 3596 /* 3597 * Generate a random locally administered unicast address. 3598 * 3599 * It would be nice to generate the same MAC address across 3600 * reboots, but it seems all the hosts currently available 3601 * support the MAC feature, so this isn't too important. 3602 */ 3603 sc->vtnet_hwaddr[0] = 0xB2; 3604 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); 3605 vtnet_set_hwaddr(sc); 3606 return; 3607 } 3608 3609 for (i = 0; i < ETHER_ADDR_LEN; i++) { 3610 sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, 3611 offsetof(struct virtio_net_config, mac) + i); 3612 } 3613 } 3614 3615 static void 3616 vtnet_vlan_tag_remove(struct mbuf *m) 3617 { 3618 struct ether_vlan_header *evh; 3619 3620 evh = mtod(m, struct ether_vlan_header *); 3621 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); 3622 m->m_flags |= M_VLANTAG; 3623 3624 /* Strip the 802.1Q header. */ 3625 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, 3626 ETHER_HDR_LEN - ETHER_TYPE_LEN); 3627 m_adj(m, ETHER_VLAN_ENCAP_LEN); 3628 } 3629 3630 static void 3631 vtnet_set_rx_process_limit(struct vtnet_softc *sc) 3632 { 3633 int limit; 3634 3635 limit = vtnet_tunable_int(sc, "rx_process_limit", 3636 vtnet_rx_process_limit); 3637 if (limit < 0) 3638 limit = INT_MAX; 3639 sc->vtnet_rx_process_limit = limit; 3640 } 3641 3642 static void 3643 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) 3644 { 3645 device_t dev; 3646 int size, thresh; 3647 3648 dev = sc->vtnet_dev; 3649 size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); 3650 3651 /* 3652 * The Tx interrupt is disabled until the queue free count falls 3653 * below our threshold. Completed frames are drained from the Tx 3654 * virtqueue before transmitting new frames and in the watchdog 3655 * callout, so the frequency of Tx interrupts is greatly reduced, 3656 * at the cost of not freeing mbufs as quickly as they otherwise 3657 * would be. 3658 * 3659 * N.B. We assume all the Tx queues are the same size. 3660 */ 3661 thresh = size / 4; 3662 3663 /* 3664 * Without indirect descriptors, leave enough room for the most 3665 * segments we handle. 3666 */ 3667 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && 3668 thresh < sc->vtnet_tx_nsegs) 3669 thresh = sc->vtnet_tx_nsegs; 3670 3671 sc->vtnet_tx_intr_thresh = thresh; 3672 } 3673 3674 static void 3675 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, 3676 struct sysctl_oid_list *child, struct vtnet_rxq *rxq) 3677 { 3678 struct sysctl_oid *node; 3679 struct sysctl_oid_list *list; 3680 struct vtnet_rxq_stats *stats; 3681 char namebuf[16]; 3682 3683 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); 3684 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 3685 CTLFLAG_RD, NULL, "Receive Queue"); 3686 list = SYSCTL_CHILDREN(node); 3687 3688 stats = &rxq->vtnrx_stats; 3689 3690 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, 3691 &stats->vrxs_ipackets, "Receive packets"); 3692 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, 3693 &stats->vrxs_ibytes, "Receive bytes"); 3694 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, 3695 &stats->vrxs_iqdrops, "Receive drops"); 3696 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, 3697 &stats->vrxs_ierrors, "Receive errors"); 3698 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 3699 &stats->vrxs_csum, "Receive checksum offloaded"); 3700 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, 3701 &stats->vrxs_csum_failed, "Receive checksum offload failed"); 3702 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 3703 &stats->vrxs_rescheduled, 3704 "Receive interrupt handler rescheduled"); 3705 } 3706 3707 static void 3708 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, 3709 struct sysctl_oid_list *child, struct vtnet_txq *txq) 3710 { 3711 struct sysctl_oid *node; 3712 struct sysctl_oid_list *list; 3713 struct vtnet_txq_stats *stats; 3714 char namebuf[16]; 3715 3716 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); 3717 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 3718 CTLFLAG_RD, NULL, "Transmit Queue"); 3719 list = SYSCTL_CHILDREN(node); 3720 3721 stats = &txq->vtntx_stats; 3722 3723 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, 3724 &stats->vtxs_opackets, "Transmit packets"); 3725 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, 3726 &stats->vtxs_obytes, "Transmit bytes"); 3727 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, 3728 &stats->vtxs_omcasts, "Transmit multicasts"); 3729 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 3730 &stats->vtxs_csum, "Transmit checksum offloaded"); 3731 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, 3732 &stats->vtxs_tso, "Transmit segmentation offloaded"); 3733 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 3734 &stats->vtxs_rescheduled, 3735 "Transmit interrupt handler rescheduled"); 3736 } 3737 3738 static void 3739 vtnet_setup_queue_sysctl(struct vtnet_softc *sc) 3740 { 3741 device_t dev; 3742 struct sysctl_ctx_list *ctx; 3743 struct sysctl_oid *tree; 3744 struct sysctl_oid_list *child; 3745 int i; 3746 3747 dev = sc->vtnet_dev; 3748 ctx = device_get_sysctl_ctx(dev); 3749 tree = device_get_sysctl_tree(dev); 3750 child = SYSCTL_CHILDREN(tree); 3751 3752 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3753 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); 3754 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); 3755 } 3756 } 3757 3758 static void 3759 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, 3760 struct sysctl_oid_list *child, struct vtnet_softc *sc) 3761 { 3762 struct vtnet_statistics *stats; 3763 struct vtnet_rxq_stats rxaccum; 3764 struct vtnet_txq_stats txaccum; 3765 3766 vtnet_accum_stats(sc, &rxaccum, &txaccum); 3767 3768 stats = &sc->vtnet_stats; 3769 stats->rx_csum_offloaded = rxaccum.vrxs_csum; 3770 stats->rx_csum_failed = rxaccum.vrxs_csum_failed; 3771 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; 3772 stats->tx_csum_offloaded = txaccum.vtxs_csum; 3773 stats->tx_tso_offloaded = txaccum.vtxs_tso; 3774 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; 3775 3776 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", 3777 CTLFLAG_RD, &stats->mbuf_alloc_failed, 3778 "Mbuf cluster allocation failures"); 3779 3780 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", 3781 CTLFLAG_RD, &stats->rx_frame_too_large, 3782 "Received frame larger than the mbuf chain"); 3783 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", 3784 CTLFLAG_RD, &stats->rx_enq_replacement_failed, 3785 "Enqueuing the replacement receive mbuf failed"); 3786 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", 3787 CTLFLAG_RD, &stats->rx_mergeable_failed, 3788 "Mergeable buffers receive failures"); 3789 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", 3790 CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 3791 "Received checksum offloaded buffer with unsupported " 3792 "Ethernet type"); 3793 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", 3794 CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 3795 "Received checksum offloaded buffer with incorrect IP protocol"); 3796 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", 3797 CTLFLAG_RD, &stats->rx_csum_bad_offset, 3798 "Received checksum offloaded buffer with incorrect offset"); 3799 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", 3800 CTLFLAG_RD, &stats->rx_csum_bad_proto, 3801 "Received checksum offloaded buffer with incorrect protocol"); 3802 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", 3803 CTLFLAG_RD, &stats->rx_csum_failed, 3804 "Received buffer checksum offload failed"); 3805 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", 3806 CTLFLAG_RD, &stats->rx_csum_offloaded, 3807 "Received buffer checksum offload succeeded"); 3808 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", 3809 CTLFLAG_RD, &stats->rx_task_rescheduled, 3810 "Times the receive interrupt task rescheduled itself"); 3811 3812 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", 3813 CTLFLAG_RD, &stats->tx_csum_bad_ethtype, 3814 "Aborted transmit of checksum offloaded buffer with unknown " 3815 "Ethernet type"); 3816 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", 3817 CTLFLAG_RD, &stats->tx_tso_bad_ethtype, 3818 "Aborted transmit of TSO buffer with unknown Ethernet type"); 3819 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", 3820 CTLFLAG_RD, &stats->tx_tso_not_tcp, 3821 "Aborted transmit of TSO buffer with non TCP protocol"); 3822 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", 3823 CTLFLAG_RD, &stats->tx_defragged, 3824 "Transmit mbufs defragged"); 3825 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", 3826 CTLFLAG_RD, &stats->tx_defrag_failed, 3827 "Aborted transmit of buffer because defrag failed"); 3828 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", 3829 CTLFLAG_RD, &stats->tx_csum_offloaded, 3830 "Offloaded checksum of transmitted buffer"); 3831 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", 3832 CTLFLAG_RD, &stats->tx_tso_offloaded, 3833 "Segmentation offload of transmitted buffer"); 3834 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", 3835 CTLFLAG_RD, &stats->tx_task_rescheduled, 3836 "Times the transmit interrupt task rescheduled itself"); 3837 } 3838 3839 static void 3840 vtnet_setup_sysctl(struct vtnet_softc *sc) 3841 { 3842 device_t dev; 3843 struct sysctl_ctx_list *ctx; 3844 struct sysctl_oid *tree; 3845 struct sysctl_oid_list *child; 3846 3847 dev = sc->vtnet_dev; 3848 ctx = device_get_sysctl_ctx(dev); 3849 tree = device_get_sysctl_tree(dev); 3850 child = SYSCTL_CHILDREN(tree); 3851 3852 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", 3853 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, 3854 "Maximum number of supported virtqueue pairs"); 3855 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", 3856 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, 3857 "Number of active virtqueue pairs"); 3858 3859 vtnet_setup_stat_sysctl(ctx, child, sc); 3860 } 3861 3862 static int 3863 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) 3864 { 3865 3866 return (virtqueue_enable_intr(rxq->vtnrx_vq)); 3867 } 3868 3869 static void 3870 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) 3871 { 3872 3873 virtqueue_disable_intr(rxq->vtnrx_vq); 3874 } 3875 3876 static int 3877 vtnet_txq_enable_intr(struct vtnet_txq *txq) 3878 { 3879 struct virtqueue *vq; 3880 3881 vq = txq->vtntx_vq; 3882 3883 if (vtnet_txq_below_threshold(txq) != 0) 3884 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); 3885 3886 /* 3887 * The free count is above our threshold. Keep the Tx interrupt 3888 * disabled until the queue is fuller. 3889 */ 3890 return (0); 3891 } 3892 3893 static void 3894 vtnet_txq_disable_intr(struct vtnet_txq *txq) 3895 { 3896 3897 virtqueue_disable_intr(txq->vtntx_vq); 3898 } 3899 3900 static void 3901 vtnet_enable_rx_interrupts(struct vtnet_softc *sc) 3902 { 3903 int i; 3904 3905 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3906 vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); 3907 } 3908 3909 static void 3910 vtnet_enable_tx_interrupts(struct vtnet_softc *sc) 3911 { 3912 int i; 3913 3914 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3915 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); 3916 } 3917 3918 static void 3919 vtnet_enable_interrupts(struct vtnet_softc *sc) 3920 { 3921 3922 vtnet_enable_rx_interrupts(sc); 3923 vtnet_enable_tx_interrupts(sc); 3924 } 3925 3926 static void 3927 vtnet_disable_rx_interrupts(struct vtnet_softc *sc) 3928 { 3929 int i; 3930 3931 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3932 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); 3933 } 3934 3935 static void 3936 vtnet_disable_tx_interrupts(struct vtnet_softc *sc) 3937 { 3938 int i; 3939 3940 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3941 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); 3942 } 3943 3944 static void 3945 vtnet_disable_interrupts(struct vtnet_softc *sc) 3946 { 3947 3948 vtnet_disable_rx_interrupts(sc); 3949 vtnet_disable_tx_interrupts(sc); 3950 } 3951 3952 static int 3953 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) 3954 { 3955 char path[64]; 3956 3957 snprintf(path, sizeof(path), 3958 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); 3959 TUNABLE_INT_FETCH(path, &def); 3960 3961 return (def); 3962 } 3963