1 /*- 2 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice unmodified, this list of conditions, and the following 10 * disclaimer. 11 * 2. Redistributions in binary form must reproduce the above copyright 12 * notice, this list of conditions and the following disclaimer in the 13 * documentation and/or other materials provided with the distribution. 14 * 15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 16 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 17 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 18 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 19 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 20 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 21 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 22 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 23 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 24 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 25 */ 26 27 /* Driver for VirtIO network devices. */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/eventhandler.h> 34 #include <sys/systm.h> 35 #include <sys/kernel.h> 36 #include <sys/sockio.h> 37 #include <sys/mbuf.h> 38 #include <sys/malloc.h> 39 #include <sys/module.h> 40 #include <sys/socket.h> 41 #include <sys/sysctl.h> 42 #include <sys/random.h> 43 #include <sys/sglist.h> 44 #include <sys/lock.h> 45 #include <sys/mutex.h> 46 #include <sys/taskqueue.h> 47 #include <sys/smp.h> 48 #include <machine/smp.h> 49 50 #include <vm/uma.h> 51 52 #include <net/ethernet.h> 53 #include <net/if.h> 54 #include <net/if_var.h> 55 #include <net/if_arp.h> 56 #include <net/if_dl.h> 57 #include <net/if_types.h> 58 #include <net/if_media.h> 59 #include <net/if_vlan_var.h> 60 61 #include <net/bpf.h> 62 63 #include <netinet/in_systm.h> 64 #include <netinet/in.h> 65 #include <netinet/ip.h> 66 #include <netinet/ip6.h> 67 #include <netinet6/ip6_var.h> 68 #include <netinet/udp.h> 69 #include <netinet/tcp.h> 70 #include <netinet/sctp.h> 71 72 #include <machine/bus.h> 73 #include <machine/resource.h> 74 #include <sys/bus.h> 75 #include <sys/rman.h> 76 77 #include <dev/virtio/virtio.h> 78 #include <dev/virtio/virtqueue.h> 79 #include <dev/virtio/network/virtio_net.h> 80 #include <dev/virtio/network/if_vtnetvar.h> 81 82 #include "virtio_if.h" 83 84 #include "opt_inet.h" 85 #include "opt_inet6.h" 86 87 static int vtnet_modevent(module_t, int, void *); 88 89 static int vtnet_probe(device_t); 90 static int vtnet_attach(device_t); 91 static int vtnet_detach(device_t); 92 static int vtnet_suspend(device_t); 93 static int vtnet_resume(device_t); 94 static int vtnet_shutdown(device_t); 95 static int vtnet_attach_completed(device_t); 96 static int vtnet_config_change(device_t); 97 98 static void vtnet_negotiate_features(struct vtnet_softc *); 99 static void vtnet_setup_features(struct vtnet_softc *); 100 static int vtnet_init_rxq(struct vtnet_softc *, int); 101 static int vtnet_init_txq(struct vtnet_softc *, int); 102 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); 103 static void vtnet_free_rxtx_queues(struct vtnet_softc *); 104 static int vtnet_alloc_rx_filters(struct vtnet_softc *); 105 static void vtnet_free_rx_filters(struct vtnet_softc *); 106 static int vtnet_alloc_virtqueues(struct vtnet_softc *); 107 static int vtnet_setup_interface(struct vtnet_softc *); 108 static int vtnet_change_mtu(struct vtnet_softc *, int); 109 static int vtnet_ioctl(struct ifnet *, u_long, caddr_t); 110 111 static int vtnet_rxq_populate(struct vtnet_rxq *); 112 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); 113 static struct mbuf * 114 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); 115 static int vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *, 116 struct mbuf *, int); 117 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); 118 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); 119 static int vtnet_rxq_new_buf(struct vtnet_rxq *); 120 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, 121 struct virtio_net_hdr *); 122 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); 123 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); 124 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); 125 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, 126 struct virtio_net_hdr *); 127 static int vtnet_rxq_eof(struct vtnet_rxq *); 128 static void vtnet_rx_vq_intr(void *); 129 static void vtnet_rxq_tq_intr(void *, int); 130 131 static int vtnet_txq_below_threshold(struct vtnet_txq *); 132 static int vtnet_txq_notify(struct vtnet_txq *); 133 static void vtnet_txq_free_mbufs(struct vtnet_txq *); 134 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, 135 int *, int *, int *); 136 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, 137 int, struct virtio_net_hdr *); 138 static struct mbuf * 139 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, 140 struct virtio_net_hdr *); 141 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, 142 struct vtnet_tx_header *); 143 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **); 144 #ifdef VTNET_LEGACY_TX 145 static void vtnet_start_locked(struct vtnet_txq *, struct ifnet *); 146 static void vtnet_start(struct ifnet *); 147 #else 148 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); 149 static int vtnet_txq_mq_start(struct ifnet *, struct mbuf *); 150 static void vtnet_txq_tq_deferred(void *, int); 151 #endif 152 static void vtnet_txq_start(struct vtnet_txq *); 153 static void vtnet_txq_tq_intr(void *, int); 154 static int vtnet_txq_eof(struct vtnet_txq *); 155 static void vtnet_tx_vq_intr(void *); 156 static void vtnet_tx_start_all(struct vtnet_softc *); 157 158 #ifndef VTNET_LEGACY_TX 159 static void vtnet_qflush(struct ifnet *); 160 #endif 161 162 static int vtnet_watchdog(struct vtnet_txq *); 163 static void vtnet_rxq_accum_stats(struct vtnet_rxq *, 164 struct vtnet_rxq_stats *); 165 static void vtnet_txq_accum_stats(struct vtnet_txq *, 166 struct vtnet_txq_stats *); 167 static void vtnet_accumulate_stats(struct vtnet_softc *); 168 static void vtnet_tick(void *); 169 170 static void vtnet_start_taskqueues(struct vtnet_softc *); 171 static void vtnet_free_taskqueues(struct vtnet_softc *); 172 static void vtnet_drain_taskqueues(struct vtnet_softc *); 173 174 static void vtnet_drain_rxtx_queues(struct vtnet_softc *); 175 static void vtnet_stop_rendezvous(struct vtnet_softc *); 176 static void vtnet_stop(struct vtnet_softc *); 177 static int vtnet_virtio_reinit(struct vtnet_softc *); 178 static void vtnet_init_rx_filters(struct vtnet_softc *); 179 static int vtnet_init_rx_queues(struct vtnet_softc *); 180 static int vtnet_init_tx_queues(struct vtnet_softc *); 181 static int vtnet_init_rxtx_queues(struct vtnet_softc *); 182 static void vtnet_set_active_vq_pairs(struct vtnet_softc *); 183 static int vtnet_reinit(struct vtnet_softc *); 184 static void vtnet_init_locked(struct vtnet_softc *); 185 static void vtnet_init(void *); 186 187 static void vtnet_free_ctrl_vq(struct vtnet_softc *); 188 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, 189 struct sglist *, int, int); 190 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); 191 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); 192 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, int, int); 193 static int vtnet_set_promisc(struct vtnet_softc *, int); 194 static int vtnet_set_allmulti(struct vtnet_softc *, int); 195 static void vtnet_attach_disable_promisc(struct vtnet_softc *); 196 static void vtnet_rx_filter(struct vtnet_softc *); 197 static void vtnet_rx_filter_mac(struct vtnet_softc *); 198 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); 199 static void vtnet_rx_filter_vlan(struct vtnet_softc *); 200 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); 201 static void vtnet_register_vlan(void *, struct ifnet *, uint16_t); 202 static void vtnet_unregister_vlan(void *, struct ifnet *, uint16_t); 203 204 static int vtnet_is_link_up(struct vtnet_softc *); 205 static void vtnet_update_link_status(struct vtnet_softc *); 206 static int vtnet_ifmedia_upd(struct ifnet *); 207 static void vtnet_ifmedia_sts(struct ifnet *, struct ifmediareq *); 208 static void vtnet_get_hwaddr(struct vtnet_softc *); 209 static void vtnet_set_hwaddr(struct vtnet_softc *); 210 static void vtnet_vlan_tag_remove(struct mbuf *); 211 static void vtnet_set_rx_process_limit(struct vtnet_softc *); 212 static void vtnet_set_tx_intr_threshold(struct vtnet_softc *); 213 214 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, 215 struct sysctl_oid_list *, struct vtnet_rxq *); 216 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, 217 struct sysctl_oid_list *, struct vtnet_txq *); 218 static void vtnet_setup_queue_sysctl(struct vtnet_softc *); 219 static void vtnet_setup_sysctl(struct vtnet_softc *); 220 221 static int vtnet_rxq_enable_intr(struct vtnet_rxq *); 222 static void vtnet_rxq_disable_intr(struct vtnet_rxq *); 223 static int vtnet_txq_enable_intr(struct vtnet_txq *); 224 static void vtnet_txq_disable_intr(struct vtnet_txq *); 225 static void vtnet_enable_rx_interrupts(struct vtnet_softc *); 226 static void vtnet_enable_tx_interrupts(struct vtnet_softc *); 227 static void vtnet_enable_interrupts(struct vtnet_softc *); 228 static void vtnet_disable_rx_interrupts(struct vtnet_softc *); 229 static void vtnet_disable_tx_interrupts(struct vtnet_softc *); 230 static void vtnet_disable_interrupts(struct vtnet_softc *); 231 232 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); 233 234 /* Tunables. */ 235 static int vtnet_csum_disable = 0; 236 TUNABLE_INT("hw.vtnet.csum_disable", &vtnet_csum_disable); 237 static int vtnet_tso_disable = 0; 238 TUNABLE_INT("hw.vtnet.tso_disable", &vtnet_tso_disable); 239 static int vtnet_lro_disable = 0; 240 TUNABLE_INT("hw.vtnet.lro_disable", &vtnet_lro_disable); 241 static int vtnet_mq_disable = 0; 242 TUNABLE_INT("hw.vtnet.mq_disable", &vtnet_mq_disable); 243 static int vtnet_mq_max_pairs = 0; 244 TUNABLE_INT("hw.vtnet.mq_max_pairs", &vtnet_mq_max_pairs); 245 static int vtnet_rx_process_limit = 512; 246 TUNABLE_INT("hw.vtnet.rx_process_limit", &vtnet_rx_process_limit); 247 248 static uma_zone_t vtnet_tx_header_zone; 249 250 static struct virtio_feature_desc vtnet_feature_desc[] = { 251 { VIRTIO_NET_F_CSUM, "TxChecksum" }, 252 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, 253 { VIRTIO_NET_F_MAC, "MacAddress" }, 254 { VIRTIO_NET_F_GSO, "TxAllGSO" }, 255 { VIRTIO_NET_F_GUEST_TSO4, "RxTSOv4" }, 256 { VIRTIO_NET_F_GUEST_TSO6, "RxTSOv6" }, 257 { VIRTIO_NET_F_GUEST_ECN, "RxECN" }, 258 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, 259 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, 260 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, 261 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, 262 { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, 263 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, 264 { VIRTIO_NET_F_STATUS, "Status" }, 265 { VIRTIO_NET_F_CTRL_VQ, "ControlVq" }, 266 { VIRTIO_NET_F_CTRL_RX, "RxMode" }, 267 { VIRTIO_NET_F_CTRL_VLAN, "VLanFilter" }, 268 { VIRTIO_NET_F_CTRL_RX_EXTRA, "RxModeExtra" }, 269 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, 270 { VIRTIO_NET_F_MQ, "Multiqueue" }, 271 { VIRTIO_NET_F_CTRL_MAC_ADDR, "SetMacAddress" }, 272 273 { 0, NULL } 274 }; 275 276 static device_method_t vtnet_methods[] = { 277 /* Device methods. */ 278 DEVMETHOD(device_probe, vtnet_probe), 279 DEVMETHOD(device_attach, vtnet_attach), 280 DEVMETHOD(device_detach, vtnet_detach), 281 DEVMETHOD(device_suspend, vtnet_suspend), 282 DEVMETHOD(device_resume, vtnet_resume), 283 DEVMETHOD(device_shutdown, vtnet_shutdown), 284 285 /* VirtIO methods. */ 286 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), 287 DEVMETHOD(virtio_config_change, vtnet_config_change), 288 289 DEVMETHOD_END 290 }; 291 292 static driver_t vtnet_driver = { 293 "vtnet", 294 vtnet_methods, 295 sizeof(struct vtnet_softc) 296 }; 297 static devclass_t vtnet_devclass; 298 299 DRIVER_MODULE(vtnet, virtio_pci, vtnet_driver, vtnet_devclass, 300 vtnet_modevent, 0); 301 MODULE_VERSION(vtnet, 1); 302 MODULE_DEPEND(vtnet, virtio, 1, 1, 1); 303 304 static int 305 vtnet_modevent(module_t mod, int type, void *unused) 306 { 307 int error; 308 309 error = 0; 310 311 switch (type) { 312 case MOD_LOAD: 313 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", 314 sizeof(struct vtnet_tx_header), 315 NULL, NULL, NULL, NULL, 0, 0); 316 break; 317 case MOD_QUIESCE: 318 case MOD_UNLOAD: 319 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) 320 error = EBUSY; 321 else if (type == MOD_UNLOAD) { 322 uma_zdestroy(vtnet_tx_header_zone); 323 vtnet_tx_header_zone = NULL; 324 } 325 break; 326 case MOD_SHUTDOWN: 327 break; 328 default: 329 error = EOPNOTSUPP; 330 break; 331 } 332 333 return (error); 334 } 335 336 static int 337 vtnet_probe(device_t dev) 338 { 339 340 if (virtio_get_device_type(dev) != VIRTIO_ID_NETWORK) 341 return (ENXIO); 342 343 device_set_desc(dev, "VirtIO Networking Adapter"); 344 345 return (BUS_PROBE_DEFAULT); 346 } 347 348 static int 349 vtnet_attach(device_t dev) 350 { 351 struct vtnet_softc *sc; 352 int error; 353 354 sc = device_get_softc(dev); 355 sc->vtnet_dev = dev; 356 357 /* Register our feature descriptions. */ 358 virtio_set_feature_desc(dev, vtnet_feature_desc); 359 360 VTNET_CORE_LOCK_INIT(sc); 361 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); 362 363 vtnet_setup_sysctl(sc); 364 vtnet_setup_features(sc); 365 366 error = vtnet_alloc_rx_filters(sc); 367 if (error) { 368 device_printf(dev, "cannot allocate Rx filters\n"); 369 goto fail; 370 } 371 372 error = vtnet_alloc_rxtx_queues(sc); 373 if (error) { 374 device_printf(dev, "cannot allocate queues\n"); 375 goto fail; 376 } 377 378 error = vtnet_alloc_virtqueues(sc); 379 if (error) { 380 device_printf(dev, "cannot allocate virtqueues\n"); 381 goto fail; 382 } 383 384 error = vtnet_setup_interface(sc); 385 if (error) { 386 device_printf(dev, "cannot setup interface\n"); 387 goto fail; 388 } 389 390 error = virtio_setup_intr(dev, INTR_TYPE_NET); 391 if (error) { 392 device_printf(dev, "cannot setup virtqueue interrupts\n"); 393 /* BMV: This will crash if during boot! */ 394 ether_ifdetach(sc->vtnet_ifp); 395 goto fail; 396 } 397 398 vtnet_start_taskqueues(sc); 399 400 fail: 401 if (error) 402 vtnet_detach(dev); 403 404 return (error); 405 } 406 407 static int 408 vtnet_detach(device_t dev) 409 { 410 struct vtnet_softc *sc; 411 struct ifnet *ifp; 412 413 sc = device_get_softc(dev); 414 ifp = sc->vtnet_ifp; 415 416 if (device_is_attached(dev)) { 417 VTNET_CORE_LOCK(sc); 418 vtnet_stop(sc); 419 VTNET_CORE_UNLOCK(sc); 420 421 callout_drain(&sc->vtnet_tick_ch); 422 vtnet_drain_taskqueues(sc); 423 424 ether_ifdetach(ifp); 425 } 426 427 vtnet_free_taskqueues(sc); 428 429 if (sc->vtnet_vlan_attach != NULL) { 430 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); 431 sc->vtnet_vlan_attach = NULL; 432 } 433 if (sc->vtnet_vlan_detach != NULL) { 434 EVENTHANDLER_DEREGISTER(vlan_unconfg, sc->vtnet_vlan_detach); 435 sc->vtnet_vlan_detach = NULL; 436 } 437 438 ifmedia_removeall(&sc->vtnet_media); 439 440 if (ifp != NULL) { 441 if_free(ifp); 442 sc->vtnet_ifp = NULL; 443 } 444 445 vtnet_free_rxtx_queues(sc); 446 vtnet_free_rx_filters(sc); 447 448 if (sc->vtnet_ctrl_vq != NULL) 449 vtnet_free_ctrl_vq(sc); 450 451 VTNET_CORE_LOCK_DESTROY(sc); 452 453 return (0); 454 } 455 456 static int 457 vtnet_suspend(device_t dev) 458 { 459 struct vtnet_softc *sc; 460 461 sc = device_get_softc(dev); 462 463 VTNET_CORE_LOCK(sc); 464 vtnet_stop(sc); 465 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; 466 VTNET_CORE_UNLOCK(sc); 467 468 return (0); 469 } 470 471 static int 472 vtnet_resume(device_t dev) 473 { 474 struct vtnet_softc *sc; 475 struct ifnet *ifp; 476 477 sc = device_get_softc(dev); 478 ifp = sc->vtnet_ifp; 479 480 VTNET_CORE_LOCK(sc); 481 if (ifp->if_flags & IFF_UP) 482 vtnet_init_locked(sc); 483 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; 484 VTNET_CORE_UNLOCK(sc); 485 486 return (0); 487 } 488 489 static int 490 vtnet_shutdown(device_t dev) 491 { 492 493 /* 494 * Suspend already does all of what we need to 495 * do here; we just never expect to be resumed. 496 */ 497 return (vtnet_suspend(dev)); 498 } 499 500 static int 501 vtnet_attach_completed(device_t dev) 502 { 503 504 vtnet_attach_disable_promisc(device_get_softc(dev)); 505 506 return (0); 507 } 508 509 static int 510 vtnet_config_change(device_t dev) 511 { 512 struct vtnet_softc *sc; 513 514 sc = device_get_softc(dev); 515 516 VTNET_CORE_LOCK(sc); 517 vtnet_update_link_status(sc); 518 if (sc->vtnet_link_active != 0) 519 vtnet_tx_start_all(sc); 520 VTNET_CORE_UNLOCK(sc); 521 522 return (0); 523 } 524 525 static void 526 vtnet_negotiate_features(struct vtnet_softc *sc) 527 { 528 device_t dev; 529 uint64_t mask, features; 530 531 dev = sc->vtnet_dev; 532 mask = 0; 533 534 /* 535 * TSO and LRO are only available when their corresponding checksum 536 * offload feature is also negotiated. 537 */ 538 if (vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable)) { 539 mask |= VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM; 540 mask |= VTNET_TSO_FEATURES | VTNET_LRO_FEATURES; 541 } 542 if (vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) 543 mask |= VTNET_TSO_FEATURES; 544 if (vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) 545 mask |= VTNET_LRO_FEATURES; 546 #ifndef VTNET_LEGACY_TX 547 if (vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) 548 mask |= VIRTIO_NET_F_MQ; 549 #else 550 mask |= VIRTIO_NET_F_MQ; 551 #endif 552 553 features = VTNET_FEATURES & ~mask; 554 sc->vtnet_features = virtio_negotiate_features(dev, features); 555 556 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && 557 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { 558 /* 559 * LRO without mergeable buffers requires special care. This 560 * is not ideal because every receive buffer must be large 561 * enough to hold the maximum TCP packet, the Ethernet header, 562 * and the header. This requires up to 34 descriptors with 563 * MCLBYTES clusters. If we do not have indirect descriptors, 564 * LRO is disabled since the virtqueue will not contain very 565 * many receive buffers. 566 */ 567 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { 568 device_printf(dev, 569 "LRO disabled due to both mergeable buffers and " 570 "indirect descriptors not negotiated\n"); 571 572 features &= ~VTNET_LRO_FEATURES; 573 sc->vtnet_features = 574 virtio_negotiate_features(dev, features); 575 } else 576 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; 577 } 578 } 579 580 static void 581 vtnet_setup_features(struct vtnet_softc *sc) 582 { 583 device_t dev; 584 int max_pairs, max; 585 586 dev = sc->vtnet_dev; 587 588 vtnet_negotiate_features(sc); 589 590 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) 591 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; 592 593 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { 594 /* This feature should always be negotiated. */ 595 sc->vtnet_flags |= VTNET_FLAG_MAC; 596 } 597 598 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { 599 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; 600 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 601 } else 602 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 603 604 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 605 sc->vtnet_rx_nsegs = VTNET_MRG_RX_SEGS; 606 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 607 sc->vtnet_rx_nsegs = VTNET_MAX_RX_SEGS; 608 else 609 sc->vtnet_rx_nsegs = VTNET_MIN_RX_SEGS; 610 611 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || 612 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || 613 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 614 sc->vtnet_tx_nsegs = VTNET_MAX_TX_SEGS; 615 else 616 sc->vtnet_tx_nsegs = VTNET_MIN_TX_SEGS; 617 618 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { 619 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; 620 621 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) 622 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; 623 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) 624 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; 625 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) 626 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; 627 } 628 629 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ) && 630 sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 631 max_pairs = virtio_read_dev_config_2(dev, 632 offsetof(struct virtio_net_config, max_virtqueue_pairs)); 633 if (max_pairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 634 max_pairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) 635 max_pairs = 1; 636 } else 637 max_pairs = 1; 638 639 if (max_pairs > 1) { 640 /* 641 * Limit the maximum number of queue pairs to the number of 642 * CPUs or the configured maximum. The actual number of 643 * queues that get used may be less. 644 */ 645 max = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); 646 if (max > 0 && max_pairs > max) 647 max_pairs = max; 648 if (max_pairs > mp_ncpus) 649 max_pairs = mp_ncpus; 650 if (max_pairs > VTNET_MAX_QUEUE_PAIRS) 651 max_pairs = VTNET_MAX_QUEUE_PAIRS; 652 if (max_pairs > 1) 653 sc->vtnet_flags |= VTNET_FLAG_MULTIQ; 654 } 655 656 sc->vtnet_max_vq_pairs = max_pairs; 657 } 658 659 static int 660 vtnet_init_rxq(struct vtnet_softc *sc, int id) 661 { 662 struct vtnet_rxq *rxq; 663 664 rxq = &sc->vtnet_rxqs[id]; 665 666 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", 667 device_get_nameunit(sc->vtnet_dev), id); 668 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); 669 670 rxq->vtnrx_sc = sc; 671 rxq->vtnrx_id = id; 672 673 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); 674 if (rxq->vtnrx_sg == NULL) 675 return (ENOMEM); 676 677 TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); 678 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, 679 taskqueue_thread_enqueue, &rxq->vtnrx_tq); 680 681 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); 682 } 683 684 static int 685 vtnet_init_txq(struct vtnet_softc *sc, int id) 686 { 687 struct vtnet_txq *txq; 688 689 txq = &sc->vtnet_txqs[id]; 690 691 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", 692 device_get_nameunit(sc->vtnet_dev), id); 693 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); 694 695 txq->vtntx_sc = sc; 696 txq->vtntx_id = id; 697 698 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); 699 if (txq->vtntx_sg == NULL) 700 return (ENOMEM); 701 702 #ifndef VTNET_LEGACY_TX 703 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, 704 M_NOWAIT, &txq->vtntx_mtx); 705 if (txq->vtntx_br == NULL) 706 return (ENOMEM); 707 708 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); 709 #endif 710 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); 711 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, 712 taskqueue_thread_enqueue, &txq->vtntx_tq); 713 if (txq->vtntx_tq == NULL) 714 return (ENOMEM); 715 716 return (0); 717 } 718 719 static int 720 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) 721 { 722 int i, npairs, error; 723 724 npairs = sc->vtnet_max_vq_pairs; 725 726 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, 727 M_NOWAIT | M_ZERO); 728 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, 729 M_NOWAIT | M_ZERO); 730 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) 731 return (ENOMEM); 732 733 for (i = 0; i < npairs; i++) { 734 error = vtnet_init_rxq(sc, i); 735 if (error) 736 return (error); 737 error = vtnet_init_txq(sc, i); 738 if (error) 739 return (error); 740 } 741 742 vtnet_setup_queue_sysctl(sc); 743 744 return (0); 745 } 746 747 static void 748 vtnet_destroy_rxq(struct vtnet_rxq *rxq) 749 { 750 751 rxq->vtnrx_sc = NULL; 752 rxq->vtnrx_id = -1; 753 754 if (rxq->vtnrx_sg != NULL) { 755 sglist_free(rxq->vtnrx_sg); 756 rxq->vtnrx_sg = NULL; 757 } 758 759 if (mtx_initialized(&rxq->vtnrx_mtx) != 0) 760 mtx_destroy(&rxq->vtnrx_mtx); 761 } 762 763 static void 764 vtnet_destroy_txq(struct vtnet_txq *txq) 765 { 766 767 txq->vtntx_sc = NULL; 768 txq->vtntx_id = -1; 769 770 if (txq->vtntx_sg != NULL) { 771 sglist_free(txq->vtntx_sg); 772 txq->vtntx_sg = NULL; 773 } 774 775 #ifndef VTNET_LEGACY_TX 776 if (txq->vtntx_br != NULL) { 777 buf_ring_free(txq->vtntx_br, M_DEVBUF); 778 txq->vtntx_br = NULL; 779 } 780 #endif 781 782 if (mtx_initialized(&txq->vtntx_mtx) != 0) 783 mtx_destroy(&txq->vtntx_mtx); 784 } 785 786 static void 787 vtnet_free_rxtx_queues(struct vtnet_softc *sc) 788 { 789 int i; 790 791 if (sc->vtnet_rxqs != NULL) { 792 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 793 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); 794 free(sc->vtnet_rxqs, M_DEVBUF); 795 sc->vtnet_rxqs = NULL; 796 } 797 798 if (sc->vtnet_txqs != NULL) { 799 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 800 vtnet_destroy_txq(&sc->vtnet_txqs[i]); 801 free(sc->vtnet_txqs, M_DEVBUF); 802 sc->vtnet_txqs = NULL; 803 } 804 } 805 806 static int 807 vtnet_alloc_rx_filters(struct vtnet_softc *sc) 808 { 809 810 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 811 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), 812 M_DEVBUF, M_NOWAIT | M_ZERO); 813 if (sc->vtnet_mac_filter == NULL) 814 return (ENOMEM); 815 } 816 817 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 818 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * 819 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); 820 if (sc->vtnet_vlan_filter == NULL) 821 return (ENOMEM); 822 } 823 824 return (0); 825 } 826 827 static void 828 vtnet_free_rx_filters(struct vtnet_softc *sc) 829 { 830 831 if (sc->vtnet_mac_filter != NULL) { 832 free(sc->vtnet_mac_filter, M_DEVBUF); 833 sc->vtnet_mac_filter = NULL; 834 } 835 836 if (sc->vtnet_vlan_filter != NULL) { 837 free(sc->vtnet_vlan_filter, M_DEVBUF); 838 sc->vtnet_vlan_filter = NULL; 839 } 840 } 841 842 static int 843 vtnet_alloc_virtqueues(struct vtnet_softc *sc) 844 { 845 device_t dev; 846 struct vq_alloc_info *info; 847 struct vtnet_rxq *rxq; 848 struct vtnet_txq *txq; 849 int i, idx, flags, nvqs, error; 850 851 dev = sc->vtnet_dev; 852 flags = 0; 853 854 nvqs = sc->vtnet_max_vq_pairs * 2; 855 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 856 nvqs++; 857 858 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); 859 if (info == NULL) 860 return (ENOMEM); 861 862 for (i = 0, idx = 0; i < sc->vtnet_max_vq_pairs; i++, idx+=2) { 863 rxq = &sc->vtnet_rxqs[i]; 864 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, 865 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, 866 "%s-%d rx", device_get_nameunit(dev), rxq->vtnrx_id); 867 868 txq = &sc->vtnet_txqs[i]; 869 VQ_ALLOC_INFO_INIT(&info[idx+1], sc->vtnet_tx_nsegs, 870 vtnet_tx_vq_intr, txq, &txq->vtntx_vq, 871 "%s-%d tx", device_get_nameunit(dev), txq->vtntx_id); 872 } 873 874 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 875 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, 876 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); 877 } 878 879 /* 880 * Enable interrupt binding if this is multiqueue. This only matters 881 * when per-vq MSIX is available. 882 */ 883 if (sc->vtnet_flags & VTNET_FLAG_MULTIQ) 884 flags |= 0; 885 886 error = virtio_alloc_virtqueues(dev, flags, nvqs, info); 887 free(info, M_TEMP); 888 889 return (error); 890 } 891 892 static int 893 vtnet_setup_interface(struct vtnet_softc *sc) 894 { 895 device_t dev; 896 struct ifnet *ifp; 897 898 dev = sc->vtnet_dev; 899 900 ifp = sc->vtnet_ifp = if_alloc(IFT_ETHER); 901 if (ifp == NULL) { 902 device_printf(dev, "cannot allocate ifnet structure\n"); 903 return (ENOSPC); 904 } 905 906 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 907 ifp->if_baudrate = IF_Gbps(10); /* Approx. */ 908 ifp->if_softc = sc; 909 ifp->if_flags = IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST; 910 ifp->if_init = vtnet_init; 911 ifp->if_ioctl = vtnet_ioctl; 912 913 #ifndef VTNET_LEGACY_TX 914 ifp->if_transmit = vtnet_txq_mq_start; 915 ifp->if_qflush = vtnet_qflush; 916 #else 917 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; 918 ifp->if_start = vtnet_start; 919 IFQ_SET_MAXLEN(&ifp->if_snd, virtqueue_size(vq) - 1); 920 ifp->if_snd.ifq_drv_maxlen = virtqueue_size(vq) - 1; 921 IFQ_SET_READY(&ifp->if_snd); 922 #endif 923 924 ifmedia_init(&sc->vtnet_media, IFM_IMASK, vtnet_ifmedia_upd, 925 vtnet_ifmedia_sts); 926 ifmedia_add(&sc->vtnet_media, VTNET_MEDIATYPE, 0, NULL); 927 ifmedia_set(&sc->vtnet_media, VTNET_MEDIATYPE); 928 929 /* Read (or generate) the MAC address for the adapter. */ 930 vtnet_get_hwaddr(sc); 931 932 ether_ifattach(ifp, sc->vtnet_hwaddr); 933 934 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) 935 ifp->if_capabilities |= IFCAP_LINKSTATE; 936 937 /* Tell the upper layer(s) we support long frames. */ 938 ifp->if_data.ifi_hdrlen = sizeof(struct ether_vlan_header); 939 ifp->if_capabilities |= IFCAP_JUMBO_MTU | IFCAP_VLAN_MTU; 940 941 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { 942 ifp->if_capabilities |= IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6; 943 944 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO)) { 945 ifp->if_capabilities |= IFCAP_TSO4 | IFCAP_TSO6; 946 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 947 } else { 948 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) 949 ifp->if_capabilities |= IFCAP_TSO4; 950 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 951 ifp->if_capabilities |= IFCAP_TSO6; 952 if (virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) 953 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 954 } 955 956 if (ifp->if_capabilities & IFCAP_TSO) 957 ifp->if_capabilities |= IFCAP_VLAN_HWTSO; 958 } 959 960 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) 961 ifp->if_capabilities |= IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6; 962 963 if (ifp->if_capabilities & IFCAP_HWCSUM) { 964 /* 965 * VirtIO does not support VLAN tagging, but we can fake 966 * it by inserting and removing the 802.1Q header during 967 * transmit and receive. We are then able to do checksum 968 * offloading of VLAN frames. 969 */ 970 ifp->if_capabilities |= 971 IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM; 972 } 973 974 ifp->if_capenable = ifp->if_capabilities; 975 976 /* 977 * Capabilities after here are not enabled by default. 978 */ 979 980 if (ifp->if_capabilities & IFCAP_RXCSUM) { 981 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) || 982 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6)) 983 ifp->if_capabilities |= IFCAP_LRO; 984 } 985 986 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 987 ifp->if_capabilities |= IFCAP_VLAN_HWFILTER; 988 989 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 990 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 991 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 992 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 993 } 994 995 vtnet_set_rx_process_limit(sc); 996 vtnet_set_tx_intr_threshold(sc); 997 998 return (0); 999 } 1000 1001 static int 1002 vtnet_change_mtu(struct vtnet_softc *sc, int new_mtu) 1003 { 1004 struct ifnet *ifp; 1005 int frame_size, clsize; 1006 1007 ifp = sc->vtnet_ifp; 1008 1009 if (new_mtu < ETHERMIN || new_mtu > VTNET_MAX_MTU) 1010 return (EINVAL); 1011 1012 frame_size = sc->vtnet_hdr_size + sizeof(struct ether_vlan_header) + 1013 new_mtu; 1014 1015 /* 1016 * Based on the new MTU (and hence frame size) determine which 1017 * cluster size is most appropriate for the receive queues. 1018 */ 1019 if (frame_size <= MCLBYTES) { 1020 clsize = MCLBYTES; 1021 } else if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1022 /* Avoid going past 9K jumbos. */ 1023 if (frame_size > MJUM9BYTES) 1024 return (EINVAL); 1025 clsize = MJUM9BYTES; 1026 } else 1027 clsize = MJUMPAGESIZE; 1028 1029 ifp->if_mtu = new_mtu; 1030 sc->vtnet_rx_new_clsize = clsize; 1031 1032 if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1033 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1034 vtnet_init_locked(sc); 1035 } 1036 1037 return (0); 1038 } 1039 1040 static int 1041 vtnet_ioctl(struct ifnet *ifp, u_long cmd, caddr_t data) 1042 { 1043 struct vtnet_softc *sc; 1044 struct ifreq *ifr; 1045 int reinit, mask, error; 1046 1047 sc = ifp->if_softc; 1048 ifr = (struct ifreq *) data; 1049 error = 0; 1050 1051 switch (cmd) { 1052 case SIOCSIFMTU: 1053 if (ifp->if_mtu != ifr->ifr_mtu) { 1054 VTNET_CORE_LOCK(sc); 1055 error = vtnet_change_mtu(sc, ifr->ifr_mtu); 1056 VTNET_CORE_UNLOCK(sc); 1057 } 1058 break; 1059 1060 case SIOCSIFFLAGS: 1061 VTNET_CORE_LOCK(sc); 1062 if ((ifp->if_flags & IFF_UP) == 0) { 1063 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1064 vtnet_stop(sc); 1065 } else if (ifp->if_drv_flags & IFF_DRV_RUNNING) { 1066 if ((ifp->if_flags ^ sc->vtnet_if_flags) & 1067 (IFF_PROMISC | IFF_ALLMULTI)) { 1068 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) 1069 vtnet_rx_filter(sc); 1070 else 1071 error = ENOTSUP; 1072 } 1073 } else 1074 vtnet_init_locked(sc); 1075 1076 if (error == 0) 1077 sc->vtnet_if_flags = ifp->if_flags; 1078 VTNET_CORE_UNLOCK(sc); 1079 break; 1080 1081 case SIOCADDMULTI: 1082 case SIOCDELMULTI: 1083 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) 1084 break; 1085 VTNET_CORE_LOCK(sc); 1086 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 1087 vtnet_rx_filter_mac(sc); 1088 VTNET_CORE_UNLOCK(sc); 1089 break; 1090 1091 case SIOCSIFMEDIA: 1092 case SIOCGIFMEDIA: 1093 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); 1094 break; 1095 1096 case SIOCSIFCAP: 1097 VTNET_CORE_LOCK(sc); 1098 mask = ifr->ifr_reqcap ^ ifp->if_capenable; 1099 1100 if (mask & IFCAP_TXCSUM) 1101 ifp->if_capenable ^= IFCAP_TXCSUM; 1102 if (mask & IFCAP_TXCSUM_IPV6) 1103 ifp->if_capenable ^= IFCAP_TXCSUM_IPV6; 1104 if (mask & IFCAP_TSO4) 1105 ifp->if_capenable ^= IFCAP_TSO4; 1106 if (mask & IFCAP_TSO6) 1107 ifp->if_capenable ^= IFCAP_TSO6; 1108 1109 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO | 1110 IFCAP_VLAN_HWFILTER)) { 1111 /* These Rx features require us to renegotiate. */ 1112 reinit = 1; 1113 1114 if (mask & IFCAP_RXCSUM) 1115 ifp->if_capenable ^= IFCAP_RXCSUM; 1116 if (mask & IFCAP_RXCSUM_IPV6) 1117 ifp->if_capenable ^= IFCAP_RXCSUM_IPV6; 1118 if (mask & IFCAP_LRO) 1119 ifp->if_capenable ^= IFCAP_LRO; 1120 if (mask & IFCAP_VLAN_HWFILTER) 1121 ifp->if_capenable ^= IFCAP_VLAN_HWFILTER; 1122 } else 1123 reinit = 0; 1124 1125 if (mask & IFCAP_VLAN_HWTSO) 1126 ifp->if_capenable ^= IFCAP_VLAN_HWTSO; 1127 if (mask & IFCAP_VLAN_HWTAGGING) 1128 ifp->if_capenable ^= IFCAP_VLAN_HWTAGGING; 1129 1130 if (reinit && (ifp->if_drv_flags & IFF_DRV_RUNNING)) { 1131 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 1132 vtnet_init_locked(sc); 1133 } 1134 1135 VTNET_CORE_UNLOCK(sc); 1136 VLAN_CAPABILITIES(ifp); 1137 1138 break; 1139 1140 default: 1141 error = ether_ioctl(ifp, cmd, data); 1142 break; 1143 } 1144 1145 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); 1146 1147 return (error); 1148 } 1149 1150 static int 1151 vtnet_rxq_populate(struct vtnet_rxq *rxq) 1152 { 1153 struct virtqueue *vq; 1154 int nbufs, error; 1155 1156 vq = rxq->vtnrx_vq; 1157 error = ENOSPC; 1158 1159 for (nbufs = 0; !virtqueue_full(vq); nbufs++) { 1160 error = vtnet_rxq_new_buf(rxq); 1161 if (error) 1162 break; 1163 } 1164 1165 if (nbufs > 0) { 1166 virtqueue_notify(vq); 1167 /* 1168 * EMSGSIZE signifies the virtqueue did not have enough 1169 * entries available to hold the last mbuf. This is not 1170 * an error. 1171 */ 1172 if (error == EMSGSIZE) 1173 error = 0; 1174 } 1175 1176 return (error); 1177 } 1178 1179 static void 1180 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) 1181 { 1182 struct virtqueue *vq; 1183 struct mbuf *m; 1184 int last; 1185 1186 vq = rxq->vtnrx_vq; 1187 last = 0; 1188 1189 while ((m = virtqueue_drain(vq, &last)) != NULL) 1190 m_freem(m); 1191 1192 KASSERT(virtqueue_empty(vq), 1193 ("%s: mbufs remaining in rx queue %p", __func__, rxq)); 1194 } 1195 1196 static struct mbuf * 1197 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) 1198 { 1199 struct mbuf *m_head, *m_tail, *m; 1200 int i, clsize; 1201 1202 clsize = sc->vtnet_rx_clsize; 1203 1204 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1205 ("%s: chained mbuf %d request without LRO_NOMRG", __func__, nbufs)); 1206 1207 m_head = m_getjcl(M_NOWAIT, MT_DATA, M_PKTHDR, clsize); 1208 if (m_head == NULL) 1209 goto fail; 1210 1211 m_head->m_len = clsize; 1212 m_tail = m_head; 1213 1214 /* Allocate the rest of the chain. */ 1215 for (i = 1; i < nbufs; i++) { 1216 m = m_getjcl(M_NOWAIT, MT_DATA, 0, clsize); 1217 if (m == NULL) 1218 goto fail; 1219 1220 m->m_len = clsize; 1221 m_tail->m_next = m; 1222 m_tail = m; 1223 } 1224 1225 if (m_tailp != NULL) 1226 *m_tailp = m_tail; 1227 1228 return (m_head); 1229 1230 fail: 1231 sc->vtnet_stats.mbuf_alloc_failed++; 1232 m_freem(m_head); 1233 1234 return (NULL); 1235 } 1236 1237 /* 1238 * Slow path for when LRO without mergeable buffers is negotiated. 1239 */ 1240 static int 1241 vtnet_rxq_replace_lro_nomgr_buf(struct vtnet_rxq *rxq, struct mbuf *m0, 1242 int len0) 1243 { 1244 struct vtnet_softc *sc; 1245 struct mbuf *m, *m_prev; 1246 struct mbuf *m_new, *m_tail; 1247 int len, clsize, nreplace, error; 1248 1249 sc = rxq->vtnrx_sc; 1250 clsize = sc->vtnet_rx_clsize; 1251 1252 m_prev = NULL; 1253 m_tail = NULL; 1254 nreplace = 0; 1255 1256 m = m0; 1257 len = len0; 1258 1259 /* 1260 * Since these mbuf chains are so large, we avoid allocating an 1261 * entire replacement chain if possible. When the received frame 1262 * did not consume the entire chain, the unused mbufs are moved 1263 * to the replacement chain. 1264 */ 1265 while (len > 0) { 1266 /* 1267 * Something is seriously wrong if we received a frame 1268 * larger than the chain. Drop it. 1269 */ 1270 if (m == NULL) { 1271 sc->vtnet_stats.rx_frame_too_large++; 1272 return (EMSGSIZE); 1273 } 1274 1275 /* We always allocate the same cluster size. */ 1276 KASSERT(m->m_len == clsize, 1277 ("%s: mbuf size %d is not the cluster size %d", 1278 __func__, m->m_len, clsize)); 1279 1280 m->m_len = MIN(m->m_len, len); 1281 len -= m->m_len; 1282 1283 m_prev = m; 1284 m = m->m_next; 1285 nreplace++; 1286 } 1287 1288 KASSERT(nreplace <= sc->vtnet_rx_nmbufs, 1289 ("%s: too many replacement mbufs %d max %d", __func__, nreplace, 1290 sc->vtnet_rx_nmbufs)); 1291 1292 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); 1293 if (m_new == NULL) { 1294 m_prev->m_len = clsize; 1295 return (ENOBUFS); 1296 } 1297 1298 /* 1299 * Move any unused mbufs from the received chain onto the end 1300 * of the new chain. 1301 */ 1302 if (m_prev->m_next != NULL) { 1303 m_tail->m_next = m_prev->m_next; 1304 m_prev->m_next = NULL; 1305 } 1306 1307 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1308 if (error) { 1309 /* 1310 * BAD! We could not enqueue the replacement mbuf chain. We 1311 * must restore the m0 chain to the original state if it was 1312 * modified so we can subsequently discard it. 1313 * 1314 * NOTE: The replacement is suppose to be an identical copy 1315 * to the one just dequeued so this is an unexpected error. 1316 */ 1317 sc->vtnet_stats.rx_enq_replacement_failed++; 1318 1319 if (m_tail->m_next != NULL) { 1320 m_prev->m_next = m_tail->m_next; 1321 m_tail->m_next = NULL; 1322 } 1323 1324 m_prev->m_len = clsize; 1325 m_freem(m_new); 1326 } 1327 1328 return (error); 1329 } 1330 1331 static int 1332 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) 1333 { 1334 struct vtnet_softc *sc; 1335 struct mbuf *m_new; 1336 int error; 1337 1338 sc = rxq->vtnrx_sc; 1339 1340 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, 1341 ("%s: chained mbuf without LRO_NOMRG", __func__)); 1342 1343 if (m->m_next == NULL) { 1344 /* Fast-path for the common case of just one mbuf. */ 1345 if (m->m_len < len) 1346 return (EINVAL); 1347 1348 m_new = vtnet_rx_alloc_buf(sc, 1, NULL); 1349 if (m_new == NULL) 1350 return (ENOBUFS); 1351 1352 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1353 if (error) { 1354 /* 1355 * The new mbuf is suppose to be an identical 1356 * copy of the one just dequeued so this is an 1357 * unexpected error. 1358 */ 1359 m_freem(m_new); 1360 sc->vtnet_stats.rx_enq_replacement_failed++; 1361 } else 1362 m->m_len = len; 1363 } else 1364 error = vtnet_rxq_replace_lro_nomgr_buf(rxq, m, len); 1365 1366 return (error); 1367 } 1368 1369 static int 1370 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1371 { 1372 struct vtnet_softc *sc; 1373 struct sglist *sg; 1374 struct vtnet_rx_header *rxhdr; 1375 uint8_t *mdata; 1376 int offset, error; 1377 1378 sc = rxq->vtnrx_sc; 1379 sg = rxq->vtnrx_sg; 1380 mdata = mtod(m, uint8_t *); 1381 1382 VTNET_RXQ_LOCK_ASSERT(rxq); 1383 KASSERT(sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG || m->m_next == NULL, 1384 ("%s: chained mbuf without LRO_NOMRG", __func__)); 1385 KASSERT(m->m_len == sc->vtnet_rx_clsize, 1386 ("%s: unexpected cluster size %d/%d", __func__, m->m_len, 1387 sc->vtnet_rx_clsize)); 1388 1389 sglist_reset(sg); 1390 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1391 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); 1392 rxhdr = (struct vtnet_rx_header *) mdata; 1393 sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); 1394 offset = sizeof(struct vtnet_rx_header); 1395 } else 1396 offset = 0; 1397 1398 sglist_append(sg, mdata + offset, m->m_len - offset); 1399 if (m->m_next != NULL) { 1400 error = sglist_append_mbuf(sg, m->m_next); 1401 MPASS(error == 0); 1402 } 1403 1404 error = virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg); 1405 1406 return (error); 1407 } 1408 1409 static int 1410 vtnet_rxq_new_buf(struct vtnet_rxq *rxq) 1411 { 1412 struct vtnet_softc *sc; 1413 struct mbuf *m; 1414 int error; 1415 1416 sc = rxq->vtnrx_sc; 1417 1418 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); 1419 if (m == NULL) 1420 return (ENOBUFS); 1421 1422 error = vtnet_rxq_enqueue_buf(rxq, m); 1423 if (error) 1424 m_freem(m); 1425 1426 return (error); 1427 } 1428 1429 /* 1430 * Use the checksum offset in the VirtIO header to set the 1431 * correct CSUM_* flags. 1432 */ 1433 static int 1434 vtnet_rxq_csum_by_offset(struct vtnet_rxq *rxq, struct mbuf *m, 1435 uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) 1436 { 1437 struct vtnet_softc *sc; 1438 #if defined(INET) || defined(INET6) 1439 int offset = hdr->csum_start + hdr->csum_offset; 1440 #endif 1441 1442 sc = rxq->vtnrx_sc; 1443 1444 /* Only do a basic sanity check on the offset. */ 1445 switch (eth_type) { 1446 #if defined(INET) 1447 case ETHERTYPE_IP: 1448 if (__predict_false(offset < ip_start + sizeof(struct ip))) 1449 return (1); 1450 break; 1451 #endif 1452 #if defined(INET6) 1453 case ETHERTYPE_IPV6: 1454 if (__predict_false(offset < ip_start + sizeof(struct ip6_hdr))) 1455 return (1); 1456 break; 1457 #endif 1458 default: 1459 sc->vtnet_stats.rx_csum_bad_ethtype++; 1460 return (1); 1461 } 1462 1463 /* 1464 * Use the offset to determine the appropriate CSUM_* flags. This is 1465 * a bit dirty, but we can get by with it since the checksum offsets 1466 * happen to be different. We assume the host host does not do IPv4 1467 * header checksum offloading. 1468 */ 1469 switch (hdr->csum_offset) { 1470 case offsetof(struct udphdr, uh_sum): 1471 case offsetof(struct tcphdr, th_sum): 1472 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1473 m->m_pkthdr.csum_data = 0xFFFF; 1474 break; 1475 case offsetof(struct sctphdr, checksum): 1476 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 1477 break; 1478 default: 1479 sc->vtnet_stats.rx_csum_bad_offset++; 1480 return (1); 1481 } 1482 1483 return (0); 1484 } 1485 1486 static int 1487 vtnet_rxq_csum_by_parse(struct vtnet_rxq *rxq, struct mbuf *m, 1488 uint16_t eth_type, int ip_start, struct virtio_net_hdr *hdr) 1489 { 1490 struct vtnet_softc *sc; 1491 int offset, proto; 1492 1493 sc = rxq->vtnrx_sc; 1494 1495 switch (eth_type) { 1496 #if defined(INET) 1497 case ETHERTYPE_IP: { 1498 struct ip *ip; 1499 if (__predict_false(m->m_len < ip_start + sizeof(struct ip))) 1500 return (1); 1501 ip = (struct ip *)(m->m_data + ip_start); 1502 proto = ip->ip_p; 1503 offset = ip_start + (ip->ip_hl << 2); 1504 break; 1505 } 1506 #endif 1507 #if defined(INET6) 1508 case ETHERTYPE_IPV6: 1509 if (__predict_false(m->m_len < ip_start + 1510 sizeof(struct ip6_hdr))) 1511 return (1); 1512 offset = ip6_lasthdr(m, ip_start, IPPROTO_IPV6, &proto); 1513 if (__predict_false(offset < 0)) 1514 return (1); 1515 break; 1516 #endif 1517 default: 1518 sc->vtnet_stats.rx_csum_bad_ethtype++; 1519 return (1); 1520 } 1521 1522 switch (proto) { 1523 case IPPROTO_TCP: 1524 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) 1525 return (1); 1526 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1527 m->m_pkthdr.csum_data = 0xFFFF; 1528 break; 1529 case IPPROTO_UDP: 1530 if (__predict_false(m->m_len < offset + sizeof(struct udphdr))) 1531 return (1); 1532 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1533 m->m_pkthdr.csum_data = 0xFFFF; 1534 break; 1535 case IPPROTO_SCTP: 1536 if (__predict_false(m->m_len < offset + sizeof(struct sctphdr))) 1537 return (1); 1538 m->m_pkthdr.csum_flags |= CSUM_SCTP_VALID; 1539 break; 1540 default: 1541 /* 1542 * For the remaining protocols, FreeBSD does not support 1543 * checksum offloading, so the checksum will be recomputed. 1544 */ 1545 #if 0 1546 if_printf(sc->vtnet_ifp, "cksum offload of unsupported " 1547 "protocol eth_type=%#x proto=%d csum_start=%d " 1548 "csum_offset=%d\n", __func__, eth_type, proto, 1549 hdr->csum_start, hdr->csum_offset); 1550 #endif 1551 break; 1552 } 1553 1554 return (0); 1555 } 1556 1557 /* 1558 * Set the appropriate CSUM_* flags. Unfortunately, the information 1559 * provided is not directly useful to us. The VirtIO header gives the 1560 * offset of the checksum, which is all Linux needs, but this is not 1561 * how FreeBSD does things. We are forced to peek inside the packet 1562 * a bit. 1563 * 1564 * It would be nice if VirtIO gave us the L4 protocol or if FreeBSD 1565 * could accept the offsets and let the stack figure it out. 1566 */ 1567 static int 1568 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, 1569 struct virtio_net_hdr *hdr) 1570 { 1571 struct ether_header *eh; 1572 struct ether_vlan_header *evh; 1573 uint16_t eth_type; 1574 int offset, error; 1575 1576 eh = mtod(m, struct ether_header *); 1577 eth_type = ntohs(eh->ether_type); 1578 if (eth_type == ETHERTYPE_VLAN) { 1579 /* BMV: We should handle nested VLAN tags too. */ 1580 evh = mtod(m, struct ether_vlan_header *); 1581 eth_type = ntohs(evh->evl_proto); 1582 offset = sizeof(struct ether_vlan_header); 1583 } else 1584 offset = sizeof(struct ether_header); 1585 1586 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1587 error = vtnet_rxq_csum_by_offset(rxq, m, eth_type, offset, hdr); 1588 else 1589 error = vtnet_rxq_csum_by_parse(rxq, m, eth_type, offset, hdr); 1590 1591 return (error); 1592 } 1593 1594 static void 1595 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) 1596 { 1597 struct mbuf *m; 1598 1599 while (--nbufs > 0) { 1600 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); 1601 if (m == NULL) 1602 break; 1603 vtnet_rxq_discard_buf(rxq, m); 1604 } 1605 } 1606 1607 static void 1608 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1609 { 1610 int error; 1611 1612 /* 1613 * Requeue the discarded mbuf. This should always be successful 1614 * since it was just dequeued. 1615 */ 1616 error = vtnet_rxq_enqueue_buf(rxq, m); 1617 KASSERT(error == 0, 1618 ("%s: cannot requeue discarded mbuf %d", __func__, error)); 1619 } 1620 1621 static int 1622 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) 1623 { 1624 struct vtnet_softc *sc; 1625 struct ifnet *ifp; 1626 struct virtqueue *vq; 1627 struct mbuf *m, *m_tail; 1628 int len; 1629 1630 sc = rxq->vtnrx_sc; 1631 vq = rxq->vtnrx_vq; 1632 ifp = sc->vtnet_ifp; 1633 m_tail = m_head; 1634 1635 while (--nbufs > 0) { 1636 m = virtqueue_dequeue(vq, &len); 1637 if (m == NULL) { 1638 rxq->vtnrx_stats.vrxs_ierrors++; 1639 goto fail; 1640 } 1641 1642 if (vtnet_rxq_new_buf(rxq) != 0) { 1643 rxq->vtnrx_stats.vrxs_iqdrops++; 1644 vtnet_rxq_discard_buf(rxq, m); 1645 if (nbufs > 1) 1646 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1647 goto fail; 1648 } 1649 1650 if (m->m_len < len) 1651 len = m->m_len; 1652 1653 m->m_len = len; 1654 m->m_flags &= ~M_PKTHDR; 1655 1656 m_head->m_pkthdr.len += len; 1657 m_tail->m_next = m; 1658 m_tail = m; 1659 } 1660 1661 return (0); 1662 1663 fail: 1664 sc->vtnet_stats.rx_mergeable_failed++; 1665 m_freem(m_head); 1666 1667 return (1); 1668 } 1669 1670 static void 1671 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, 1672 struct virtio_net_hdr *hdr) 1673 { 1674 struct vtnet_softc *sc; 1675 struct ifnet *ifp; 1676 struct ether_header *eh; 1677 1678 sc = rxq->vtnrx_sc; 1679 ifp = sc->vtnet_ifp; 1680 1681 if (ifp->if_capenable & IFCAP_VLAN_HWTAGGING) { 1682 eh = mtod(m, struct ether_header *); 1683 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 1684 vtnet_vlan_tag_remove(m); 1685 /* 1686 * With the 802.1Q header removed, update the 1687 * checksum starting location accordingly. 1688 */ 1689 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1690 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; 1691 } 1692 } 1693 1694 m->m_pkthdr.flowid = rxq->vtnrx_id; 1695 m->m_flags |= M_FLOWID; 1696 1697 /* 1698 * BMV: FreeBSD does not have the UNNECESSARY and PARTIAL checksum 1699 * distinction that Linux does. Need to reevaluate if performing 1700 * offloading for the NEEDS_CSUM case is really appropriate. 1701 */ 1702 if (hdr->flags & (VIRTIO_NET_HDR_F_NEEDS_CSUM | 1703 VIRTIO_NET_HDR_F_DATA_VALID)) { 1704 if (vtnet_rxq_csum(rxq, m, hdr) == 0) 1705 rxq->vtnrx_stats.vrxs_csum++; 1706 else 1707 rxq->vtnrx_stats.vrxs_csum_failed++; 1708 } 1709 1710 rxq->vtnrx_stats.vrxs_ipackets++; 1711 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; 1712 1713 VTNET_RXQ_UNLOCK(rxq); 1714 (*ifp->if_input)(ifp, m); 1715 VTNET_RXQ_LOCK(rxq); 1716 } 1717 1718 static int 1719 vtnet_rxq_eof(struct vtnet_rxq *rxq) 1720 { 1721 struct virtio_net_hdr lhdr, *hdr; 1722 struct vtnet_softc *sc; 1723 struct ifnet *ifp; 1724 struct virtqueue *vq; 1725 struct mbuf *m; 1726 struct virtio_net_hdr_mrg_rxbuf *mhdr; 1727 int len, deq, nbufs, adjsz, count; 1728 1729 sc = rxq->vtnrx_sc; 1730 vq = rxq->vtnrx_vq; 1731 ifp = sc->vtnet_ifp; 1732 hdr = &lhdr; 1733 deq = 0; 1734 count = sc->vtnet_rx_process_limit; 1735 1736 VTNET_RXQ_LOCK_ASSERT(rxq); 1737 1738 while (count-- > 0) { 1739 m = virtqueue_dequeue(vq, &len); 1740 if (m == NULL) 1741 break; 1742 deq++; 1743 1744 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { 1745 rxq->vtnrx_stats.vrxs_ierrors++; 1746 vtnet_rxq_discard_buf(rxq, m); 1747 continue; 1748 } 1749 1750 if ((sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) == 0) { 1751 nbufs = 1; 1752 adjsz = sizeof(struct vtnet_rx_header); 1753 /* 1754 * Account for our pad inserted between the header 1755 * and the actual start of the frame. 1756 */ 1757 len += VTNET_RX_HEADER_PAD; 1758 } else { 1759 mhdr = mtod(m, struct virtio_net_hdr_mrg_rxbuf *); 1760 nbufs = mhdr->num_buffers; 1761 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); 1762 } 1763 1764 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { 1765 rxq->vtnrx_stats.vrxs_iqdrops++; 1766 vtnet_rxq_discard_buf(rxq, m); 1767 if (nbufs > 1) 1768 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1769 continue; 1770 } 1771 1772 m->m_pkthdr.len = len; 1773 m->m_pkthdr.rcvif = ifp; 1774 m->m_pkthdr.csum_flags = 0; 1775 1776 if (nbufs > 1) { 1777 /* Dequeue the rest of chain. */ 1778 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) 1779 continue; 1780 } 1781 1782 /* 1783 * Save copy of header before we strip it. For both mergeable 1784 * and non-mergeable, the header is at the beginning of the 1785 * mbuf data. We no longer need num_buffers, so always use a 1786 * regular header. 1787 * 1788 * BMV: Is this memcpy() expensive? We know the mbuf data is 1789 * still valid even after the m_adj(). 1790 */ 1791 memcpy(hdr, mtod(m, void *), sizeof(struct virtio_net_hdr)); 1792 m_adj(m, adjsz); 1793 1794 vtnet_rxq_input(rxq, m, hdr); 1795 1796 /* Must recheck after dropping the Rx lock. */ 1797 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) 1798 break; 1799 } 1800 1801 if (deq > 0) 1802 virtqueue_notify(vq); 1803 1804 return (count > 0 ? 0 : EAGAIN); 1805 } 1806 1807 static void 1808 vtnet_rx_vq_intr(void *xrxq) 1809 { 1810 struct vtnet_softc *sc; 1811 struct vtnet_rxq *rxq; 1812 struct ifnet *ifp; 1813 int tries, more; 1814 1815 rxq = xrxq; 1816 sc = rxq->vtnrx_sc; 1817 ifp = sc->vtnet_ifp; 1818 tries = 0; 1819 1820 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { 1821 /* 1822 * Ignore this interrupt. Either this is a spurious interrupt 1823 * or multiqueue without per-VQ MSIX so every queue needs to 1824 * be polled (a brain dead configuration we could try harder 1825 * to avoid). 1826 */ 1827 vtnet_rxq_disable_intr(rxq); 1828 return; 1829 } 1830 1831 VTNET_RXQ_LOCK(rxq); 1832 1833 again: 1834 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1835 VTNET_RXQ_UNLOCK(rxq); 1836 return; 1837 } 1838 1839 more = vtnet_rxq_eof(rxq); 1840 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 1841 if (!more) 1842 vtnet_rxq_disable_intr(rxq); 1843 /* 1844 * This is an occasional condition or race (when !more), 1845 * so retry a few times before scheduling the taskqueue. 1846 */ 1847 if (tries++ < VTNET_INTR_DISABLE_RETRIES) 1848 goto again; 1849 1850 VTNET_RXQ_UNLOCK(rxq); 1851 rxq->vtnrx_stats.vrxs_rescheduled++; 1852 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 1853 } else 1854 VTNET_RXQ_UNLOCK(rxq); 1855 } 1856 1857 static void 1858 vtnet_rxq_tq_intr(void *xrxq, int pending) 1859 { 1860 struct vtnet_softc *sc; 1861 struct vtnet_rxq *rxq; 1862 struct ifnet *ifp; 1863 int more; 1864 1865 rxq = xrxq; 1866 sc = rxq->vtnrx_sc; 1867 ifp = sc->vtnet_ifp; 1868 1869 VTNET_RXQ_LOCK(rxq); 1870 1871 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 1872 VTNET_RXQ_UNLOCK(rxq); 1873 return; 1874 } 1875 1876 more = vtnet_rxq_eof(rxq); 1877 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 1878 if (!more) 1879 vtnet_rxq_disable_intr(rxq); 1880 rxq->vtnrx_stats.vrxs_rescheduled++; 1881 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 1882 } 1883 1884 VTNET_RXQ_UNLOCK(rxq); 1885 } 1886 1887 static int 1888 vtnet_txq_below_threshold(struct vtnet_txq *txq) 1889 { 1890 struct vtnet_softc *sc; 1891 struct virtqueue *vq; 1892 1893 sc = txq->vtntx_sc; 1894 vq = txq->vtntx_vq; 1895 1896 return (virtqueue_nfree(vq) <= sc->vtnet_tx_intr_thresh); 1897 } 1898 1899 static int 1900 vtnet_txq_notify(struct vtnet_txq *txq) 1901 { 1902 struct virtqueue *vq; 1903 1904 vq = txq->vtntx_vq; 1905 1906 txq->vtntx_watchdog = VTNET_TX_TIMEOUT; 1907 virtqueue_notify(vq); 1908 1909 if (vtnet_txq_enable_intr(txq) == 0) 1910 return (0); 1911 1912 /* 1913 * Drain frames that were completed since last checked. If this 1914 * causes the queue to go above the threshold, the caller should 1915 * continue transmitting. 1916 */ 1917 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { 1918 virtqueue_disable_intr(vq); 1919 return (1); 1920 } 1921 1922 return (0); 1923 } 1924 1925 static void 1926 vtnet_txq_free_mbufs(struct vtnet_txq *txq) 1927 { 1928 struct virtqueue *vq; 1929 struct vtnet_tx_header *txhdr; 1930 int last; 1931 1932 vq = txq->vtntx_vq; 1933 last = 0; 1934 1935 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { 1936 m_freem(txhdr->vth_mbuf); 1937 uma_zfree(vtnet_tx_header_zone, txhdr); 1938 } 1939 1940 KASSERT(virtqueue_empty(vq), 1941 ("%s: mbufs remaining in tx queue %p", __func__, txq)); 1942 } 1943 1944 /* 1945 * BMV: Much of this can go away once we finally have offsets in 1946 * the mbuf packet header. Bug andre@. 1947 */ 1948 static int 1949 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, 1950 int *etype, int *proto, int *start) 1951 { 1952 struct vtnet_softc *sc; 1953 struct ether_vlan_header *evh; 1954 int offset; 1955 1956 sc = txq->vtntx_sc; 1957 1958 evh = mtod(m, struct ether_vlan_header *); 1959 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 1960 /* BMV: We should handle nested VLAN tags too. */ 1961 *etype = ntohs(evh->evl_proto); 1962 offset = sizeof(struct ether_vlan_header); 1963 } else { 1964 *etype = ntohs(evh->evl_encap_proto); 1965 offset = sizeof(struct ether_header); 1966 } 1967 1968 switch (*etype) { 1969 #if defined(INET) 1970 case ETHERTYPE_IP: { 1971 struct ip *ip, iphdr; 1972 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 1973 m_copydata(m, offset, sizeof(struct ip), 1974 (caddr_t) &iphdr); 1975 ip = &iphdr; 1976 } else 1977 ip = (struct ip *)(m->m_data + offset); 1978 *proto = ip->ip_p; 1979 *start = offset + (ip->ip_hl << 2); 1980 break; 1981 } 1982 #endif 1983 #if defined(INET6) 1984 case ETHERTYPE_IPV6: 1985 *proto = -1; 1986 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 1987 /* Assert the network stack sent us a valid packet. */ 1988 KASSERT(*start > offset, 1989 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 1990 *start, offset, *proto)); 1991 break; 1992 #endif 1993 default: 1994 sc->vtnet_stats.tx_csum_bad_ethtype++; 1995 return (EINVAL); 1996 } 1997 1998 return (0); 1999 } 2000 2001 static int 2002 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, 2003 int offset, struct virtio_net_hdr *hdr) 2004 { 2005 static struct timeval lastecn; 2006 static int curecn; 2007 struct vtnet_softc *sc; 2008 struct tcphdr *tcp, tcphdr; 2009 2010 sc = txq->vtntx_sc; 2011 2012 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 2013 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 2014 tcp = &tcphdr; 2015 } else 2016 tcp = (struct tcphdr *)(m->m_data + offset); 2017 2018 hdr->hdr_len = offset + (tcp->th_off << 2); 2019 hdr->gso_size = m->m_pkthdr.tso_segsz; 2020 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 2021 VIRTIO_NET_HDR_GSO_TCPV6; 2022 2023 if (tcp->th_flags & TH_CWR) { 2024 /* 2025 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In FreeBSD, 2026 * ECN support is not on a per-interface basis, but globally via 2027 * the net.inet.tcp.ecn.enable sysctl knob. The default is off. 2028 */ 2029 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { 2030 if (ppsratecheck(&lastecn, &curecn, 1)) 2031 if_printf(sc->vtnet_ifp, 2032 "TSO with ECN not negotiated with host\n"); 2033 return (ENOTSUP); 2034 } 2035 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2036 } 2037 2038 txq->vtntx_stats.vtxs_tso++; 2039 2040 return (0); 2041 } 2042 2043 static struct mbuf * 2044 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, 2045 struct virtio_net_hdr *hdr) 2046 { 2047 struct vtnet_softc *sc; 2048 int flags, etype, csum_start, proto, error; 2049 2050 sc = txq->vtntx_sc; 2051 flags = m->m_pkthdr.csum_flags; 2052 2053 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); 2054 if (error) 2055 goto drop; 2056 2057 if ((etype == ETHERTYPE_IP && flags & VTNET_CSUM_OFFLOAD) || 2058 (etype == ETHERTYPE_IPV6 && flags & VTNET_CSUM_OFFLOAD_IPV6)) { 2059 /* 2060 * We could compare the IP protocol vs the CSUM_ flag too, 2061 * but that really should not be necessary. 2062 */ 2063 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 2064 hdr->csum_start = csum_start; 2065 hdr->csum_offset = m->m_pkthdr.csum_data; 2066 txq->vtntx_stats.vtxs_csum++; 2067 } 2068 2069 if (flags & CSUM_TSO) { 2070 if (__predict_false(proto != IPPROTO_TCP)) { 2071 /* Likely failed to correctly parse the mbuf. */ 2072 sc->vtnet_stats.tx_tso_not_tcp++; 2073 goto drop; 2074 } 2075 2076 KASSERT(hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM, 2077 ("%s: mbuf %p TSO without checksum offload %#x", 2078 __func__, m, flags)); 2079 2080 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); 2081 if (error) 2082 goto drop; 2083 } 2084 2085 return (m); 2086 2087 drop: 2088 m_freem(m); 2089 return (NULL); 2090 } 2091 2092 static int 2093 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, 2094 struct vtnet_tx_header *txhdr) 2095 { 2096 struct vtnet_softc *sc; 2097 struct virtqueue *vq; 2098 struct sglist *sg; 2099 struct mbuf *m; 2100 int error; 2101 2102 sc = txq->vtntx_sc; 2103 vq = txq->vtntx_vq; 2104 sg = txq->vtntx_sg; 2105 m = *m_head; 2106 2107 sglist_reset(sg); 2108 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); 2109 KASSERT(error == 0 && sg->sg_nseg == 1, 2110 ("%s: error %d adding header to sglist", __func__, error)); 2111 2112 error = sglist_append_mbuf(sg, m); 2113 if (error) { 2114 m = m_defrag(m, M_NOWAIT); 2115 if (m == NULL) 2116 goto fail; 2117 2118 *m_head = m; 2119 sc->vtnet_stats.tx_defragged++; 2120 2121 error = sglist_append_mbuf(sg, m); 2122 if (error) 2123 goto fail; 2124 } 2125 2126 txhdr->vth_mbuf = m; 2127 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); 2128 2129 return (error); 2130 2131 fail: 2132 sc->vtnet_stats.tx_defrag_failed++; 2133 m_freem(*m_head); 2134 *m_head = NULL; 2135 2136 return (ENOBUFS); 2137 } 2138 2139 static int 2140 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head) 2141 { 2142 struct vtnet_tx_header *txhdr; 2143 struct virtio_net_hdr *hdr; 2144 struct mbuf *m; 2145 int error; 2146 2147 m = *m_head; 2148 M_ASSERTPKTHDR(m); 2149 2150 txhdr = uma_zalloc(vtnet_tx_header_zone, M_NOWAIT | M_ZERO); 2151 if (txhdr == NULL) { 2152 m_freem(m); 2153 *m_head = NULL; 2154 return (ENOMEM); 2155 } 2156 2157 /* 2158 * Always use the non-mergeable header, regardless if the feature 2159 * was negotiated. For transmit, num_buffers is always zero. The 2160 * vtnet_hdr_size is used to enqueue the correct header size. 2161 */ 2162 hdr = &txhdr->vth_uhdr.hdr; 2163 2164 if (m->m_flags & M_VLANTAG) { 2165 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); 2166 if ((*m_head = m) == NULL) { 2167 error = ENOBUFS; 2168 goto fail; 2169 } 2170 m->m_flags &= ~M_VLANTAG; 2171 } 2172 2173 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { 2174 m = vtnet_txq_offload(txq, m, hdr); 2175 if ((*m_head = m) == NULL) { 2176 error = ENOBUFS; 2177 goto fail; 2178 } 2179 } 2180 2181 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); 2182 if (error == 0) 2183 return (0); 2184 2185 fail: 2186 uma_zfree(vtnet_tx_header_zone, txhdr); 2187 2188 return (error); 2189 } 2190 2191 #ifdef VTNET_LEGACY_TX 2192 2193 static void 2194 vtnet_start_locked(struct vtnet_txq *txq, struct ifnet *ifp) 2195 { 2196 struct vtnet_softc *sc; 2197 struct virtqueue *vq; 2198 struct mbuf *m0; 2199 int tries, enq; 2200 2201 sc = txq->vtntx_sc; 2202 vq = txq->vtntx_vq; 2203 tries = 0; 2204 2205 VTNET_TXQ_LOCK_ASSERT(txq); 2206 2207 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2208 sc->vtnet_link_active == 0) 2209 return; 2210 2211 vtnet_txq_eof(txq); 2212 2213 again: 2214 enq = 0; 2215 2216 while (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) { 2217 if (virtqueue_full(vq)) 2218 break; 2219 2220 IFQ_DRV_DEQUEUE(&ifp->if_snd, m0); 2221 if (m0 == NULL) 2222 break; 2223 2224 if (vtnet_txq_encap(txq, &m0) != 0) { 2225 if (m0 != NULL) 2226 IFQ_DRV_PREPEND(&ifp->if_snd, m0); 2227 break; 2228 } 2229 2230 enq++; 2231 ETHER_BPF_MTAP(ifp, m0); 2232 } 2233 2234 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2235 if (tries++ < VTNET_NOTIFY_RETRIES) 2236 goto again; 2237 2238 txq->vtntx_stats.vtxs_rescheduled++; 2239 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2240 } 2241 } 2242 2243 static void 2244 vtnet_start(struct ifnet *ifp) 2245 { 2246 struct vtnet_softc *sc; 2247 struct vtnet_txq *txq; 2248 2249 sc = ifp->if_softc; 2250 txq = &sc->vtnet_txqs[0]; 2251 2252 VTNET_TXQ_LOCK(txq); 2253 vtnet_start_locked(txq, ifp); 2254 VTNET_TXQ_UNLOCK(txq); 2255 } 2256 2257 #else /* !VTNET_LEGACY_TX */ 2258 2259 static int 2260 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) 2261 { 2262 struct vtnet_softc *sc; 2263 struct virtqueue *vq; 2264 struct buf_ring *br; 2265 struct ifnet *ifp; 2266 int enq, tries, error; 2267 2268 sc = txq->vtntx_sc; 2269 vq = txq->vtntx_vq; 2270 br = txq->vtntx_br; 2271 ifp = sc->vtnet_ifp; 2272 tries = 0; 2273 error = 0; 2274 2275 VTNET_TXQ_LOCK_ASSERT(txq); 2276 2277 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0 || 2278 sc->vtnet_link_active == 0) { 2279 if (m != NULL) 2280 error = drbr_enqueue(ifp, br, m); 2281 return (error); 2282 } 2283 2284 if (m != NULL) { 2285 error = drbr_enqueue(ifp, br, m); 2286 if (error) 2287 return (error); 2288 } 2289 2290 vtnet_txq_eof(txq); 2291 2292 again: 2293 enq = 0; 2294 2295 while ((m = drbr_peek(ifp, br)) != NULL) { 2296 if (virtqueue_full(vq)) { 2297 drbr_putback(ifp, br, m); 2298 break; 2299 } 2300 2301 if (vtnet_txq_encap(txq, &m) != 0) { 2302 if (m != NULL) 2303 drbr_putback(ifp, br, m); 2304 else 2305 drbr_advance(ifp, br); 2306 break; 2307 } 2308 drbr_advance(ifp, br); 2309 2310 enq++; 2311 ETHER_BPF_MTAP(ifp, m); 2312 } 2313 2314 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2315 if (tries++ < VTNET_NOTIFY_RETRIES) 2316 goto again; 2317 2318 txq->vtntx_stats.vtxs_rescheduled++; 2319 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2320 } 2321 2322 return (0); 2323 } 2324 2325 static int 2326 vtnet_txq_mq_start(struct ifnet *ifp, struct mbuf *m) 2327 { 2328 struct vtnet_softc *sc; 2329 struct vtnet_txq *txq; 2330 int i, npairs, error; 2331 2332 sc = ifp->if_softc; 2333 npairs = sc->vtnet_act_vq_pairs; 2334 2335 if (m->m_flags & M_FLOWID) 2336 i = m->m_pkthdr.flowid % npairs; 2337 else 2338 i = curcpu % npairs; 2339 2340 txq = &sc->vtnet_txqs[i]; 2341 2342 if (VTNET_TXQ_TRYLOCK(txq) != 0) { 2343 error = vtnet_txq_mq_start_locked(txq, m); 2344 VTNET_TXQ_UNLOCK(txq); 2345 } else { 2346 error = drbr_enqueue(ifp, txq->vtntx_br, m); 2347 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); 2348 } 2349 2350 return (error); 2351 } 2352 2353 static void 2354 vtnet_txq_tq_deferred(void *xtxq, int pending) 2355 { 2356 struct vtnet_softc *sc; 2357 struct vtnet_txq *txq; 2358 2359 txq = xtxq; 2360 sc = txq->vtntx_sc; 2361 2362 VTNET_TXQ_LOCK(txq); 2363 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) 2364 vtnet_txq_mq_start_locked(txq, NULL); 2365 VTNET_TXQ_UNLOCK(txq); 2366 } 2367 2368 #endif /* VTNET_LEGACY_TX */ 2369 2370 static void 2371 vtnet_txq_start(struct vtnet_txq *txq) 2372 { 2373 struct vtnet_softc *sc; 2374 struct ifnet *ifp; 2375 2376 sc = txq->vtntx_sc; 2377 ifp = sc->vtnet_ifp; 2378 2379 #ifdef VTNET_LEGACY_TX 2380 if (!IFQ_DRV_IS_EMPTY(&ifp->if_snd)) 2381 vtnet_start_locked(txq, ifp); 2382 #else 2383 if (!drbr_empty(ifp, txq->vtntx_br)) 2384 vtnet_txq_mq_start_locked(txq, NULL); 2385 #endif 2386 } 2387 2388 static void 2389 vtnet_txq_tq_intr(void *xtxq, int pending) 2390 { 2391 struct vtnet_softc *sc; 2392 struct vtnet_txq *txq; 2393 struct ifnet *ifp; 2394 2395 txq = xtxq; 2396 sc = txq->vtntx_sc; 2397 ifp = sc->vtnet_ifp; 2398 2399 VTNET_TXQ_LOCK(txq); 2400 2401 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2402 VTNET_TXQ_UNLOCK(txq); 2403 return; 2404 } 2405 2406 vtnet_txq_eof(txq); 2407 vtnet_txq_start(txq); 2408 2409 VTNET_TXQ_UNLOCK(txq); 2410 } 2411 2412 static int 2413 vtnet_txq_eof(struct vtnet_txq *txq) 2414 { 2415 struct virtqueue *vq; 2416 struct vtnet_tx_header *txhdr; 2417 struct mbuf *m; 2418 int deq; 2419 2420 vq = txq->vtntx_vq; 2421 deq = 0; 2422 VTNET_TXQ_LOCK_ASSERT(txq); 2423 2424 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { 2425 m = txhdr->vth_mbuf; 2426 deq++; 2427 2428 txq->vtntx_stats.vtxs_opackets++; 2429 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; 2430 if (m->m_flags & M_MCAST) 2431 txq->vtntx_stats.vtxs_omcasts++; 2432 2433 m_freem(m); 2434 uma_zfree(vtnet_tx_header_zone, txhdr); 2435 } 2436 2437 if (virtqueue_empty(vq)) 2438 txq->vtntx_watchdog = 0; 2439 2440 return (deq); 2441 } 2442 2443 static void 2444 vtnet_tx_vq_intr(void *xtxq) 2445 { 2446 struct vtnet_softc *sc; 2447 struct vtnet_txq *txq; 2448 struct ifnet *ifp; 2449 2450 txq = xtxq; 2451 sc = txq->vtntx_sc; 2452 ifp = sc->vtnet_ifp; 2453 2454 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { 2455 /* 2456 * Ignore this interrupt. Either this is a spurious interrupt 2457 * or multiqueue without per-VQ MSIX so every queue needs to 2458 * be polled (a brain dead configuration we could try harder 2459 * to avoid). 2460 */ 2461 vtnet_txq_disable_intr(txq); 2462 return; 2463 } 2464 2465 VTNET_TXQ_LOCK(txq); 2466 2467 if ((ifp->if_drv_flags & IFF_DRV_RUNNING) == 0) { 2468 VTNET_TXQ_UNLOCK(txq); 2469 return; 2470 } 2471 2472 vtnet_txq_eof(txq); 2473 vtnet_txq_start(txq); 2474 2475 VTNET_TXQ_UNLOCK(txq); 2476 } 2477 2478 static void 2479 vtnet_tx_start_all(struct vtnet_softc *sc) 2480 { 2481 struct vtnet_txq *txq; 2482 int i; 2483 2484 VTNET_CORE_LOCK_ASSERT(sc); 2485 2486 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2487 txq = &sc->vtnet_txqs[i]; 2488 2489 VTNET_TXQ_LOCK(txq); 2490 vtnet_txq_start(txq); 2491 VTNET_TXQ_UNLOCK(txq); 2492 } 2493 } 2494 2495 #ifndef VTNET_LEGACY_TX 2496 static void 2497 vtnet_qflush(struct ifnet *ifp) 2498 { 2499 struct vtnet_softc *sc; 2500 struct vtnet_txq *txq; 2501 struct mbuf *m; 2502 int i; 2503 2504 sc = ifp->if_softc; 2505 2506 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2507 txq = &sc->vtnet_txqs[i]; 2508 2509 VTNET_TXQ_LOCK(txq); 2510 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) 2511 m_freem(m); 2512 VTNET_TXQ_UNLOCK(txq); 2513 } 2514 2515 if_qflush(ifp); 2516 } 2517 #endif 2518 2519 static int 2520 vtnet_watchdog(struct vtnet_txq *txq) 2521 { 2522 struct ifnet *ifp; 2523 2524 ifp = txq->vtntx_sc->vtnet_ifp; 2525 2526 VTNET_TXQ_LOCK(txq); 2527 if (txq->vtntx_watchdog == 1) { 2528 /* 2529 * Only drain completed frames if the watchdog is about to 2530 * expire. If any frames were drained, there may be enough 2531 * free descriptors now available to transmit queued frames. 2532 * In that case, the timer will immediately be decremented 2533 * below, but the timeout is generous enough that should not 2534 * be a problem. 2535 */ 2536 if (vtnet_txq_eof(txq) != 0) 2537 vtnet_txq_start(txq); 2538 } 2539 2540 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { 2541 VTNET_TXQ_UNLOCK(txq); 2542 return (0); 2543 } 2544 VTNET_TXQ_UNLOCK(txq); 2545 2546 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); 2547 return (1); 2548 } 2549 2550 static void 2551 vtnet_rxq_accum_stats(struct vtnet_rxq *rxq, struct vtnet_rxq_stats *accum) 2552 { 2553 struct vtnet_rxq_stats *st; 2554 2555 st = &rxq->vtnrx_stats; 2556 2557 accum->vrxs_ipackets += st->vrxs_ipackets; 2558 accum->vrxs_ibytes += st->vrxs_ibytes; 2559 accum->vrxs_iqdrops += st->vrxs_iqdrops; 2560 accum->vrxs_csum += st->vrxs_csum; 2561 accum->vrxs_csum_failed += st->vrxs_csum_failed; 2562 accum->vrxs_rescheduled += st->vrxs_rescheduled; 2563 } 2564 2565 static void 2566 vtnet_txq_accum_stats(struct vtnet_txq *txq, struct vtnet_txq_stats *accum) 2567 { 2568 struct vtnet_txq_stats *st; 2569 2570 st = &txq->vtntx_stats; 2571 2572 accum->vtxs_opackets += st->vtxs_opackets; 2573 accum->vtxs_obytes += st->vtxs_obytes; 2574 accum->vtxs_csum += st->vtxs_csum; 2575 accum->vtxs_tso += st->vtxs_tso; 2576 accum->vtxs_rescheduled += st->vtxs_rescheduled; 2577 } 2578 2579 static void 2580 vtnet_accumulate_stats(struct vtnet_softc *sc) 2581 { 2582 struct ifnet *ifp; 2583 struct vtnet_statistics *st; 2584 struct vtnet_rxq_stats rxaccum; 2585 struct vtnet_txq_stats txaccum; 2586 int i; 2587 2588 ifp = sc->vtnet_ifp; 2589 st = &sc->vtnet_stats; 2590 bzero(&rxaccum, sizeof(struct vtnet_rxq_stats)); 2591 bzero(&txaccum, sizeof(struct vtnet_txq_stats)); 2592 2593 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2594 vtnet_rxq_accum_stats(&sc->vtnet_rxqs[i], &rxaccum); 2595 vtnet_txq_accum_stats(&sc->vtnet_txqs[i], &txaccum); 2596 } 2597 2598 st->rx_csum_offloaded = rxaccum.vrxs_csum; 2599 st->rx_csum_failed = rxaccum.vrxs_csum_failed; 2600 st->rx_task_rescheduled = rxaccum.vrxs_rescheduled; 2601 st->tx_csum_offloaded = txaccum.vtxs_csum; 2602 st->tx_tso_offloaded = txaccum.vtxs_tso; 2603 st->tx_task_rescheduled = txaccum.vtxs_rescheduled; 2604 2605 /* 2606 * With the exception of if_ierrors, these ifnet statistics are 2607 * only updated in the driver, so just set them to our accumulated 2608 * values. if_ierrors is updated in ether_input() for malformed 2609 * frames that we should have already discarded. 2610 */ 2611 ifp->if_ipackets = rxaccum.vrxs_ipackets; 2612 ifp->if_iqdrops = rxaccum.vrxs_iqdrops; 2613 ifp->if_ierrors = rxaccum.vrxs_ierrors; 2614 ifp->if_opackets = txaccum.vtxs_opackets; 2615 #ifndef VTNET_LEGACY_TX 2616 ifp->if_obytes = txaccum.vtxs_obytes; 2617 ifp->if_omcasts = txaccum.vtxs_omcasts; 2618 #endif 2619 } 2620 2621 static void 2622 vtnet_tick(void *xsc) 2623 { 2624 struct vtnet_softc *sc; 2625 struct ifnet *ifp; 2626 int i, timedout; 2627 2628 sc = xsc; 2629 ifp = sc->vtnet_ifp; 2630 timedout = 0; 2631 2632 VTNET_CORE_LOCK_ASSERT(sc); 2633 vtnet_accumulate_stats(sc); 2634 2635 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 2636 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); 2637 2638 if (timedout != 0) { 2639 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2640 vtnet_init_locked(sc); 2641 } else 2642 callout_schedule(&sc->vtnet_tick_ch, hz); 2643 } 2644 2645 static void 2646 vtnet_start_taskqueues(struct vtnet_softc *sc) 2647 { 2648 device_t dev; 2649 struct vtnet_rxq *rxq; 2650 struct vtnet_txq *txq; 2651 int i, error; 2652 2653 dev = sc->vtnet_dev; 2654 2655 /* 2656 * Errors here are very difficult to recover from - we cannot 2657 * easily fail because, if this is during boot, we will hang 2658 * when freeing any successfully started taskqueues because 2659 * the scheduler isn't up yet. 2660 * 2661 * Most drivers just ignore the return value - it only fails 2662 * with ENOMEM so an error is not likely. 2663 */ 2664 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2665 rxq = &sc->vtnet_rxqs[i]; 2666 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, 2667 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); 2668 if (error) { 2669 device_printf(dev, "failed to start rx taskq %d\n", 2670 rxq->vtnrx_id); 2671 } 2672 2673 txq = &sc->vtnet_txqs[i]; 2674 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, 2675 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); 2676 if (error) { 2677 device_printf(dev, "failed to start tx taskq %d\n", 2678 txq->vtntx_id); 2679 } 2680 } 2681 } 2682 2683 static void 2684 vtnet_free_taskqueues(struct vtnet_softc *sc) 2685 { 2686 struct vtnet_rxq *rxq; 2687 struct vtnet_txq *txq; 2688 int i; 2689 2690 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2691 rxq = &sc->vtnet_rxqs[i]; 2692 if (rxq->vtnrx_tq != NULL) { 2693 taskqueue_free(rxq->vtnrx_tq); 2694 rxq->vtnrx_vq = NULL; 2695 } 2696 2697 txq = &sc->vtnet_txqs[i]; 2698 if (txq->vtntx_tq != NULL) { 2699 taskqueue_free(txq->vtntx_tq); 2700 txq->vtntx_tq = NULL; 2701 } 2702 } 2703 } 2704 2705 static void 2706 vtnet_drain_taskqueues(struct vtnet_softc *sc) 2707 { 2708 struct vtnet_rxq *rxq; 2709 struct vtnet_txq *txq; 2710 int i; 2711 2712 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2713 rxq = &sc->vtnet_rxqs[i]; 2714 if (rxq->vtnrx_tq != NULL) 2715 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2716 2717 txq = &sc->vtnet_txqs[i]; 2718 if (txq->vtntx_tq != NULL) { 2719 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); 2720 #ifndef VTNET_LEGACY_TX 2721 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); 2722 #endif 2723 } 2724 } 2725 } 2726 2727 static void 2728 vtnet_drain_rxtx_queues(struct vtnet_softc *sc) 2729 { 2730 struct vtnet_rxq *rxq; 2731 struct vtnet_txq *txq; 2732 int i; 2733 2734 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2735 rxq = &sc->vtnet_rxqs[i]; 2736 vtnet_rxq_free_mbufs(rxq); 2737 2738 txq = &sc->vtnet_txqs[i]; 2739 vtnet_txq_free_mbufs(txq); 2740 } 2741 } 2742 2743 static void 2744 vtnet_stop_rendezvous(struct vtnet_softc *sc) 2745 { 2746 struct vtnet_rxq *rxq; 2747 struct vtnet_txq *txq; 2748 int i; 2749 2750 /* 2751 * Lock and unlock the per-queue mutex so we known the stop 2752 * state is visible. Doing only the active queues should be 2753 * sufficient, but it does not cost much extra to do all the 2754 * queues. Note we hold the core mutex here too. 2755 */ 2756 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2757 rxq = &sc->vtnet_rxqs[i]; 2758 VTNET_RXQ_LOCK(rxq); 2759 VTNET_RXQ_UNLOCK(rxq); 2760 2761 txq = &sc->vtnet_txqs[i]; 2762 VTNET_TXQ_LOCK(txq); 2763 VTNET_TXQ_UNLOCK(txq); 2764 } 2765 } 2766 2767 static void 2768 vtnet_stop(struct vtnet_softc *sc) 2769 { 2770 device_t dev; 2771 struct ifnet *ifp; 2772 2773 dev = sc->vtnet_dev; 2774 ifp = sc->vtnet_ifp; 2775 2776 VTNET_CORE_LOCK_ASSERT(sc); 2777 2778 ifp->if_drv_flags &= ~IFF_DRV_RUNNING; 2779 sc->vtnet_link_active = 0; 2780 callout_stop(&sc->vtnet_tick_ch); 2781 2782 /* Only advisory. */ 2783 vtnet_disable_interrupts(sc); 2784 2785 /* 2786 * Stop the host adapter. This resets it to the pre-initialized 2787 * state. It will not generate any interrupts until after it is 2788 * reinitialized. 2789 */ 2790 virtio_stop(dev); 2791 vtnet_stop_rendezvous(sc); 2792 2793 /* Free any mbufs left in the virtqueues. */ 2794 vtnet_drain_rxtx_queues(sc); 2795 } 2796 2797 static int 2798 vtnet_virtio_reinit(struct vtnet_softc *sc) 2799 { 2800 device_t dev; 2801 struct ifnet *ifp; 2802 uint64_t features; 2803 int mask, error; 2804 2805 dev = sc->vtnet_dev; 2806 ifp = sc->vtnet_ifp; 2807 features = sc->vtnet_features; 2808 2809 mask = 0; 2810 #if defined(INET) 2811 mask |= IFCAP_RXCSUM; 2812 #endif 2813 #if defined (INET6) 2814 mask |= IFCAP_RXCSUM_IPV6; 2815 #endif 2816 2817 /* 2818 * Re-negotiate with the host, removing any disabled receive 2819 * features. Transmit features are disabled only on our side 2820 * via if_capenable and if_hwassist. 2821 */ 2822 2823 if (ifp->if_capabilities & mask) { 2824 /* 2825 * We require both IPv4 and IPv6 offloading to be enabled 2826 * in order to negotiated it: VirtIO does not distinguish 2827 * between the two. 2828 */ 2829 if ((ifp->if_capenable & mask) != mask) 2830 features &= ~VIRTIO_NET_F_GUEST_CSUM; 2831 } 2832 2833 if (ifp->if_capabilities & IFCAP_LRO) { 2834 if ((ifp->if_capenable & IFCAP_LRO) == 0) 2835 features &= ~VTNET_LRO_FEATURES; 2836 } 2837 2838 if (ifp->if_capabilities & IFCAP_VLAN_HWFILTER) { 2839 if ((ifp->if_capenable & IFCAP_VLAN_HWFILTER) == 0) 2840 features &= ~VIRTIO_NET_F_CTRL_VLAN; 2841 } 2842 2843 error = virtio_reinit(dev, features); 2844 if (error) 2845 device_printf(dev, "virtio reinit error %d\n", error); 2846 2847 return (error); 2848 } 2849 2850 static void 2851 vtnet_init_rx_filters(struct vtnet_softc *sc) 2852 { 2853 struct ifnet *ifp; 2854 2855 ifp = sc->vtnet_ifp; 2856 2857 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 2858 /* Restore promiscuous and all-multicast modes. */ 2859 vtnet_rx_filter(sc); 2860 /* Restore filtered MAC addresses. */ 2861 vtnet_rx_filter_mac(sc); 2862 } 2863 2864 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER) 2865 vtnet_rx_filter_vlan(sc); 2866 } 2867 2868 static int 2869 vtnet_init_rx_queues(struct vtnet_softc *sc) 2870 { 2871 device_t dev; 2872 struct vtnet_rxq *rxq; 2873 int i, clsize, error; 2874 2875 dev = sc->vtnet_dev; 2876 2877 /* 2878 * Use the new cluster size if one has been set (via a MTU 2879 * change). Otherwise, use the standard 2K clusters. 2880 * 2881 * BMV: It might make sense to use page sized clusters as 2882 * the default (depending on the features negotiated). 2883 */ 2884 if (sc->vtnet_rx_new_clsize != 0) { 2885 clsize = sc->vtnet_rx_new_clsize; 2886 sc->vtnet_rx_new_clsize = 0; 2887 } else 2888 clsize = MCLBYTES; 2889 2890 sc->vtnet_rx_clsize = clsize; 2891 sc->vtnet_rx_nmbufs = VTNET_NEEDED_RX_MBUFS(sc, clsize); 2892 2893 KASSERT(sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS || 2894 sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, 2895 ("%s: too many rx mbufs %d for %d segments", __func__, 2896 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); 2897 2898 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2899 rxq = &sc->vtnet_rxqs[i]; 2900 2901 /* Hold the lock to satisfy asserts. */ 2902 VTNET_RXQ_LOCK(rxq); 2903 error = vtnet_rxq_populate(rxq); 2904 VTNET_RXQ_UNLOCK(rxq); 2905 2906 if (error) { 2907 device_printf(dev, 2908 "cannot allocate mbufs for Rx queue %d\n", i); 2909 return (error); 2910 } 2911 } 2912 2913 return (0); 2914 } 2915 2916 static int 2917 vtnet_init_tx_queues(struct vtnet_softc *sc) 2918 { 2919 struct vtnet_txq *txq; 2920 int i; 2921 2922 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2923 txq = &sc->vtnet_txqs[i]; 2924 txq->vtntx_watchdog = 0; 2925 } 2926 2927 return (0); 2928 } 2929 2930 static int 2931 vtnet_init_rxtx_queues(struct vtnet_softc *sc) 2932 { 2933 int error; 2934 2935 error = vtnet_init_rx_queues(sc); 2936 if (error) 2937 return (error); 2938 2939 error = vtnet_init_tx_queues(sc); 2940 if (error) 2941 return (error); 2942 2943 return (0); 2944 } 2945 2946 static void 2947 vtnet_set_active_vq_pairs(struct vtnet_softc *sc) 2948 { 2949 device_t dev; 2950 int npairs; 2951 2952 dev = sc->vtnet_dev; 2953 2954 if ((sc->vtnet_flags & VTNET_FLAG_MULTIQ) == 0) { 2955 MPASS(sc->vtnet_max_vq_pairs == 1); 2956 sc->vtnet_act_vq_pairs = 1; 2957 return; 2958 } 2959 2960 /* BMV: Just use the maximum configured for now. */ 2961 npairs = sc->vtnet_max_vq_pairs; 2962 2963 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { 2964 device_printf(dev, 2965 "cannot set active queue pairs to %d\n", npairs); 2966 npairs = 1; 2967 } 2968 2969 sc->vtnet_act_vq_pairs = npairs; 2970 } 2971 2972 static int 2973 vtnet_reinit(struct vtnet_softc *sc) 2974 { 2975 struct ifnet *ifp; 2976 int error; 2977 2978 ifp = sc->vtnet_ifp; 2979 2980 /* Use the current MAC address. */ 2981 bcopy(IF_LLADDR(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); 2982 vtnet_set_hwaddr(sc); 2983 2984 vtnet_set_active_vq_pairs(sc); 2985 2986 ifp->if_hwassist = 0; 2987 if (ifp->if_capenable & IFCAP_TXCSUM) 2988 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD; 2989 if (ifp->if_capenable & IFCAP_TXCSUM_IPV6) 2990 ifp->if_hwassist |= VTNET_CSUM_OFFLOAD_IPV6; 2991 if (ifp->if_capenable & IFCAP_TSO4) 2992 ifp->if_hwassist |= CSUM_TSO; 2993 if (ifp->if_capenable & IFCAP_TSO6) 2994 ifp->if_hwassist |= CSUM_TSO; /* No CSUM_TSO_IPV6. */ 2995 2996 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 2997 vtnet_init_rx_filters(sc); 2998 2999 error = vtnet_init_rxtx_queues(sc); 3000 if (error) 3001 return (error); 3002 3003 vtnet_enable_interrupts(sc); 3004 ifp->if_drv_flags |= IFF_DRV_RUNNING; 3005 3006 return (0); 3007 } 3008 3009 static void 3010 vtnet_init_locked(struct vtnet_softc *sc) 3011 { 3012 device_t dev; 3013 struct ifnet *ifp; 3014 3015 dev = sc->vtnet_dev; 3016 ifp = sc->vtnet_ifp; 3017 3018 VTNET_CORE_LOCK_ASSERT(sc); 3019 3020 if (ifp->if_drv_flags & IFF_DRV_RUNNING) 3021 return; 3022 3023 vtnet_stop(sc); 3024 3025 /* Reinitialize with the host. */ 3026 if (vtnet_virtio_reinit(sc) != 0) 3027 goto fail; 3028 3029 if (vtnet_reinit(sc) != 0) 3030 goto fail; 3031 3032 virtio_reinit_complete(dev); 3033 3034 vtnet_update_link_status(sc); 3035 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); 3036 3037 return; 3038 3039 fail: 3040 vtnet_stop(sc); 3041 } 3042 3043 static void 3044 vtnet_init(void *xsc) 3045 { 3046 struct vtnet_softc *sc; 3047 3048 sc = xsc; 3049 3050 VTNET_CORE_LOCK(sc); 3051 vtnet_init_locked(sc); 3052 VTNET_CORE_UNLOCK(sc); 3053 } 3054 3055 static void 3056 vtnet_free_ctrl_vq(struct vtnet_softc *sc) 3057 { 3058 struct virtqueue *vq; 3059 3060 vq = sc->vtnet_ctrl_vq; 3061 3062 /* 3063 * The control virtqueue is only polled and therefore it should 3064 * already be empty. 3065 */ 3066 KASSERT(virtqueue_empty(vq), 3067 ("%s: ctrl vq %p not empty", __func__, vq)); 3068 } 3069 3070 static void 3071 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, 3072 struct sglist *sg, int readable, int writable) 3073 { 3074 struct virtqueue *vq; 3075 3076 vq = sc->vtnet_ctrl_vq; 3077 3078 VTNET_CORE_LOCK_ASSERT(sc); 3079 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ, 3080 ("%s: CTRL_VQ feature not negotiated", __func__)); 3081 3082 if (!virtqueue_empty(vq)) 3083 return; 3084 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) != 0) 3085 return; 3086 3087 /* 3088 * Poll for the response, but the command is likely already 3089 * done when we return from the notify. 3090 */ 3091 virtqueue_notify(vq); 3092 virtqueue_poll(vq, NULL); 3093 } 3094 3095 static int 3096 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) 3097 { 3098 struct virtio_net_ctrl_hdr hdr __aligned(2); 3099 struct sglist_seg segs[3]; 3100 struct sglist sg; 3101 uint8_t ack; 3102 int error; 3103 3104 hdr.class = VIRTIO_NET_CTRL_MAC; 3105 hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 3106 ack = VIRTIO_NET_ERR; 3107 3108 sglist_init(&sg, 3, segs); 3109 error = 0; 3110 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3111 error |= sglist_append(&sg, hwaddr, ETHER_ADDR_LEN); 3112 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3113 KASSERT(error == 0 && sg.sg_nseg == 3, 3114 ("%s: error %d adding set MAC msg to sglist", __func__, error)); 3115 3116 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3117 3118 return (ack == VIRTIO_NET_OK ? 0 : EIO); 3119 } 3120 3121 static int 3122 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) 3123 { 3124 struct sglist_seg segs[3]; 3125 struct sglist sg; 3126 struct { 3127 struct virtio_net_ctrl_hdr hdr; 3128 uint8_t pad1; 3129 struct virtio_net_ctrl_mq mq; 3130 uint8_t pad2; 3131 uint8_t ack; 3132 } s __aligned(2); 3133 int error; 3134 3135 s.hdr.class = VIRTIO_NET_CTRL_MQ; 3136 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 3137 s.mq.virtqueue_pairs = npairs; 3138 s.ack = VIRTIO_NET_ERR; 3139 3140 sglist_init(&sg, 3, segs); 3141 error = 0; 3142 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3143 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); 3144 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3145 KASSERT(error == 0 && sg.sg_nseg == 3, 3146 ("%s: error %d adding MQ message to sglist", __func__, error)); 3147 3148 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3149 3150 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3151 } 3152 3153 static int 3154 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, int cmd, int on) 3155 { 3156 struct sglist_seg segs[3]; 3157 struct sglist sg; 3158 struct { 3159 struct virtio_net_ctrl_hdr hdr; 3160 uint8_t pad1; 3161 uint8_t onoff; 3162 uint8_t pad2; 3163 uint8_t ack; 3164 } s __aligned(2); 3165 int error; 3166 3167 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, 3168 ("%s: CTRL_RX feature not negotiated", __func__)); 3169 3170 s.hdr.class = VIRTIO_NET_CTRL_RX; 3171 s.hdr.cmd = cmd; 3172 s.onoff = !!on; 3173 s.ack = VIRTIO_NET_ERR; 3174 3175 sglist_init(&sg, 3, segs); 3176 error = 0; 3177 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3178 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); 3179 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3180 KASSERT(error == 0 && sg.sg_nseg == 3, 3181 ("%s: error %d adding Rx message to sglist", __func__, error)); 3182 3183 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3184 3185 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3186 } 3187 3188 static int 3189 vtnet_set_promisc(struct vtnet_softc *sc, int on) 3190 { 3191 3192 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); 3193 } 3194 3195 static int 3196 vtnet_set_allmulti(struct vtnet_softc *sc, int on) 3197 { 3198 3199 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); 3200 } 3201 3202 /* 3203 * The device defaults to promiscuous mode for backwards compatibility. 3204 * Turn it off at attach time if possible. 3205 */ 3206 static void 3207 vtnet_attach_disable_promisc(struct vtnet_softc *sc) 3208 { 3209 struct ifnet *ifp; 3210 3211 ifp = sc->vtnet_ifp; 3212 3213 VTNET_CORE_LOCK(sc); 3214 if ((sc->vtnet_flags & VTNET_FLAG_CTRL_RX) == 0) { 3215 ifp->if_flags |= IFF_PROMISC; 3216 } else if (vtnet_set_promisc(sc, 0) != 0) { 3217 ifp->if_flags |= IFF_PROMISC; 3218 device_printf(sc->vtnet_dev, 3219 "cannot disable default promiscuous mode\n"); 3220 } 3221 VTNET_CORE_UNLOCK(sc); 3222 } 3223 3224 static void 3225 vtnet_rx_filter(struct vtnet_softc *sc) 3226 { 3227 device_t dev; 3228 struct ifnet *ifp; 3229 3230 dev = sc->vtnet_dev; 3231 ifp = sc->vtnet_ifp; 3232 3233 VTNET_CORE_LOCK_ASSERT(sc); 3234 3235 if (vtnet_set_promisc(sc, ifp->if_flags & IFF_PROMISC) != 0) 3236 device_printf(dev, "cannot %s promiscuous mode\n", 3237 ifp->if_flags & IFF_PROMISC ? "enable" : "disable"); 3238 3239 if (vtnet_set_allmulti(sc, ifp->if_flags & IFF_ALLMULTI) != 0) 3240 device_printf(dev, "cannot %s all-multicast mode\n", 3241 ifp->if_flags & IFF_ALLMULTI ? "enable" : "disable"); 3242 } 3243 3244 static void 3245 vtnet_rx_filter_mac(struct vtnet_softc *sc) 3246 { 3247 struct virtio_net_ctrl_hdr hdr __aligned(2); 3248 struct vtnet_mac_filter *filter; 3249 struct sglist_seg segs[4]; 3250 struct sglist sg; 3251 struct ifnet *ifp; 3252 struct ifaddr *ifa; 3253 struct ifmultiaddr *ifma; 3254 int ucnt, mcnt, promisc, allmulti, error; 3255 uint8_t ack; 3256 3257 ifp = sc->vtnet_ifp; 3258 filter = sc->vtnet_mac_filter; 3259 ucnt = 0; 3260 mcnt = 0; 3261 promisc = 0; 3262 allmulti = 0; 3263 3264 VTNET_CORE_LOCK_ASSERT(sc); 3265 KASSERT(sc->vtnet_flags & VTNET_FLAG_CTRL_RX, 3266 ("%s: CTRL_RX feature not negotiated", __func__)); 3267 3268 /* Unicast MAC addresses: */ 3269 if_addr_rlock(ifp); 3270 TAILQ_FOREACH(ifa, &ifp->if_addrhead, ifa_link) { 3271 if (ifa->ifa_addr->sa_family != AF_LINK) 3272 continue; 3273 else if (memcmp(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 3274 sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) 3275 continue; 3276 else if (ucnt == VTNET_MAX_MAC_ENTRIES) { 3277 promisc = 1; 3278 break; 3279 } 3280 3281 bcopy(LLADDR((struct sockaddr_dl *)ifa->ifa_addr), 3282 &filter->vmf_unicast.macs[ucnt], ETHER_ADDR_LEN); 3283 ucnt++; 3284 } 3285 if_addr_runlock(ifp); 3286 3287 if (promisc != 0) { 3288 filter->vmf_unicast.nentries = 0; 3289 if_printf(ifp, "more than %d MAC addresses assigned, " 3290 "falling back to promiscuous mode\n", 3291 VTNET_MAX_MAC_ENTRIES); 3292 } else 3293 filter->vmf_unicast.nentries = ucnt; 3294 3295 /* Multicast MAC addresses: */ 3296 if_maddr_rlock(ifp); 3297 TAILQ_FOREACH(ifma, &ifp->if_multiaddrs, ifma_link) { 3298 if (ifma->ifma_addr->sa_family != AF_LINK) 3299 continue; 3300 else if (mcnt == VTNET_MAX_MAC_ENTRIES) { 3301 allmulti = 1; 3302 break; 3303 } 3304 3305 bcopy(LLADDR((struct sockaddr_dl *)ifma->ifma_addr), 3306 &filter->vmf_multicast.macs[mcnt], ETHER_ADDR_LEN); 3307 mcnt++; 3308 } 3309 if_maddr_runlock(ifp); 3310 3311 if (allmulti != 0) { 3312 filter->vmf_multicast.nentries = 0; 3313 if_printf(ifp, "more than %d multicast MAC addresses " 3314 "assigned, falling back to all-multicast mode\n", 3315 VTNET_MAX_MAC_ENTRIES); 3316 } else 3317 filter->vmf_multicast.nentries = mcnt; 3318 3319 if (promisc != 0 && allmulti != 0) 3320 goto out; 3321 3322 hdr.class = VIRTIO_NET_CTRL_MAC; 3323 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 3324 ack = VIRTIO_NET_ERR; 3325 3326 sglist_init(&sg, 4, segs); 3327 error = 0; 3328 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3329 error |= sglist_append(&sg, &filter->vmf_unicast, 3330 sizeof(uint32_t) + filter->vmf_unicast.nentries * ETHER_ADDR_LEN); 3331 error |= sglist_append(&sg, &filter->vmf_multicast, 3332 sizeof(uint32_t) + filter->vmf_multicast.nentries * ETHER_ADDR_LEN); 3333 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3334 KASSERT(error == 0 && sg.sg_nseg == 4, 3335 ("%s: error %d adding MAC filter msg to sglist", __func__, error)); 3336 3337 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3338 3339 if (ack != VIRTIO_NET_OK) 3340 if_printf(ifp, "error setting host MAC filter table\n"); 3341 3342 out: 3343 if (promisc != 0 && vtnet_set_promisc(sc, 1) != 0) 3344 if_printf(ifp, "cannot enable promiscuous mode\n"); 3345 if (allmulti != 0 && vtnet_set_allmulti(sc, 1) != 0) 3346 if_printf(ifp, "cannot enable all-multicast mode\n"); 3347 } 3348 3349 static int 3350 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3351 { 3352 struct sglist_seg segs[3]; 3353 struct sglist sg; 3354 struct { 3355 struct virtio_net_ctrl_hdr hdr; 3356 uint8_t pad1; 3357 uint16_t tag; 3358 uint8_t pad2; 3359 uint8_t ack; 3360 } s __aligned(2); 3361 int error; 3362 3363 s.hdr.class = VIRTIO_NET_CTRL_VLAN; 3364 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 3365 s.tag = tag; 3366 s.ack = VIRTIO_NET_ERR; 3367 3368 sglist_init(&sg, 3, segs); 3369 error = 0; 3370 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3371 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); 3372 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3373 KASSERT(error == 0 && sg.sg_nseg == 3, 3374 ("%s: error %d adding VLAN message to sglist", __func__, error)); 3375 3376 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3377 3378 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3379 } 3380 3381 static void 3382 vtnet_rx_filter_vlan(struct vtnet_softc *sc) 3383 { 3384 uint32_t w; 3385 uint16_t tag; 3386 int i, bit; 3387 3388 VTNET_CORE_LOCK_ASSERT(sc); 3389 KASSERT(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER, 3390 ("%s: VLAN_FILTER feature not negotiated", __func__)); 3391 3392 /* Enable the filter for each configured VLAN. */ 3393 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { 3394 w = sc->vtnet_vlan_filter[i]; 3395 3396 while ((bit = ffs(w) - 1) != -1) { 3397 w &= ~(1 << bit); 3398 tag = sizeof(w) * CHAR_BIT * i + bit; 3399 3400 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { 3401 device_printf(sc->vtnet_dev, 3402 "cannot enable VLAN %d filter\n", tag); 3403 } 3404 } 3405 } 3406 } 3407 3408 static void 3409 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3410 { 3411 struct ifnet *ifp; 3412 int idx, bit; 3413 3414 ifp = sc->vtnet_ifp; 3415 idx = (tag >> 5) & 0x7F; 3416 bit = tag & 0x1F; 3417 3418 if (tag == 0 || tag > 4095) 3419 return; 3420 3421 VTNET_CORE_LOCK(sc); 3422 3423 if (add) 3424 sc->vtnet_vlan_filter[idx] |= (1 << bit); 3425 else 3426 sc->vtnet_vlan_filter[idx] &= ~(1 << bit); 3427 3428 if (ifp->if_capenable & IFCAP_VLAN_HWFILTER && 3429 vtnet_exec_vlan_filter(sc, add, tag) != 0) { 3430 device_printf(sc->vtnet_dev, 3431 "cannot %s VLAN %d %s the host filter table\n", 3432 add ? "add" : "remove", tag, add ? "to" : "from"); 3433 } 3434 3435 VTNET_CORE_UNLOCK(sc); 3436 } 3437 3438 static void 3439 vtnet_register_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3440 { 3441 3442 if (ifp->if_softc != arg) 3443 return; 3444 3445 vtnet_update_vlan_filter(arg, 1, tag); 3446 } 3447 3448 static void 3449 vtnet_unregister_vlan(void *arg, struct ifnet *ifp, uint16_t tag) 3450 { 3451 3452 if (ifp->if_softc != arg) 3453 return; 3454 3455 vtnet_update_vlan_filter(arg, 0, tag); 3456 } 3457 3458 static int 3459 vtnet_is_link_up(struct vtnet_softc *sc) 3460 { 3461 device_t dev; 3462 struct ifnet *ifp; 3463 uint16_t status; 3464 3465 dev = sc->vtnet_dev; 3466 ifp = sc->vtnet_ifp; 3467 3468 if ((ifp->if_capabilities & IFCAP_LINKSTATE) == 0) 3469 status = VIRTIO_NET_S_LINK_UP; 3470 else 3471 status = virtio_read_dev_config_2(dev, 3472 offsetof(struct virtio_net_config, status)); 3473 3474 return ((status & VIRTIO_NET_S_LINK_UP) != 0); 3475 } 3476 3477 static void 3478 vtnet_update_link_status(struct vtnet_softc *sc) 3479 { 3480 struct ifnet *ifp; 3481 int link; 3482 3483 ifp = sc->vtnet_ifp; 3484 3485 VTNET_CORE_LOCK_ASSERT(sc); 3486 link = vtnet_is_link_up(sc); 3487 3488 /* Notify if the link status has changed. */ 3489 if (link != 0 && sc->vtnet_link_active == 0) { 3490 sc->vtnet_link_active = 1; 3491 if_link_state_change(ifp, LINK_STATE_UP); 3492 } else if (link == 0 && sc->vtnet_link_active != 0) { 3493 sc->vtnet_link_active = 0; 3494 if_link_state_change(ifp, LINK_STATE_DOWN); 3495 } 3496 } 3497 3498 static int 3499 vtnet_ifmedia_upd(struct ifnet *ifp) 3500 { 3501 struct vtnet_softc *sc; 3502 struct ifmedia *ifm; 3503 3504 sc = ifp->if_softc; 3505 ifm = &sc->vtnet_media; 3506 3507 if (IFM_TYPE(ifm->ifm_media) != IFM_ETHER) 3508 return (EINVAL); 3509 3510 return (0); 3511 } 3512 3513 static void 3514 vtnet_ifmedia_sts(struct ifnet *ifp, struct ifmediareq *ifmr) 3515 { 3516 struct vtnet_softc *sc; 3517 3518 sc = ifp->if_softc; 3519 3520 ifmr->ifm_status = IFM_AVALID; 3521 ifmr->ifm_active = IFM_ETHER; 3522 3523 VTNET_CORE_LOCK(sc); 3524 if (vtnet_is_link_up(sc) != 0) { 3525 ifmr->ifm_status |= IFM_ACTIVE; 3526 ifmr->ifm_active |= VTNET_MEDIATYPE; 3527 } else 3528 ifmr->ifm_active |= IFM_NONE; 3529 VTNET_CORE_UNLOCK(sc); 3530 } 3531 3532 static void 3533 vtnet_set_hwaddr(struct vtnet_softc *sc) 3534 { 3535 device_t dev; 3536 int i; 3537 3538 dev = sc->vtnet_dev; 3539 3540 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { 3541 if (vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr) != 0) 3542 device_printf(dev, "unable to set MAC address\n"); 3543 } else if (sc->vtnet_flags & VTNET_FLAG_MAC) { 3544 for (i = 0; i < ETHER_ADDR_LEN; i++) { 3545 virtio_write_dev_config_1(dev, 3546 offsetof(struct virtio_net_config, mac) + i, 3547 sc->vtnet_hwaddr[i]); 3548 } 3549 } 3550 } 3551 3552 static void 3553 vtnet_get_hwaddr(struct vtnet_softc *sc) 3554 { 3555 device_t dev; 3556 int i; 3557 3558 dev = sc->vtnet_dev; 3559 3560 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) { 3561 /* 3562 * Generate a random locally administered unicast address. 3563 * 3564 * It would be nice to generate the same MAC address across 3565 * reboots, but it seems all the hosts currently available 3566 * support the MAC feature, so this isn't too important. 3567 */ 3568 sc->vtnet_hwaddr[0] = 0xB2; 3569 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); 3570 vtnet_set_hwaddr(sc); 3571 return; 3572 } 3573 3574 for (i = 0; i < ETHER_ADDR_LEN; i++) { 3575 sc->vtnet_hwaddr[i] = virtio_read_dev_config_1(dev, 3576 offsetof(struct virtio_net_config, mac) + i); 3577 } 3578 } 3579 3580 static void 3581 vtnet_vlan_tag_remove(struct mbuf *m) 3582 { 3583 struct ether_vlan_header *evh; 3584 3585 evh = mtod(m, struct ether_vlan_header *); 3586 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); 3587 m->m_flags |= M_VLANTAG; 3588 3589 /* Strip the 802.1Q header. */ 3590 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, 3591 ETHER_HDR_LEN - ETHER_TYPE_LEN); 3592 m_adj(m, ETHER_VLAN_ENCAP_LEN); 3593 } 3594 3595 static void 3596 vtnet_set_rx_process_limit(struct vtnet_softc *sc) 3597 { 3598 int limit; 3599 3600 limit = vtnet_tunable_int(sc, "rx_process_limit", 3601 vtnet_rx_process_limit); 3602 if (limit < 0) 3603 limit = INT_MAX; 3604 sc->vtnet_rx_process_limit = limit; 3605 } 3606 3607 static void 3608 vtnet_set_tx_intr_threshold(struct vtnet_softc *sc) 3609 { 3610 device_t dev; 3611 int size, thresh; 3612 3613 dev = sc->vtnet_dev; 3614 size = virtqueue_size(sc->vtnet_txqs[0].vtntx_vq); 3615 3616 /* 3617 * The Tx interrupt is disabled until the queue free count falls 3618 * below our threshold. Completed frames are drained from the Tx 3619 * virtqueue before transmitting new frames and in the watchdog 3620 * callout, so the frequency of Tx interrupts is greatly reduced, 3621 * at the cost of not freeing mbufs as quickly as they otherwise 3622 * would be. 3623 * 3624 * N.B. We assume all the Tx queues are the same size. 3625 */ 3626 thresh = size / 4; 3627 3628 /* 3629 * Without indirect descriptors, leave enough room for the most 3630 * segments we handle. 3631 */ 3632 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC) == 0 && 3633 thresh < sc->vtnet_tx_nsegs) 3634 thresh = sc->vtnet_tx_nsegs; 3635 3636 sc->vtnet_tx_intr_thresh = thresh; 3637 } 3638 3639 static void 3640 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, 3641 struct sysctl_oid_list *child, struct vtnet_rxq *rxq) 3642 { 3643 struct sysctl_oid *node; 3644 struct sysctl_oid_list *list; 3645 struct vtnet_rxq_stats *stats; 3646 char namebuf[16]; 3647 3648 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); 3649 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 3650 CTLFLAG_RD, NULL, "Receive Queue"); 3651 list = SYSCTL_CHILDREN(node); 3652 3653 stats = &rxq->vtnrx_stats; 3654 3655 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", CTLFLAG_RD, 3656 &stats->vrxs_ipackets, "Receive packets"); 3657 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", CTLFLAG_RD, 3658 &stats->vrxs_ibytes, "Receive bytes"); 3659 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", CTLFLAG_RD, 3660 &stats->vrxs_iqdrops, "Receive drops"); 3661 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", CTLFLAG_RD, 3662 &stats->vrxs_ierrors, "Receive errors"); 3663 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 3664 &stats->vrxs_csum, "Receive checksum offloaded"); 3665 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", CTLFLAG_RD, 3666 &stats->vrxs_csum_failed, "Receive checksum offload failed"); 3667 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 3668 &stats->vrxs_rescheduled, 3669 "Receive interrupt handler rescheduled"); 3670 } 3671 3672 static void 3673 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, 3674 struct sysctl_oid_list *child, struct vtnet_txq *txq) 3675 { 3676 struct sysctl_oid *node; 3677 struct sysctl_oid_list *list; 3678 struct vtnet_txq_stats *stats; 3679 char namebuf[16]; 3680 3681 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); 3682 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 3683 CTLFLAG_RD, NULL, "Transmit Queue"); 3684 list = SYSCTL_CHILDREN(node); 3685 3686 stats = &txq->vtntx_stats; 3687 3688 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", CTLFLAG_RD, 3689 &stats->vtxs_opackets, "Transmit packets"); 3690 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", CTLFLAG_RD, 3691 &stats->vtxs_obytes, "Transmit bytes"); 3692 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", CTLFLAG_RD, 3693 &stats->vtxs_omcasts, "Transmit multicasts"); 3694 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", CTLFLAG_RD, 3695 &stats->vtxs_csum, "Transmit checksum offloaded"); 3696 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", CTLFLAG_RD, 3697 &stats->vtxs_tso, "Transmit segmentation offloaded"); 3698 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", CTLFLAG_RD, 3699 &stats->vtxs_rescheduled, 3700 "Transmit interrupt handler rescheduled"); 3701 } 3702 3703 static void 3704 vtnet_setup_queue_sysctl(struct vtnet_softc *sc) 3705 { 3706 device_t dev; 3707 struct sysctl_ctx_list *ctx; 3708 struct sysctl_oid *tree; 3709 struct sysctl_oid_list *child; 3710 int i; 3711 3712 dev = sc->vtnet_dev; 3713 ctx = device_get_sysctl_ctx(dev); 3714 tree = device_get_sysctl_tree(dev); 3715 child = SYSCTL_CHILDREN(tree); 3716 3717 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3718 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); 3719 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); 3720 } 3721 } 3722 3723 static void 3724 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, 3725 struct sysctl_oid_list *child, struct vtnet_softc *sc) 3726 { 3727 struct vtnet_statistics *stats; 3728 3729 stats = &sc->vtnet_stats; 3730 3731 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", 3732 CTLFLAG_RD, &stats->mbuf_alloc_failed, 3733 "Mbuf cluster allocation failures"); 3734 3735 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", 3736 CTLFLAG_RD, &stats->rx_frame_too_large, 3737 "Received frame larger than the mbuf chain"); 3738 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", 3739 CTLFLAG_RD, &stats->rx_enq_replacement_failed, 3740 "Enqueuing the replacement receive mbuf failed"); 3741 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", 3742 CTLFLAG_RD, &stats->rx_mergeable_failed, 3743 "Mergeable buffers receive failures"); 3744 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", 3745 CTLFLAG_RD, &stats->rx_csum_bad_ethtype, 3746 "Received checksum offloaded buffer with unsupported " 3747 "Ethernet type"); 3748 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", 3749 CTLFLAG_RD, &stats->rx_csum_bad_ipproto, 3750 "Received checksum offloaded buffer with incorrect IP protocol"); 3751 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_offset", 3752 CTLFLAG_RD, &stats->rx_csum_bad_offset, 3753 "Received checksum offloaded buffer with incorrect offset"); 3754 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_proto", 3755 CTLFLAG_RD, &stats->rx_csum_bad_proto, 3756 "Received checksum offloaded buffer with incorrect protocol"); 3757 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_failed", 3758 CTLFLAG_RD, &stats->rx_csum_failed, 3759 "Received buffer checksum offload failed"); 3760 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_offloaded", 3761 CTLFLAG_RD, &stats->rx_csum_offloaded, 3762 "Received buffer checksum offload succeeded"); 3763 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_task_rescheduled", 3764 CTLFLAG_RD, &stats->rx_task_rescheduled, 3765 "Times the receive interrupt task rescheduled itself"); 3766 3767 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_bad_ethtype", 3768 CTLFLAG_RD, &stats->tx_csum_bad_ethtype, 3769 "Aborted transmit of checksum offloaded buffer with unknown " 3770 "Ethernet type"); 3771 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_bad_ethtype", 3772 CTLFLAG_RD, &stats->tx_tso_bad_ethtype, 3773 "Aborted transmit of TSO buffer with unknown Ethernet type"); 3774 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", 3775 CTLFLAG_RD, &stats->tx_tso_not_tcp, 3776 "Aborted transmit of TSO buffer with non TCP protocol"); 3777 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", 3778 CTLFLAG_RD, &stats->tx_defragged, 3779 "Transmit mbufs defragged"); 3780 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", 3781 CTLFLAG_RD, &stats->tx_defrag_failed, 3782 "Aborted transmit of buffer because defrag failed"); 3783 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_offloaded", 3784 CTLFLAG_RD, &stats->tx_csum_offloaded, 3785 "Offloaded checksum of transmitted buffer"); 3786 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_offloaded", 3787 CTLFLAG_RD, &stats->tx_tso_offloaded, 3788 "Segmentation offload of transmitted buffer"); 3789 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_task_rescheduled", 3790 CTLFLAG_RD, &stats->tx_task_rescheduled, 3791 "Times the transmit interrupt task rescheduled itself"); 3792 } 3793 3794 static void 3795 vtnet_setup_sysctl(struct vtnet_softc *sc) 3796 { 3797 device_t dev; 3798 struct sysctl_ctx_list *ctx; 3799 struct sysctl_oid *tree; 3800 struct sysctl_oid_list *child; 3801 3802 dev = sc->vtnet_dev; 3803 ctx = device_get_sysctl_ctx(dev); 3804 tree = device_get_sysctl_tree(dev); 3805 child = SYSCTL_CHILDREN(tree); 3806 3807 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", 3808 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, 3809 "Maximum number of supported virtqueue pairs"); 3810 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", 3811 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, 3812 "Number of active virtqueue pairs"); 3813 3814 vtnet_setup_stat_sysctl(ctx, child, sc); 3815 } 3816 3817 static int 3818 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) 3819 { 3820 3821 return (virtqueue_enable_intr(rxq->vtnrx_vq)); 3822 } 3823 3824 static void 3825 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) 3826 { 3827 3828 virtqueue_disable_intr(rxq->vtnrx_vq); 3829 } 3830 3831 static int 3832 vtnet_txq_enable_intr(struct vtnet_txq *txq) 3833 { 3834 struct virtqueue *vq; 3835 3836 vq = txq->vtntx_vq; 3837 3838 if (vtnet_txq_below_threshold(txq) != 0) 3839 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); 3840 3841 /* 3842 * The free count is above our threshold. Keep the Tx interrupt 3843 * disabled until the queue is fuller. 3844 */ 3845 return (0); 3846 } 3847 3848 static void 3849 vtnet_txq_disable_intr(struct vtnet_txq *txq) 3850 { 3851 3852 virtqueue_disable_intr(txq->vtntx_vq); 3853 } 3854 3855 static void 3856 vtnet_enable_rx_interrupts(struct vtnet_softc *sc) 3857 { 3858 int i; 3859 3860 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3861 vtnet_rxq_enable_intr(&sc->vtnet_rxqs[i]); 3862 } 3863 3864 static void 3865 vtnet_enable_tx_interrupts(struct vtnet_softc *sc) 3866 { 3867 int i; 3868 3869 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3870 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); 3871 } 3872 3873 static void 3874 vtnet_enable_interrupts(struct vtnet_softc *sc) 3875 { 3876 3877 vtnet_enable_rx_interrupts(sc); 3878 vtnet_enable_tx_interrupts(sc); 3879 } 3880 3881 static void 3882 vtnet_disable_rx_interrupts(struct vtnet_softc *sc) 3883 { 3884 int i; 3885 3886 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3887 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); 3888 } 3889 3890 static void 3891 vtnet_disable_tx_interrupts(struct vtnet_softc *sc) 3892 { 3893 int i; 3894 3895 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3896 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); 3897 } 3898 3899 static void 3900 vtnet_disable_interrupts(struct vtnet_softc *sc) 3901 { 3902 3903 vtnet_disable_rx_interrupts(sc); 3904 vtnet_disable_tx_interrupts(sc); 3905 } 3906 3907 static int 3908 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) 3909 { 3910 char path[64]; 3911 3912 snprintf(path, sizeof(path), 3913 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); 3914 TUNABLE_INT_FETCH(path, &def); 3915 3916 return (def); 3917 } 3918