1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011, Bryan Venteicher <bryanv@FreeBSD.org> 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice unmodified, this list of conditions, and the following 12 * disclaimer. 13 * 2. Redistributions in binary form must reproduce the above copyright 14 * notice, this list of conditions and the following disclaimer in the 15 * documentation and/or other materials provided with the distribution. 16 * 17 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR 18 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES 19 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. 20 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, 21 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT 22 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 23 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 24 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 25 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF 26 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 27 */ 28 29 /* Driver for VirtIO network devices. */ 30 31 #include "opt_inet.h" 32 #include "opt_inet6.h" 33 34 #include <sys/param.h> 35 #include <sys/eventhandler.h> 36 #include <sys/systm.h> 37 #include <sys/kernel.h> 38 #include <sys/sockio.h> 39 #include <sys/malloc.h> 40 #include <sys/mbuf.h> 41 #include <sys/module.h> 42 #include <sys/msan.h> 43 #include <sys/sbuf.h> 44 #include <sys/socket.h> 45 #include <sys/sysctl.h> 46 #include <sys/random.h> 47 #include <sys/sglist.h> 48 #include <sys/lock.h> 49 #include <sys/mutex.h> 50 #include <sys/taskqueue.h> 51 #include <sys/smp.h> 52 #include <machine/smp.h> 53 54 #include <vm/uma.h> 55 56 #include <net/debugnet.h> 57 #include <net/ethernet.h> 58 #include <net/pfil.h> 59 #include <net/if.h> 60 #include <net/if_var.h> 61 #include <net/if_arp.h> 62 #include <net/if_dl.h> 63 #include <net/if_types.h> 64 #include <net/if_media.h> 65 #include <net/if_vlan_var.h> 66 67 #include <net/bpf.h> 68 69 #include <netinet/in_systm.h> 70 #include <netinet/in.h> 71 #include <netinet/ip.h> 72 #include <netinet/ip6.h> 73 #include <netinet6/ip6_var.h> 74 #include <netinet/udp.h> 75 #include <netinet/tcp.h> 76 #include <netinet/tcp_lro.h> 77 78 #include <machine/bus.h> 79 #include <machine/resource.h> 80 #include <sys/bus.h> 81 #include <sys/rman.h> 82 83 #include <dev/virtio/virtio.h> 84 #include <dev/virtio/virtqueue.h> 85 #include <dev/virtio/network/virtio_net.h> 86 #include <dev/virtio/network/if_vtnetvar.h> 87 #include "virtio_if.h" 88 89 #if defined(INET) || defined(INET6) 90 #include <machine/in_cksum.h> 91 #endif 92 93 #ifdef __NO_STRICT_ALIGNMENT 94 #define VTNET_ETHER_ALIGN 0 95 #else /* Strict alignment */ 96 #define VTNET_ETHER_ALIGN ETHER_ALIGN 97 #endif 98 99 static int vtnet_modevent(module_t, int, void *); 100 101 static int vtnet_probe(device_t); 102 static int vtnet_attach(device_t); 103 static int vtnet_detach(device_t); 104 static int vtnet_suspend(device_t); 105 static int vtnet_resume(device_t); 106 static int vtnet_shutdown(device_t); 107 static int vtnet_attach_completed(device_t); 108 static int vtnet_config_change(device_t); 109 110 static int vtnet_negotiate_features(struct vtnet_softc *); 111 static int vtnet_setup_features(struct vtnet_softc *); 112 static int vtnet_init_rxq(struct vtnet_softc *, int); 113 static int vtnet_init_txq(struct vtnet_softc *, int); 114 static int vtnet_alloc_rxtx_queues(struct vtnet_softc *); 115 static void vtnet_free_rxtx_queues(struct vtnet_softc *); 116 static int vtnet_alloc_rx_filters(struct vtnet_softc *); 117 static void vtnet_free_rx_filters(struct vtnet_softc *); 118 static int vtnet_alloc_virtqueues(struct vtnet_softc *); 119 static void vtnet_alloc_interface(struct vtnet_softc *); 120 static int vtnet_setup_interface(struct vtnet_softc *); 121 static int vtnet_ioctl_mtu(struct vtnet_softc *, u_int); 122 static int vtnet_ioctl_ifflags(struct vtnet_softc *); 123 static int vtnet_ioctl_multi(struct vtnet_softc *); 124 static int vtnet_ioctl_ifcap(struct vtnet_softc *, struct ifreq *); 125 static int vtnet_ioctl(if_t, u_long, caddr_t); 126 static uint64_t vtnet_get_counter(if_t, ift_counter); 127 128 static int vtnet_rxq_populate(struct vtnet_rxq *); 129 static void vtnet_rxq_free_mbufs(struct vtnet_rxq *); 130 static struct mbuf * 131 vtnet_rx_alloc_buf(struct vtnet_softc *, int , struct mbuf **); 132 static int vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *, 133 struct mbuf *, int); 134 static int vtnet_rxq_replace_buf(struct vtnet_rxq *, struct mbuf *, int); 135 static int vtnet_rxq_enqueue_buf(struct vtnet_rxq *, struct mbuf *); 136 static int vtnet_rxq_new_buf(struct vtnet_rxq *); 137 #if defined(INET) || defined(INET6) 138 static void vtnet_rxq_csum_needs_csum(struct vtnet_rxq *, struct mbuf *, 139 bool, int, struct virtio_net_hdr *); 140 static void vtnet_rxq_csum_data_valid(struct vtnet_rxq *, struct mbuf *, 141 int); 142 static int vtnet_rxq_csum(struct vtnet_rxq *, struct mbuf *, 143 struct virtio_net_hdr *); 144 #endif 145 static void vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *, int); 146 static void vtnet_rxq_discard_buf(struct vtnet_rxq *, struct mbuf *); 147 static int vtnet_rxq_merged_eof(struct vtnet_rxq *, struct mbuf *, int); 148 static void vtnet_rxq_input(struct vtnet_rxq *, struct mbuf *, 149 struct virtio_net_hdr *); 150 static int vtnet_rxq_eof(struct vtnet_rxq *); 151 static void vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries); 152 static void vtnet_rx_vq_intr(void *); 153 static void vtnet_rxq_tq_intr(void *, int); 154 155 static int vtnet_txq_intr_threshold(struct vtnet_txq *); 156 static int vtnet_txq_below_threshold(struct vtnet_txq *); 157 static int vtnet_txq_notify(struct vtnet_txq *); 158 static void vtnet_txq_free_mbufs(struct vtnet_txq *); 159 static int vtnet_txq_offload_ctx(struct vtnet_txq *, struct mbuf *, 160 int *, int *, int *); 161 static int vtnet_txq_offload_tso(struct vtnet_txq *, struct mbuf *, int, 162 int, struct virtio_net_hdr *); 163 static struct mbuf * 164 vtnet_txq_offload(struct vtnet_txq *, struct mbuf *, 165 struct virtio_net_hdr *); 166 static int vtnet_txq_enqueue_buf(struct vtnet_txq *, struct mbuf **, 167 struct vtnet_tx_header *); 168 static int vtnet_txq_encap(struct vtnet_txq *, struct mbuf **, int); 169 170 /* Required for ALTQ */ 171 static void vtnet_start_locked(struct vtnet_txq *, if_t); 172 static void vtnet_start(if_t); 173 174 /* Required for MQ */ 175 static int vtnet_txq_mq_start_locked(struct vtnet_txq *, struct mbuf *); 176 static int vtnet_txq_mq_start(if_t, struct mbuf *); 177 static void vtnet_txq_tq_deferred(void *, int); 178 static void vtnet_qflush(if_t); 179 180 181 static void vtnet_txq_start(struct vtnet_txq *); 182 static void vtnet_txq_tq_intr(void *, int); 183 static int vtnet_txq_eof(struct vtnet_txq *); 184 static void vtnet_tx_vq_intr(void *); 185 static void vtnet_tx_start_all(struct vtnet_softc *); 186 187 static int vtnet_watchdog(struct vtnet_txq *); 188 static void vtnet_accum_stats(struct vtnet_softc *, 189 struct vtnet_rxq_stats *, struct vtnet_txq_stats *); 190 static void vtnet_tick(void *); 191 192 static void vtnet_start_taskqueues(struct vtnet_softc *); 193 static void vtnet_free_taskqueues(struct vtnet_softc *); 194 static void vtnet_drain_taskqueues(struct vtnet_softc *); 195 196 static void vtnet_drain_rxtx_queues(struct vtnet_softc *); 197 static void vtnet_stop_rendezvous(struct vtnet_softc *); 198 static void vtnet_stop(struct vtnet_softc *); 199 static int vtnet_virtio_reinit(struct vtnet_softc *); 200 static void vtnet_init_rx_filters(struct vtnet_softc *); 201 static int vtnet_init_rx_queues(struct vtnet_softc *); 202 static int vtnet_init_tx_queues(struct vtnet_softc *); 203 static int vtnet_init_rxtx_queues(struct vtnet_softc *); 204 static void vtnet_set_active_vq_pairs(struct vtnet_softc *); 205 static void vtnet_update_rx_offloads(struct vtnet_softc *); 206 static int vtnet_reinit(struct vtnet_softc *); 207 static void vtnet_init_locked(struct vtnet_softc *, int); 208 static void vtnet_init(void *); 209 210 static void vtnet_free_ctrl_vq(struct vtnet_softc *); 211 static void vtnet_exec_ctrl_cmd(struct vtnet_softc *, void *, 212 struct sglist *, int, int); 213 static int vtnet_ctrl_mac_cmd(struct vtnet_softc *, uint8_t *); 214 static int vtnet_ctrl_guest_offloads(struct vtnet_softc *, uint64_t); 215 static int vtnet_ctrl_mq_cmd(struct vtnet_softc *, uint16_t); 216 static int vtnet_ctrl_rx_cmd(struct vtnet_softc *, uint8_t, bool); 217 static int vtnet_set_promisc(struct vtnet_softc *, bool); 218 static int vtnet_set_allmulti(struct vtnet_softc *, bool); 219 static void vtnet_rx_filter(struct vtnet_softc *); 220 static void vtnet_rx_filter_mac(struct vtnet_softc *); 221 static int vtnet_exec_vlan_filter(struct vtnet_softc *, int, uint16_t); 222 static void vtnet_rx_filter_vlan(struct vtnet_softc *); 223 static void vtnet_update_vlan_filter(struct vtnet_softc *, int, uint16_t); 224 static void vtnet_register_vlan(void *, if_t, uint16_t); 225 static void vtnet_unregister_vlan(void *, if_t, uint16_t); 226 227 static void vtnet_update_speed_duplex(struct vtnet_softc *); 228 static int vtnet_is_link_up(struct vtnet_softc *); 229 static void vtnet_update_link_status(struct vtnet_softc *); 230 static int vtnet_ifmedia_upd(if_t); 231 static void vtnet_ifmedia_sts(if_t, struct ifmediareq *); 232 static void vtnet_get_macaddr(struct vtnet_softc *); 233 static void vtnet_set_macaddr(struct vtnet_softc *); 234 static void vtnet_attached_set_macaddr(struct vtnet_softc *); 235 static void vtnet_vlan_tag_remove(struct mbuf *); 236 static void vtnet_set_rx_process_limit(struct vtnet_softc *); 237 238 static void vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *, 239 struct sysctl_oid_list *, struct vtnet_rxq *); 240 static void vtnet_setup_txq_sysctl(struct sysctl_ctx_list *, 241 struct sysctl_oid_list *, struct vtnet_txq *); 242 static void vtnet_setup_queue_sysctl(struct vtnet_softc *); 243 static void vtnet_load_tunables(struct vtnet_softc *); 244 static void vtnet_setup_sysctl(struct vtnet_softc *); 245 246 static int vtnet_rxq_enable_intr(struct vtnet_rxq *); 247 static void vtnet_rxq_disable_intr(struct vtnet_rxq *); 248 static int vtnet_txq_enable_intr(struct vtnet_txq *); 249 static void vtnet_txq_disable_intr(struct vtnet_txq *); 250 static void vtnet_enable_rx_interrupts(struct vtnet_softc *); 251 static void vtnet_enable_tx_interrupts(struct vtnet_softc *); 252 static void vtnet_enable_interrupts(struct vtnet_softc *); 253 static void vtnet_disable_rx_interrupts(struct vtnet_softc *); 254 static void vtnet_disable_tx_interrupts(struct vtnet_softc *); 255 static void vtnet_disable_interrupts(struct vtnet_softc *); 256 257 static int vtnet_tunable_int(struct vtnet_softc *, const char *, int); 258 259 DEBUGNET_DEFINE(vtnet); 260 261 #define vtnet_htog16(_sc, _val) virtio_htog16(vtnet_modern(_sc), _val) 262 #define vtnet_htog32(_sc, _val) virtio_htog32(vtnet_modern(_sc), _val) 263 #define vtnet_htog64(_sc, _val) virtio_htog64(vtnet_modern(_sc), _val) 264 #define vtnet_gtoh16(_sc, _val) virtio_gtoh16(vtnet_modern(_sc), _val) 265 #define vtnet_gtoh32(_sc, _val) virtio_gtoh32(vtnet_modern(_sc), _val) 266 #define vtnet_gtoh64(_sc, _val) virtio_gtoh64(vtnet_modern(_sc), _val) 267 268 /* Tunables. */ 269 static SYSCTL_NODE(_hw, OID_AUTO, vtnet, CTLFLAG_RD | CTLFLAG_MPSAFE, 0, 270 "VirtIO Net driver parameters"); 271 272 static int vtnet_csum_disable = 0; 273 SYSCTL_INT(_hw_vtnet, OID_AUTO, csum_disable, CTLFLAG_RDTUN, 274 &vtnet_csum_disable, 0, "Disables receive and send checksum offload"); 275 276 static int vtnet_tso_disable = 0; 277 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_disable, CTLFLAG_RDTUN, 278 &vtnet_tso_disable, 0, "Disables TSO"); 279 280 static int vtnet_lro_disable = 1; 281 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_disable, CTLFLAG_RDTUN, 282 &vtnet_lro_disable, 0, "Disables hardware LRO"); 283 284 static int vtnet_mq_disable = 0; 285 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_disable, CTLFLAG_RDTUN, 286 &vtnet_mq_disable, 0, "Disables multiqueue support"); 287 288 static int vtnet_mq_max_pairs = VTNET_MAX_QUEUE_PAIRS; 289 SYSCTL_INT(_hw_vtnet, OID_AUTO, mq_max_pairs, CTLFLAG_RDTUN, 290 &vtnet_mq_max_pairs, 0, "Maximum number of multiqueue pairs"); 291 292 static int vtnet_tso_maxlen = IP_MAXPACKET; 293 SYSCTL_INT(_hw_vtnet, OID_AUTO, tso_maxlen, CTLFLAG_RDTUN, 294 &vtnet_tso_maxlen, 0, "TSO burst limit"); 295 296 static int vtnet_rx_process_limit = 1024; 297 SYSCTL_INT(_hw_vtnet, OID_AUTO, rx_process_limit, CTLFLAG_RDTUN, 298 &vtnet_rx_process_limit, 0, 299 "Number of RX segments processed in one pass"); 300 301 static int vtnet_lro_entry_count = 128; 302 SYSCTL_INT(_hw_vtnet, OID_AUTO, lro_entry_count, CTLFLAG_RDTUN, 303 &vtnet_lro_entry_count, 0, "Software LRO entry count"); 304 305 /* Enable sorted LRO, and the depth of the mbuf queue. */ 306 static int vtnet_lro_mbufq_depth = 0; 307 SYSCTL_UINT(_hw_vtnet, OID_AUTO, lro_mbufq_depth, CTLFLAG_RDTUN, 308 &vtnet_lro_mbufq_depth, 0, "Depth of software LRO mbuf queue"); 309 310 /* Deactivate ALTQ Support */ 311 static int vtnet_altq_disable = 0; 312 SYSCTL_INT(_hw_vtnet, OID_AUTO, altq_disable, CTLFLAG_RDTUN, 313 &vtnet_altq_disable, 0, "Disables ALTQ Support"); 314 315 /* 316 * For the driver to be considered as having altq enabled, 317 * it must be compiled with an ALTQ capable kernel, 318 * and the tunable hw.vtnet.altq_disable must be zero 319 */ 320 #define VTNET_ALTQ_ENABLED (VTNET_ALTQ_CAPABLE && (!vtnet_altq_disable)) 321 322 323 static uma_zone_t vtnet_tx_header_zone; 324 325 static struct virtio_feature_desc vtnet_feature_desc[] = { 326 { VIRTIO_NET_F_CSUM, "TxChecksum" }, 327 { VIRTIO_NET_F_GUEST_CSUM, "RxChecksum" }, 328 { VIRTIO_NET_F_CTRL_GUEST_OFFLOADS, "CtrlRxOffloads" }, 329 { VIRTIO_NET_F_MAC, "MAC" }, 330 { VIRTIO_NET_F_GSO, "TxGSO" }, 331 { VIRTIO_NET_F_GUEST_TSO4, "RxLROv4" }, 332 { VIRTIO_NET_F_GUEST_TSO6, "RxLROv6" }, 333 { VIRTIO_NET_F_GUEST_ECN, "RxLROECN" }, 334 { VIRTIO_NET_F_GUEST_UFO, "RxUFO" }, 335 { VIRTIO_NET_F_HOST_TSO4, "TxTSOv4" }, 336 { VIRTIO_NET_F_HOST_TSO6, "TxTSOv6" }, 337 { VIRTIO_NET_F_HOST_ECN, "TxTSOECN" }, 338 { VIRTIO_NET_F_HOST_UFO, "TxUFO" }, 339 { VIRTIO_NET_F_MRG_RXBUF, "MrgRxBuf" }, 340 { VIRTIO_NET_F_STATUS, "Status" }, 341 { VIRTIO_NET_F_CTRL_VQ, "CtrlVq" }, 342 { VIRTIO_NET_F_CTRL_RX, "CtrlRxMode" }, 343 { VIRTIO_NET_F_CTRL_VLAN, "CtrlVLANFilter" }, 344 { VIRTIO_NET_F_CTRL_RX_EXTRA, "CtrlRxModeExtra" }, 345 { VIRTIO_NET_F_GUEST_ANNOUNCE, "GuestAnnounce" }, 346 { VIRTIO_NET_F_MQ, "Multiqueue" }, 347 { VIRTIO_NET_F_CTRL_MAC_ADDR, "CtrlMacAddr" }, 348 { VIRTIO_NET_F_SPEED_DUPLEX, "SpeedDuplex" }, 349 350 { 0, NULL } 351 }; 352 353 static device_method_t vtnet_methods[] = { 354 /* Device methods. */ 355 DEVMETHOD(device_probe, vtnet_probe), 356 DEVMETHOD(device_attach, vtnet_attach), 357 DEVMETHOD(device_detach, vtnet_detach), 358 DEVMETHOD(device_suspend, vtnet_suspend), 359 DEVMETHOD(device_resume, vtnet_resume), 360 DEVMETHOD(device_shutdown, vtnet_shutdown), 361 362 /* VirtIO methods. */ 363 DEVMETHOD(virtio_attach_completed, vtnet_attach_completed), 364 DEVMETHOD(virtio_config_change, vtnet_config_change), 365 366 DEVMETHOD_END 367 }; 368 369 #ifdef DEV_NETMAP 370 #include <dev/netmap/if_vtnet_netmap.h> 371 #endif 372 373 static driver_t vtnet_driver = { 374 .name = "vtnet", 375 .methods = vtnet_methods, 376 .size = sizeof(struct vtnet_softc) 377 }; 378 VIRTIO_DRIVER_MODULE(vtnet, vtnet_driver, vtnet_modevent, NULL); 379 MODULE_VERSION(vtnet, 1); 380 MODULE_DEPEND(vtnet, virtio, 1, 1, 1); 381 #ifdef DEV_NETMAP 382 MODULE_DEPEND(vtnet, netmap, 1, 1, 1); 383 #endif 384 385 VIRTIO_SIMPLE_PNPINFO(vtnet, VIRTIO_ID_NETWORK, "VirtIO Networking Adapter"); 386 387 static int 388 vtnet_modevent(module_t mod __unused, int type, void *unused __unused) 389 { 390 int error = 0; 391 static int loaded = 0; 392 393 switch (type) { 394 case MOD_LOAD: 395 if (loaded++ == 0) { 396 vtnet_tx_header_zone = uma_zcreate("vtnet_tx_hdr", 397 sizeof(struct vtnet_tx_header), 398 NULL, NULL, NULL, NULL, 0, 0); 399 #ifdef DEBUGNET 400 /* 401 * We need to allocate from this zone in the transmit path, so ensure 402 * that we have at least one item per header available. 403 * XXX add a separate zone like we do for mbufs? otherwise we may alloc 404 * buckets 405 */ 406 uma_zone_reserve(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); 407 uma_prealloc(vtnet_tx_header_zone, DEBUGNET_MAX_IN_FLIGHT * 2); 408 #endif 409 } 410 break; 411 case MOD_QUIESCE: 412 if (uma_zone_get_cur(vtnet_tx_header_zone) > 0) 413 error = EBUSY; 414 break; 415 case MOD_UNLOAD: 416 if (--loaded == 0) { 417 uma_zdestroy(vtnet_tx_header_zone); 418 vtnet_tx_header_zone = NULL; 419 } 420 break; 421 case MOD_SHUTDOWN: 422 break; 423 default: 424 error = EOPNOTSUPP; 425 break; 426 } 427 428 return (error); 429 } 430 431 static int 432 vtnet_probe(device_t dev) 433 { 434 return (VIRTIO_SIMPLE_PROBE(dev, vtnet)); 435 } 436 437 static int 438 vtnet_attach(device_t dev) 439 { 440 struct vtnet_softc *sc; 441 int error; 442 443 sc = device_get_softc(dev); 444 sc->vtnet_dev = dev; 445 virtio_set_feature_desc(dev, vtnet_feature_desc); 446 447 VTNET_CORE_LOCK_INIT(sc); 448 callout_init_mtx(&sc->vtnet_tick_ch, VTNET_CORE_MTX(sc), 0); 449 vtnet_load_tunables(sc); 450 451 vtnet_alloc_interface(sc); 452 vtnet_setup_sysctl(sc); 453 454 error = vtnet_setup_features(sc); 455 if (error) { 456 device_printf(dev, "cannot setup features\n"); 457 goto fail; 458 } 459 460 error = vtnet_alloc_rx_filters(sc); 461 if (error) { 462 device_printf(dev, "cannot allocate Rx filters\n"); 463 goto fail; 464 } 465 466 error = vtnet_alloc_rxtx_queues(sc); 467 if (error) { 468 device_printf(dev, "cannot allocate queues\n"); 469 goto fail; 470 } 471 472 error = vtnet_alloc_virtqueues(sc); 473 if (error) { 474 device_printf(dev, "cannot allocate virtqueues\n"); 475 goto fail; 476 } 477 478 error = vtnet_setup_interface(sc); 479 if (error) { 480 device_printf(dev, "cannot setup interface\n"); 481 goto fail; 482 } 483 484 error = virtio_setup_intr(dev, INTR_TYPE_NET); 485 if (error) { 486 device_printf(dev, "cannot setup interrupts\n"); 487 ether_ifdetach(sc->vtnet_ifp); 488 goto fail; 489 } 490 491 #ifdef DEV_NETMAP 492 vtnet_netmap_attach(sc); 493 #endif 494 vtnet_start_taskqueues(sc); 495 496 fail: 497 if (error) 498 vtnet_detach(dev); 499 500 return (error); 501 } 502 503 static int 504 vtnet_detach(device_t dev) 505 { 506 struct vtnet_softc *sc; 507 if_t ifp; 508 509 sc = device_get_softc(dev); 510 ifp = sc->vtnet_ifp; 511 512 if (device_is_attached(dev)) { 513 VTNET_CORE_LOCK(sc); 514 vtnet_stop(sc); 515 VTNET_CORE_UNLOCK(sc); 516 517 callout_drain(&sc->vtnet_tick_ch); 518 vtnet_drain_taskqueues(sc); 519 520 ether_ifdetach(ifp); 521 } 522 523 #ifdef DEV_NETMAP 524 netmap_detach(ifp); 525 #endif 526 527 if (sc->vtnet_pfil != NULL) { 528 pfil_head_unregister(sc->vtnet_pfil); 529 sc->vtnet_pfil = NULL; 530 } 531 532 vtnet_free_taskqueues(sc); 533 534 if (sc->vtnet_vlan_attach != NULL) { 535 EVENTHANDLER_DEREGISTER(vlan_config, sc->vtnet_vlan_attach); 536 sc->vtnet_vlan_attach = NULL; 537 } 538 if (sc->vtnet_vlan_detach != NULL) { 539 EVENTHANDLER_DEREGISTER(vlan_unconfig, sc->vtnet_vlan_detach); 540 sc->vtnet_vlan_detach = NULL; 541 } 542 543 ifmedia_removeall(&sc->vtnet_media); 544 545 if (ifp != NULL) { 546 if_free(ifp); 547 sc->vtnet_ifp = NULL; 548 } 549 550 vtnet_free_rxtx_queues(sc); 551 vtnet_free_rx_filters(sc); 552 553 if (sc->vtnet_ctrl_vq != NULL) 554 vtnet_free_ctrl_vq(sc); 555 556 VTNET_CORE_LOCK_DESTROY(sc); 557 558 return (0); 559 } 560 561 static int 562 vtnet_suspend(device_t dev) 563 { 564 struct vtnet_softc *sc; 565 566 sc = device_get_softc(dev); 567 568 VTNET_CORE_LOCK(sc); 569 vtnet_stop(sc); 570 sc->vtnet_flags |= VTNET_FLAG_SUSPENDED; 571 VTNET_CORE_UNLOCK(sc); 572 573 return (0); 574 } 575 576 static int 577 vtnet_resume(device_t dev) 578 { 579 struct vtnet_softc *sc; 580 if_t ifp; 581 582 sc = device_get_softc(dev); 583 ifp = sc->vtnet_ifp; 584 585 VTNET_CORE_LOCK(sc); 586 if (if_getflags(ifp) & IFF_UP) 587 vtnet_init_locked(sc, 0); 588 sc->vtnet_flags &= ~VTNET_FLAG_SUSPENDED; 589 VTNET_CORE_UNLOCK(sc); 590 591 return (0); 592 } 593 594 static int 595 vtnet_shutdown(device_t dev) 596 { 597 /* 598 * Suspend already does all of what we need to 599 * do here; we just never expect to be resumed. 600 */ 601 return (vtnet_suspend(dev)); 602 } 603 604 static int 605 vtnet_attach_completed(device_t dev) 606 { 607 struct vtnet_softc *sc; 608 609 sc = device_get_softc(dev); 610 611 VTNET_CORE_LOCK(sc); 612 vtnet_attached_set_macaddr(sc); 613 VTNET_CORE_UNLOCK(sc); 614 615 return (0); 616 } 617 618 static int 619 vtnet_config_change(device_t dev) 620 { 621 struct vtnet_softc *sc; 622 623 sc = device_get_softc(dev); 624 625 VTNET_CORE_LOCK(sc); 626 vtnet_update_link_status(sc); 627 if (sc->vtnet_link_active != 0) 628 vtnet_tx_start_all(sc); 629 VTNET_CORE_UNLOCK(sc); 630 631 return (0); 632 } 633 634 static int 635 vtnet_negotiate_features(struct vtnet_softc *sc) 636 { 637 device_t dev; 638 uint64_t features, negotiated_features; 639 int no_csum; 640 641 dev = sc->vtnet_dev; 642 features = virtio_bus_is_modern(dev) ? VTNET_MODERN_FEATURES : 643 VTNET_LEGACY_FEATURES; 644 645 /* 646 * TSO and LRO are only available when their corresponding checksum 647 * offload feature is also negotiated. 648 */ 649 no_csum = vtnet_tunable_int(sc, "csum_disable", vtnet_csum_disable); 650 if (no_csum) 651 features &= ~(VIRTIO_NET_F_CSUM | VIRTIO_NET_F_GUEST_CSUM); 652 if (no_csum || vtnet_tunable_int(sc, "tso_disable", vtnet_tso_disable)) 653 features &= ~VTNET_TSO_FEATURES; 654 if (no_csum || vtnet_tunable_int(sc, "lro_disable", vtnet_lro_disable)) 655 features &= ~VTNET_LRO_FEATURES; 656 657 /* Deactivate MQ Feature flag, if driver has ALTQ enabled, or MQ is explicitly disabled */ 658 if (VTNET_ALTQ_ENABLED || vtnet_tunable_int(sc, "mq_disable", vtnet_mq_disable)) 659 features &= ~VIRTIO_NET_F_MQ; 660 661 negotiated_features = virtio_negotiate_features(dev, features); 662 663 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { 664 uint16_t mtu; 665 666 mtu = virtio_read_dev_config_2(dev, 667 offsetof(struct virtio_net_config, mtu)); 668 if (mtu < VTNET_MIN_MTU) { 669 device_printf(dev, "Invalid MTU value: %d. " 670 "MTU feature disabled.\n", mtu); 671 features &= ~VIRTIO_NET_F_MTU; 672 negotiated_features = 673 virtio_negotiate_features(dev, features); 674 } 675 } 676 677 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { 678 uint16_t npairs; 679 680 npairs = virtio_read_dev_config_2(dev, 681 offsetof(struct virtio_net_config, max_virtqueue_pairs)); 682 if (npairs < VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MIN || 683 npairs > VIRTIO_NET_CTRL_MQ_VQ_PAIRS_MAX) { 684 device_printf(dev, "Invalid max_virtqueue_pairs value: " 685 "%d. Multiqueue feature disabled.\n", npairs); 686 features &= ~VIRTIO_NET_F_MQ; 687 negotiated_features = 688 virtio_negotiate_features(dev, features); 689 } 690 } 691 692 if (virtio_with_feature(dev, VTNET_LRO_FEATURES) && 693 virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF) == 0) { 694 /* 695 * LRO without mergeable buffers requires special care. This 696 * is not ideal because every receive buffer must be large 697 * enough to hold the maximum TCP packet, the Ethernet header, 698 * and the header. This requires up to 34 descriptors with 699 * MCLBYTES clusters. If we do not have indirect descriptors, 700 * LRO is disabled since the virtqueue will not contain very 701 * many receive buffers. 702 */ 703 if (!virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) { 704 device_printf(dev, 705 "Host LRO disabled since both mergeable buffers " 706 "and indirect descriptors were not negotiated\n"); 707 features &= ~VTNET_LRO_FEATURES; 708 negotiated_features = 709 virtio_negotiate_features(dev, features); 710 } else 711 sc->vtnet_flags |= VTNET_FLAG_LRO_NOMRG; 712 } 713 714 sc->vtnet_features = negotiated_features; 715 sc->vtnet_negotiated_features = negotiated_features; 716 717 return (virtio_finalize_features(dev)); 718 } 719 720 static int 721 vtnet_setup_features(struct vtnet_softc *sc) 722 { 723 device_t dev; 724 int error; 725 726 dev = sc->vtnet_dev; 727 728 error = vtnet_negotiate_features(sc); 729 if (error) 730 return (error); 731 732 if (virtio_with_feature(dev, VIRTIO_F_VERSION_1)) 733 sc->vtnet_flags |= VTNET_FLAG_MODERN; 734 if (virtio_with_feature(dev, VIRTIO_RING_F_INDIRECT_DESC)) 735 sc->vtnet_flags |= VTNET_FLAG_INDIRECT; 736 if (virtio_with_feature(dev, VIRTIO_RING_F_EVENT_IDX)) 737 sc->vtnet_flags |= VTNET_FLAG_EVENT_IDX; 738 739 if (virtio_with_feature(dev, VIRTIO_NET_F_MAC)) { 740 /* This feature should always be negotiated. */ 741 sc->vtnet_flags |= VTNET_FLAG_MAC; 742 } 743 744 if (virtio_with_feature(dev, VIRTIO_NET_F_MTU)) { 745 sc->vtnet_max_mtu = virtio_read_dev_config_2(dev, 746 offsetof(struct virtio_net_config, mtu)); 747 } else 748 sc->vtnet_max_mtu = VTNET_MAX_MTU; 749 750 if (virtio_with_feature(dev, VIRTIO_NET_F_MRG_RXBUF)) { 751 sc->vtnet_flags |= VTNET_FLAG_MRG_RXBUFS; 752 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_mrg_rxbuf); 753 } else if (vtnet_modern(sc)) { 754 /* This is identical to the mergeable header. */ 755 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr_v1); 756 } else 757 sc->vtnet_hdr_size = sizeof(struct virtio_net_hdr); 758 759 if (vtnet_modern(sc) || sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 760 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_INLINE; 761 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 762 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_LRO_NOMRG; 763 else 764 sc->vtnet_rx_nsegs = VTNET_RX_SEGS_HDR_SEPARATE; 765 766 /* 767 * Favor "hardware" LRO if negotiated, but support software LRO as 768 * a fallback; there is usually little benefit (or worse) with both. 769 */ 770 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO4) == 0 && 771 virtio_with_feature(dev, VIRTIO_NET_F_GUEST_TSO6) == 0) 772 sc->vtnet_flags |= VTNET_FLAG_SW_LRO; 773 774 if (virtio_with_feature(dev, VIRTIO_NET_F_GSO) || 775 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4) || 776 virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 777 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MAX; 778 else 779 sc->vtnet_tx_nsegs = VTNET_TX_SEGS_MIN; 780 781 sc->vtnet_req_vq_pairs = 1; 782 sc->vtnet_max_vq_pairs = 1; 783 784 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VQ)) { 785 sc->vtnet_flags |= VTNET_FLAG_CTRL_VQ; 786 787 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_RX)) 788 sc->vtnet_flags |= VTNET_FLAG_CTRL_RX; 789 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_VLAN)) 790 sc->vtnet_flags |= VTNET_FLAG_VLAN_FILTER; 791 if (virtio_with_feature(dev, VIRTIO_NET_F_CTRL_MAC_ADDR)) 792 sc->vtnet_flags |= VTNET_FLAG_CTRL_MAC; 793 794 if (virtio_with_feature(dev, VIRTIO_NET_F_MQ)) { 795 sc->vtnet_max_vq_pairs = virtio_read_dev_config_2(dev, 796 offsetof(struct virtio_net_config, 797 max_virtqueue_pairs)); 798 } 799 } 800 801 if (sc->vtnet_max_vq_pairs > 1) { 802 int req; 803 804 /* 805 * Limit the maximum number of requested queue pairs to the 806 * number of CPUs and the configured maximum. 807 */ 808 req = vtnet_tunable_int(sc, "mq_max_pairs", vtnet_mq_max_pairs); 809 if (req < 0) 810 req = 1; 811 if (req == 0) 812 req = mp_ncpus; 813 if (req > sc->vtnet_max_vq_pairs) 814 req = sc->vtnet_max_vq_pairs; 815 if (req > mp_ncpus) 816 req = mp_ncpus; 817 if (req > 1) { 818 sc->vtnet_req_vq_pairs = req; 819 sc->vtnet_flags |= VTNET_FLAG_MQ; 820 } 821 } 822 823 return (0); 824 } 825 826 static int 827 vtnet_init_rxq(struct vtnet_softc *sc, int id) 828 { 829 struct vtnet_rxq *rxq; 830 831 rxq = &sc->vtnet_rxqs[id]; 832 833 snprintf(rxq->vtnrx_name, sizeof(rxq->vtnrx_name), "%s-rx%d", 834 device_get_nameunit(sc->vtnet_dev), id); 835 mtx_init(&rxq->vtnrx_mtx, rxq->vtnrx_name, NULL, MTX_DEF); 836 837 rxq->vtnrx_sc = sc; 838 rxq->vtnrx_id = id; 839 840 rxq->vtnrx_sg = sglist_alloc(sc->vtnet_rx_nsegs, M_NOWAIT); 841 if (rxq->vtnrx_sg == NULL) 842 return (ENOMEM); 843 844 #if defined(INET) || defined(INET6) 845 if (vtnet_software_lro(sc)) { 846 if (tcp_lro_init_args(&rxq->vtnrx_lro, sc->vtnet_ifp, 847 sc->vtnet_lro_entry_count, sc->vtnet_lro_mbufq_depth) != 0) 848 return (ENOMEM); 849 } 850 #endif 851 852 NET_TASK_INIT(&rxq->vtnrx_intrtask, 0, vtnet_rxq_tq_intr, rxq); 853 rxq->vtnrx_tq = taskqueue_create(rxq->vtnrx_name, M_NOWAIT, 854 taskqueue_thread_enqueue, &rxq->vtnrx_tq); 855 856 return (rxq->vtnrx_tq == NULL ? ENOMEM : 0); 857 } 858 859 static int 860 vtnet_init_txq(struct vtnet_softc *sc, int id) 861 { 862 struct vtnet_txq *txq; 863 864 txq = &sc->vtnet_txqs[id]; 865 866 snprintf(txq->vtntx_name, sizeof(txq->vtntx_name), "%s-tx%d", 867 device_get_nameunit(sc->vtnet_dev), id); 868 mtx_init(&txq->vtntx_mtx, txq->vtntx_name, NULL, MTX_DEF); 869 870 txq->vtntx_sc = sc; 871 txq->vtntx_id = id; 872 873 txq->vtntx_sg = sglist_alloc(sc->vtnet_tx_nsegs, M_NOWAIT); 874 if (txq->vtntx_sg == NULL) 875 return (ENOMEM); 876 877 if (!VTNET_ALTQ_ENABLED) { 878 txq->vtntx_br = buf_ring_alloc(VTNET_DEFAULT_BUFRING_SIZE, M_DEVBUF, 879 M_NOWAIT, &txq->vtntx_mtx); 880 if (txq->vtntx_br == NULL) 881 return (ENOMEM); 882 883 TASK_INIT(&txq->vtntx_defrtask, 0, vtnet_txq_tq_deferred, txq); 884 } 885 TASK_INIT(&txq->vtntx_intrtask, 0, vtnet_txq_tq_intr, txq); 886 txq->vtntx_tq = taskqueue_create(txq->vtntx_name, M_NOWAIT, 887 taskqueue_thread_enqueue, &txq->vtntx_tq); 888 if (txq->vtntx_tq == NULL) 889 return (ENOMEM); 890 891 return (0); 892 } 893 894 static int 895 vtnet_alloc_rxtx_queues(struct vtnet_softc *sc) 896 { 897 int i, npairs, error; 898 899 npairs = sc->vtnet_max_vq_pairs; 900 901 sc->vtnet_rxqs = malloc(sizeof(struct vtnet_rxq) * npairs, M_DEVBUF, 902 M_NOWAIT | M_ZERO); 903 sc->vtnet_txqs = malloc(sizeof(struct vtnet_txq) * npairs, M_DEVBUF, 904 M_NOWAIT | M_ZERO); 905 if (sc->vtnet_rxqs == NULL || sc->vtnet_txqs == NULL) 906 return (ENOMEM); 907 908 for (i = 0; i < npairs; i++) { 909 error = vtnet_init_rxq(sc, i); 910 if (error) 911 return (error); 912 error = vtnet_init_txq(sc, i); 913 if (error) 914 return (error); 915 } 916 917 vtnet_set_rx_process_limit(sc); 918 vtnet_setup_queue_sysctl(sc); 919 920 return (0); 921 } 922 923 static void 924 vtnet_destroy_rxq(struct vtnet_rxq *rxq) 925 { 926 927 rxq->vtnrx_sc = NULL; 928 rxq->vtnrx_id = -1; 929 930 #if defined(INET) || defined(INET6) 931 tcp_lro_free(&rxq->vtnrx_lro); 932 #endif 933 934 if (rxq->vtnrx_sg != NULL) { 935 sglist_free(rxq->vtnrx_sg); 936 rxq->vtnrx_sg = NULL; 937 } 938 939 if (mtx_initialized(&rxq->vtnrx_mtx) != 0) 940 mtx_destroy(&rxq->vtnrx_mtx); 941 } 942 943 static void 944 vtnet_destroy_txq(struct vtnet_txq *txq) 945 { 946 947 txq->vtntx_sc = NULL; 948 txq->vtntx_id = -1; 949 950 if (txq->vtntx_sg != NULL) { 951 sglist_free(txq->vtntx_sg); 952 txq->vtntx_sg = NULL; 953 } 954 955 if (!VTNET_ALTQ_ENABLED) { 956 if (txq->vtntx_br != NULL) { 957 buf_ring_free(txq->vtntx_br, M_DEVBUF); 958 txq->vtntx_br = NULL; 959 } 960 } 961 962 if (mtx_initialized(&txq->vtntx_mtx) != 0) 963 mtx_destroy(&txq->vtntx_mtx); 964 } 965 966 static void 967 vtnet_free_rxtx_queues(struct vtnet_softc *sc) 968 { 969 int i; 970 971 if (sc->vtnet_rxqs != NULL) { 972 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 973 vtnet_destroy_rxq(&sc->vtnet_rxqs[i]); 974 free(sc->vtnet_rxqs, M_DEVBUF); 975 sc->vtnet_rxqs = NULL; 976 } 977 978 if (sc->vtnet_txqs != NULL) { 979 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 980 vtnet_destroy_txq(&sc->vtnet_txqs[i]); 981 free(sc->vtnet_txqs, M_DEVBUF); 982 sc->vtnet_txqs = NULL; 983 } 984 } 985 986 static int 987 vtnet_alloc_rx_filters(struct vtnet_softc *sc) 988 { 989 990 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 991 sc->vtnet_mac_filter = malloc(sizeof(struct vtnet_mac_filter), 992 M_DEVBUF, M_NOWAIT | M_ZERO); 993 if (sc->vtnet_mac_filter == NULL) 994 return (ENOMEM); 995 } 996 997 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 998 sc->vtnet_vlan_filter = malloc(sizeof(uint32_t) * 999 VTNET_VLAN_FILTER_NWORDS, M_DEVBUF, M_NOWAIT | M_ZERO); 1000 if (sc->vtnet_vlan_filter == NULL) 1001 return (ENOMEM); 1002 } 1003 1004 return (0); 1005 } 1006 1007 static void 1008 vtnet_free_rx_filters(struct vtnet_softc *sc) 1009 { 1010 1011 if (sc->vtnet_mac_filter != NULL) { 1012 free(sc->vtnet_mac_filter, M_DEVBUF); 1013 sc->vtnet_mac_filter = NULL; 1014 } 1015 1016 if (sc->vtnet_vlan_filter != NULL) { 1017 free(sc->vtnet_vlan_filter, M_DEVBUF); 1018 sc->vtnet_vlan_filter = NULL; 1019 } 1020 } 1021 1022 static int 1023 vtnet_alloc_virtqueues(struct vtnet_softc *sc) 1024 { 1025 device_t dev; 1026 struct vq_alloc_info *info; 1027 struct vtnet_rxq *rxq; 1028 struct vtnet_txq *txq; 1029 int i, idx, nvqs, error; 1030 1031 dev = sc->vtnet_dev; 1032 1033 nvqs = sc->vtnet_max_vq_pairs * 2; 1034 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 1035 nvqs++; 1036 1037 info = malloc(sizeof(struct vq_alloc_info) * nvqs, M_TEMP, M_NOWAIT); 1038 if (info == NULL) 1039 return (ENOMEM); 1040 1041 for (i = 0, idx = 0; i < sc->vtnet_req_vq_pairs; i++, idx += 2) { 1042 rxq = &sc->vtnet_rxqs[i]; 1043 VQ_ALLOC_INFO_INIT(&info[idx], sc->vtnet_rx_nsegs, 1044 vtnet_rx_vq_intr, rxq, &rxq->vtnrx_vq, 1045 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); 1046 1047 txq = &sc->vtnet_txqs[i]; 1048 VQ_ALLOC_INFO_INIT(&info[idx + 1], sc->vtnet_tx_nsegs, 1049 vtnet_tx_vq_intr, txq, &txq->vtntx_vq, 1050 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); 1051 } 1052 1053 /* These queues will not be used so allocate the minimum resources. */ 1054 for (; i < sc->vtnet_max_vq_pairs; i++, idx += 2) { 1055 rxq = &sc->vtnet_rxqs[i]; 1056 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, rxq, &rxq->vtnrx_vq, 1057 "%s-rx%d", device_get_nameunit(dev), rxq->vtnrx_id); 1058 1059 txq = &sc->vtnet_txqs[i]; 1060 VQ_ALLOC_INFO_INIT(&info[idx + 1], 0, NULL, txq, &txq->vtntx_vq, 1061 "%s-tx%d", device_get_nameunit(dev), txq->vtntx_id); 1062 } 1063 1064 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) { 1065 VQ_ALLOC_INFO_INIT(&info[idx], 0, NULL, NULL, 1066 &sc->vtnet_ctrl_vq, "%s ctrl", device_get_nameunit(dev)); 1067 } 1068 1069 error = virtio_alloc_virtqueues(dev, nvqs, info); 1070 free(info, M_TEMP); 1071 1072 return (error); 1073 } 1074 1075 static void 1076 vtnet_alloc_interface(struct vtnet_softc *sc) 1077 { 1078 device_t dev; 1079 if_t ifp; 1080 1081 dev = sc->vtnet_dev; 1082 1083 ifp = if_alloc(IFT_ETHER); 1084 sc->vtnet_ifp = ifp; 1085 if_setsoftc(ifp, sc); 1086 if_initname(ifp, device_get_name(dev), device_get_unit(dev)); 1087 } 1088 1089 static int 1090 vtnet_setup_interface(struct vtnet_softc *sc) 1091 { 1092 device_t dev; 1093 struct pfil_head_args pa; 1094 if_t ifp; 1095 1096 dev = sc->vtnet_dev; 1097 ifp = sc->vtnet_ifp; 1098 1099 if_setflags(ifp, IFF_BROADCAST | IFF_SIMPLEX | IFF_MULTICAST); 1100 if_setbaudrate(ifp, IF_Gbps(10)); 1101 if_setinitfn(ifp, vtnet_init); 1102 if_setioctlfn(ifp, vtnet_ioctl); 1103 if_setgetcounterfn(ifp, vtnet_get_counter); 1104 1105 if (!VTNET_ALTQ_ENABLED) { 1106 if_settransmitfn(ifp, vtnet_txq_mq_start); 1107 if_setqflushfn(ifp, vtnet_qflush); 1108 } else { 1109 struct virtqueue *vq = sc->vtnet_txqs[0].vtntx_vq; 1110 if_setstartfn(ifp, vtnet_start); 1111 if_setsendqlen(ifp, virtqueue_size(vq) - 1); 1112 if_setsendqready(ifp); 1113 } 1114 1115 vtnet_get_macaddr(sc); 1116 1117 if (virtio_with_feature(dev, VIRTIO_NET_F_STATUS)) 1118 if_setcapabilitiesbit(ifp, IFCAP_LINKSTATE, 0); 1119 1120 ifmedia_init(&sc->vtnet_media, 0, vtnet_ifmedia_upd, vtnet_ifmedia_sts); 1121 ifmedia_add(&sc->vtnet_media, IFM_ETHER | IFM_AUTO, 0, NULL); 1122 ifmedia_set(&sc->vtnet_media, IFM_ETHER | IFM_AUTO); 1123 1124 if (virtio_with_feature(dev, VIRTIO_NET_F_CSUM)) { 1125 int gso; 1126 1127 if_setcapabilitiesbit(ifp, IFCAP_TXCSUM | IFCAP_TXCSUM_IPV6, 0); 1128 1129 gso = virtio_with_feature(dev, VIRTIO_NET_F_GSO); 1130 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO4)) 1131 if_setcapabilitiesbit(ifp, IFCAP_TSO4, 0); 1132 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_TSO6)) 1133 if_setcapabilitiesbit(ifp, IFCAP_TSO6, 0); 1134 if (gso || virtio_with_feature(dev, VIRTIO_NET_F_HOST_ECN)) 1135 sc->vtnet_flags |= VTNET_FLAG_TSO_ECN; 1136 1137 if (if_getcapabilities(ifp) & (IFCAP_TSO4 | IFCAP_TSO6)) { 1138 int tso_maxlen; 1139 1140 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTSO, 0); 1141 1142 tso_maxlen = vtnet_tunable_int(sc, "tso_maxlen", 1143 vtnet_tso_maxlen); 1144 if_sethwtsomax(ifp, tso_maxlen - 1145 (ETHER_HDR_LEN + ETHER_VLAN_ENCAP_LEN)); 1146 if_sethwtsomaxsegcount(ifp, sc->vtnet_tx_nsegs - 1); 1147 if_sethwtsomaxsegsize(ifp, PAGE_SIZE); 1148 } 1149 } 1150 1151 if (virtio_with_feature(dev, VIRTIO_NET_F_GUEST_CSUM)) { 1152 /* BMV: Rx checksums not distinguished between IPv4 and IPv6. */ 1153 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM, 0); 1154 if_setcapabilitiesbit(ifp, IFCAP_RXCSUM_IPV6, 0); 1155 1156 /* Support either "hardware" or software LRO. */ 1157 if_setcapabilitiesbit(ifp, IFCAP_LRO, 0); 1158 } 1159 1160 if (if_getcapabilities(ifp) & (IFCAP_HWCSUM | IFCAP_HWCSUM_IPV6)) { 1161 /* 1162 * VirtIO does not support VLAN tagging, but we can fake 1163 * it by inserting and removing the 802.1Q header during 1164 * transmit and receive. We are then able to do checksum 1165 * offloading of VLAN frames. 1166 */ 1167 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWTAGGING | IFCAP_VLAN_HWCSUM, 0); 1168 } 1169 1170 if (sc->vtnet_max_mtu >= ETHERMTU_JUMBO) 1171 if_setcapabilitiesbit(ifp, IFCAP_JUMBO_MTU, 0); 1172 if_setcapabilitiesbit(ifp, IFCAP_VLAN_MTU, 0); 1173 if_setcapabilitiesbit(ifp, IFCAP_HWSTATS, 0); 1174 1175 /* 1176 * Capabilities after here are not enabled by default. 1177 */ 1178 if_setcapenable(ifp, if_getcapabilities(ifp)); 1179 1180 if (sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER) { 1181 if_setcapabilitiesbit(ifp, IFCAP_VLAN_HWFILTER, 0); 1182 1183 sc->vtnet_vlan_attach = EVENTHANDLER_REGISTER(vlan_config, 1184 vtnet_register_vlan, sc, EVENTHANDLER_PRI_FIRST); 1185 sc->vtnet_vlan_detach = EVENTHANDLER_REGISTER(vlan_unconfig, 1186 vtnet_unregister_vlan, sc, EVENTHANDLER_PRI_FIRST); 1187 } 1188 1189 ether_ifattach(ifp, sc->vtnet_hwaddr); 1190 1191 /* Tell the upper layer(s) we support long frames. */ 1192 if_setifheaderlen(ifp, sizeof(struct ether_vlan_header)); 1193 1194 DEBUGNET_SET(ifp, vtnet); 1195 1196 pa.pa_version = PFIL_VERSION; 1197 pa.pa_flags = PFIL_IN; 1198 pa.pa_type = PFIL_TYPE_ETHERNET; 1199 pa.pa_headname = if_name(ifp); 1200 sc->vtnet_pfil = pfil_head_register(&pa); 1201 1202 return (0); 1203 } 1204 1205 static int 1206 vtnet_rx_cluster_size(struct vtnet_softc *sc, int mtu) 1207 { 1208 int framesz; 1209 1210 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) 1211 return (MJUMPAGESIZE); 1212 else if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 1213 return (MCLBYTES); 1214 1215 /* 1216 * Try to scale the receive mbuf cluster size from the MTU. We 1217 * could also use the VQ size to influence the selected size, 1218 * but that would only matter for very small queues. 1219 */ 1220 if (vtnet_modern(sc)) { 1221 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr_v1)); 1222 framesz = sizeof(struct virtio_net_hdr_v1); 1223 } else 1224 framesz = sizeof(struct vtnet_rx_header); 1225 framesz += sizeof(struct ether_vlan_header) + mtu; 1226 /* 1227 * Account for the offsetting we'll do elsewhere so we allocate the 1228 * right size for the mtu. 1229 */ 1230 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) { 1231 framesz += VTNET_ETHER_ALIGN; 1232 } 1233 1234 if (framesz <= MCLBYTES) 1235 return (MCLBYTES); 1236 else if (framesz <= MJUMPAGESIZE) 1237 return (MJUMPAGESIZE); 1238 else if (framesz <= MJUM9BYTES) 1239 return (MJUM9BYTES); 1240 1241 /* Sane default; avoid 16KB clusters. */ 1242 return (MCLBYTES); 1243 } 1244 1245 static int 1246 vtnet_ioctl_mtu(struct vtnet_softc *sc, u_int mtu) 1247 { 1248 if_t ifp; 1249 int clustersz; 1250 1251 ifp = sc->vtnet_ifp; 1252 VTNET_CORE_LOCK_ASSERT(sc); 1253 1254 if (if_getmtu(ifp) == mtu) 1255 return (0); 1256 else if (mtu < ETHERMIN || mtu > sc->vtnet_max_mtu) 1257 return (EINVAL); 1258 1259 if_setmtu(ifp, mtu); 1260 clustersz = vtnet_rx_cluster_size(sc, mtu); 1261 1262 if (clustersz != sc->vtnet_rx_clustersz && 1263 if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1264 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1265 vtnet_init_locked(sc, 0); 1266 } 1267 1268 return (0); 1269 } 1270 1271 static int 1272 vtnet_ioctl_ifflags(struct vtnet_softc *sc) 1273 { 1274 if_t ifp; 1275 int drv_running; 1276 1277 ifp = sc->vtnet_ifp; 1278 drv_running = (if_getdrvflags(ifp) & IFF_DRV_RUNNING) != 0; 1279 1280 VTNET_CORE_LOCK_ASSERT(sc); 1281 1282 if ((if_getflags(ifp) & IFF_UP) == 0) { 1283 if (drv_running) 1284 vtnet_stop(sc); 1285 goto out; 1286 } 1287 1288 if (!drv_running) { 1289 vtnet_init_locked(sc, 0); 1290 goto out; 1291 } 1292 1293 if ((if_getflags(ifp) ^ sc->vtnet_if_flags) & 1294 (IFF_PROMISC | IFF_ALLMULTI)) { 1295 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) 1296 vtnet_rx_filter(sc); 1297 else { 1298 /* 1299 * We don't support filtering out multicast, so 1300 * ALLMULTI is always set. 1301 */ 1302 if_setflagbits(ifp, IFF_ALLMULTI, 0); 1303 if_setflagbits(ifp, IFF_PROMISC, 0); 1304 } 1305 } 1306 1307 out: 1308 sc->vtnet_if_flags = if_getflags(ifp); 1309 return (0); 1310 } 1311 1312 static int 1313 vtnet_ioctl_multi(struct vtnet_softc *sc) 1314 { 1315 if_t ifp; 1316 1317 ifp = sc->vtnet_ifp; 1318 1319 VTNET_CORE_LOCK_ASSERT(sc); 1320 1321 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX && 1322 if_getdrvflags(ifp) & IFF_DRV_RUNNING) 1323 vtnet_rx_filter_mac(sc); 1324 1325 return (0); 1326 } 1327 1328 static int 1329 vtnet_ioctl_ifcap(struct vtnet_softc *sc, struct ifreq *ifr) 1330 { 1331 if_t ifp; 1332 int mask, reinit, update; 1333 1334 ifp = sc->vtnet_ifp; 1335 mask = (ifr->ifr_reqcap & if_getcapabilities(ifp)) ^ if_getcapenable(ifp); 1336 reinit = update = 0; 1337 1338 VTNET_CORE_LOCK_ASSERT(sc); 1339 1340 if (mask & IFCAP_TXCSUM) { 1341 if (if_getcapenable(ifp) & IFCAP_TXCSUM && 1342 if_getcapenable(ifp) & IFCAP_TSO4) { 1343 /* Disable tso4, because txcsum will be disabled. */ 1344 if_setcapenablebit(ifp, 0, IFCAP_TSO4); 1345 if_sethwassistbits(ifp, 0, CSUM_IP_TSO); 1346 mask &= ~IFCAP_TSO4; 1347 } 1348 if_togglecapenable(ifp, IFCAP_TXCSUM); 1349 if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD); 1350 } 1351 if (mask & IFCAP_TXCSUM_IPV6) { 1352 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6 && 1353 if_getcapenable(ifp) & IFCAP_TSO6) { 1354 /* Disable tso6, because txcsum6 will be disabled. */ 1355 if_setcapenablebit(ifp, 0, IFCAP_TSO6); 1356 if_sethwassistbits(ifp, 0, CSUM_IP6_TSO); 1357 mask &= ~IFCAP_TSO6; 1358 } 1359 if_togglecapenable(ifp, IFCAP_TXCSUM_IPV6); 1360 if_togglehwassist(ifp, VTNET_CSUM_OFFLOAD_IPV6); 1361 } 1362 if (mask & IFCAP_TSO4) { 1363 if (if_getcapenable(ifp) & (IFCAP_TXCSUM | IFCAP_TSO4)) { 1364 /* tso4 can only be enabled, if txcsum is enabled. */ 1365 if_togglecapenable(ifp, IFCAP_TSO4); 1366 if_togglehwassist(ifp, CSUM_IP_TSO); 1367 } 1368 } 1369 if (mask & IFCAP_TSO6) { 1370 if (if_getcapenable(ifp) & (IFCAP_TXCSUM_IPV6 | IFCAP_TSO6)) { 1371 /* tso6 can only be enabled, if txcsum6 is enabled. */ 1372 if_togglecapenable(ifp, IFCAP_TSO6); 1373 if_togglehwassist(ifp, CSUM_IP6_TSO); 1374 } 1375 } 1376 1377 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) { 1378 /* 1379 * These Rx features require the negotiated features to 1380 * be updated. Avoid a full reinit if possible. 1381 */ 1382 if (sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS) 1383 update = 1; 1384 else 1385 reinit = 1; 1386 1387 /* BMV: Avoid needless renegotiation for just software LRO. */ 1388 if ((mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6 | IFCAP_LRO)) == 1389 IFCAP_LRO && vtnet_software_lro(sc)) 1390 reinit = update = 0; 1391 /* 1392 * VirtIO does not distinguish between receive checksum offload 1393 * for IPv4 and IPv6 packets, so treat them as a pair. 1394 */ 1395 if (mask & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 1396 if_togglecapenable(ifp, IFCAP_RXCSUM); 1397 if_togglecapenable(ifp, IFCAP_RXCSUM_IPV6); 1398 } 1399 if (mask & IFCAP_LRO) 1400 if_togglecapenable(ifp, IFCAP_LRO); 1401 /* Both SW and HW TCP LRO require receive checksum offload. */ 1402 if ((if_getcapenable(ifp) & 1403 (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0) 1404 if_setcapenablebit(ifp, 0, IFCAP_LRO); 1405 } 1406 1407 if (mask & IFCAP_VLAN_HWFILTER) { 1408 /* These Rx features require renegotiation. */ 1409 reinit = 1; 1410 1411 if (mask & IFCAP_VLAN_HWFILTER) 1412 if_togglecapenable(ifp, IFCAP_VLAN_HWFILTER); 1413 } 1414 1415 if (mask & IFCAP_VLAN_HWTSO) 1416 if_togglecapenable(ifp, IFCAP_VLAN_HWTSO); 1417 if (mask & IFCAP_VLAN_HWTAGGING) 1418 if_togglecapenable(ifp, IFCAP_VLAN_HWTAGGING); 1419 1420 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 1421 if (reinit) { 1422 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 1423 vtnet_init_locked(sc, 0); 1424 } else if (update) 1425 vtnet_update_rx_offloads(sc); 1426 } 1427 1428 return (0); 1429 } 1430 1431 static int 1432 vtnet_ioctl(if_t ifp, u_long cmd, caddr_t data) 1433 { 1434 struct vtnet_softc *sc; 1435 struct ifreq *ifr; 1436 int error; 1437 1438 sc = if_getsoftc(ifp); 1439 ifr = (struct ifreq *) data; 1440 error = 0; 1441 1442 switch (cmd) { 1443 case SIOCSIFMTU: 1444 VTNET_CORE_LOCK(sc); 1445 error = vtnet_ioctl_mtu(sc, ifr->ifr_mtu); 1446 VTNET_CORE_UNLOCK(sc); 1447 break; 1448 1449 case SIOCSIFFLAGS: 1450 VTNET_CORE_LOCK(sc); 1451 error = vtnet_ioctl_ifflags(sc); 1452 VTNET_CORE_UNLOCK(sc); 1453 break; 1454 1455 case SIOCADDMULTI: 1456 case SIOCDELMULTI: 1457 VTNET_CORE_LOCK(sc); 1458 error = vtnet_ioctl_multi(sc); 1459 VTNET_CORE_UNLOCK(sc); 1460 break; 1461 1462 case SIOCSIFMEDIA: 1463 case SIOCGIFMEDIA: 1464 error = ifmedia_ioctl(ifp, ifr, &sc->vtnet_media, cmd); 1465 break; 1466 1467 case SIOCSIFCAP: 1468 VTNET_CORE_LOCK(sc); 1469 error = vtnet_ioctl_ifcap(sc, ifr); 1470 VTNET_CORE_UNLOCK(sc); 1471 VLAN_CAPABILITIES(ifp); 1472 break; 1473 1474 default: 1475 error = ether_ioctl(ifp, cmd, data); 1476 break; 1477 } 1478 1479 VTNET_CORE_LOCK_ASSERT_NOTOWNED(sc); 1480 1481 return (error); 1482 } 1483 1484 static int 1485 vtnet_rxq_populate(struct vtnet_rxq *rxq) 1486 { 1487 struct virtqueue *vq; 1488 int nbufs, error; 1489 1490 #ifdef DEV_NETMAP 1491 error = vtnet_netmap_rxq_populate(rxq); 1492 if (error >= 0) 1493 return (error); 1494 #endif /* DEV_NETMAP */ 1495 1496 vq = rxq->vtnrx_vq; 1497 error = ENOSPC; 1498 1499 for (nbufs = 0; !virtqueue_full(vq); nbufs++) { 1500 error = vtnet_rxq_new_buf(rxq); 1501 if (error) 1502 break; 1503 } 1504 1505 if (nbufs > 0) { 1506 virtqueue_notify(vq); 1507 /* 1508 * EMSGSIZE signifies the virtqueue did not have enough 1509 * entries available to hold the last mbuf. This is not 1510 * an error. 1511 */ 1512 if (error == EMSGSIZE) 1513 error = 0; 1514 } 1515 1516 return (error); 1517 } 1518 1519 static void 1520 vtnet_rxq_free_mbufs(struct vtnet_rxq *rxq) 1521 { 1522 struct virtqueue *vq; 1523 struct mbuf *m; 1524 int last; 1525 #ifdef DEV_NETMAP 1526 struct netmap_kring *kring = netmap_kring_on(NA(rxq->vtnrx_sc->vtnet_ifp), 1527 rxq->vtnrx_id, NR_RX); 1528 #else /* !DEV_NETMAP */ 1529 void *kring = NULL; 1530 #endif /* !DEV_NETMAP */ 1531 1532 vq = rxq->vtnrx_vq; 1533 last = 0; 1534 1535 while ((m = virtqueue_drain(vq, &last)) != NULL) { 1536 if (kring == NULL) 1537 m_freem(m); 1538 } 1539 1540 KASSERT(virtqueue_empty(vq), 1541 ("%s: mbufs remaining in rx queue %p", __func__, rxq)); 1542 } 1543 1544 static struct mbuf * 1545 vtnet_rx_alloc_buf(struct vtnet_softc *sc, int nbufs, struct mbuf **m_tailp) 1546 { 1547 struct mbuf *m_head, *m_tail, *m; 1548 int i, size; 1549 1550 m_head = NULL; 1551 size = sc->vtnet_rx_clustersz; 1552 1553 KASSERT(nbufs == 1 || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1554 ("%s: mbuf %d chain requested without LRO_NOMRG", __func__, nbufs)); 1555 1556 for (i = 0; i < nbufs; i++) { 1557 m = m_getjcl(M_NOWAIT, MT_DATA, i == 0 ? M_PKTHDR : 0, size); 1558 if (m == NULL) { 1559 sc->vtnet_stats.mbuf_alloc_failed++; 1560 m_freem(m_head); 1561 return (NULL); 1562 } 1563 1564 m->m_len = size; 1565 /* 1566 * Need to offset the mbuf if the header we're going to add 1567 * will misalign. 1568 */ 1569 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) { 1570 m_adj(m, VTNET_ETHER_ALIGN); 1571 } 1572 if (m_head != NULL) { 1573 m_tail->m_next = m; 1574 m_tail = m; 1575 } else 1576 m_head = m_tail = m; 1577 } 1578 1579 if (m_tailp != NULL) 1580 *m_tailp = m_tail; 1581 1582 return (m_head); 1583 } 1584 1585 /* 1586 * Slow path for when LRO without mergeable buffers is negotiated. 1587 */ 1588 static int 1589 vtnet_rxq_replace_lro_nomrg_buf(struct vtnet_rxq *rxq, struct mbuf *m0, 1590 int len0) 1591 { 1592 struct vtnet_softc *sc; 1593 struct mbuf *m, *m_prev, *m_new, *m_tail; 1594 int len, clustersz, nreplace, error; 1595 1596 sc = rxq->vtnrx_sc; 1597 clustersz = sc->vtnet_rx_clustersz; 1598 /* 1599 * Need to offset the mbuf if the header we're going to add will 1600 * misalign, account for that here. 1601 */ 1602 if (VTNET_ETHER_ALIGN != 0 && sc->vtnet_hdr_size % 4 == 0) 1603 clustersz -= VTNET_ETHER_ALIGN; 1604 1605 m_prev = NULL; 1606 m_tail = NULL; 1607 nreplace = 0; 1608 1609 m = m0; 1610 len = len0; 1611 1612 /* 1613 * Since these mbuf chains are so large, avoid allocating a complete 1614 * replacement when the received frame did not consume the entire 1615 * chain. Unused mbufs are moved to the tail of the replacement mbuf. 1616 */ 1617 while (len > 0) { 1618 if (m == NULL) { 1619 sc->vtnet_stats.rx_frame_too_large++; 1620 return (EMSGSIZE); 1621 } 1622 1623 /* 1624 * Every mbuf should have the expected cluster size since that 1625 * is also used to allocate the replacements. 1626 */ 1627 KASSERT(m->m_len == clustersz, 1628 ("%s: mbuf size %d not expected cluster size %d", __func__, 1629 m->m_len, clustersz)); 1630 1631 m->m_len = MIN(m->m_len, len); 1632 len -= m->m_len; 1633 1634 m_prev = m; 1635 m = m->m_next; 1636 nreplace++; 1637 } 1638 1639 KASSERT(nreplace > 0 && nreplace <= sc->vtnet_rx_nmbufs, 1640 ("%s: invalid replacement mbuf count %d max %d", __func__, 1641 nreplace, sc->vtnet_rx_nmbufs)); 1642 1643 m_new = vtnet_rx_alloc_buf(sc, nreplace, &m_tail); 1644 if (m_new == NULL) { 1645 m_prev->m_len = clustersz; 1646 return (ENOBUFS); 1647 } 1648 1649 /* 1650 * Move any unused mbufs from the received mbuf chain onto the 1651 * end of the replacement chain. 1652 */ 1653 if (m_prev->m_next != NULL) { 1654 m_tail->m_next = m_prev->m_next; 1655 m_prev->m_next = NULL; 1656 } 1657 1658 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1659 if (error) { 1660 /* 1661 * The replacement is suppose to be an copy of the one 1662 * dequeued so this is a very unexpected error. 1663 * 1664 * Restore the m0 chain to the original state if it was 1665 * modified so we can then discard it. 1666 */ 1667 if (m_tail->m_next != NULL) { 1668 m_prev->m_next = m_tail->m_next; 1669 m_tail->m_next = NULL; 1670 } 1671 m_prev->m_len = clustersz; 1672 sc->vtnet_stats.rx_enq_replacement_failed++; 1673 m_freem(m_new); 1674 } 1675 1676 return (error); 1677 } 1678 1679 static int 1680 vtnet_rxq_replace_buf(struct vtnet_rxq *rxq, struct mbuf *m, int len) 1681 { 1682 struct vtnet_softc *sc; 1683 struct mbuf *m_new; 1684 int error; 1685 1686 sc = rxq->vtnrx_sc; 1687 1688 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) 1689 return (vtnet_rxq_replace_lro_nomrg_buf(rxq, m, len)); 1690 1691 MPASS(m->m_next == NULL); 1692 if (m->m_len < len) 1693 return (EMSGSIZE); 1694 1695 m_new = vtnet_rx_alloc_buf(sc, 1, NULL); 1696 if (m_new == NULL) 1697 return (ENOBUFS); 1698 1699 error = vtnet_rxq_enqueue_buf(rxq, m_new); 1700 if (error) { 1701 sc->vtnet_stats.rx_enq_replacement_failed++; 1702 m_freem(m_new); 1703 } else 1704 m->m_len = len; 1705 1706 return (error); 1707 } 1708 1709 static int 1710 vtnet_rxq_enqueue_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1711 { 1712 struct vtnet_softc *sc; 1713 struct sglist *sg; 1714 int header_inlined, error; 1715 1716 sc = rxq->vtnrx_sc; 1717 sg = rxq->vtnrx_sg; 1718 1719 KASSERT(m->m_next == NULL || sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG, 1720 ("%s: mbuf chain without LRO_NOMRG", __func__)); 1721 VTNET_RXQ_LOCK_ASSERT(rxq); 1722 1723 sglist_reset(sg); 1724 header_inlined = vtnet_modern(sc) || 1725 (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) != 0; /* TODO: ANY_LAYOUT */ 1726 1727 /* 1728 * Note: The mbuf has been already adjusted when we allocate it if we 1729 * have to do strict alignment. 1730 */ 1731 if (header_inlined) 1732 error = sglist_append_mbuf(sg, m); 1733 else { 1734 struct vtnet_rx_header *rxhdr = 1735 mtod(m, struct vtnet_rx_header *); 1736 MPASS(sc->vtnet_hdr_size == sizeof(struct virtio_net_hdr)); 1737 1738 /* Append the header and remaining mbuf data. */ 1739 error = sglist_append(sg, &rxhdr->vrh_hdr, sc->vtnet_hdr_size); 1740 if (error) 1741 return (error); 1742 error = sglist_append(sg, &rxhdr[1], 1743 m->m_len - sizeof(struct vtnet_rx_header)); 1744 if (error) 1745 return (error); 1746 1747 if (m->m_next != NULL) 1748 error = sglist_append_mbuf(sg, m->m_next); 1749 } 1750 1751 if (error) 1752 return (error); 1753 1754 return (virtqueue_enqueue(rxq->vtnrx_vq, m, sg, 0, sg->sg_nseg)); 1755 } 1756 1757 static int 1758 vtnet_rxq_new_buf(struct vtnet_rxq *rxq) 1759 { 1760 struct vtnet_softc *sc; 1761 struct mbuf *m; 1762 int error; 1763 1764 sc = rxq->vtnrx_sc; 1765 1766 m = vtnet_rx_alloc_buf(sc, sc->vtnet_rx_nmbufs, NULL); 1767 if (m == NULL) 1768 return (ENOBUFS); 1769 1770 error = vtnet_rxq_enqueue_buf(rxq, m); 1771 if (error) 1772 m_freem(m); 1773 1774 return (error); 1775 } 1776 1777 #if defined(INET) || defined(INET6) 1778 static void 1779 vtnet_rxq_csum_needs_csum(struct vtnet_rxq *rxq, struct mbuf *m, bool isipv6, 1780 int protocol, struct virtio_net_hdr *hdr) 1781 { 1782 /* 1783 * The packet is likely from another VM on the same host or from the 1784 * host that itself performed checksum offloading so Tx/Rx is basically 1785 * a memcpy and the checksum has little value so far. 1786 */ 1787 1788 KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP, 1789 ("%s: unsupported IP protocol %d", __func__, protocol)); 1790 1791 /* 1792 * Just forward the order to compute the checksum by setting 1793 * the corresponding mbuf flag (e.g., CSUM_TCP). 1794 */ 1795 switch (protocol) { 1796 case IPPROTO_TCP: 1797 m->m_pkthdr.csum_flags |= (isipv6 ? CSUM_TCP_IPV6 : CSUM_TCP); 1798 break; 1799 case IPPROTO_UDP: 1800 m->m_pkthdr.csum_flags |= (isipv6 ? CSUM_UDP_IPV6 : CSUM_UDP); 1801 break; 1802 } 1803 m->m_pkthdr.csum_data = hdr->csum_offset; 1804 } 1805 1806 static void 1807 vtnet_rxq_csum_data_valid(struct vtnet_rxq *rxq, struct mbuf *m, int protocol) 1808 { 1809 KASSERT(protocol == IPPROTO_TCP || protocol == IPPROTO_UDP, 1810 ("%s: unsupported IP protocol %d", __func__, protocol)); 1811 1812 m->m_pkthdr.csum_flags |= CSUM_DATA_VALID | CSUM_PSEUDO_HDR; 1813 m->m_pkthdr.csum_data = 0xFFFF; 1814 } 1815 1816 static int 1817 vtnet_rxq_csum(struct vtnet_rxq *rxq, struct mbuf *m, 1818 struct virtio_net_hdr *hdr) 1819 { 1820 const struct ether_header *eh; 1821 struct vtnet_softc *sc; 1822 int hoff, protocol; 1823 uint16_t etype; 1824 bool isipv6; 1825 1826 KASSERT(hdr->flags & 1827 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID), 1828 ("%s: missing checksum offloading flag %x", __func__, hdr->flags)); 1829 1830 eh = mtod(m, const struct ether_header *); 1831 etype = ntohs(eh->ether_type); 1832 if (etype == ETHERTYPE_VLAN) { 1833 /* TODO BMV: Handle QinQ. */ 1834 const struct ether_vlan_header *evh = 1835 mtod(m, const struct ether_vlan_header *); 1836 etype = ntohs(evh->evl_proto); 1837 hoff = sizeof(struct ether_vlan_header); 1838 } else 1839 hoff = sizeof(struct ether_header); 1840 1841 sc = rxq->vtnrx_sc; 1842 1843 /* Check whether ethernet type is IP or IPv6, and get protocol. */ 1844 switch (etype) { 1845 #if defined(INET) 1846 case ETHERTYPE_IP: 1847 if (__predict_false(m->m_len < hoff + sizeof(struct ip))) { 1848 sc->vtnet_stats.rx_csum_inaccessible_ipproto++; 1849 return (1); 1850 } else { 1851 struct ip *ip = (struct ip *)(m->m_data + hoff); 1852 protocol = ip->ip_p; 1853 } 1854 isipv6 = false; 1855 break; 1856 #endif 1857 #if defined(INET6) 1858 case ETHERTYPE_IPV6: 1859 if (__predict_false(m->m_len < hoff + sizeof(struct ip6_hdr)) 1860 || ip6_lasthdr(m, hoff, IPPROTO_IPV6, &protocol) < 0) { 1861 sc->vtnet_stats.rx_csum_inaccessible_ipproto++; 1862 return (1); 1863 } 1864 isipv6 = true; 1865 break; 1866 #endif 1867 default: 1868 sc->vtnet_stats.rx_csum_bad_ethtype++; 1869 return (1); 1870 } 1871 1872 /* Check whether protocol is TCP or UDP. */ 1873 switch (protocol) { 1874 case IPPROTO_TCP: 1875 case IPPROTO_UDP: 1876 break; 1877 default: 1878 /* 1879 * FreeBSD does not support checksum offloading of this 1880 * protocol here. 1881 */ 1882 sc->vtnet_stats.rx_csum_bad_ipproto++; 1883 return (1); 1884 } 1885 1886 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 1887 vtnet_rxq_csum_needs_csum(rxq, m, isipv6, protocol, hdr); 1888 else /* VIRTIO_NET_HDR_F_DATA_VALID */ 1889 vtnet_rxq_csum_data_valid(rxq, m, protocol); 1890 1891 return (0); 1892 } 1893 #endif 1894 1895 static void 1896 vtnet_rxq_discard_merged_bufs(struct vtnet_rxq *rxq, int nbufs) 1897 { 1898 struct mbuf *m; 1899 1900 while (--nbufs > 0) { 1901 m = virtqueue_dequeue(rxq->vtnrx_vq, NULL); 1902 if (m == NULL) 1903 break; 1904 vtnet_rxq_discard_buf(rxq, m); 1905 } 1906 } 1907 1908 static void 1909 vtnet_rxq_discard_buf(struct vtnet_rxq *rxq, struct mbuf *m) 1910 { 1911 int error __diagused; 1912 1913 /* 1914 * Requeue the discarded mbuf. This should always be successful 1915 * since it was just dequeued. 1916 */ 1917 error = vtnet_rxq_enqueue_buf(rxq, m); 1918 KASSERT(error == 0, 1919 ("%s: cannot requeue discarded mbuf %d", __func__, error)); 1920 } 1921 1922 static int 1923 vtnet_rxq_merged_eof(struct vtnet_rxq *rxq, struct mbuf *m_head, int nbufs) 1924 { 1925 struct vtnet_softc *sc; 1926 struct virtqueue *vq; 1927 struct mbuf *m_tail; 1928 1929 sc = rxq->vtnrx_sc; 1930 vq = rxq->vtnrx_vq; 1931 m_tail = m_head; 1932 1933 while (--nbufs > 0) { 1934 struct mbuf *m; 1935 uint32_t len; 1936 1937 m = virtqueue_dequeue(vq, &len); 1938 if (m == NULL) { 1939 rxq->vtnrx_stats.vrxs_ierrors++; 1940 goto fail; 1941 } 1942 1943 if (vtnet_rxq_new_buf(rxq) != 0) { 1944 rxq->vtnrx_stats.vrxs_iqdrops++; 1945 vtnet_rxq_discard_buf(rxq, m); 1946 if (nbufs > 1) 1947 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 1948 goto fail; 1949 } 1950 1951 if (m->m_len < len) 1952 len = m->m_len; 1953 1954 m->m_len = len; 1955 m->m_flags &= ~M_PKTHDR; 1956 1957 m_head->m_pkthdr.len += len; 1958 m_tail->m_next = m; 1959 m_tail = m; 1960 } 1961 1962 return (0); 1963 1964 fail: 1965 sc->vtnet_stats.rx_mergeable_failed++; 1966 m_freem(m_head); 1967 1968 return (1); 1969 } 1970 1971 #if defined(INET) || defined(INET6) 1972 static int 1973 vtnet_lro_rx(struct vtnet_rxq *rxq, struct mbuf *m) 1974 { 1975 struct lro_ctrl *lro; 1976 1977 lro = &rxq->vtnrx_lro; 1978 1979 if (lro->lro_mbuf_max != 0) { 1980 tcp_lro_queue_mbuf(lro, m); 1981 return (0); 1982 } 1983 1984 return (tcp_lro_rx(lro, m, 0)); 1985 } 1986 #endif 1987 1988 static void 1989 vtnet_rxq_input(struct vtnet_rxq *rxq, struct mbuf *m, 1990 struct virtio_net_hdr *hdr) 1991 { 1992 struct vtnet_softc *sc; 1993 if_t ifp; 1994 1995 sc = rxq->vtnrx_sc; 1996 ifp = sc->vtnet_ifp; 1997 1998 if (if_getcapenable(ifp) & IFCAP_VLAN_HWTAGGING) { 1999 struct ether_header *eh = mtod(m, struct ether_header *); 2000 if (eh->ether_type == htons(ETHERTYPE_VLAN)) { 2001 vtnet_vlan_tag_remove(m); 2002 /* 2003 * With the 802.1Q header removed, update the 2004 * checksum starting location accordingly. 2005 */ 2006 if (hdr->flags & VIRTIO_NET_HDR_F_NEEDS_CSUM) 2007 hdr->csum_start -= ETHER_VLAN_ENCAP_LEN; 2008 } 2009 } 2010 2011 if (sc->vtnet_act_vq_pairs == 1) { 2012 /* 2013 * When RSS is not needed (one active rx queue), let the upper 2014 * layer know and react. 2015 */ 2016 M_HASHTYPE_CLEAR(m); 2017 } else { 2018 m->m_pkthdr.flowid = rxq->vtnrx_id; 2019 M_HASHTYPE_SET(m, M_HASHTYPE_OPAQUE); 2020 } 2021 2022 if (hdr->flags & 2023 (VIRTIO_NET_HDR_F_NEEDS_CSUM | VIRTIO_NET_HDR_F_DATA_VALID)) { 2024 #if defined(INET) || defined(INET6) 2025 if (vtnet_rxq_csum(rxq, m, hdr) == 0) 2026 rxq->vtnrx_stats.vrxs_csum++; 2027 else 2028 rxq->vtnrx_stats.vrxs_csum_failed++; 2029 #else 2030 sc->vtnet_stats.rx_csum_bad_ethtype++; 2031 rxq->vtnrx_stats.vrxs_csum_failed++; 2032 #endif 2033 } 2034 2035 if (hdr->gso_size != 0) { 2036 switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { 2037 case VIRTIO_NET_HDR_GSO_TCPV4: 2038 case VIRTIO_NET_HDR_GSO_TCPV6: 2039 m->m_pkthdr.lro_nsegs = 2040 howmany(m->m_pkthdr.len, hdr->gso_size); 2041 rxq->vtnrx_stats.vrxs_host_lro++; 2042 break; 2043 } 2044 } 2045 2046 rxq->vtnrx_stats.vrxs_ipackets++; 2047 rxq->vtnrx_stats.vrxs_ibytes += m->m_pkthdr.len; 2048 2049 #if defined(INET) || defined(INET6) 2050 if (vtnet_software_lro(sc) && if_getcapenable(ifp) & IFCAP_LRO) { 2051 if (vtnet_lro_rx(rxq, m) == 0) 2052 return; 2053 } 2054 #endif 2055 2056 if_input(ifp, m); 2057 } 2058 2059 static int 2060 vtnet_rxq_eof(struct vtnet_rxq *rxq) 2061 { 2062 struct virtio_net_hdr lhdr, *hdr; 2063 struct vtnet_softc *sc; 2064 if_t ifp; 2065 struct virtqueue *vq; 2066 int deq, count; 2067 2068 sc = rxq->vtnrx_sc; 2069 vq = rxq->vtnrx_vq; 2070 ifp = sc->vtnet_ifp; 2071 deq = 0; 2072 count = sc->vtnet_rx_process_limit; 2073 2074 VTNET_RXQ_LOCK_ASSERT(rxq); 2075 2076 CURVNET_SET(if_getvnet(ifp)); 2077 while (count-- > 0) { 2078 struct mbuf *m; 2079 uint32_t len, nbufs, adjsz; 2080 2081 m = virtqueue_dequeue(vq, &len); 2082 if (m == NULL) 2083 break; 2084 deq++; 2085 2086 if (len < sc->vtnet_hdr_size + ETHER_HDR_LEN) { 2087 rxq->vtnrx_stats.vrxs_ierrors++; 2088 vtnet_rxq_discard_buf(rxq, m); 2089 continue; 2090 } 2091 2092 if (sc->vtnet_flags & VTNET_FLAG_MRG_RXBUFS) { 2093 struct virtio_net_hdr_mrg_rxbuf *mhdr = 2094 mtod(m, struct virtio_net_hdr_mrg_rxbuf *); 2095 kmsan_mark(mhdr, sizeof(*mhdr), KMSAN_STATE_INITED); 2096 nbufs = vtnet_htog16(sc, mhdr->num_buffers); 2097 adjsz = sizeof(struct virtio_net_hdr_mrg_rxbuf); 2098 } else if (vtnet_modern(sc)) { 2099 nbufs = 1; /* num_buffers is always 1 */ 2100 adjsz = sizeof(struct virtio_net_hdr_v1); 2101 } else { 2102 nbufs = 1; 2103 adjsz = sizeof(struct vtnet_rx_header); 2104 /* 2105 * Account for our gap between the header and start of 2106 * data to keep the segments separated. 2107 */ 2108 len += VTNET_RX_HEADER_PAD; 2109 } 2110 2111 if (vtnet_rxq_replace_buf(rxq, m, len) != 0) { 2112 rxq->vtnrx_stats.vrxs_iqdrops++; 2113 vtnet_rxq_discard_buf(rxq, m); 2114 if (nbufs > 1) 2115 vtnet_rxq_discard_merged_bufs(rxq, nbufs); 2116 continue; 2117 } 2118 2119 m->m_pkthdr.len = len; 2120 m->m_pkthdr.rcvif = ifp; 2121 m->m_pkthdr.csum_flags = 0; 2122 2123 if (nbufs > 1) { 2124 /* Dequeue the rest of chain. */ 2125 if (vtnet_rxq_merged_eof(rxq, m, nbufs) != 0) 2126 continue; 2127 } 2128 2129 kmsan_mark_mbuf(m, KMSAN_STATE_INITED); 2130 2131 /* 2132 * Save an endian swapped version of the header prior to it 2133 * being stripped. The header is always at the start of the 2134 * mbuf data. num_buffers was already saved (and not needed) 2135 * so use the standard header. 2136 */ 2137 hdr = mtod(m, struct virtio_net_hdr *); 2138 lhdr.flags = hdr->flags; 2139 lhdr.gso_type = hdr->gso_type; 2140 lhdr.hdr_len = vtnet_htog16(sc, hdr->hdr_len); 2141 lhdr.gso_size = vtnet_htog16(sc, hdr->gso_size); 2142 lhdr.csum_start = vtnet_htog16(sc, hdr->csum_start); 2143 lhdr.csum_offset = vtnet_htog16(sc, hdr->csum_offset); 2144 m_adj(m, adjsz); 2145 2146 if (PFIL_HOOKED_IN(sc->vtnet_pfil)) { 2147 pfil_return_t pfil; 2148 2149 pfil = pfil_mbuf_in(sc->vtnet_pfil, &m, ifp, NULL); 2150 switch (pfil) { 2151 case PFIL_DROPPED: 2152 case PFIL_CONSUMED: 2153 continue; 2154 default: 2155 KASSERT(pfil == PFIL_PASS, 2156 ("Filter returned %d!", pfil)); 2157 } 2158 } 2159 2160 vtnet_rxq_input(rxq, m, &lhdr); 2161 } 2162 2163 if (deq > 0) { 2164 #if defined(INET) || defined(INET6) 2165 if (vtnet_software_lro(sc)) 2166 tcp_lro_flush_all(&rxq->vtnrx_lro); 2167 #endif 2168 virtqueue_notify(vq); 2169 } 2170 CURVNET_RESTORE(); 2171 2172 return (count > 0 ? 0 : EAGAIN); 2173 } 2174 2175 static void 2176 vtnet_rx_vq_process(struct vtnet_rxq *rxq, int tries) 2177 { 2178 struct vtnet_softc *sc; 2179 if_t ifp; 2180 u_int more; 2181 #ifdef DEV_NETMAP 2182 int nmirq; 2183 #endif /* DEV_NETMAP */ 2184 2185 sc = rxq->vtnrx_sc; 2186 ifp = sc->vtnet_ifp; 2187 2188 if (__predict_false(rxq->vtnrx_id >= sc->vtnet_act_vq_pairs)) { 2189 /* 2190 * Ignore this interrupt. Either this is a spurious interrupt 2191 * or multiqueue without per-VQ MSIX so every queue needs to 2192 * be polled (a brain dead configuration we could try harder 2193 * to avoid). 2194 */ 2195 vtnet_rxq_disable_intr(rxq); 2196 return; 2197 } 2198 2199 VTNET_RXQ_LOCK(rxq); 2200 2201 #ifdef DEV_NETMAP 2202 /* 2203 * We call netmap_rx_irq() under lock to prevent concurrent calls. 2204 * This is not necessary to serialize the access to the RX vq, but 2205 * rather to avoid races that may happen if this interface is 2206 * attached to a VALE switch, which would cause received packets 2207 * to stall in the RX queue (nm_kr_tryget() could find the kring 2208 * busy when called from netmap_bwrap_intr_notify()). 2209 */ 2210 nmirq = netmap_rx_irq(ifp, rxq->vtnrx_id, &more); 2211 if (nmirq != NM_IRQ_PASS) { 2212 VTNET_RXQ_UNLOCK(rxq); 2213 if (nmirq == NM_IRQ_RESCHED) { 2214 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2215 } 2216 return; 2217 } 2218 #endif /* DEV_NETMAP */ 2219 2220 again: 2221 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 2222 VTNET_RXQ_UNLOCK(rxq); 2223 return; 2224 } 2225 2226 more = vtnet_rxq_eof(rxq); 2227 if (more || vtnet_rxq_enable_intr(rxq) != 0) { 2228 if (!more) 2229 vtnet_rxq_disable_intr(rxq); 2230 /* 2231 * This is an occasional condition or race (when !more), 2232 * so retry a few times before scheduling the taskqueue. 2233 */ 2234 if (tries-- > 0) 2235 goto again; 2236 2237 rxq->vtnrx_stats.vrxs_rescheduled++; 2238 VTNET_RXQ_UNLOCK(rxq); 2239 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 2240 } else 2241 VTNET_RXQ_UNLOCK(rxq); 2242 } 2243 2244 static void 2245 vtnet_rx_vq_intr(void *xrxq) 2246 { 2247 struct vtnet_rxq *rxq; 2248 2249 rxq = xrxq; 2250 vtnet_rx_vq_process(rxq, VTNET_INTR_DISABLE_RETRIES); 2251 } 2252 2253 static void 2254 vtnet_rxq_tq_intr(void *xrxq, int pending __unused) 2255 { 2256 struct vtnet_rxq *rxq; 2257 2258 rxq = xrxq; 2259 vtnet_rx_vq_process(rxq, 0); 2260 } 2261 2262 static int 2263 vtnet_txq_intr_threshold(struct vtnet_txq *txq) 2264 { 2265 struct vtnet_softc *sc; 2266 int threshold; 2267 2268 sc = txq->vtntx_sc; 2269 2270 /* 2271 * The Tx interrupt is disabled until the queue free count falls 2272 * below our threshold. Completed frames are drained from the Tx 2273 * virtqueue before transmitting new frames and in the watchdog 2274 * callout, so the frequency of Tx interrupts is greatly reduced, 2275 * at the cost of not freeing mbufs as quickly as they otherwise 2276 * would be. 2277 */ 2278 threshold = virtqueue_size(txq->vtntx_vq) / 4; 2279 2280 /* 2281 * Without indirect descriptors, leave enough room for the most 2282 * segments we handle. 2283 */ 2284 if ((sc->vtnet_flags & VTNET_FLAG_INDIRECT) == 0 && 2285 threshold < sc->vtnet_tx_nsegs) 2286 threshold = sc->vtnet_tx_nsegs; 2287 2288 return (threshold); 2289 } 2290 2291 static int 2292 vtnet_txq_below_threshold(struct vtnet_txq *txq) 2293 { 2294 struct virtqueue *vq; 2295 2296 vq = txq->vtntx_vq; 2297 2298 return (virtqueue_nfree(vq) <= txq->vtntx_intr_threshold); 2299 } 2300 2301 static int 2302 vtnet_txq_notify(struct vtnet_txq *txq) 2303 { 2304 struct virtqueue *vq; 2305 2306 vq = txq->vtntx_vq; 2307 2308 txq->vtntx_watchdog = VTNET_TX_TIMEOUT; 2309 virtqueue_notify(vq); 2310 2311 if (vtnet_txq_enable_intr(txq) == 0) 2312 return (0); 2313 2314 /* 2315 * Drain frames that were completed since last checked. If this 2316 * causes the queue to go above the threshold, the caller should 2317 * continue transmitting. 2318 */ 2319 if (vtnet_txq_eof(txq) != 0 && vtnet_txq_below_threshold(txq) == 0) { 2320 virtqueue_disable_intr(vq); 2321 return (1); 2322 } 2323 2324 return (0); 2325 } 2326 2327 static void 2328 vtnet_txq_free_mbufs(struct vtnet_txq *txq) 2329 { 2330 struct virtqueue *vq; 2331 struct vtnet_tx_header *txhdr; 2332 int last; 2333 #ifdef DEV_NETMAP 2334 struct netmap_kring *kring = netmap_kring_on(NA(txq->vtntx_sc->vtnet_ifp), 2335 txq->vtntx_id, NR_TX); 2336 #else /* !DEV_NETMAP */ 2337 void *kring = NULL; 2338 #endif /* !DEV_NETMAP */ 2339 2340 vq = txq->vtntx_vq; 2341 last = 0; 2342 2343 while ((txhdr = virtqueue_drain(vq, &last)) != NULL) { 2344 if (kring == NULL) { 2345 m_freem(txhdr->vth_mbuf); 2346 uma_zfree(vtnet_tx_header_zone, txhdr); 2347 } 2348 } 2349 2350 KASSERT(virtqueue_empty(vq), 2351 ("%s: mbufs remaining in tx queue %p", __func__, txq)); 2352 } 2353 2354 /* 2355 * BMV: This can go away once we finally have offsets in the mbuf header. 2356 */ 2357 static int 2358 vtnet_txq_offload_ctx(struct vtnet_txq *txq, struct mbuf *m, int *etype, 2359 int *proto, int *start) 2360 { 2361 struct vtnet_softc *sc; 2362 struct ether_vlan_header *evh; 2363 #if defined(INET) || defined(INET6) 2364 int offset; 2365 #endif 2366 2367 sc = txq->vtntx_sc; 2368 2369 evh = mtod(m, struct ether_vlan_header *); 2370 if (evh->evl_encap_proto == htons(ETHERTYPE_VLAN)) { 2371 /* BMV: We should handle nested VLAN tags too. */ 2372 *etype = ntohs(evh->evl_proto); 2373 #if defined(INET) || defined(INET6) 2374 offset = sizeof(struct ether_vlan_header); 2375 #endif 2376 } else { 2377 *etype = ntohs(evh->evl_encap_proto); 2378 #if defined(INET) || defined(INET6) 2379 offset = sizeof(struct ether_header); 2380 #endif 2381 } 2382 2383 switch (*etype) { 2384 #if defined(INET) 2385 case ETHERTYPE_IP: { 2386 struct ip *ip, iphdr; 2387 if (__predict_false(m->m_len < offset + sizeof(struct ip))) { 2388 m_copydata(m, offset, sizeof(struct ip), 2389 (caddr_t) &iphdr); 2390 ip = &iphdr; 2391 } else 2392 ip = (struct ip *)(m->m_data + offset); 2393 *proto = ip->ip_p; 2394 *start = offset + (ip->ip_hl << 2); 2395 break; 2396 } 2397 #endif 2398 #if defined(INET6) 2399 case ETHERTYPE_IPV6: 2400 *proto = -1; 2401 *start = ip6_lasthdr(m, offset, IPPROTO_IPV6, proto); 2402 /* Assert the network stack sent us a valid packet. */ 2403 KASSERT(*start > offset, 2404 ("%s: mbuf %p start %d offset %d proto %d", __func__, m, 2405 *start, offset, *proto)); 2406 break; 2407 #endif 2408 default: 2409 sc->vtnet_stats.tx_csum_unknown_ethtype++; 2410 return (EINVAL); 2411 } 2412 2413 return (0); 2414 } 2415 2416 static int 2417 vtnet_txq_offload_tso(struct vtnet_txq *txq, struct mbuf *m, int eth_type, 2418 int offset, struct virtio_net_hdr *hdr) 2419 { 2420 static struct timeval lastecn; 2421 static int curecn; 2422 struct vtnet_softc *sc; 2423 struct tcphdr *tcp, tcphdr; 2424 2425 sc = txq->vtntx_sc; 2426 2427 if (__predict_false(m->m_len < offset + sizeof(struct tcphdr))) { 2428 m_copydata(m, offset, sizeof(struct tcphdr), (caddr_t) &tcphdr); 2429 tcp = &tcphdr; 2430 } else 2431 tcp = (struct tcphdr *)(m->m_data + offset); 2432 2433 hdr->hdr_len = vtnet_gtoh16(sc, offset + (tcp->th_off << 2)); 2434 hdr->gso_size = vtnet_gtoh16(sc, m->m_pkthdr.tso_segsz); 2435 hdr->gso_type = eth_type == ETHERTYPE_IP ? VIRTIO_NET_HDR_GSO_TCPV4 : 2436 VIRTIO_NET_HDR_GSO_TCPV6; 2437 2438 if (__predict_false(tcp_get_flags(tcp) & TH_CWR)) { 2439 /* 2440 * Drop if VIRTIO_NET_F_HOST_ECN was not negotiated. In 2441 * FreeBSD, ECN support is not on a per-interface basis, 2442 * but globally via the net.inet.tcp.ecn.enable sysctl 2443 * knob. The default is off. 2444 */ 2445 if ((sc->vtnet_flags & VTNET_FLAG_TSO_ECN) == 0) { 2446 if (ppsratecheck(&lastecn, &curecn, 1)) 2447 if_printf(sc->vtnet_ifp, 2448 "TSO with ECN not negotiated with host\n"); 2449 return (ENOTSUP); 2450 } 2451 hdr->gso_type |= VIRTIO_NET_HDR_GSO_ECN; 2452 } 2453 2454 txq->vtntx_stats.vtxs_tso++; 2455 2456 return (0); 2457 } 2458 2459 static struct mbuf * 2460 vtnet_txq_offload(struct vtnet_txq *txq, struct mbuf *m, 2461 struct virtio_net_hdr *hdr) 2462 { 2463 struct vtnet_softc *sc; 2464 int flags, etype, csum_start, proto, error; 2465 2466 sc = txq->vtntx_sc; 2467 flags = m->m_pkthdr.csum_flags; 2468 2469 error = vtnet_txq_offload_ctx(txq, m, &etype, &proto, &csum_start); 2470 if (error) 2471 goto drop; 2472 2473 if (flags & (VTNET_CSUM_OFFLOAD | VTNET_CSUM_OFFLOAD_IPV6)) { 2474 /* Sanity check the parsed mbuf matches the offload flags. */ 2475 if (__predict_false((flags & VTNET_CSUM_OFFLOAD && 2476 etype != ETHERTYPE_IP) || (flags & VTNET_CSUM_OFFLOAD_IPV6 2477 && etype != ETHERTYPE_IPV6))) { 2478 sc->vtnet_stats.tx_csum_proto_mismatch++; 2479 goto drop; 2480 } 2481 2482 hdr->flags |= VIRTIO_NET_HDR_F_NEEDS_CSUM; 2483 hdr->csum_start = vtnet_gtoh16(sc, csum_start); 2484 hdr->csum_offset = vtnet_gtoh16(sc, m->m_pkthdr.csum_data); 2485 txq->vtntx_stats.vtxs_csum++; 2486 } 2487 2488 if (flags & (CSUM_IP_TSO | CSUM_IP6_TSO)) { 2489 /* 2490 * Sanity check the parsed mbuf IP protocol is TCP, and 2491 * VirtIO TSO reqires the checksum offloading above. 2492 */ 2493 if (__predict_false(proto != IPPROTO_TCP)) { 2494 sc->vtnet_stats.tx_tso_not_tcp++; 2495 goto drop; 2496 } else if (__predict_false((hdr->flags & 2497 VIRTIO_NET_HDR_F_NEEDS_CSUM) == 0)) { 2498 sc->vtnet_stats.tx_tso_without_csum++; 2499 goto drop; 2500 } 2501 2502 error = vtnet_txq_offload_tso(txq, m, etype, csum_start, hdr); 2503 if (error) 2504 goto drop; 2505 } 2506 2507 return (m); 2508 2509 drop: 2510 m_freem(m); 2511 return (NULL); 2512 } 2513 2514 static int 2515 vtnet_txq_enqueue_buf(struct vtnet_txq *txq, struct mbuf **m_head, 2516 struct vtnet_tx_header *txhdr) 2517 { 2518 struct vtnet_softc *sc; 2519 struct virtqueue *vq; 2520 struct sglist *sg; 2521 struct mbuf *m; 2522 int error; 2523 2524 sc = txq->vtntx_sc; 2525 vq = txq->vtntx_vq; 2526 sg = txq->vtntx_sg; 2527 m = *m_head; 2528 2529 sglist_reset(sg); 2530 error = sglist_append(sg, &txhdr->vth_uhdr, sc->vtnet_hdr_size); 2531 if (error != 0 || sg->sg_nseg != 1) { 2532 KASSERT(0, ("%s: cannot add header to sglist error %d nseg %d", 2533 __func__, error, sg->sg_nseg)); 2534 goto fail; 2535 } 2536 2537 error = sglist_append_mbuf(sg, m); 2538 if (error) { 2539 m = m_defrag(m, M_NOWAIT); 2540 if (m == NULL) { 2541 sc->vtnet_stats.tx_defrag_failed++; 2542 goto fail; 2543 } 2544 2545 *m_head = m; 2546 sc->vtnet_stats.tx_defragged++; 2547 2548 error = sglist_append_mbuf(sg, m); 2549 if (error) 2550 goto fail; 2551 } 2552 2553 txhdr->vth_mbuf = m; 2554 error = virtqueue_enqueue(vq, txhdr, sg, sg->sg_nseg, 0); 2555 2556 return (error); 2557 2558 fail: 2559 m_freem(*m_head); 2560 *m_head = NULL; 2561 2562 return (ENOBUFS); 2563 } 2564 2565 static int 2566 vtnet_txq_encap(struct vtnet_txq *txq, struct mbuf **m_head, int flags) 2567 { 2568 struct vtnet_tx_header *txhdr; 2569 struct virtio_net_hdr *hdr; 2570 struct mbuf *m; 2571 int error; 2572 2573 m = *m_head; 2574 M_ASSERTPKTHDR(m); 2575 2576 txhdr = uma_zalloc(vtnet_tx_header_zone, flags | M_ZERO); 2577 if (txhdr == NULL) { 2578 m_freem(m); 2579 *m_head = NULL; 2580 return (ENOMEM); 2581 } 2582 2583 /* 2584 * Always use the non-mergeable header, regardless if mergable headers 2585 * were negotiated, because for transmit num_buffers is always zero. 2586 * The vtnet_hdr_size is used to enqueue the right header size segment. 2587 */ 2588 hdr = &txhdr->vth_uhdr.hdr; 2589 2590 if (m->m_flags & M_VLANTAG) { 2591 m = ether_vlanencap(m, m->m_pkthdr.ether_vtag); 2592 if ((*m_head = m) == NULL) { 2593 error = ENOBUFS; 2594 goto fail; 2595 } 2596 m->m_flags &= ~M_VLANTAG; 2597 } 2598 2599 if (m->m_pkthdr.csum_flags & VTNET_CSUM_ALL_OFFLOAD) { 2600 m = vtnet_txq_offload(txq, m, hdr); 2601 if ((*m_head = m) == NULL) { 2602 error = ENOBUFS; 2603 goto fail; 2604 } 2605 } 2606 2607 error = vtnet_txq_enqueue_buf(txq, m_head, txhdr); 2608 fail: 2609 if (error) 2610 uma_zfree(vtnet_tx_header_zone, txhdr); 2611 2612 return (error); 2613 } 2614 2615 2616 static void 2617 vtnet_start_locked(struct vtnet_txq *txq, if_t ifp) 2618 { 2619 struct vtnet_softc *sc; 2620 struct virtqueue *vq; 2621 struct mbuf *m0; 2622 int tries, enq; 2623 2624 sc = txq->vtntx_sc; 2625 vq = txq->vtntx_vq; 2626 tries = 0; 2627 2628 VTNET_TXQ_LOCK_ASSERT(txq); 2629 2630 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || 2631 sc->vtnet_link_active == 0) 2632 return; 2633 2634 vtnet_txq_eof(txq); 2635 2636 again: 2637 enq = 0; 2638 2639 while (!if_sendq_empty(ifp)) { 2640 if (virtqueue_full(vq)) 2641 break; 2642 2643 m0 = if_dequeue(ifp); 2644 if (m0 == NULL) 2645 break; 2646 2647 if (vtnet_txq_encap(txq, &m0, M_NOWAIT) != 0) { 2648 if (m0 != NULL) 2649 if_sendq_prepend(ifp, m0); 2650 break; 2651 } 2652 2653 enq++; 2654 ETHER_BPF_MTAP(ifp, m0); 2655 } 2656 2657 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2658 if (tries++ < VTNET_NOTIFY_RETRIES) 2659 goto again; 2660 2661 txq->vtntx_stats.vtxs_rescheduled++; 2662 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2663 } 2664 } 2665 2666 static void 2667 vtnet_start(if_t ifp) 2668 { 2669 struct vtnet_softc *sc; 2670 struct vtnet_txq *txq; 2671 2672 sc = if_getsoftc(ifp); 2673 txq = &sc->vtnet_txqs[0]; 2674 2675 VTNET_TXQ_LOCK(txq); 2676 vtnet_start_locked(txq, ifp); 2677 VTNET_TXQ_UNLOCK(txq); 2678 } 2679 2680 2681 static int 2682 vtnet_txq_mq_start_locked(struct vtnet_txq *txq, struct mbuf *m) 2683 { 2684 struct vtnet_softc *sc; 2685 struct virtqueue *vq; 2686 struct buf_ring *br; 2687 if_t ifp; 2688 int enq, tries, error; 2689 2690 sc = txq->vtntx_sc; 2691 vq = txq->vtntx_vq; 2692 br = txq->vtntx_br; 2693 ifp = sc->vtnet_ifp; 2694 tries = 0; 2695 error = 0; 2696 2697 VTNET_TXQ_LOCK_ASSERT(txq); 2698 2699 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0 || 2700 sc->vtnet_link_active == 0) { 2701 if (m != NULL) 2702 error = drbr_enqueue(ifp, br, m); 2703 return (error); 2704 } 2705 2706 if (m != NULL) { 2707 error = drbr_enqueue(ifp, br, m); 2708 if (error) 2709 return (error); 2710 } 2711 2712 vtnet_txq_eof(txq); 2713 2714 again: 2715 enq = 0; 2716 2717 while ((m = drbr_peek(ifp, br)) != NULL) { 2718 if (virtqueue_full(vq)) { 2719 drbr_putback(ifp, br, m); 2720 break; 2721 } 2722 2723 if (vtnet_txq_encap(txq, &m, M_NOWAIT) != 0) { 2724 if (m != NULL) 2725 drbr_putback(ifp, br, m); 2726 else 2727 drbr_advance(ifp, br); 2728 break; 2729 } 2730 drbr_advance(ifp, br); 2731 2732 enq++; 2733 ETHER_BPF_MTAP(ifp, m); 2734 } 2735 2736 if (enq > 0 && vtnet_txq_notify(txq) != 0) { 2737 if (tries++ < VTNET_NOTIFY_RETRIES) 2738 goto again; 2739 2740 txq->vtntx_stats.vtxs_rescheduled++; 2741 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_intrtask); 2742 } 2743 2744 return (0); 2745 } 2746 2747 static int 2748 vtnet_txq_mq_start(if_t ifp, struct mbuf *m) 2749 { 2750 struct vtnet_softc *sc; 2751 struct vtnet_txq *txq; 2752 int i, npairs, error; 2753 2754 sc = if_getsoftc(ifp); 2755 npairs = sc->vtnet_act_vq_pairs; 2756 2757 if (M_HASHTYPE_GET(m) != M_HASHTYPE_NONE) 2758 i = m->m_pkthdr.flowid % npairs; 2759 else 2760 i = curcpu % npairs; 2761 2762 txq = &sc->vtnet_txqs[i]; 2763 2764 if (VTNET_TXQ_TRYLOCK(txq) != 0) { 2765 error = vtnet_txq_mq_start_locked(txq, m); 2766 VTNET_TXQ_UNLOCK(txq); 2767 } else { 2768 error = drbr_enqueue(ifp, txq->vtntx_br, m); 2769 taskqueue_enqueue(txq->vtntx_tq, &txq->vtntx_defrtask); 2770 } 2771 2772 return (error); 2773 } 2774 2775 static void 2776 vtnet_txq_tq_deferred(void *xtxq, int pending __unused) 2777 { 2778 struct vtnet_softc *sc; 2779 struct vtnet_txq *txq; 2780 2781 txq = xtxq; 2782 sc = txq->vtntx_sc; 2783 2784 VTNET_TXQ_LOCK(txq); 2785 if (!drbr_empty(sc->vtnet_ifp, txq->vtntx_br)) 2786 vtnet_txq_mq_start_locked(txq, NULL); 2787 VTNET_TXQ_UNLOCK(txq); 2788 } 2789 2790 2791 static void 2792 vtnet_txq_start(struct vtnet_txq *txq) 2793 { 2794 struct vtnet_softc *sc; 2795 if_t ifp; 2796 2797 sc = txq->vtntx_sc; 2798 ifp = sc->vtnet_ifp; 2799 2800 if (!VTNET_ALTQ_ENABLED) { 2801 if (!drbr_empty(ifp, txq->vtntx_br)) 2802 vtnet_txq_mq_start_locked(txq, NULL); 2803 } else { 2804 if (!if_sendq_empty(ifp)) 2805 vtnet_start_locked(txq, ifp); 2806 2807 } 2808 } 2809 2810 static void 2811 vtnet_txq_tq_intr(void *xtxq, int pending __unused) 2812 { 2813 struct vtnet_softc *sc; 2814 struct vtnet_txq *txq; 2815 if_t ifp; 2816 2817 txq = xtxq; 2818 sc = txq->vtntx_sc; 2819 ifp = sc->vtnet_ifp; 2820 2821 VTNET_TXQ_LOCK(txq); 2822 2823 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 2824 VTNET_TXQ_UNLOCK(txq); 2825 return; 2826 } 2827 2828 vtnet_txq_eof(txq); 2829 vtnet_txq_start(txq); 2830 2831 VTNET_TXQ_UNLOCK(txq); 2832 } 2833 2834 static int 2835 vtnet_txq_eof(struct vtnet_txq *txq) 2836 { 2837 struct virtqueue *vq; 2838 struct vtnet_tx_header *txhdr; 2839 struct mbuf *m; 2840 int deq; 2841 2842 vq = txq->vtntx_vq; 2843 deq = 0; 2844 VTNET_TXQ_LOCK_ASSERT(txq); 2845 2846 while ((txhdr = virtqueue_dequeue(vq, NULL)) != NULL) { 2847 m = txhdr->vth_mbuf; 2848 deq++; 2849 2850 txq->vtntx_stats.vtxs_opackets++; 2851 txq->vtntx_stats.vtxs_obytes += m->m_pkthdr.len; 2852 if (m->m_flags & M_MCAST) 2853 txq->vtntx_stats.vtxs_omcasts++; 2854 2855 m_freem(m); 2856 uma_zfree(vtnet_tx_header_zone, txhdr); 2857 } 2858 2859 if (virtqueue_empty(vq)) 2860 txq->vtntx_watchdog = 0; 2861 2862 return (deq); 2863 } 2864 2865 static void 2866 vtnet_tx_vq_intr(void *xtxq) 2867 { 2868 struct vtnet_softc *sc; 2869 struct vtnet_txq *txq; 2870 if_t ifp; 2871 2872 txq = xtxq; 2873 sc = txq->vtntx_sc; 2874 ifp = sc->vtnet_ifp; 2875 2876 if (__predict_false(txq->vtntx_id >= sc->vtnet_act_vq_pairs)) { 2877 /* 2878 * Ignore this interrupt. Either this is a spurious interrupt 2879 * or multiqueue without per-VQ MSIX so every queue needs to 2880 * be polled (a brain dead configuration we could try harder 2881 * to avoid). 2882 */ 2883 vtnet_txq_disable_intr(txq); 2884 return; 2885 } 2886 2887 #ifdef DEV_NETMAP 2888 if (netmap_tx_irq(ifp, txq->vtntx_id) != NM_IRQ_PASS) 2889 return; 2890 #endif /* DEV_NETMAP */ 2891 2892 VTNET_TXQ_LOCK(txq); 2893 2894 if ((if_getdrvflags(ifp) & IFF_DRV_RUNNING) == 0) { 2895 VTNET_TXQ_UNLOCK(txq); 2896 return; 2897 } 2898 2899 vtnet_txq_eof(txq); 2900 vtnet_txq_start(txq); 2901 2902 VTNET_TXQ_UNLOCK(txq); 2903 } 2904 2905 static void 2906 vtnet_tx_start_all(struct vtnet_softc *sc) 2907 { 2908 struct vtnet_txq *txq; 2909 int i; 2910 2911 VTNET_CORE_LOCK_ASSERT(sc); 2912 2913 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2914 txq = &sc->vtnet_txqs[i]; 2915 2916 VTNET_TXQ_LOCK(txq); 2917 vtnet_txq_start(txq); 2918 VTNET_TXQ_UNLOCK(txq); 2919 } 2920 } 2921 2922 static void 2923 vtnet_qflush(if_t ifp) 2924 { 2925 struct vtnet_softc *sc; 2926 struct vtnet_txq *txq; 2927 struct mbuf *m; 2928 int i; 2929 2930 sc = if_getsoftc(ifp); 2931 2932 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 2933 txq = &sc->vtnet_txqs[i]; 2934 2935 VTNET_TXQ_LOCK(txq); 2936 while ((m = buf_ring_dequeue_sc(txq->vtntx_br)) != NULL) 2937 m_freem(m); 2938 VTNET_TXQ_UNLOCK(txq); 2939 } 2940 2941 if_qflush(ifp); 2942 } 2943 2944 static int 2945 vtnet_watchdog(struct vtnet_txq *txq) 2946 { 2947 if_t ifp; 2948 2949 ifp = txq->vtntx_sc->vtnet_ifp; 2950 2951 VTNET_TXQ_LOCK(txq); 2952 if (txq->vtntx_watchdog == 1) { 2953 /* 2954 * Only drain completed frames if the watchdog is about to 2955 * expire. If any frames were drained, there may be enough 2956 * free descriptors now available to transmit queued frames. 2957 * In that case, the timer will immediately be decremented 2958 * below, but the timeout is generous enough that should not 2959 * be a problem. 2960 */ 2961 if (vtnet_txq_eof(txq) != 0) 2962 vtnet_txq_start(txq); 2963 } 2964 2965 if (txq->vtntx_watchdog == 0 || --txq->vtntx_watchdog) { 2966 VTNET_TXQ_UNLOCK(txq); 2967 return (0); 2968 } 2969 VTNET_TXQ_UNLOCK(txq); 2970 2971 if_printf(ifp, "watchdog timeout on queue %d\n", txq->vtntx_id); 2972 return (1); 2973 } 2974 2975 static void 2976 vtnet_accum_stats(struct vtnet_softc *sc, struct vtnet_rxq_stats *rxacc, 2977 struct vtnet_txq_stats *txacc) 2978 { 2979 2980 bzero(rxacc, sizeof(struct vtnet_rxq_stats)); 2981 bzero(txacc, sizeof(struct vtnet_txq_stats)); 2982 2983 for (int i = 0; i < sc->vtnet_max_vq_pairs; i++) { 2984 struct vtnet_rxq_stats *rxst; 2985 struct vtnet_txq_stats *txst; 2986 2987 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 2988 rxacc->vrxs_ipackets += rxst->vrxs_ipackets; 2989 rxacc->vrxs_ibytes += rxst->vrxs_ibytes; 2990 rxacc->vrxs_iqdrops += rxst->vrxs_iqdrops; 2991 rxacc->vrxs_csum += rxst->vrxs_csum; 2992 rxacc->vrxs_csum_failed += rxst->vrxs_csum_failed; 2993 rxacc->vrxs_rescheduled += rxst->vrxs_rescheduled; 2994 2995 txst = &sc->vtnet_txqs[i].vtntx_stats; 2996 txacc->vtxs_opackets += txst->vtxs_opackets; 2997 txacc->vtxs_obytes += txst->vtxs_obytes; 2998 txacc->vtxs_csum += txst->vtxs_csum; 2999 txacc->vtxs_tso += txst->vtxs_tso; 3000 txacc->vtxs_rescheduled += txst->vtxs_rescheduled; 3001 } 3002 } 3003 3004 static uint64_t 3005 vtnet_get_counter(if_t ifp, ift_counter cnt) 3006 { 3007 struct vtnet_softc *sc; 3008 struct vtnet_rxq_stats rxaccum; 3009 struct vtnet_txq_stats txaccum; 3010 3011 sc = if_getsoftc(ifp); 3012 vtnet_accum_stats(sc, &rxaccum, &txaccum); 3013 3014 switch (cnt) { 3015 case IFCOUNTER_IPACKETS: 3016 return (rxaccum.vrxs_ipackets); 3017 case IFCOUNTER_IQDROPS: 3018 return (rxaccum.vrxs_iqdrops); 3019 case IFCOUNTER_IERRORS: 3020 return (rxaccum.vrxs_ierrors); 3021 case IFCOUNTER_IBYTES: 3022 return (rxaccum.vrxs_ibytes); 3023 case IFCOUNTER_OPACKETS: 3024 return (txaccum.vtxs_opackets); 3025 case IFCOUNTER_OBYTES: 3026 return (txaccum.vtxs_obytes); 3027 case IFCOUNTER_OMCASTS: 3028 return (txaccum.vtxs_omcasts); 3029 default: 3030 return (if_get_counter_default(ifp, cnt)); 3031 } 3032 } 3033 3034 static void 3035 vtnet_tick(void *xsc) 3036 { 3037 struct vtnet_softc *sc; 3038 if_t ifp; 3039 int i, timedout; 3040 3041 sc = xsc; 3042 ifp = sc->vtnet_ifp; 3043 timedout = 0; 3044 3045 VTNET_CORE_LOCK_ASSERT(sc); 3046 3047 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 3048 timedout |= vtnet_watchdog(&sc->vtnet_txqs[i]); 3049 3050 if (timedout != 0) { 3051 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 3052 vtnet_init_locked(sc, 0); 3053 } else 3054 callout_schedule(&sc->vtnet_tick_ch, hz); 3055 } 3056 3057 static void 3058 vtnet_start_taskqueues(struct vtnet_softc *sc) 3059 { 3060 device_t dev; 3061 struct vtnet_rxq *rxq; 3062 struct vtnet_txq *txq; 3063 int i, error; 3064 3065 dev = sc->vtnet_dev; 3066 3067 /* 3068 * Errors here are very difficult to recover from - we cannot 3069 * easily fail because, if this is during boot, we will hang 3070 * when freeing any successfully started taskqueues because 3071 * the scheduler isn't up yet. 3072 * 3073 * Most drivers just ignore the return value - it only fails 3074 * with ENOMEM so an error is not likely. 3075 */ 3076 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { 3077 rxq = &sc->vtnet_rxqs[i]; 3078 error = taskqueue_start_threads(&rxq->vtnrx_tq, 1, PI_NET, 3079 "%s rxq %d", device_get_nameunit(dev), rxq->vtnrx_id); 3080 if (error) { 3081 device_printf(dev, "failed to start rx taskq %d\n", 3082 rxq->vtnrx_id); 3083 } 3084 3085 txq = &sc->vtnet_txqs[i]; 3086 error = taskqueue_start_threads(&txq->vtntx_tq, 1, PI_NET, 3087 "%s txq %d", device_get_nameunit(dev), txq->vtntx_id); 3088 if (error) { 3089 device_printf(dev, "failed to start tx taskq %d\n", 3090 txq->vtntx_id); 3091 } 3092 } 3093 } 3094 3095 static void 3096 vtnet_free_taskqueues(struct vtnet_softc *sc) 3097 { 3098 struct vtnet_rxq *rxq; 3099 struct vtnet_txq *txq; 3100 int i; 3101 3102 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3103 rxq = &sc->vtnet_rxqs[i]; 3104 if (rxq->vtnrx_tq != NULL) { 3105 taskqueue_free(rxq->vtnrx_tq); 3106 rxq->vtnrx_tq = NULL; 3107 } 3108 3109 txq = &sc->vtnet_txqs[i]; 3110 if (txq->vtntx_tq != NULL) { 3111 taskqueue_free(txq->vtntx_tq); 3112 txq->vtntx_tq = NULL; 3113 } 3114 } 3115 } 3116 3117 static void 3118 vtnet_drain_taskqueues(struct vtnet_softc *sc) 3119 { 3120 struct vtnet_rxq *rxq; 3121 struct vtnet_txq *txq; 3122 int i; 3123 3124 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3125 rxq = &sc->vtnet_rxqs[i]; 3126 if (rxq->vtnrx_tq != NULL) 3127 taskqueue_drain(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 3128 3129 txq = &sc->vtnet_txqs[i]; 3130 if (txq->vtntx_tq != NULL) { 3131 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_intrtask); 3132 if (!VTNET_ALTQ_ENABLED) 3133 taskqueue_drain(txq->vtntx_tq, &txq->vtntx_defrtask); 3134 } 3135 } 3136 } 3137 3138 static void 3139 vtnet_drain_rxtx_queues(struct vtnet_softc *sc) 3140 { 3141 struct vtnet_rxq *rxq; 3142 struct vtnet_txq *txq; 3143 int i; 3144 3145 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3146 rxq = &sc->vtnet_rxqs[i]; 3147 vtnet_rxq_free_mbufs(rxq); 3148 3149 txq = &sc->vtnet_txqs[i]; 3150 vtnet_txq_free_mbufs(txq); 3151 } 3152 } 3153 3154 static void 3155 vtnet_stop_rendezvous(struct vtnet_softc *sc) 3156 { 3157 struct vtnet_rxq *rxq; 3158 struct vtnet_txq *txq; 3159 int i; 3160 3161 VTNET_CORE_LOCK_ASSERT(sc); 3162 3163 /* 3164 * Lock and unlock the per-queue mutex so we known the stop 3165 * state is visible. Doing only the active queues should be 3166 * sufficient, but it does not cost much extra to do all the 3167 * queues. 3168 */ 3169 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 3170 rxq = &sc->vtnet_rxqs[i]; 3171 VTNET_RXQ_LOCK(rxq); 3172 VTNET_RXQ_UNLOCK(rxq); 3173 3174 txq = &sc->vtnet_txqs[i]; 3175 VTNET_TXQ_LOCK(txq); 3176 VTNET_TXQ_UNLOCK(txq); 3177 } 3178 } 3179 3180 static void 3181 vtnet_stop(struct vtnet_softc *sc) 3182 { 3183 device_t dev; 3184 if_t ifp; 3185 3186 dev = sc->vtnet_dev; 3187 ifp = sc->vtnet_ifp; 3188 3189 VTNET_CORE_LOCK_ASSERT(sc); 3190 3191 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 3192 sc->vtnet_link_active = 0; 3193 callout_stop(&sc->vtnet_tick_ch); 3194 3195 /* Only advisory. */ 3196 vtnet_disable_interrupts(sc); 3197 3198 #ifdef DEV_NETMAP 3199 /* Stop any pending txsync/rxsync and disable them. */ 3200 netmap_disable_all_rings(ifp); 3201 #endif /* DEV_NETMAP */ 3202 3203 /* 3204 * Stop the host adapter. This resets it to the pre-initialized 3205 * state. It will not generate any interrupts until after it is 3206 * reinitialized. 3207 */ 3208 virtio_stop(dev); 3209 vtnet_stop_rendezvous(sc); 3210 3211 vtnet_drain_rxtx_queues(sc); 3212 sc->vtnet_act_vq_pairs = 1; 3213 } 3214 3215 static int 3216 vtnet_virtio_reinit(struct vtnet_softc *sc) 3217 { 3218 device_t dev; 3219 if_t ifp; 3220 uint64_t features; 3221 int error; 3222 3223 dev = sc->vtnet_dev; 3224 ifp = sc->vtnet_ifp; 3225 features = sc->vtnet_negotiated_features; 3226 3227 /* 3228 * Re-negotiate with the host, removing any disabled receive 3229 * features. Transmit features are disabled only on our side 3230 * via if_capenable and if_hwassist. 3231 */ 3232 3233 if ((if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) == 0) 3234 features &= ~(VIRTIO_NET_F_GUEST_CSUM | VTNET_LRO_FEATURES); 3235 3236 if ((if_getcapenable(ifp) & IFCAP_LRO) == 0) 3237 features &= ~VTNET_LRO_FEATURES; 3238 3239 if ((if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) == 0) 3240 features &= ~VIRTIO_NET_F_CTRL_VLAN; 3241 3242 error = virtio_reinit(dev, features); 3243 if (error) { 3244 device_printf(dev, "virtio reinit error %d\n", error); 3245 return (error); 3246 } 3247 3248 sc->vtnet_features = features; 3249 virtio_reinit_complete(dev); 3250 3251 return (0); 3252 } 3253 3254 static void 3255 vtnet_init_rx_filters(struct vtnet_softc *sc) 3256 { 3257 if_t ifp; 3258 3259 ifp = sc->vtnet_ifp; 3260 3261 if (sc->vtnet_flags & VTNET_FLAG_CTRL_RX) { 3262 vtnet_rx_filter(sc); 3263 vtnet_rx_filter_mac(sc); 3264 } 3265 3266 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER) 3267 vtnet_rx_filter_vlan(sc); 3268 } 3269 3270 static int 3271 vtnet_init_rx_queues(struct vtnet_softc *sc) 3272 { 3273 device_t dev; 3274 if_t ifp; 3275 struct vtnet_rxq *rxq; 3276 int i, clustersz, error; 3277 3278 dev = sc->vtnet_dev; 3279 ifp = sc->vtnet_ifp; 3280 3281 clustersz = vtnet_rx_cluster_size(sc, if_getmtu(ifp)); 3282 sc->vtnet_rx_clustersz = clustersz; 3283 3284 if (sc->vtnet_flags & VTNET_FLAG_LRO_NOMRG) { 3285 sc->vtnet_rx_nmbufs = howmany(sizeof(struct vtnet_rx_header) + 3286 VTNET_MAX_RX_SIZE, clustersz); 3287 KASSERT(sc->vtnet_rx_nmbufs < sc->vtnet_rx_nsegs, 3288 ("%s: too many rx mbufs %d for %d segments", __func__, 3289 sc->vtnet_rx_nmbufs, sc->vtnet_rx_nsegs)); 3290 } else 3291 sc->vtnet_rx_nmbufs = 1; 3292 3293 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 3294 rxq = &sc->vtnet_rxqs[i]; 3295 3296 /* Hold the lock to satisfy asserts. */ 3297 VTNET_RXQ_LOCK(rxq); 3298 error = vtnet_rxq_populate(rxq); 3299 VTNET_RXQ_UNLOCK(rxq); 3300 3301 if (error) { 3302 device_printf(dev, "cannot populate Rx queue %d\n", i); 3303 return (error); 3304 } 3305 } 3306 3307 return (0); 3308 } 3309 3310 static int 3311 vtnet_init_tx_queues(struct vtnet_softc *sc) 3312 { 3313 struct vtnet_txq *txq; 3314 int i; 3315 3316 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 3317 txq = &sc->vtnet_txqs[i]; 3318 txq->vtntx_watchdog = 0; 3319 txq->vtntx_intr_threshold = vtnet_txq_intr_threshold(txq); 3320 #ifdef DEV_NETMAP 3321 netmap_reset(NA(sc->vtnet_ifp), NR_TX, i, 0); 3322 #endif /* DEV_NETMAP */ 3323 } 3324 3325 return (0); 3326 } 3327 3328 static int 3329 vtnet_init_rxtx_queues(struct vtnet_softc *sc) 3330 { 3331 int error; 3332 3333 error = vtnet_init_rx_queues(sc); 3334 if (error) 3335 return (error); 3336 3337 error = vtnet_init_tx_queues(sc); 3338 if (error) 3339 return (error); 3340 3341 return (0); 3342 } 3343 3344 static void 3345 vtnet_set_active_vq_pairs(struct vtnet_softc *sc) 3346 { 3347 device_t dev; 3348 int npairs; 3349 3350 dev = sc->vtnet_dev; 3351 3352 if ((sc->vtnet_flags & VTNET_FLAG_MQ) == 0) { 3353 sc->vtnet_act_vq_pairs = 1; 3354 return; 3355 } 3356 3357 npairs = sc->vtnet_req_vq_pairs; 3358 3359 if (vtnet_ctrl_mq_cmd(sc, npairs) != 0) { 3360 device_printf(dev, "cannot set active queue pairs to %d, " 3361 "falling back to 1 queue pair\n", npairs); 3362 npairs = 1; 3363 } 3364 3365 sc->vtnet_act_vq_pairs = npairs; 3366 } 3367 3368 static void 3369 vtnet_update_rx_offloads(struct vtnet_softc *sc) 3370 { 3371 if_t ifp; 3372 uint64_t features; 3373 int error; 3374 3375 ifp = sc->vtnet_ifp; 3376 features = sc->vtnet_features; 3377 3378 VTNET_CORE_LOCK_ASSERT(sc); 3379 3380 if (if_getcapabilities(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) { 3381 if (if_getcapenable(ifp) & (IFCAP_RXCSUM | IFCAP_RXCSUM_IPV6)) 3382 features |= VIRTIO_NET_F_GUEST_CSUM; 3383 else 3384 features &= ~VIRTIO_NET_F_GUEST_CSUM; 3385 } 3386 3387 if (if_getcapabilities(ifp) & IFCAP_LRO && !vtnet_software_lro(sc)) { 3388 if (if_getcapenable(ifp) & IFCAP_LRO) 3389 features |= VTNET_LRO_FEATURES; 3390 else 3391 features &= ~VTNET_LRO_FEATURES; 3392 } 3393 3394 error = vtnet_ctrl_guest_offloads(sc, 3395 features & (VIRTIO_NET_F_GUEST_CSUM | VIRTIO_NET_F_GUEST_TSO4 | 3396 VIRTIO_NET_F_GUEST_TSO6 | VIRTIO_NET_F_GUEST_ECN | 3397 VIRTIO_NET_F_GUEST_UFO)); 3398 if (error) { 3399 device_printf(sc->vtnet_dev, 3400 "%s: cannot update Rx features\n", __func__); 3401 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) { 3402 if_setdrvflagbits(ifp, 0, IFF_DRV_RUNNING); 3403 vtnet_init_locked(sc, 0); 3404 } 3405 } else 3406 sc->vtnet_features = features; 3407 } 3408 3409 static int 3410 vtnet_reinit(struct vtnet_softc *sc) 3411 { 3412 if_t ifp; 3413 int error; 3414 3415 ifp = sc->vtnet_ifp; 3416 3417 bcopy(if_getlladdr(ifp), sc->vtnet_hwaddr, ETHER_ADDR_LEN); 3418 3419 error = vtnet_virtio_reinit(sc); 3420 if (error) 3421 return (error); 3422 3423 vtnet_set_macaddr(sc); 3424 vtnet_set_active_vq_pairs(sc); 3425 3426 if (sc->vtnet_flags & VTNET_FLAG_CTRL_VQ) 3427 vtnet_init_rx_filters(sc); 3428 3429 if_sethwassist(ifp, 0); 3430 if (if_getcapenable(ifp) & IFCAP_TXCSUM) 3431 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD, 0); 3432 if (if_getcapenable(ifp) & IFCAP_TXCSUM_IPV6) 3433 if_sethwassistbits(ifp, VTNET_CSUM_OFFLOAD_IPV6, 0); 3434 if (if_getcapenable(ifp) & IFCAP_TSO4) 3435 if_sethwassistbits(ifp, CSUM_IP_TSO, 0); 3436 if (if_getcapenable(ifp) & IFCAP_TSO6) 3437 if_sethwassistbits(ifp, CSUM_IP6_TSO, 0); 3438 3439 error = vtnet_init_rxtx_queues(sc); 3440 if (error) 3441 return (error); 3442 3443 return (0); 3444 } 3445 3446 static void 3447 vtnet_init_locked(struct vtnet_softc *sc, int init_mode) 3448 { 3449 if_t ifp; 3450 3451 ifp = sc->vtnet_ifp; 3452 3453 VTNET_CORE_LOCK_ASSERT(sc); 3454 3455 if (if_getdrvflags(ifp) & IFF_DRV_RUNNING) 3456 return; 3457 3458 vtnet_stop(sc); 3459 3460 #ifdef DEV_NETMAP 3461 /* Once stopped we can update the netmap flags, if necessary. */ 3462 switch (init_mode) { 3463 case VTNET_INIT_NETMAP_ENTER: 3464 nm_set_native_flags(NA(ifp)); 3465 break; 3466 case VTNET_INIT_NETMAP_EXIT: 3467 nm_clear_native_flags(NA(ifp)); 3468 break; 3469 } 3470 #endif /* DEV_NETMAP */ 3471 3472 if (vtnet_reinit(sc) != 0) { 3473 vtnet_stop(sc); 3474 return; 3475 } 3476 3477 if_setdrvflagbits(ifp, IFF_DRV_RUNNING, 0); 3478 vtnet_update_link_status(sc); 3479 vtnet_enable_interrupts(sc); 3480 callout_reset(&sc->vtnet_tick_ch, hz, vtnet_tick, sc); 3481 3482 #ifdef DEV_NETMAP 3483 /* Re-enable txsync/rxsync. */ 3484 netmap_enable_all_rings(ifp); 3485 #endif /* DEV_NETMAP */ 3486 } 3487 3488 static void 3489 vtnet_init(void *xsc) 3490 { 3491 struct vtnet_softc *sc; 3492 3493 sc = xsc; 3494 3495 VTNET_CORE_LOCK(sc); 3496 vtnet_init_locked(sc, 0); 3497 VTNET_CORE_UNLOCK(sc); 3498 } 3499 3500 static void 3501 vtnet_free_ctrl_vq(struct vtnet_softc *sc) 3502 { 3503 3504 /* 3505 * The control virtqueue is only polled and therefore it should 3506 * already be empty. 3507 */ 3508 KASSERT(virtqueue_empty(sc->vtnet_ctrl_vq), 3509 ("%s: ctrl vq %p not empty", __func__, sc->vtnet_ctrl_vq)); 3510 } 3511 3512 static void 3513 vtnet_exec_ctrl_cmd(struct vtnet_softc *sc, void *cookie, 3514 struct sglist *sg, int readable, int writable) 3515 { 3516 struct virtqueue *vq; 3517 3518 vq = sc->vtnet_ctrl_vq; 3519 3520 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_VQ); 3521 VTNET_CORE_LOCK_ASSERT(sc); 3522 3523 if (!virtqueue_empty(vq)) 3524 return; 3525 3526 /* 3527 * Poll for the response, but the command is likely completed before 3528 * returning from the notify. 3529 */ 3530 if (virtqueue_enqueue(vq, cookie, sg, readable, writable) == 0) { 3531 virtqueue_notify(vq); 3532 virtqueue_poll(vq, NULL); 3533 } 3534 } 3535 3536 static int 3537 vtnet_ctrl_mac_cmd(struct vtnet_softc *sc, uint8_t *hwaddr) 3538 { 3539 struct sglist_seg segs[3]; 3540 struct sglist sg; 3541 struct { 3542 struct virtio_net_ctrl_hdr hdr __aligned(2); 3543 uint8_t pad1; 3544 uint8_t addr[ETHER_ADDR_LEN] __aligned(8); 3545 uint8_t pad2; 3546 uint8_t ack; 3547 } s; 3548 int error; 3549 3550 error = 0; 3551 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_MAC); 3552 3553 s.hdr.class = VIRTIO_NET_CTRL_MAC; 3554 s.hdr.cmd = VIRTIO_NET_CTRL_MAC_ADDR_SET; 3555 bcopy(hwaddr, &s.addr[0], ETHER_ADDR_LEN); 3556 s.ack = VIRTIO_NET_ERR; 3557 3558 sglist_init(&sg, nitems(segs), segs); 3559 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3560 error |= sglist_append(&sg, &s.addr[0], ETHER_ADDR_LEN); 3561 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3562 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3563 3564 if (error == 0) 3565 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3566 3567 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3568 } 3569 3570 static int 3571 vtnet_ctrl_guest_offloads(struct vtnet_softc *sc, uint64_t offloads) 3572 { 3573 struct sglist_seg segs[3]; 3574 struct sglist sg; 3575 struct { 3576 struct virtio_net_ctrl_hdr hdr __aligned(2); 3577 uint8_t pad1; 3578 uint64_t offloads __aligned(8); 3579 uint8_t pad2; 3580 uint8_t ack; 3581 } s; 3582 int error; 3583 3584 error = 0; 3585 MPASS(sc->vtnet_features & VIRTIO_NET_F_CTRL_GUEST_OFFLOADS); 3586 3587 s.hdr.class = VIRTIO_NET_CTRL_GUEST_OFFLOADS; 3588 s.hdr.cmd = VIRTIO_NET_CTRL_GUEST_OFFLOADS_SET; 3589 s.offloads = vtnet_gtoh64(sc, offloads); 3590 s.ack = VIRTIO_NET_ERR; 3591 3592 sglist_init(&sg, nitems(segs), segs); 3593 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3594 error |= sglist_append(&sg, &s.offloads, sizeof(uint64_t)); 3595 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3596 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3597 3598 if (error == 0) 3599 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3600 3601 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3602 } 3603 3604 static int 3605 vtnet_ctrl_mq_cmd(struct vtnet_softc *sc, uint16_t npairs) 3606 { 3607 struct sglist_seg segs[3]; 3608 struct sglist sg; 3609 struct { 3610 struct virtio_net_ctrl_hdr hdr __aligned(2); 3611 uint8_t pad1; 3612 struct virtio_net_ctrl_mq mq __aligned(2); 3613 uint8_t pad2; 3614 uint8_t ack; 3615 } s; 3616 int error; 3617 3618 error = 0; 3619 MPASS(sc->vtnet_flags & VTNET_FLAG_MQ); 3620 3621 s.hdr.class = VIRTIO_NET_CTRL_MQ; 3622 s.hdr.cmd = VIRTIO_NET_CTRL_MQ_VQ_PAIRS_SET; 3623 s.mq.virtqueue_pairs = vtnet_gtoh16(sc, npairs); 3624 s.ack = VIRTIO_NET_ERR; 3625 3626 sglist_init(&sg, nitems(segs), segs); 3627 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3628 error |= sglist_append(&sg, &s.mq, sizeof(struct virtio_net_ctrl_mq)); 3629 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3630 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3631 3632 if (error == 0) 3633 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3634 3635 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3636 } 3637 3638 static int 3639 vtnet_ctrl_rx_cmd(struct vtnet_softc *sc, uint8_t cmd, bool on) 3640 { 3641 struct sglist_seg segs[3]; 3642 struct sglist sg; 3643 struct { 3644 struct virtio_net_ctrl_hdr hdr __aligned(2); 3645 uint8_t pad1; 3646 uint8_t onoff; 3647 uint8_t pad2; 3648 uint8_t ack; 3649 } s; 3650 int error; 3651 3652 error = 0; 3653 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); 3654 3655 s.hdr.class = VIRTIO_NET_CTRL_RX; 3656 s.hdr.cmd = cmd; 3657 s.onoff = on; 3658 s.ack = VIRTIO_NET_ERR; 3659 3660 sglist_init(&sg, nitems(segs), segs); 3661 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3662 error |= sglist_append(&sg, &s.onoff, sizeof(uint8_t)); 3663 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3664 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3665 3666 if (error == 0) 3667 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3668 3669 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3670 } 3671 3672 static int 3673 vtnet_set_promisc(struct vtnet_softc *sc, bool on) 3674 { 3675 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_PROMISC, on)); 3676 } 3677 3678 static int 3679 vtnet_set_allmulti(struct vtnet_softc *sc, bool on) 3680 { 3681 return (vtnet_ctrl_rx_cmd(sc, VIRTIO_NET_CTRL_RX_ALLMULTI, on)); 3682 } 3683 3684 static void 3685 vtnet_rx_filter(struct vtnet_softc *sc) 3686 { 3687 device_t dev; 3688 if_t ifp; 3689 3690 dev = sc->vtnet_dev; 3691 ifp = sc->vtnet_ifp; 3692 3693 VTNET_CORE_LOCK_ASSERT(sc); 3694 3695 if (vtnet_set_promisc(sc, if_getflags(ifp) & IFF_PROMISC) != 0) { 3696 device_printf(dev, "cannot %s promiscuous mode\n", 3697 if_getflags(ifp) & IFF_PROMISC ? "enable" : "disable"); 3698 } 3699 3700 if (vtnet_set_allmulti(sc, if_getflags(ifp) & IFF_ALLMULTI) != 0) { 3701 device_printf(dev, "cannot %s all-multicast mode\n", 3702 if_getflags(ifp) & IFF_ALLMULTI ? "enable" : "disable"); 3703 } 3704 } 3705 3706 static u_int 3707 vtnet_copy_ifaddr(void *arg, struct sockaddr_dl *sdl, u_int ucnt) 3708 { 3709 struct vtnet_softc *sc = arg; 3710 3711 if (memcmp(LLADDR(sdl), sc->vtnet_hwaddr, ETHER_ADDR_LEN) == 0) 3712 return (0); 3713 3714 if (ucnt < VTNET_MAX_MAC_ENTRIES) 3715 bcopy(LLADDR(sdl), 3716 &sc->vtnet_mac_filter->vmf_unicast.macs[ucnt], 3717 ETHER_ADDR_LEN); 3718 3719 return (1); 3720 } 3721 3722 static u_int 3723 vtnet_copy_maddr(void *arg, struct sockaddr_dl *sdl, u_int mcnt) 3724 { 3725 struct vtnet_mac_filter *filter = arg; 3726 3727 if (mcnt < VTNET_MAX_MAC_ENTRIES) 3728 bcopy(LLADDR(sdl), &filter->vmf_multicast.macs[mcnt], 3729 ETHER_ADDR_LEN); 3730 3731 return (1); 3732 } 3733 3734 static void 3735 vtnet_rx_filter_mac(struct vtnet_softc *sc) 3736 { 3737 struct virtio_net_ctrl_hdr hdr __aligned(2); 3738 struct vtnet_mac_filter *filter; 3739 struct sglist_seg segs[4]; 3740 struct sglist sg; 3741 if_t ifp; 3742 bool promisc, allmulti; 3743 u_int ucnt, mcnt; 3744 int error; 3745 uint8_t ack; 3746 3747 ifp = sc->vtnet_ifp; 3748 filter = sc->vtnet_mac_filter; 3749 error = 0; 3750 3751 MPASS(sc->vtnet_flags & VTNET_FLAG_CTRL_RX); 3752 VTNET_CORE_LOCK_ASSERT(sc); 3753 3754 /* Unicast MAC addresses: */ 3755 ucnt = if_foreach_lladdr(ifp, vtnet_copy_ifaddr, sc); 3756 promisc = (ucnt > VTNET_MAX_MAC_ENTRIES); 3757 3758 if (promisc) { 3759 ucnt = 0; 3760 if_printf(ifp, "more than %d MAC addresses assigned, " 3761 "falling back to promiscuous mode\n", 3762 VTNET_MAX_MAC_ENTRIES); 3763 } 3764 3765 /* Multicast MAC addresses: */ 3766 mcnt = if_foreach_llmaddr(ifp, vtnet_copy_maddr, filter); 3767 allmulti = (mcnt > VTNET_MAX_MAC_ENTRIES); 3768 3769 if (allmulti) { 3770 mcnt = 0; 3771 if_printf(ifp, "more than %d multicast MAC addresses " 3772 "assigned, falling back to all-multicast mode\n", 3773 VTNET_MAX_MAC_ENTRIES); 3774 } 3775 3776 if (promisc && allmulti) 3777 goto out; 3778 3779 filter->vmf_unicast.nentries = vtnet_gtoh32(sc, ucnt); 3780 filter->vmf_multicast.nentries = vtnet_gtoh32(sc, mcnt); 3781 3782 hdr.class = VIRTIO_NET_CTRL_MAC; 3783 hdr.cmd = VIRTIO_NET_CTRL_MAC_TABLE_SET; 3784 ack = VIRTIO_NET_ERR; 3785 3786 sglist_init(&sg, nitems(segs), segs); 3787 error |= sglist_append(&sg, &hdr, sizeof(struct virtio_net_ctrl_hdr)); 3788 error |= sglist_append(&sg, &filter->vmf_unicast, 3789 sizeof(uint32_t) + ucnt * ETHER_ADDR_LEN); 3790 error |= sglist_append(&sg, &filter->vmf_multicast, 3791 sizeof(uint32_t) + mcnt * ETHER_ADDR_LEN); 3792 error |= sglist_append(&sg, &ack, sizeof(uint8_t)); 3793 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3794 3795 if (error == 0) 3796 vtnet_exec_ctrl_cmd(sc, &ack, &sg, sg.sg_nseg - 1, 1); 3797 if (ack != VIRTIO_NET_OK) 3798 if_printf(ifp, "error setting host MAC filter table\n"); 3799 3800 out: 3801 if (promisc && vtnet_set_promisc(sc, true) != 0) 3802 if_printf(ifp, "cannot enable promiscuous mode\n"); 3803 if (allmulti && vtnet_set_allmulti(sc, true) != 0) 3804 if_printf(ifp, "cannot enable all-multicast mode\n"); 3805 } 3806 3807 static int 3808 vtnet_exec_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3809 { 3810 struct sglist_seg segs[3]; 3811 struct sglist sg; 3812 struct { 3813 struct virtio_net_ctrl_hdr hdr __aligned(2); 3814 uint8_t pad1; 3815 uint16_t tag __aligned(2); 3816 uint8_t pad2; 3817 uint8_t ack; 3818 } s; 3819 int error; 3820 3821 error = 0; 3822 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); 3823 3824 s.hdr.class = VIRTIO_NET_CTRL_VLAN; 3825 s.hdr.cmd = add ? VIRTIO_NET_CTRL_VLAN_ADD : VIRTIO_NET_CTRL_VLAN_DEL; 3826 s.tag = vtnet_gtoh16(sc, tag); 3827 s.ack = VIRTIO_NET_ERR; 3828 3829 sglist_init(&sg, nitems(segs), segs); 3830 error |= sglist_append(&sg, &s.hdr, sizeof(struct virtio_net_ctrl_hdr)); 3831 error |= sglist_append(&sg, &s.tag, sizeof(uint16_t)); 3832 error |= sglist_append(&sg, &s.ack, sizeof(uint8_t)); 3833 MPASS(error == 0 && sg.sg_nseg == nitems(segs)); 3834 3835 if (error == 0) 3836 vtnet_exec_ctrl_cmd(sc, &s.ack, &sg, sg.sg_nseg - 1, 1); 3837 3838 return (s.ack == VIRTIO_NET_OK ? 0 : EIO); 3839 } 3840 3841 static void 3842 vtnet_rx_filter_vlan(struct vtnet_softc *sc) 3843 { 3844 int i, bit; 3845 uint32_t w; 3846 uint16_t tag; 3847 3848 MPASS(sc->vtnet_flags & VTNET_FLAG_VLAN_FILTER); 3849 VTNET_CORE_LOCK_ASSERT(sc); 3850 3851 /* Enable the filter for each configured VLAN. */ 3852 for (i = 0; i < VTNET_VLAN_FILTER_NWORDS; i++) { 3853 w = sc->vtnet_vlan_filter[i]; 3854 3855 while ((bit = ffs(w) - 1) != -1) { 3856 w &= ~(1 << bit); 3857 tag = sizeof(w) * CHAR_BIT * i + bit; 3858 3859 if (vtnet_exec_vlan_filter(sc, 1, tag) != 0) { 3860 device_printf(sc->vtnet_dev, 3861 "cannot enable VLAN %d filter\n", tag); 3862 } 3863 } 3864 } 3865 } 3866 3867 static void 3868 vtnet_update_vlan_filter(struct vtnet_softc *sc, int add, uint16_t tag) 3869 { 3870 if_t ifp; 3871 int idx, bit; 3872 3873 ifp = sc->vtnet_ifp; 3874 idx = (tag >> 5) & 0x7F; 3875 bit = tag & 0x1F; 3876 3877 if (tag == 0 || tag > 4095) 3878 return; 3879 3880 VTNET_CORE_LOCK(sc); 3881 3882 if (add) 3883 sc->vtnet_vlan_filter[idx] |= (1 << bit); 3884 else 3885 sc->vtnet_vlan_filter[idx] &= ~(1 << bit); 3886 3887 if (if_getcapenable(ifp) & IFCAP_VLAN_HWFILTER && 3888 if_getdrvflags(ifp) & IFF_DRV_RUNNING && 3889 vtnet_exec_vlan_filter(sc, add, tag) != 0) { 3890 device_printf(sc->vtnet_dev, 3891 "cannot %s VLAN %d %s the host filter table\n", 3892 add ? "add" : "remove", tag, add ? "to" : "from"); 3893 } 3894 3895 VTNET_CORE_UNLOCK(sc); 3896 } 3897 3898 static void 3899 vtnet_register_vlan(void *arg, if_t ifp, uint16_t tag) 3900 { 3901 3902 if (if_getsoftc(ifp) != arg) 3903 return; 3904 3905 vtnet_update_vlan_filter(arg, 1, tag); 3906 } 3907 3908 static void 3909 vtnet_unregister_vlan(void *arg, if_t ifp, uint16_t tag) 3910 { 3911 3912 if (if_getsoftc(ifp) != arg) 3913 return; 3914 3915 vtnet_update_vlan_filter(arg, 0, tag); 3916 } 3917 3918 static void 3919 vtnet_update_speed_duplex(struct vtnet_softc *sc) 3920 { 3921 if_t ifp; 3922 uint32_t speed; 3923 3924 ifp = sc->vtnet_ifp; 3925 3926 if ((sc->vtnet_features & VIRTIO_NET_F_SPEED_DUPLEX) == 0) 3927 return; 3928 3929 /* BMV: Ignore duplex. */ 3930 speed = virtio_read_dev_config_4(sc->vtnet_dev, 3931 offsetof(struct virtio_net_config, speed)); 3932 if (speed != UINT32_MAX) 3933 if_setbaudrate(ifp, IF_Mbps(speed)); 3934 } 3935 3936 static int 3937 vtnet_is_link_up(struct vtnet_softc *sc) 3938 { 3939 uint16_t status; 3940 3941 if ((sc->vtnet_features & VIRTIO_NET_F_STATUS) == 0) 3942 return (1); 3943 3944 status = virtio_read_dev_config_2(sc->vtnet_dev, 3945 offsetof(struct virtio_net_config, status)); 3946 3947 return ((status & VIRTIO_NET_S_LINK_UP) != 0); 3948 } 3949 3950 static void 3951 vtnet_update_link_status(struct vtnet_softc *sc) 3952 { 3953 if_t ifp; 3954 int link; 3955 3956 ifp = sc->vtnet_ifp; 3957 VTNET_CORE_LOCK_ASSERT(sc); 3958 link = vtnet_is_link_up(sc); 3959 3960 /* Notify if the link status has changed. */ 3961 if (link != 0 && sc->vtnet_link_active == 0) { 3962 vtnet_update_speed_duplex(sc); 3963 sc->vtnet_link_active = 1; 3964 if_link_state_change(ifp, LINK_STATE_UP); 3965 } else if (link == 0 && sc->vtnet_link_active != 0) { 3966 sc->vtnet_link_active = 0; 3967 if_link_state_change(ifp, LINK_STATE_DOWN); 3968 } 3969 } 3970 3971 static int 3972 vtnet_ifmedia_upd(if_t ifp __unused) 3973 { 3974 return (EOPNOTSUPP); 3975 } 3976 3977 static void 3978 vtnet_ifmedia_sts(if_t ifp, struct ifmediareq *ifmr) 3979 { 3980 struct vtnet_softc *sc; 3981 3982 sc = if_getsoftc(ifp); 3983 3984 ifmr->ifm_status = IFM_AVALID; 3985 ifmr->ifm_active = IFM_ETHER; 3986 3987 VTNET_CORE_LOCK(sc); 3988 if (vtnet_is_link_up(sc) != 0) { 3989 ifmr->ifm_status |= IFM_ACTIVE; 3990 ifmr->ifm_active |= IFM_10G_T | IFM_FDX; 3991 } else 3992 ifmr->ifm_active |= IFM_NONE; 3993 VTNET_CORE_UNLOCK(sc); 3994 } 3995 3996 static void 3997 vtnet_get_macaddr(struct vtnet_softc *sc) 3998 { 3999 4000 if (sc->vtnet_flags & VTNET_FLAG_MAC) { 4001 virtio_read_device_config_array(sc->vtnet_dev, 4002 offsetof(struct virtio_net_config, mac), 4003 &sc->vtnet_hwaddr[0], sizeof(uint8_t), ETHER_ADDR_LEN); 4004 } else { 4005 /* Generate a random locally administered unicast address. */ 4006 sc->vtnet_hwaddr[0] = 0xB2; 4007 arc4rand(&sc->vtnet_hwaddr[1], ETHER_ADDR_LEN - 1, 0); 4008 } 4009 } 4010 4011 static void 4012 vtnet_set_macaddr(struct vtnet_softc *sc) 4013 { 4014 device_t dev; 4015 int error; 4016 4017 dev = sc->vtnet_dev; 4018 4019 if (sc->vtnet_flags & VTNET_FLAG_CTRL_MAC) { 4020 error = vtnet_ctrl_mac_cmd(sc, sc->vtnet_hwaddr); 4021 if (error) 4022 device_printf(dev, "unable to set MAC address\n"); 4023 return; 4024 } 4025 4026 /* MAC in config is read-only in modern VirtIO. */ 4027 if (!vtnet_modern(sc) && sc->vtnet_flags & VTNET_FLAG_MAC) { 4028 for (int i = 0; i < ETHER_ADDR_LEN; i++) { 4029 virtio_write_dev_config_1(dev, 4030 offsetof(struct virtio_net_config, mac) + i, 4031 sc->vtnet_hwaddr[i]); 4032 } 4033 } 4034 } 4035 4036 static void 4037 vtnet_attached_set_macaddr(struct vtnet_softc *sc) 4038 { 4039 4040 /* Assign MAC address if it was generated. */ 4041 if ((sc->vtnet_flags & VTNET_FLAG_MAC) == 0) 4042 vtnet_set_macaddr(sc); 4043 } 4044 4045 static void 4046 vtnet_vlan_tag_remove(struct mbuf *m) 4047 { 4048 struct ether_vlan_header *evh; 4049 4050 evh = mtod(m, struct ether_vlan_header *); 4051 m->m_pkthdr.ether_vtag = ntohs(evh->evl_tag); 4052 m->m_flags |= M_VLANTAG; 4053 4054 /* Strip the 802.1Q header. */ 4055 bcopy((char *) evh, (char *) evh + ETHER_VLAN_ENCAP_LEN, 4056 ETHER_HDR_LEN - ETHER_TYPE_LEN); 4057 m_adj(m, ETHER_VLAN_ENCAP_LEN); 4058 } 4059 4060 static void 4061 vtnet_set_rx_process_limit(struct vtnet_softc *sc) 4062 { 4063 int limit; 4064 4065 limit = vtnet_tunable_int(sc, "rx_process_limit", 4066 vtnet_rx_process_limit); 4067 if (limit < 0) 4068 limit = INT_MAX; 4069 sc->vtnet_rx_process_limit = limit; 4070 } 4071 4072 static void 4073 vtnet_setup_rxq_sysctl(struct sysctl_ctx_list *ctx, 4074 struct sysctl_oid_list *child, struct vtnet_rxq *rxq) 4075 { 4076 struct sysctl_oid *node; 4077 struct sysctl_oid_list *list; 4078 struct vtnet_rxq_stats *stats; 4079 char namebuf[16]; 4080 4081 snprintf(namebuf, sizeof(namebuf), "rxq%d", rxq->vtnrx_id); 4082 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 4083 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Receive Queue"); 4084 list = SYSCTL_CHILDREN(node); 4085 4086 stats = &rxq->vtnrx_stats; 4087 4088 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ipackets", 4089 CTLFLAG_RD | CTLFLAG_STATS, 4090 &stats->vrxs_ipackets, "Receive packets"); 4091 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ibytes", 4092 CTLFLAG_RD | CTLFLAG_STATS, 4093 &stats->vrxs_ibytes, "Receive bytes"); 4094 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "iqdrops", 4095 CTLFLAG_RD | CTLFLAG_STATS, 4096 &stats->vrxs_iqdrops, "Receive drops"); 4097 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "ierrors", 4098 CTLFLAG_RD | CTLFLAG_STATS, 4099 &stats->vrxs_ierrors, "Receive errors"); 4100 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", 4101 CTLFLAG_RD | CTLFLAG_STATS, 4102 &stats->vrxs_csum, "Receive checksum offloaded"); 4103 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum_failed", 4104 CTLFLAG_RD | CTLFLAG_STATS, 4105 &stats->vrxs_csum_failed, "Receive checksum offload failed"); 4106 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "host_lro", 4107 CTLFLAG_RD | CTLFLAG_STATS, 4108 &stats->vrxs_host_lro, "Receive host segmentation offloaded"); 4109 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", 4110 CTLFLAG_RD | CTLFLAG_STATS, 4111 &stats->vrxs_rescheduled, 4112 "Receive interrupt handler rescheduled"); 4113 } 4114 4115 static void 4116 vtnet_setup_txq_sysctl(struct sysctl_ctx_list *ctx, 4117 struct sysctl_oid_list *child, struct vtnet_txq *txq) 4118 { 4119 struct sysctl_oid *node; 4120 struct sysctl_oid_list *list; 4121 struct vtnet_txq_stats *stats; 4122 char namebuf[16]; 4123 4124 snprintf(namebuf, sizeof(namebuf), "txq%d", txq->vtntx_id); 4125 node = SYSCTL_ADD_NODE(ctx, child, OID_AUTO, namebuf, 4126 CTLFLAG_RD | CTLFLAG_MPSAFE, NULL, "Transmit Queue"); 4127 list = SYSCTL_CHILDREN(node); 4128 4129 stats = &txq->vtntx_stats; 4130 4131 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "opackets", 4132 CTLFLAG_RD | CTLFLAG_STATS, 4133 &stats->vtxs_opackets, "Transmit packets"); 4134 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "obytes", 4135 CTLFLAG_RD | CTLFLAG_STATS, 4136 &stats->vtxs_obytes, "Transmit bytes"); 4137 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "omcasts", 4138 CTLFLAG_RD | CTLFLAG_STATS, 4139 &stats->vtxs_omcasts, "Transmit multicasts"); 4140 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "csum", 4141 CTLFLAG_RD | CTLFLAG_STATS, 4142 &stats->vtxs_csum, "Transmit checksum offloaded"); 4143 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "tso", 4144 CTLFLAG_RD | CTLFLAG_STATS, 4145 &stats->vtxs_tso, "Transmit TCP segmentation offloaded"); 4146 SYSCTL_ADD_UQUAD(ctx, list, OID_AUTO, "rescheduled", 4147 CTLFLAG_RD | CTLFLAG_STATS, 4148 &stats->vtxs_rescheduled, 4149 "Transmit interrupt handler rescheduled"); 4150 } 4151 4152 static void 4153 vtnet_setup_queue_sysctl(struct vtnet_softc *sc) 4154 { 4155 device_t dev; 4156 struct sysctl_ctx_list *ctx; 4157 struct sysctl_oid *tree; 4158 struct sysctl_oid_list *child; 4159 int i; 4160 4161 dev = sc->vtnet_dev; 4162 ctx = device_get_sysctl_ctx(dev); 4163 tree = device_get_sysctl_tree(dev); 4164 child = SYSCTL_CHILDREN(tree); 4165 4166 for (i = 0; i < sc->vtnet_req_vq_pairs; i++) { 4167 vtnet_setup_rxq_sysctl(ctx, child, &sc->vtnet_rxqs[i]); 4168 vtnet_setup_txq_sysctl(ctx, child, &sc->vtnet_txqs[i]); 4169 } 4170 } 4171 4172 static int 4173 vtnet_sysctl_rx_csum_failed(SYSCTL_HANDLER_ARGS) 4174 { 4175 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4176 struct vtnet_statistics *stats = &sc->vtnet_stats; 4177 struct vtnet_rxq_stats *rxst; 4178 int i; 4179 4180 stats->rx_csum_failed = 0; 4181 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 4182 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 4183 stats->rx_csum_failed += rxst->vrxs_csum_failed; 4184 } 4185 return (sysctl_handle_64(oidp, NULL, stats->rx_csum_failed, req)); 4186 } 4187 4188 static int 4189 vtnet_sysctl_rx_csum_offloaded(SYSCTL_HANDLER_ARGS) 4190 { 4191 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4192 struct vtnet_statistics *stats = &sc->vtnet_stats; 4193 struct vtnet_rxq_stats *rxst; 4194 int i; 4195 4196 stats->rx_csum_offloaded = 0; 4197 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 4198 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 4199 stats->rx_csum_offloaded += rxst->vrxs_csum; 4200 } 4201 return (sysctl_handle_64(oidp, NULL, stats->rx_csum_offloaded, req)); 4202 } 4203 4204 static int 4205 vtnet_sysctl_rx_task_rescheduled(SYSCTL_HANDLER_ARGS) 4206 { 4207 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4208 struct vtnet_statistics *stats = &sc->vtnet_stats; 4209 struct vtnet_rxq_stats *rxst; 4210 int i; 4211 4212 stats->rx_task_rescheduled = 0; 4213 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 4214 rxst = &sc->vtnet_rxqs[i].vtnrx_stats; 4215 stats->rx_task_rescheduled += rxst->vrxs_rescheduled; 4216 } 4217 return (sysctl_handle_64(oidp, NULL, stats->rx_task_rescheduled, req)); 4218 } 4219 4220 static int 4221 vtnet_sysctl_tx_csum_offloaded(SYSCTL_HANDLER_ARGS) 4222 { 4223 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4224 struct vtnet_statistics *stats = &sc->vtnet_stats; 4225 struct vtnet_txq_stats *txst; 4226 int i; 4227 4228 stats->tx_csum_offloaded = 0; 4229 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 4230 txst = &sc->vtnet_txqs[i].vtntx_stats; 4231 stats->tx_csum_offloaded += txst->vtxs_csum; 4232 } 4233 return (sysctl_handle_64(oidp, NULL, stats->tx_csum_offloaded, req)); 4234 } 4235 4236 static int 4237 vtnet_sysctl_tx_tso_offloaded(SYSCTL_HANDLER_ARGS) 4238 { 4239 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4240 struct vtnet_statistics *stats = &sc->vtnet_stats; 4241 struct vtnet_txq_stats *txst; 4242 int i; 4243 4244 stats->tx_tso_offloaded = 0; 4245 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 4246 txst = &sc->vtnet_txqs[i].vtntx_stats; 4247 stats->tx_tso_offloaded += txst->vtxs_tso; 4248 } 4249 return (sysctl_handle_64(oidp, NULL, stats->tx_tso_offloaded, req)); 4250 } 4251 4252 static int 4253 vtnet_sysctl_tx_task_rescheduled(SYSCTL_HANDLER_ARGS) 4254 { 4255 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4256 struct vtnet_statistics *stats = &sc->vtnet_stats; 4257 struct vtnet_txq_stats *txst; 4258 int i; 4259 4260 stats->tx_task_rescheduled = 0; 4261 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) { 4262 txst = &sc->vtnet_txqs[i].vtntx_stats; 4263 stats->tx_task_rescheduled += txst->vtxs_rescheduled; 4264 } 4265 return (sysctl_handle_64(oidp, NULL, stats->tx_task_rescheduled, req)); 4266 } 4267 4268 static void 4269 vtnet_setup_stat_sysctl(struct sysctl_ctx_list *ctx, 4270 struct sysctl_oid_list *child, struct vtnet_softc *sc) 4271 { 4272 struct vtnet_statistics *stats; 4273 struct vtnet_rxq_stats rxaccum; 4274 struct vtnet_txq_stats txaccum; 4275 4276 vtnet_accum_stats(sc, &rxaccum, &txaccum); 4277 4278 stats = &sc->vtnet_stats; 4279 stats->rx_csum_offloaded = rxaccum.vrxs_csum; 4280 stats->rx_csum_failed = rxaccum.vrxs_csum_failed; 4281 stats->rx_task_rescheduled = rxaccum.vrxs_rescheduled; 4282 stats->tx_csum_offloaded = txaccum.vtxs_csum; 4283 stats->tx_tso_offloaded = txaccum.vtxs_tso; 4284 stats->tx_task_rescheduled = txaccum.vtxs_rescheduled; 4285 4286 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "mbuf_alloc_failed", 4287 CTLFLAG_RD | CTLFLAG_STATS, &stats->mbuf_alloc_failed, 4288 "Mbuf cluster allocation failures"); 4289 4290 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_frame_too_large", 4291 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_frame_too_large, 4292 "Received frame larger than the mbuf chain"); 4293 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_enq_replacement_failed", 4294 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_enq_replacement_failed, 4295 "Enqueuing the replacement receive mbuf failed"); 4296 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_mergeable_failed", 4297 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_mergeable_failed, 4298 "Mergeable buffers receive failures"); 4299 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ethtype", 4300 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ethtype, 4301 "Received checksum offloaded buffer with unsupported " 4302 "Ethernet type"); 4303 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_bad_ipproto", 4304 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_bad_ipproto, 4305 "Received checksum offloaded buffer with incorrect IP protocol"); 4306 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "rx_csum_inaccessible_ipproto", 4307 CTLFLAG_RD | CTLFLAG_STATS, &stats->rx_csum_inaccessible_ipproto, 4308 "Received checksum offloaded buffer with inaccessible IP protocol"); 4309 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_failed", 4310 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, 4311 sc, 0, vtnet_sysctl_rx_csum_failed, "QU", 4312 "Received buffer checksum offload failed"); 4313 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_csum_offloaded", 4314 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, 4315 sc, 0, vtnet_sysctl_rx_csum_offloaded, "QU", 4316 "Received buffer checksum offload succeeded"); 4317 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "rx_task_rescheduled", 4318 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, 4319 sc, 0, vtnet_sysctl_rx_task_rescheduled, "QU", 4320 "Times the receive interrupt task rescheduled itself"); 4321 4322 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_unknown_ethtype", 4323 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_unknown_ethtype, 4324 "Aborted transmit of checksum offloaded buffer with unknown " 4325 "Ethernet type"); 4326 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_csum_proto_mismatch", 4327 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_csum_proto_mismatch, 4328 "Aborted transmit of checksum offloaded buffer because mismatched " 4329 "protocols"); 4330 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_not_tcp", 4331 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_not_tcp, 4332 "Aborted transmit of TSO buffer with non TCP protocol"); 4333 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_tso_without_csum", 4334 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_tso_without_csum, 4335 "Aborted transmit of TSO buffer without TCP checksum offload"); 4336 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defragged", 4337 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defragged, 4338 "Transmit mbufs defragged"); 4339 SYSCTL_ADD_UQUAD(ctx, child, OID_AUTO, "tx_defrag_failed", 4340 CTLFLAG_RD | CTLFLAG_STATS, &stats->tx_defrag_failed, 4341 "Aborted transmit of buffer because defrag failed"); 4342 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_csum_offloaded", 4343 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, 4344 sc, 0, vtnet_sysctl_tx_csum_offloaded, "QU", 4345 "Offloaded checksum of transmitted buffer"); 4346 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_tso_offloaded", 4347 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, 4348 sc, 0, vtnet_sysctl_tx_tso_offloaded, "QU", 4349 "Segmentation offload of transmitted buffer"); 4350 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "tx_task_rescheduled", 4351 CTLTYPE_U64 | CTLFLAG_RD | CTLFLAG_STATS, 4352 sc, 0, vtnet_sysctl_tx_task_rescheduled, "QU", 4353 "Times the transmit interrupt task rescheduled itself"); 4354 } 4355 4356 static int 4357 vtnet_sysctl_features(SYSCTL_HANDLER_ARGS) 4358 { 4359 struct sbuf sb; 4360 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4361 int error; 4362 4363 sbuf_new_for_sysctl(&sb, NULL, 0, req); 4364 sbuf_printf(&sb, "%b", (uint32_t)sc->vtnet_features, 4365 VIRTIO_NET_FEATURE_BITS); 4366 error = sbuf_finish(&sb); 4367 sbuf_delete(&sb); 4368 return (error); 4369 } 4370 4371 static int 4372 vtnet_sysctl_flags(SYSCTL_HANDLER_ARGS) 4373 { 4374 struct sbuf sb; 4375 struct vtnet_softc *sc = (struct vtnet_softc *)arg1; 4376 int error; 4377 4378 sbuf_new_for_sysctl(&sb, NULL, 0, req); 4379 sbuf_printf(&sb, "%b", sc->vtnet_flags, VTNET_FLAGS_BITS); 4380 error = sbuf_finish(&sb); 4381 sbuf_delete(&sb); 4382 return (error); 4383 } 4384 4385 static void 4386 vtnet_setup_sysctl(struct vtnet_softc *sc) 4387 { 4388 device_t dev; 4389 struct sysctl_ctx_list *ctx; 4390 struct sysctl_oid *tree; 4391 struct sysctl_oid_list *child; 4392 4393 dev = sc->vtnet_dev; 4394 ctx = device_get_sysctl_ctx(dev); 4395 tree = device_get_sysctl_tree(dev); 4396 child = SYSCTL_CHILDREN(tree); 4397 4398 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "max_vq_pairs", 4399 CTLFLAG_RD, &sc->vtnet_max_vq_pairs, 0, 4400 "Number of maximum supported virtqueue pairs"); 4401 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "req_vq_pairs", 4402 CTLFLAG_RD, &sc->vtnet_req_vq_pairs, 0, 4403 "Number of requested virtqueue pairs"); 4404 SYSCTL_ADD_INT(ctx, child, OID_AUTO, "act_vq_pairs", 4405 CTLFLAG_RD, &sc->vtnet_act_vq_pairs, 0, 4406 "Number of active virtqueue pairs"); 4407 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "features", 4408 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 4409 vtnet_sysctl_features, "A", "Features"); 4410 SYSCTL_ADD_PROC(ctx, child, OID_AUTO, "flags", 4411 CTLTYPE_STRING | CTLFLAG_RD | CTLFLAG_MPSAFE, sc, 0, 4412 vtnet_sysctl_flags, "A", "Flags"); 4413 4414 vtnet_setup_stat_sysctl(ctx, child, sc); 4415 } 4416 4417 static void 4418 vtnet_load_tunables(struct vtnet_softc *sc) 4419 { 4420 4421 sc->vtnet_lro_entry_count = vtnet_tunable_int(sc, 4422 "lro_entry_count", vtnet_lro_entry_count); 4423 if (sc->vtnet_lro_entry_count < TCP_LRO_ENTRIES) 4424 sc->vtnet_lro_entry_count = TCP_LRO_ENTRIES; 4425 4426 sc->vtnet_lro_mbufq_depth = vtnet_tunable_int(sc, 4427 "lro_mbufq_depth", vtnet_lro_mbufq_depth); 4428 } 4429 4430 static int 4431 vtnet_rxq_enable_intr(struct vtnet_rxq *rxq) 4432 { 4433 4434 return (virtqueue_enable_intr(rxq->vtnrx_vq)); 4435 } 4436 4437 static void 4438 vtnet_rxq_disable_intr(struct vtnet_rxq *rxq) 4439 { 4440 4441 virtqueue_disable_intr(rxq->vtnrx_vq); 4442 } 4443 4444 static int 4445 vtnet_txq_enable_intr(struct vtnet_txq *txq) 4446 { 4447 struct virtqueue *vq; 4448 4449 vq = txq->vtntx_vq; 4450 4451 if (vtnet_txq_below_threshold(txq) != 0) 4452 return (virtqueue_postpone_intr(vq, VQ_POSTPONE_LONG)); 4453 4454 /* 4455 * The free count is above our threshold. Keep the Tx interrupt 4456 * disabled until the queue is fuller. 4457 */ 4458 return (0); 4459 } 4460 4461 static void 4462 vtnet_txq_disable_intr(struct vtnet_txq *txq) 4463 { 4464 4465 virtqueue_disable_intr(txq->vtntx_vq); 4466 } 4467 4468 static void 4469 vtnet_enable_rx_interrupts(struct vtnet_softc *sc) 4470 { 4471 struct vtnet_rxq *rxq; 4472 int i; 4473 4474 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) { 4475 rxq = &sc->vtnet_rxqs[i]; 4476 if (vtnet_rxq_enable_intr(rxq) != 0) 4477 taskqueue_enqueue(rxq->vtnrx_tq, &rxq->vtnrx_intrtask); 4478 } 4479 } 4480 4481 static void 4482 vtnet_enable_tx_interrupts(struct vtnet_softc *sc) 4483 { 4484 int i; 4485 4486 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 4487 vtnet_txq_enable_intr(&sc->vtnet_txqs[i]); 4488 } 4489 4490 static void 4491 vtnet_enable_interrupts(struct vtnet_softc *sc) 4492 { 4493 4494 vtnet_enable_rx_interrupts(sc); 4495 vtnet_enable_tx_interrupts(sc); 4496 } 4497 4498 static void 4499 vtnet_disable_rx_interrupts(struct vtnet_softc *sc) 4500 { 4501 int i; 4502 4503 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 4504 vtnet_rxq_disable_intr(&sc->vtnet_rxqs[i]); 4505 } 4506 4507 static void 4508 vtnet_disable_tx_interrupts(struct vtnet_softc *sc) 4509 { 4510 int i; 4511 4512 for (i = 0; i < sc->vtnet_max_vq_pairs; i++) 4513 vtnet_txq_disable_intr(&sc->vtnet_txqs[i]); 4514 } 4515 4516 static void 4517 vtnet_disable_interrupts(struct vtnet_softc *sc) 4518 { 4519 4520 vtnet_disable_rx_interrupts(sc); 4521 vtnet_disable_tx_interrupts(sc); 4522 } 4523 4524 static int 4525 vtnet_tunable_int(struct vtnet_softc *sc, const char *knob, int def) 4526 { 4527 char path[64]; 4528 4529 snprintf(path, sizeof(path), 4530 "hw.vtnet.%d.%s", device_get_unit(sc->vtnet_dev), knob); 4531 TUNABLE_INT_FETCH(path, &def); 4532 4533 return (def); 4534 } 4535 4536 #ifdef DEBUGNET 4537 static void 4538 vtnet_debugnet_init(if_t ifp, int *nrxr, int *ncl, int *clsize) 4539 { 4540 struct vtnet_softc *sc; 4541 4542 sc = if_getsoftc(ifp); 4543 4544 VTNET_CORE_LOCK(sc); 4545 *nrxr = sc->vtnet_req_vq_pairs; 4546 *ncl = DEBUGNET_MAX_IN_FLIGHT; 4547 *clsize = sc->vtnet_rx_clustersz; 4548 VTNET_CORE_UNLOCK(sc); 4549 } 4550 4551 static void 4552 vtnet_debugnet_event(if_t ifp __unused, enum debugnet_ev event) 4553 { 4554 struct vtnet_softc *sc; 4555 static bool sw_lro_enabled = false; 4556 4557 /* 4558 * Disable software LRO, since it would require entering the network 4559 * epoch when calling vtnet_txq_eof() in vtnet_debugnet_poll(). 4560 */ 4561 sc = if_getsoftc(ifp); 4562 switch (event) { 4563 case DEBUGNET_START: 4564 sw_lro_enabled = (sc->vtnet_flags & VTNET_FLAG_SW_LRO) != 0; 4565 if (sw_lro_enabled) 4566 sc->vtnet_flags &= ~VTNET_FLAG_SW_LRO; 4567 break; 4568 case DEBUGNET_END: 4569 if (sw_lro_enabled) 4570 sc->vtnet_flags |= VTNET_FLAG_SW_LRO; 4571 break; 4572 } 4573 } 4574 4575 static int 4576 vtnet_debugnet_transmit(if_t ifp, struct mbuf *m) 4577 { 4578 struct vtnet_softc *sc; 4579 struct vtnet_txq *txq; 4580 int error; 4581 4582 sc = if_getsoftc(ifp); 4583 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4584 IFF_DRV_RUNNING) 4585 return (EBUSY); 4586 4587 txq = &sc->vtnet_txqs[0]; 4588 error = vtnet_txq_encap(txq, &m, M_NOWAIT | M_USE_RESERVE); 4589 if (error == 0) 4590 (void)vtnet_txq_notify(txq); 4591 return (error); 4592 } 4593 4594 static int 4595 vtnet_debugnet_poll(if_t ifp, int count) 4596 { 4597 struct vtnet_softc *sc; 4598 int i; 4599 4600 sc = if_getsoftc(ifp); 4601 if ((if_getdrvflags(ifp) & (IFF_DRV_RUNNING | IFF_DRV_OACTIVE)) != 4602 IFF_DRV_RUNNING) 4603 return (EBUSY); 4604 4605 (void)vtnet_txq_eof(&sc->vtnet_txqs[0]); 4606 for (i = 0; i < sc->vtnet_act_vq_pairs; i++) 4607 (void)vtnet_rxq_eof(&sc->vtnet_rxqs[i]); 4608 return (0); 4609 } 4610 #endif /* DEBUGNET */ 4611