1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/param.h> 30 #include <sys/linker_set.h> 31 #include <sys/select.h> 32 #include <sys/uio.h> 33 #include <sys/ioctl.h> 34 #include <machine/vmm_snapshot.h> 35 #include <net/ethernet.h> 36 #include <net/if.h> /* IFNAMSIZ */ 37 38 #include <err.h> 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <stdint.h> 44 #include <string.h> 45 #include <strings.h> 46 #include <unistd.h> 47 #include <assert.h> 48 #include <pthread.h> 49 #include <pthread_np.h> 50 51 #include "bhyverun.h" 52 #include "config.h" 53 #include "debug.h" 54 #include "pci_emul.h" 55 #include "mevent.h" 56 #include "virtio.h" 57 #include "net_utils.h" 58 #include "net_backends.h" 59 #include "iov.h" 60 61 #define VTNET_RINGSZ 1024 62 63 #define VTNET_MAXSEGS 256 64 65 #define VTNET_MAX_PKT_LEN (65536 + 64) 66 67 #define VTNET_MIN_MTU ETHERMIN 68 #define VTNET_MAX_MTU 65535 69 70 #define VTNET_S_HOSTCAPS \ 71 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ 72 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 73 74 /* 75 * PCI config-space "registers" 76 */ 77 struct virtio_net_config { 78 uint8_t mac[6]; 79 uint16_t status; 80 uint16_t max_virtqueue_pairs; 81 uint16_t mtu; 82 } __packed; 83 84 /* 85 * Queue definitions. 86 */ 87 #define VTNET_RXQ 0 88 #define VTNET_TXQ 1 89 #define VTNET_CTLQ 2 /* NB: not yet supported */ 90 91 #define VTNET_MAXQ 3 92 93 /* 94 * Debug printf 95 */ 96 static int pci_vtnet_debug; 97 #define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params 98 #define WPRINTF(params) PRINTLN params 99 100 /* 101 * Per-device softc 102 */ 103 struct pci_vtnet_softc { 104 struct virtio_softc vsc_vs; 105 struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 106 pthread_mutex_t vsc_mtx; 107 108 net_backend_t *vsc_be; 109 110 bool features_negotiated; /* protected by rx_mtx */ 111 112 int resetting; /* protected by tx_mtx */ 113 114 uint64_t vsc_features; /* negotiated features */ 115 116 pthread_mutex_t rx_mtx; 117 int rx_merge; /* merged rx bufs in use */ 118 119 pthread_t tx_tid; 120 pthread_mutex_t tx_mtx; 121 pthread_cond_t tx_cond; 122 int tx_in_progress; 123 124 size_t vhdrlen; 125 size_t be_vhdrlen; 126 127 struct virtio_net_config vsc_config; 128 struct virtio_consts vsc_consts; 129 }; 130 131 static void pci_vtnet_reset(void *); 132 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 133 static int pci_vtnet_cfgread(void *, int, int, uint32_t *); 134 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 135 static void pci_vtnet_neg_features(void *, uint64_t); 136 #ifdef BHYVE_SNAPSHOT 137 static void pci_vtnet_pause(void *); 138 static void pci_vtnet_resume(void *); 139 static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *); 140 #endif 141 142 static struct virtio_consts vtnet_vi_consts = { 143 .vc_name = "vtnet", 144 .vc_nvq = VTNET_MAXQ - 1, 145 .vc_cfgsize = sizeof(struct virtio_net_config), 146 .vc_reset = pci_vtnet_reset, 147 .vc_cfgread = pci_vtnet_cfgread, 148 .vc_cfgwrite = pci_vtnet_cfgwrite, 149 .vc_apply_features = pci_vtnet_neg_features, 150 .vc_hv_caps = VTNET_S_HOSTCAPS, 151 #ifdef BHYVE_SNAPSHOT 152 .vc_pause = pci_vtnet_pause, 153 .vc_resume = pci_vtnet_resume, 154 .vc_snapshot = pci_vtnet_snapshot, 155 #endif 156 }; 157 158 static void 159 pci_vtnet_reset(void *vsc) 160 { 161 struct pci_vtnet_softc *sc = vsc; 162 163 DPRINTF(("vtnet: device reset requested !")); 164 165 /* Acquire the RX lock to block RX processing. */ 166 pthread_mutex_lock(&sc->rx_mtx); 167 168 /* 169 * Make sure receive operation is disabled at least until we 170 * re-negotiate the features, since receive operation depends 171 * on the value of sc->rx_merge and the header length, which 172 * are both set in pci_vtnet_neg_features(). 173 * Receive operation will be enabled again once the guest adds 174 * the first receive buffers and kicks us. 175 */ 176 sc->features_negotiated = false; 177 netbe_rx_disable(sc->vsc_be); 178 179 /* Set sc->resetting and give a chance to the TX thread to stop. */ 180 pthread_mutex_lock(&sc->tx_mtx); 181 sc->resetting = 1; 182 while (sc->tx_in_progress) { 183 pthread_mutex_unlock(&sc->tx_mtx); 184 usleep(10000); 185 pthread_mutex_lock(&sc->tx_mtx); 186 } 187 188 /* 189 * Now reset rings, MSI-X vectors, and negotiated capabilities. 190 * Do that with the TX lock held, since we need to reset 191 * sc->resetting. 192 */ 193 vi_reset_dev(&sc->vsc_vs); 194 195 sc->resetting = 0; 196 pthread_mutex_unlock(&sc->tx_mtx); 197 pthread_mutex_unlock(&sc->rx_mtx); 198 } 199 200 static __inline struct iovec * 201 iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen) 202 { 203 struct iovec *riov; 204 205 if (iov[0].iov_len < hlen) { 206 /* 207 * Not enough header space in the first fragment. 208 * That's not ok for us. 209 */ 210 return NULL; 211 } 212 213 iov[0].iov_len -= hlen; 214 if (iov[0].iov_len == 0) { 215 *iovcnt -= 1; 216 if (*iovcnt == 0) { 217 /* 218 * Only space for the header. That's not 219 * enough for us. 220 */ 221 return NULL; 222 } 223 riov = &iov[1]; 224 } else { 225 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen); 226 riov = &iov[0]; 227 } 228 229 return (riov); 230 } 231 232 struct virtio_mrg_rxbuf_info { 233 uint16_t idx; 234 uint16_t pad; 235 uint32_t len; 236 }; 237 238 static void 239 pci_vtnet_rx(struct pci_vtnet_softc *sc) 240 { 241 int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen; 242 struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; 243 struct iovec iov[VTNET_MAXSEGS + 1]; 244 struct vqueue_info *vq; 245 struct vi_req req; 246 247 vq = &sc->vsc_queues[VTNET_RXQ]; 248 249 /* Features must be negotiated */ 250 if (!sc->features_negotiated) { 251 return; 252 } 253 254 for (;;) { 255 struct virtio_net_rxhdr *hdr; 256 uint32_t riov_bytes; 257 struct iovec *riov; 258 uint32_t ulen; 259 int riov_len; 260 int n_chains; 261 ssize_t rlen; 262 ssize_t plen; 263 264 plen = netbe_peek_recvlen(sc->vsc_be); 265 if (plen <= 0) { 266 /* 267 * No more packets (plen == 0), or backend errored 268 * (plen < 0). Interrupt if needed and stop. 269 */ 270 vq_endchains(vq, /*used_all_avail=*/0); 271 return; 272 } 273 plen += prepend_hdr_len; 274 275 /* 276 * Get a descriptor chain to store the next ingress 277 * packet. In case of mergeable rx buffers, get as 278 * many chains as necessary in order to make room 279 * for plen bytes. 280 */ 281 riov_bytes = 0; 282 riov_len = 0; 283 riov = iov; 284 n_chains = 0; 285 do { 286 int n = vq_getchain(vq, riov, VTNET_MAXSEGS - riov_len, 287 &req); 288 info[n_chains].idx = req.idx; 289 290 if (n == 0) { 291 /* 292 * No rx buffers. Enable RX kicks and double 293 * check. 294 */ 295 vq_kick_enable(vq); 296 if (!vq_has_descs(vq)) { 297 /* 298 * Still no buffers. Return the unused 299 * chains (if any), interrupt if needed 300 * (including for NOTIFY_ON_EMPTY), and 301 * disable the backend until the next 302 * kick. 303 */ 304 vq_retchains(vq, n_chains); 305 vq_endchains(vq, /*used_all_avail=*/1); 306 netbe_rx_disable(sc->vsc_be); 307 return; 308 } 309 310 /* More rx buffers found, so keep going. */ 311 vq_kick_disable(vq); 312 continue; 313 } 314 assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS); 315 riov_len += n; 316 if (!sc->rx_merge) { 317 n_chains = 1; 318 break; 319 } 320 info[n_chains].len = (uint32_t)count_iov(riov, n); 321 riov_bytes += info[n_chains].len; 322 riov += n; 323 n_chains++; 324 } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS); 325 326 riov = iov; 327 hdr = riov[0].iov_base; 328 if (prepend_hdr_len > 0) { 329 /* 330 * The frontend uses a virtio-net header, but the 331 * backend does not. We need to prepend a zeroed 332 * header. 333 */ 334 riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len); 335 if (riov == NULL) { 336 /* 337 * The first collected chain is nonsensical, 338 * as it is not even enough to store the 339 * virtio-net header. Just drop it. 340 */ 341 vq_relchain(vq, info[0].idx, 0); 342 vq_retchains(vq, n_chains - 1); 343 continue; 344 } 345 memset(hdr, 0, prepend_hdr_len); 346 } 347 348 rlen = netbe_recv(sc->vsc_be, riov, riov_len); 349 if (rlen != plen - prepend_hdr_len) { 350 /* 351 * If this happens it means there is something 352 * wrong with the backend (e.g., some other 353 * process is stealing our packets). 354 */ 355 WPRINTF(("netbe_recv: expected %zd bytes, " 356 "got %zd", plen - prepend_hdr_len, rlen)); 357 vq_retchains(vq, n_chains); 358 continue; 359 } 360 361 ulen = (uint32_t)plen; 362 363 /* 364 * Publish the used buffers to the guest, reporting the 365 * number of bytes that we wrote. 366 */ 367 if (!sc->rx_merge) { 368 vq_relchain(vq, info[0].idx, ulen); 369 } else { 370 uint32_t iolen; 371 int i = 0; 372 373 do { 374 iolen = info[i].len; 375 if (iolen > ulen) { 376 iolen = ulen; 377 } 378 vq_relchain_prepare(vq, info[i].idx, iolen); 379 ulen -= iolen; 380 i++; 381 } while (ulen > 0); 382 383 hdr->vrh_bufs = i; 384 vq_relchain_publish(vq); 385 assert(i == n_chains); 386 } 387 } 388 389 } 390 391 /* 392 * Called when there is read activity on the backend file descriptor. 393 * Each buffer posted by the guest is assumed to be able to contain 394 * an entire ethernet frame + rx header. 395 */ 396 static void 397 pci_vtnet_rx_callback(int fd __unused, enum ev_type type __unused, void *param) 398 { 399 struct pci_vtnet_softc *sc = param; 400 401 pthread_mutex_lock(&sc->rx_mtx); 402 pci_vtnet_rx(sc); 403 pthread_mutex_unlock(&sc->rx_mtx); 404 405 } 406 407 /* Called on RX kick. */ 408 static void 409 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 410 { 411 struct pci_vtnet_softc *sc = vsc; 412 413 /* 414 * A qnotify means that the rx process can now begin. 415 * Enable RX only if features are negotiated. 416 */ 417 pthread_mutex_lock(&sc->rx_mtx); 418 if (!sc->features_negotiated) { 419 pthread_mutex_unlock(&sc->rx_mtx); 420 return; 421 } 422 423 vq_kick_disable(vq); 424 netbe_rx_enable(sc->vsc_be); 425 pthread_mutex_unlock(&sc->rx_mtx); 426 } 427 428 /* TX virtqueue processing, called by the TX thread. */ 429 static void 430 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 431 { 432 struct iovec iov[VTNET_MAXSEGS + 1]; 433 struct iovec *siov = iov; 434 struct vi_req req; 435 ssize_t len; 436 int n; 437 438 /* 439 * Obtain chain of descriptors. The first descriptor also 440 * contains the virtio-net header. 441 */ 442 n = vq_getchain(vq, iov, VTNET_MAXSEGS, &req); 443 assert(n >= 1 && n <= VTNET_MAXSEGS); 444 445 if (sc->vhdrlen != sc->be_vhdrlen) { 446 /* 447 * The frontend uses a virtio-net header, but the backend 448 * does not. We simply strip the header and ignore it, as 449 * it should be zero-filled. 450 */ 451 siov = iov_trim_hdr(siov, &n, sc->vhdrlen); 452 } 453 454 if (siov == NULL) { 455 /* The chain is nonsensical. Just drop it. */ 456 len = 0; 457 } else { 458 len = netbe_send(sc->vsc_be, siov, n); 459 if (len < 0) { 460 /* 461 * If send failed, report that 0 bytes 462 * were read. 463 */ 464 len = 0; 465 } 466 } 467 468 /* 469 * Return the processed chain to the guest, reporting 470 * the number of bytes that we read. 471 */ 472 vq_relchain(vq, req.idx, len); 473 } 474 475 /* Called on TX kick. */ 476 static void 477 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 478 { 479 struct pci_vtnet_softc *sc = vsc; 480 481 /* 482 * Any ring entries to process? 483 */ 484 if (!vq_has_descs(vq)) 485 return; 486 487 /* Signal the tx thread for processing */ 488 pthread_mutex_lock(&sc->tx_mtx); 489 vq_kick_disable(vq); 490 if (sc->tx_in_progress == 0) 491 pthread_cond_signal(&sc->tx_cond); 492 pthread_mutex_unlock(&sc->tx_mtx); 493 } 494 495 /* 496 * Thread which will handle processing of TX desc 497 */ 498 static void * 499 pci_vtnet_tx_thread(void *param) 500 { 501 struct pci_vtnet_softc *sc = param; 502 struct vqueue_info *vq; 503 int error; 504 505 vq = &sc->vsc_queues[VTNET_TXQ]; 506 507 /* 508 * Let us wait till the tx queue pointers get initialised & 509 * first tx signaled 510 */ 511 pthread_mutex_lock(&sc->tx_mtx); 512 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 513 assert(error == 0); 514 515 for (;;) { 516 /* note - tx mutex is locked here */ 517 while (sc->resetting || !vq_has_descs(vq)) { 518 vq_kick_enable(vq); 519 if (!sc->resetting && vq_has_descs(vq)) 520 break; 521 522 sc->tx_in_progress = 0; 523 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 524 assert(error == 0); 525 } 526 vq_kick_disable(vq); 527 sc->tx_in_progress = 1; 528 pthread_mutex_unlock(&sc->tx_mtx); 529 530 do { 531 /* 532 * Run through entries, placing them into 533 * iovecs and sending when an end-of-packet 534 * is found 535 */ 536 pci_vtnet_proctx(sc, vq); 537 } while (vq_has_descs(vq)); 538 539 /* 540 * Generate an interrupt if needed. 541 */ 542 vq_endchains(vq, /*used_all_avail=*/1); 543 544 pthread_mutex_lock(&sc->tx_mtx); 545 } 546 } 547 548 #ifdef notyet 549 static void 550 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 551 { 552 553 DPRINTF(("vtnet: control qnotify!")); 554 } 555 #endif 556 557 static int 558 pci_vtnet_init(struct pci_devinst *pi, nvlist_t *nvl) 559 { 560 struct pci_vtnet_softc *sc; 561 const char *value; 562 char tname[MAXCOMLEN + 1]; 563 unsigned long mtu = ETHERMTU; 564 int err; 565 566 /* 567 * Allocate data structures for further virtio initializations. 568 * sc also contains a copy of vtnet_vi_consts, since capabilities 569 * change depending on the backend. 570 */ 571 sc = calloc(1, sizeof(struct pci_vtnet_softc)); 572 573 sc->vsc_consts = vtnet_vi_consts; 574 pthread_mutex_init(&sc->vsc_mtx, NULL); 575 576 sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 577 sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 578 sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 579 sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 580 #ifdef notyet 581 sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 582 sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 583 #endif 584 585 value = get_config_value_node(nvl, "mac"); 586 if (value != NULL) { 587 err = net_parsemac(value, sc->vsc_config.mac); 588 if (err) { 589 free(sc); 590 return (err); 591 } 592 } else 593 net_genmac(pi, sc->vsc_config.mac); 594 595 value = get_config_value_node(nvl, "mtu"); 596 if (value != NULL) { 597 err = net_parsemtu(value, &mtu); 598 if (err) { 599 free(sc); 600 return (err); 601 } 602 603 if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) { 604 err = EINVAL; 605 errno = EINVAL; 606 free(sc); 607 return (err); 608 } 609 sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU; 610 } 611 sc->vsc_config.mtu = mtu; 612 613 /* Permit interfaces without a configured backend. */ 614 if (get_config_value_node(nvl, "backend") != NULL) { 615 err = netbe_init(&sc->vsc_be, nvl, pci_vtnet_rx_callback, sc); 616 if (err) { 617 free(sc); 618 return (err); 619 } 620 } 621 622 sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | 623 netbe_get_cap(sc->vsc_be); 624 625 /* 626 * Since we do not actually support multiqueue, 627 * set the maximum virtqueue pairs to 1. 628 */ 629 sc->vsc_config.max_virtqueue_pairs = 1; 630 631 /* initialize config space */ 632 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 633 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 634 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 635 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); 636 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 637 638 /* Link is always up. */ 639 sc->vsc_config.status = 1; 640 641 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 642 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 643 644 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 645 if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { 646 free(sc); 647 return (1); 648 } 649 650 /* use BAR 0 to map config regs in IO space */ 651 vi_set_io_bar(&sc->vsc_vs, 0); 652 653 sc->resetting = 0; 654 655 sc->rx_merge = 0; 656 sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 657 pthread_mutex_init(&sc->rx_mtx, NULL); 658 659 /* 660 * Initialize tx semaphore & spawn TX processing thread. 661 * As of now, only one thread for TX desc processing is 662 * spawned. 663 */ 664 sc->tx_in_progress = 0; 665 pthread_mutex_init(&sc->tx_mtx, NULL); 666 pthread_cond_init(&sc->tx_cond, NULL); 667 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 668 snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 669 pi->pi_func); 670 pthread_set_name_np(sc->tx_tid, tname); 671 672 return (0); 673 } 674 675 static int 676 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 677 { 678 struct pci_vtnet_softc *sc = vsc; 679 void *ptr; 680 681 if (offset < (int)sizeof(sc->vsc_config.mac)) { 682 assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); 683 /* 684 * The driver is allowed to change the MAC address 685 */ 686 ptr = &sc->vsc_config.mac[offset]; 687 memcpy(ptr, &value, size); 688 } else { 689 /* silently ignore other writes */ 690 DPRINTF(("vtnet: write to readonly reg %d", offset)); 691 } 692 693 return (0); 694 } 695 696 static int 697 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 698 { 699 struct pci_vtnet_softc *sc = vsc; 700 void *ptr; 701 702 ptr = (uint8_t *)&sc->vsc_config + offset; 703 memcpy(retval, ptr, size); 704 return (0); 705 } 706 707 static void 708 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 709 { 710 struct pci_vtnet_softc *sc = vsc; 711 712 sc->vsc_features = negotiated_features; 713 714 if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { 715 sc->vhdrlen = sizeof(struct virtio_net_rxhdr); 716 sc->rx_merge = 1; 717 } else { 718 /* 719 * Without mergeable rx buffers, virtio-net header is 2 720 * bytes shorter than sizeof(struct virtio_net_rxhdr). 721 */ 722 sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 723 sc->rx_merge = 0; 724 } 725 726 /* Tell the backend to enable some capabilities it has advertised. */ 727 netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen); 728 sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be); 729 assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen); 730 731 pthread_mutex_lock(&sc->rx_mtx); 732 sc->features_negotiated = true; 733 pthread_mutex_unlock(&sc->rx_mtx); 734 } 735 736 #ifdef BHYVE_SNAPSHOT 737 static void 738 pci_vtnet_pause(void *vsc) 739 { 740 struct pci_vtnet_softc *sc = vsc; 741 742 DPRINTF(("vtnet: device pause requested !\n")); 743 744 /* Acquire the RX lock to block RX processing. */ 745 pthread_mutex_lock(&sc->rx_mtx); 746 747 /* Wait for the transmit thread to finish its processing. */ 748 pthread_mutex_lock(&sc->tx_mtx); 749 while (sc->tx_in_progress) { 750 pthread_mutex_unlock(&sc->tx_mtx); 751 usleep(10000); 752 pthread_mutex_lock(&sc->tx_mtx); 753 } 754 } 755 756 static void 757 pci_vtnet_resume(void *vsc) 758 { 759 struct pci_vtnet_softc *sc = vsc; 760 761 DPRINTF(("vtnet: device resume requested !\n")); 762 763 pthread_mutex_unlock(&sc->tx_mtx); 764 /* The RX lock should have been acquired in vtnet_pause. */ 765 pthread_mutex_unlock(&sc->rx_mtx); 766 } 767 768 static int 769 pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta) 770 { 771 int ret; 772 struct pci_vtnet_softc *sc = vsc; 773 774 DPRINTF(("vtnet: device snapshot requested !\n")); 775 776 /* 777 * Queues and consts should have been saved by the more generic 778 * vi_pci_snapshot function. We need to save only our features and 779 * config. 780 */ 781 782 SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done); 783 SNAPSHOT_VAR_OR_LEAVE(sc->features_negotiated, meta, ret, done); 784 785 /* Force reapply negotiated features at restore time */ 786 if (meta->op == VM_SNAPSHOT_RESTORE && 787 sc->features_negotiated) { 788 pci_vtnet_neg_features(sc, sc->vsc_features); 789 netbe_rx_enable(sc->vsc_be); 790 } 791 792 SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done); 793 SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done); 794 795 SNAPSHOT_VAR_OR_LEAVE(sc->vhdrlen, meta, ret, done); 796 SNAPSHOT_VAR_OR_LEAVE(sc->be_vhdrlen, meta, ret, done); 797 798 done: 799 return (ret); 800 } 801 #endif 802 803 static const struct pci_devemu pci_de_vnet = { 804 .pe_emu = "virtio-net", 805 .pe_init = pci_vtnet_init, 806 .pe_legacy_config = netbe_legacy_config, 807 .pe_barwrite = vi_pci_write, 808 .pe_barread = vi_pci_read, 809 #ifdef BHYVE_SNAPSHOT 810 .pe_snapshot = vi_pci_snapshot, 811 .pe_pause = vi_pci_pause, 812 .pe_resume = vi_pci_resume, 813 #endif 814 }; 815 PCI_EMUL_SET(pci_de_vnet); 816