1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 */ 28 29 #include <sys/cdefs.h> 30 #include <sys/param.h> 31 #include <sys/linker_set.h> 32 #include <sys/select.h> 33 #include <sys/uio.h> 34 #include <sys/ioctl.h> 35 #include <machine/vmm_snapshot.h> 36 #include <net/ethernet.h> 37 #include <net/if.h> /* IFNAMSIZ */ 38 39 #include <err.h> 40 #include <errno.h> 41 #include <fcntl.h> 42 #include <stdio.h> 43 #include <stdlib.h> 44 #include <stdint.h> 45 #include <string.h> 46 #include <strings.h> 47 #include <unistd.h> 48 #include <assert.h> 49 #include <pthread.h> 50 #include <pthread_np.h> 51 52 #include "bhyverun.h" 53 #include "config.h" 54 #include "debug.h" 55 #include "pci_emul.h" 56 #include "mevent.h" 57 #include "virtio.h" 58 #include "net_utils.h" 59 #include "net_backends.h" 60 #include "iov.h" 61 62 #define VTNET_RINGSZ 1024 63 64 #define VTNET_MAXSEGS 256 65 66 #define VTNET_MAX_PKT_LEN (65536 + 64) 67 68 #define VTNET_MIN_MTU ETHERMIN 69 #define VTNET_MAX_MTU 65535 70 71 #define VTNET_S_HOSTCAPS \ 72 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ 73 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 74 75 /* 76 * PCI config-space "registers" 77 */ 78 struct virtio_net_config { 79 uint8_t mac[6]; 80 uint16_t status; 81 uint16_t max_virtqueue_pairs; 82 uint16_t mtu; 83 } __packed; 84 85 /* 86 * Queue definitions. 87 */ 88 #define VTNET_RXQ 0 89 #define VTNET_TXQ 1 90 #define VTNET_CTLQ 2 /* NB: not yet supported */ 91 92 #define VTNET_MAXQ 3 93 94 /* 95 * Debug printf 96 */ 97 static int pci_vtnet_debug; 98 #define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params 99 #define WPRINTF(params) PRINTLN params 100 101 /* 102 * Per-device softc 103 */ 104 struct pci_vtnet_softc { 105 struct virtio_softc vsc_vs; 106 struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 107 pthread_mutex_t vsc_mtx; 108 109 net_backend_t *vsc_be; 110 111 bool features_negotiated; /* protected by rx_mtx */ 112 113 int resetting; /* protected by tx_mtx */ 114 115 uint64_t vsc_features; /* negotiated features */ 116 117 pthread_mutex_t rx_mtx; 118 int rx_merge; /* merged rx bufs in use */ 119 120 pthread_t tx_tid; 121 pthread_mutex_t tx_mtx; 122 pthread_cond_t tx_cond; 123 int tx_in_progress; 124 125 size_t vhdrlen; 126 size_t be_vhdrlen; 127 128 struct virtio_net_config vsc_config; 129 struct virtio_consts vsc_consts; 130 }; 131 132 static void pci_vtnet_reset(void *); 133 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 134 static int pci_vtnet_cfgread(void *, int, int, uint32_t *); 135 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 136 static void pci_vtnet_neg_features(void *, uint64_t); 137 #ifdef BHYVE_SNAPSHOT 138 static void pci_vtnet_pause(void *); 139 static void pci_vtnet_resume(void *); 140 static int pci_vtnet_snapshot(void *, struct vm_snapshot_meta *); 141 #endif 142 143 static struct virtio_consts vtnet_vi_consts = { 144 .vc_name = "vtnet", 145 .vc_nvq = VTNET_MAXQ - 1, 146 .vc_cfgsize = sizeof(struct virtio_net_config), 147 .vc_reset = pci_vtnet_reset, 148 .vc_cfgread = pci_vtnet_cfgread, 149 .vc_cfgwrite = pci_vtnet_cfgwrite, 150 .vc_apply_features = pci_vtnet_neg_features, 151 .vc_hv_caps = VTNET_S_HOSTCAPS, 152 #ifdef BHYVE_SNAPSHOT 153 .vc_pause = pci_vtnet_pause, 154 .vc_resume = pci_vtnet_resume, 155 .vc_snapshot = pci_vtnet_snapshot, 156 #endif 157 }; 158 159 static void 160 pci_vtnet_reset(void *vsc) 161 { 162 struct pci_vtnet_softc *sc = vsc; 163 164 DPRINTF(("vtnet: device reset requested !")); 165 166 /* Acquire the RX lock to block RX processing. */ 167 pthread_mutex_lock(&sc->rx_mtx); 168 169 /* 170 * Make sure receive operation is disabled at least until we 171 * re-negotiate the features, since receive operation depends 172 * on the value of sc->rx_merge and the header length, which 173 * are both set in pci_vtnet_neg_features(). 174 * Receive operation will be enabled again once the guest adds 175 * the first receive buffers and kicks us. 176 */ 177 sc->features_negotiated = false; 178 netbe_rx_disable(sc->vsc_be); 179 180 /* Set sc->resetting and give a chance to the TX thread to stop. */ 181 pthread_mutex_lock(&sc->tx_mtx); 182 sc->resetting = 1; 183 while (sc->tx_in_progress) { 184 pthread_mutex_unlock(&sc->tx_mtx); 185 usleep(10000); 186 pthread_mutex_lock(&sc->tx_mtx); 187 } 188 189 /* 190 * Now reset rings, MSI-X vectors, and negotiated capabilities. 191 * Do that with the TX lock held, since we need to reset 192 * sc->resetting. 193 */ 194 vi_reset_dev(&sc->vsc_vs); 195 196 sc->resetting = 0; 197 pthread_mutex_unlock(&sc->tx_mtx); 198 pthread_mutex_unlock(&sc->rx_mtx); 199 } 200 201 static __inline struct iovec * 202 iov_trim_hdr(struct iovec *iov, int *iovcnt, unsigned int hlen) 203 { 204 struct iovec *riov; 205 206 if (iov[0].iov_len < hlen) { 207 /* 208 * Not enough header space in the first fragment. 209 * That's not ok for us. 210 */ 211 return NULL; 212 } 213 214 iov[0].iov_len -= hlen; 215 if (iov[0].iov_len == 0) { 216 *iovcnt -= 1; 217 if (*iovcnt == 0) { 218 /* 219 * Only space for the header. That's not 220 * enough for us. 221 */ 222 return NULL; 223 } 224 riov = &iov[1]; 225 } else { 226 iov[0].iov_base = (void *)((uintptr_t)iov[0].iov_base + hlen); 227 riov = &iov[0]; 228 } 229 230 return (riov); 231 } 232 233 struct virtio_mrg_rxbuf_info { 234 uint16_t idx; 235 uint16_t pad; 236 uint32_t len; 237 }; 238 239 static void 240 pci_vtnet_rx(struct pci_vtnet_softc *sc) 241 { 242 int prepend_hdr_len = sc->vhdrlen - sc->be_vhdrlen; 243 struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; 244 struct iovec iov[VTNET_MAXSEGS + 1]; 245 struct vqueue_info *vq; 246 struct vi_req req; 247 248 vq = &sc->vsc_queues[VTNET_RXQ]; 249 250 /* Features must be negotiated */ 251 if (!sc->features_negotiated) { 252 return; 253 } 254 255 for (;;) { 256 struct virtio_net_rxhdr *hdr; 257 uint32_t riov_bytes; 258 struct iovec *riov; 259 uint32_t ulen; 260 int riov_len; 261 int n_chains; 262 ssize_t rlen; 263 ssize_t plen; 264 265 plen = netbe_peek_recvlen(sc->vsc_be); 266 if (plen <= 0) { 267 /* 268 * No more packets (plen == 0), or backend errored 269 * (plen < 0). Interrupt if needed and stop. 270 */ 271 vq_endchains(vq, /*used_all_avail=*/0); 272 return; 273 } 274 plen += prepend_hdr_len; 275 276 /* 277 * Get a descriptor chain to store the next ingress 278 * packet. In case of mergeable rx buffers, get as 279 * many chains as necessary in order to make room 280 * for plen bytes. 281 */ 282 riov_bytes = 0; 283 riov_len = 0; 284 riov = iov; 285 n_chains = 0; 286 do { 287 int n = vq_getchain(vq, riov, VTNET_MAXSEGS - riov_len, 288 &req); 289 info[n_chains].idx = req.idx; 290 291 if (n == 0) { 292 /* 293 * No rx buffers. Enable RX kicks and double 294 * check. 295 */ 296 vq_kick_enable(vq); 297 if (!vq_has_descs(vq)) { 298 /* 299 * Still no buffers. Return the unused 300 * chains (if any), interrupt if needed 301 * (including for NOTIFY_ON_EMPTY), and 302 * disable the backend until the next 303 * kick. 304 */ 305 vq_retchains(vq, n_chains); 306 vq_endchains(vq, /*used_all_avail=*/1); 307 netbe_rx_disable(sc->vsc_be); 308 return; 309 } 310 311 /* More rx buffers found, so keep going. */ 312 vq_kick_disable(vq); 313 continue; 314 } 315 assert(n >= 1 && riov_len + n <= VTNET_MAXSEGS); 316 riov_len += n; 317 if (!sc->rx_merge) { 318 n_chains = 1; 319 break; 320 } 321 info[n_chains].len = (uint32_t)count_iov(riov, n); 322 riov_bytes += info[n_chains].len; 323 riov += n; 324 n_chains++; 325 } while (riov_bytes < plen && riov_len < VTNET_MAXSEGS); 326 327 riov = iov; 328 hdr = riov[0].iov_base; 329 if (prepend_hdr_len > 0) { 330 /* 331 * The frontend uses a virtio-net header, but the 332 * backend does not. We need to prepend a zeroed 333 * header. 334 */ 335 riov = iov_trim_hdr(riov, &riov_len, prepend_hdr_len); 336 if (riov == NULL) { 337 /* 338 * The first collected chain is nonsensical, 339 * as it is not even enough to store the 340 * virtio-net header. Just drop it. 341 */ 342 vq_relchain(vq, info[0].idx, 0); 343 vq_retchains(vq, n_chains - 1); 344 continue; 345 } 346 memset(hdr, 0, prepend_hdr_len); 347 } 348 349 rlen = netbe_recv(sc->vsc_be, riov, riov_len); 350 if (rlen != plen - prepend_hdr_len) { 351 /* 352 * If this happens it means there is something 353 * wrong with the backend (e.g., some other 354 * process is stealing our packets). 355 */ 356 WPRINTF(("netbe_recv: expected %zd bytes, " 357 "got %zd", plen - prepend_hdr_len, rlen)); 358 vq_retchains(vq, n_chains); 359 continue; 360 } 361 362 ulen = (uint32_t)plen; 363 364 /* 365 * Publish the used buffers to the guest, reporting the 366 * number of bytes that we wrote. 367 */ 368 if (!sc->rx_merge) { 369 vq_relchain(vq, info[0].idx, ulen); 370 } else { 371 uint32_t iolen; 372 int i = 0; 373 374 do { 375 iolen = info[i].len; 376 if (iolen > ulen) { 377 iolen = ulen; 378 } 379 vq_relchain_prepare(vq, info[i].idx, iolen); 380 ulen -= iolen; 381 i++; 382 } while (ulen > 0); 383 384 hdr->vrh_bufs = i; 385 vq_relchain_publish(vq); 386 assert(i == n_chains); 387 } 388 } 389 390 } 391 392 /* 393 * Called when there is read activity on the backend file descriptor. 394 * Each buffer posted by the guest is assumed to be able to contain 395 * an entire ethernet frame + rx header. 396 */ 397 static void 398 pci_vtnet_rx_callback(int fd __unused, enum ev_type type __unused, void *param) 399 { 400 struct pci_vtnet_softc *sc = param; 401 402 pthread_mutex_lock(&sc->rx_mtx); 403 pci_vtnet_rx(sc); 404 pthread_mutex_unlock(&sc->rx_mtx); 405 406 } 407 408 /* Called on RX kick. */ 409 static void 410 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 411 { 412 struct pci_vtnet_softc *sc = vsc; 413 414 /* 415 * A qnotify means that the rx process can now begin. 416 * Enable RX only if features are negotiated. 417 */ 418 pthread_mutex_lock(&sc->rx_mtx); 419 if (!sc->features_negotiated) { 420 pthread_mutex_unlock(&sc->rx_mtx); 421 return; 422 } 423 424 vq_kick_disable(vq); 425 netbe_rx_enable(sc->vsc_be); 426 pthread_mutex_unlock(&sc->rx_mtx); 427 } 428 429 /* TX virtqueue processing, called by the TX thread. */ 430 static void 431 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 432 { 433 struct iovec iov[VTNET_MAXSEGS + 1]; 434 struct iovec *siov = iov; 435 struct vi_req req; 436 ssize_t len; 437 int n; 438 439 /* 440 * Obtain chain of descriptors. The first descriptor also 441 * contains the virtio-net header. 442 */ 443 n = vq_getchain(vq, iov, VTNET_MAXSEGS, &req); 444 assert(n >= 1 && n <= VTNET_MAXSEGS); 445 446 if (sc->vhdrlen != sc->be_vhdrlen) { 447 /* 448 * The frontend uses a virtio-net header, but the backend 449 * does not. We simply strip the header and ignore it, as 450 * it should be zero-filled. 451 */ 452 siov = iov_trim_hdr(siov, &n, sc->vhdrlen); 453 } 454 455 if (siov == NULL) { 456 /* The chain is nonsensical. Just drop it. */ 457 len = 0; 458 } else { 459 len = netbe_send(sc->vsc_be, siov, n); 460 if (len < 0) { 461 /* 462 * If send failed, report that 0 bytes 463 * were read. 464 */ 465 len = 0; 466 } 467 } 468 469 /* 470 * Return the processed chain to the guest, reporting 471 * the number of bytes that we read. 472 */ 473 vq_relchain(vq, req.idx, len); 474 } 475 476 /* Called on TX kick. */ 477 static void 478 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 479 { 480 struct pci_vtnet_softc *sc = vsc; 481 482 /* 483 * Any ring entries to process? 484 */ 485 if (!vq_has_descs(vq)) 486 return; 487 488 /* Signal the tx thread for processing */ 489 pthread_mutex_lock(&sc->tx_mtx); 490 vq_kick_disable(vq); 491 if (sc->tx_in_progress == 0) 492 pthread_cond_signal(&sc->tx_cond); 493 pthread_mutex_unlock(&sc->tx_mtx); 494 } 495 496 /* 497 * Thread which will handle processing of TX desc 498 */ 499 static void * 500 pci_vtnet_tx_thread(void *param) 501 { 502 struct pci_vtnet_softc *sc = param; 503 struct vqueue_info *vq; 504 int error; 505 506 vq = &sc->vsc_queues[VTNET_TXQ]; 507 508 /* 509 * Let us wait till the tx queue pointers get initialised & 510 * first tx signaled 511 */ 512 pthread_mutex_lock(&sc->tx_mtx); 513 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 514 assert(error == 0); 515 516 for (;;) { 517 /* note - tx mutex is locked here */ 518 while (sc->resetting || !vq_has_descs(vq)) { 519 vq_kick_enable(vq); 520 if (!sc->resetting && vq_has_descs(vq)) 521 break; 522 523 sc->tx_in_progress = 0; 524 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 525 assert(error == 0); 526 } 527 vq_kick_disable(vq); 528 sc->tx_in_progress = 1; 529 pthread_mutex_unlock(&sc->tx_mtx); 530 531 do { 532 /* 533 * Run through entries, placing them into 534 * iovecs and sending when an end-of-packet 535 * is found 536 */ 537 pci_vtnet_proctx(sc, vq); 538 } while (vq_has_descs(vq)); 539 540 /* 541 * Generate an interrupt if needed. 542 */ 543 vq_endchains(vq, /*used_all_avail=*/1); 544 545 pthread_mutex_lock(&sc->tx_mtx); 546 } 547 } 548 549 #ifdef notyet 550 static void 551 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 552 { 553 554 DPRINTF(("vtnet: control qnotify!")); 555 } 556 #endif 557 558 static int 559 pci_vtnet_init(struct pci_devinst *pi, nvlist_t *nvl) 560 { 561 struct pci_vtnet_softc *sc; 562 const char *value; 563 char tname[MAXCOMLEN + 1]; 564 unsigned long mtu = ETHERMTU; 565 int err; 566 567 /* 568 * Allocate data structures for further virtio initializations. 569 * sc also contains a copy of vtnet_vi_consts, since capabilities 570 * change depending on the backend. 571 */ 572 sc = calloc(1, sizeof(struct pci_vtnet_softc)); 573 574 sc->vsc_consts = vtnet_vi_consts; 575 pthread_mutex_init(&sc->vsc_mtx, NULL); 576 577 sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 578 sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 579 sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 580 sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 581 #ifdef notyet 582 sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 583 sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 584 #endif 585 586 value = get_config_value_node(nvl, "mac"); 587 if (value != NULL) { 588 err = net_parsemac(value, sc->vsc_config.mac); 589 if (err) { 590 free(sc); 591 return (err); 592 } 593 } else 594 net_genmac(pi, sc->vsc_config.mac); 595 596 value = get_config_value_node(nvl, "mtu"); 597 if (value != NULL) { 598 err = net_parsemtu(value, &mtu); 599 if (err) { 600 free(sc); 601 return (err); 602 } 603 604 if (mtu < VTNET_MIN_MTU || mtu > VTNET_MAX_MTU) { 605 err = EINVAL; 606 errno = EINVAL; 607 free(sc); 608 return (err); 609 } 610 sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MTU; 611 } 612 sc->vsc_config.mtu = mtu; 613 614 /* Permit interfaces without a configured backend. */ 615 if (get_config_value_node(nvl, "backend") != NULL) { 616 err = netbe_init(&sc->vsc_be, nvl, pci_vtnet_rx_callback, sc); 617 if (err) { 618 free(sc); 619 return (err); 620 } 621 } 622 623 sc->vsc_consts.vc_hv_caps |= VIRTIO_NET_F_MRG_RXBUF | 624 netbe_get_cap(sc->vsc_be); 625 626 /* 627 * Since we do not actually support multiqueue, 628 * set the maximum virtqueue pairs to 1. 629 */ 630 sc->vsc_config.max_virtqueue_pairs = 1; 631 632 /* initialize config space */ 633 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 634 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 635 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 636 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); 637 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 638 639 /* Link is always up. */ 640 sc->vsc_config.status = 1; 641 642 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 643 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 644 645 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 646 if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { 647 free(sc); 648 return (1); 649 } 650 651 /* use BAR 0 to map config regs in IO space */ 652 vi_set_io_bar(&sc->vsc_vs, 0); 653 654 sc->resetting = 0; 655 656 sc->rx_merge = 0; 657 sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 658 pthread_mutex_init(&sc->rx_mtx, NULL); 659 660 /* 661 * Initialize tx semaphore & spawn TX processing thread. 662 * As of now, only one thread for TX desc processing is 663 * spawned. 664 */ 665 sc->tx_in_progress = 0; 666 pthread_mutex_init(&sc->tx_mtx, NULL); 667 pthread_cond_init(&sc->tx_cond, NULL); 668 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 669 snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 670 pi->pi_func); 671 pthread_set_name_np(sc->tx_tid, tname); 672 673 return (0); 674 } 675 676 static int 677 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 678 { 679 struct pci_vtnet_softc *sc = vsc; 680 void *ptr; 681 682 if (offset < (int)sizeof(sc->vsc_config.mac)) { 683 assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); 684 /* 685 * The driver is allowed to change the MAC address 686 */ 687 ptr = &sc->vsc_config.mac[offset]; 688 memcpy(ptr, &value, size); 689 } else { 690 /* silently ignore other writes */ 691 DPRINTF(("vtnet: write to readonly reg %d", offset)); 692 } 693 694 return (0); 695 } 696 697 static int 698 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 699 { 700 struct pci_vtnet_softc *sc = vsc; 701 void *ptr; 702 703 ptr = (uint8_t *)&sc->vsc_config + offset; 704 memcpy(retval, ptr, size); 705 return (0); 706 } 707 708 static void 709 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 710 { 711 struct pci_vtnet_softc *sc = vsc; 712 713 sc->vsc_features = negotiated_features; 714 715 if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { 716 sc->vhdrlen = sizeof(struct virtio_net_rxhdr); 717 sc->rx_merge = 1; 718 } else { 719 /* 720 * Without mergeable rx buffers, virtio-net header is 2 721 * bytes shorter than sizeof(struct virtio_net_rxhdr). 722 */ 723 sc->vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 724 sc->rx_merge = 0; 725 } 726 727 /* Tell the backend to enable some capabilities it has advertised. */ 728 netbe_set_cap(sc->vsc_be, negotiated_features, sc->vhdrlen); 729 sc->be_vhdrlen = netbe_get_vnet_hdr_len(sc->vsc_be); 730 assert(sc->be_vhdrlen == 0 || sc->be_vhdrlen == sc->vhdrlen); 731 732 pthread_mutex_lock(&sc->rx_mtx); 733 sc->features_negotiated = true; 734 pthread_mutex_unlock(&sc->rx_mtx); 735 } 736 737 #ifdef BHYVE_SNAPSHOT 738 static void 739 pci_vtnet_pause(void *vsc) 740 { 741 struct pci_vtnet_softc *sc = vsc; 742 743 DPRINTF(("vtnet: device pause requested !\n")); 744 745 /* Acquire the RX lock to block RX processing. */ 746 pthread_mutex_lock(&sc->rx_mtx); 747 748 /* Wait for the transmit thread to finish its processing. */ 749 pthread_mutex_lock(&sc->tx_mtx); 750 while (sc->tx_in_progress) { 751 pthread_mutex_unlock(&sc->tx_mtx); 752 usleep(10000); 753 pthread_mutex_lock(&sc->tx_mtx); 754 } 755 } 756 757 static void 758 pci_vtnet_resume(void *vsc) 759 { 760 struct pci_vtnet_softc *sc = vsc; 761 762 DPRINTF(("vtnet: device resume requested !\n")); 763 764 pthread_mutex_unlock(&sc->tx_mtx); 765 /* The RX lock should have been acquired in vtnet_pause. */ 766 pthread_mutex_unlock(&sc->rx_mtx); 767 } 768 769 static int 770 pci_vtnet_snapshot(void *vsc, struct vm_snapshot_meta *meta) 771 { 772 int ret; 773 struct pci_vtnet_softc *sc = vsc; 774 775 DPRINTF(("vtnet: device snapshot requested !\n")); 776 777 /* 778 * Queues and consts should have been saved by the more generic 779 * vi_pci_snapshot function. We need to save only our features and 780 * config. 781 */ 782 783 SNAPSHOT_VAR_OR_LEAVE(sc->vsc_features, meta, ret, done); 784 SNAPSHOT_VAR_OR_LEAVE(sc->features_negotiated, meta, ret, done); 785 786 /* Force reapply negotiated features at restore time */ 787 if (meta->op == VM_SNAPSHOT_RESTORE && 788 sc->features_negotiated) { 789 pci_vtnet_neg_features(sc, sc->vsc_features); 790 netbe_rx_enable(sc->vsc_be); 791 } 792 793 SNAPSHOT_VAR_OR_LEAVE(sc->vsc_config, meta, ret, done); 794 SNAPSHOT_VAR_OR_LEAVE(sc->rx_merge, meta, ret, done); 795 796 SNAPSHOT_VAR_OR_LEAVE(sc->vhdrlen, meta, ret, done); 797 SNAPSHOT_VAR_OR_LEAVE(sc->be_vhdrlen, meta, ret, done); 798 799 done: 800 return (ret); 801 } 802 #endif 803 804 static const struct pci_devemu pci_de_vnet = { 805 .pe_emu = "virtio-net", 806 .pe_init = pci_vtnet_init, 807 .pe_legacy_config = netbe_legacy_config, 808 .pe_barwrite = vi_pci_write, 809 .pe_barread = vi_pci_read, 810 #ifdef BHYVE_SNAPSHOT 811 .pe_snapshot = vi_pci_snapshot, 812 .pe_pause = vi_pci_pause, 813 .pe_resume = vi_pci_resume, 814 #endif 815 }; 816 PCI_EMUL_SET(pci_de_vnet); 817