1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <sys/select.h> 37 #include <sys/uio.h> 38 #include <sys/ioctl.h> 39 #include <net/ethernet.h> 40 #include <net/if.h> /* IFNAMSIZ */ 41 42 #include <err.h> 43 #include <errno.h> 44 #include <fcntl.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stdint.h> 48 #include <string.h> 49 #include <strings.h> 50 #include <unistd.h> 51 #include <assert.h> 52 #include <pthread.h> 53 #include <pthread_np.h> 54 55 #include "bhyverun.h" 56 #include "debug.h" 57 #include "pci_emul.h" 58 #include "mevent.h" 59 #include "virtio.h" 60 #include "net_utils.h" 61 #include "net_backends.h" 62 #include "iov.h" 63 64 #define VTNET_RINGSZ 1024 65 66 #define VTNET_MAXSEGS 256 67 68 #define VTNET_MAX_PKT_LEN (65536 + 64) 69 70 #define VTNET_S_HOSTCAPS \ 71 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ 72 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 73 74 /* 75 * PCI config-space "registers" 76 */ 77 struct virtio_net_config { 78 uint8_t mac[6]; 79 uint16_t status; 80 } __packed; 81 82 /* 83 * Queue definitions. 84 */ 85 #define VTNET_RXQ 0 86 #define VTNET_TXQ 1 87 #define VTNET_CTLQ 2 /* NB: not yet supported */ 88 89 #define VTNET_MAXQ 3 90 91 /* 92 * Debug printf 93 */ 94 static int pci_vtnet_debug; 95 #define DPRINTF(params) if (pci_vtnet_debug) PRINTLN params 96 #define WPRINTF(params) PRINTLN params 97 98 /* 99 * Per-device softc 100 */ 101 struct pci_vtnet_softc { 102 struct virtio_softc vsc_vs; 103 struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 104 pthread_mutex_t vsc_mtx; 105 106 net_backend_t *vsc_be; 107 108 int resetting; /* protected by tx_mtx */ 109 110 uint64_t vsc_features; /* negotiated features */ 111 112 pthread_mutex_t rx_mtx; 113 int rx_merge; /* merged rx bufs in use */ 114 115 pthread_t tx_tid; 116 pthread_mutex_t tx_mtx; 117 pthread_cond_t tx_cond; 118 int tx_in_progress; 119 120 struct virtio_net_config vsc_config; 121 struct virtio_consts vsc_consts; 122 }; 123 124 static void pci_vtnet_reset(void *); 125 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 126 static int pci_vtnet_cfgread(void *, int, int, uint32_t *); 127 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 128 static void pci_vtnet_neg_features(void *, uint64_t); 129 130 static struct virtio_consts vtnet_vi_consts = { 131 "vtnet", /* our name */ 132 VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ 133 sizeof(struct virtio_net_config), /* config reg size */ 134 pci_vtnet_reset, /* reset */ 135 NULL, /* device-wide qnotify -- not used */ 136 pci_vtnet_cfgread, /* read PCI config */ 137 pci_vtnet_cfgwrite, /* write PCI config */ 138 pci_vtnet_neg_features, /* apply negotiated features */ 139 VTNET_S_HOSTCAPS, /* our capabilities */ 140 }; 141 142 static void 143 pci_vtnet_reset(void *vsc) 144 { 145 struct pci_vtnet_softc *sc = vsc; 146 147 DPRINTF(("vtnet: device reset requested !")); 148 149 /* Acquire the RX lock to block RX processing. */ 150 pthread_mutex_lock(&sc->rx_mtx); 151 152 /* 153 * Make sure receive operation is disabled at least until we 154 * re-negotiate the features, since receive operation depends 155 * on the value of sc->rx_merge and the header length, which 156 * are both set in pci_vtnet_neg_features(). 157 * Receive operation will be enabled again once the guest adds 158 * the first receive buffers and kicks us. 159 */ 160 netbe_rx_disable(sc->vsc_be); 161 162 /* Set sc->resetting and give a chance to the TX thread to stop. */ 163 pthread_mutex_lock(&sc->tx_mtx); 164 sc->resetting = 1; 165 while (sc->tx_in_progress) { 166 pthread_mutex_unlock(&sc->tx_mtx); 167 usleep(10000); 168 pthread_mutex_lock(&sc->tx_mtx); 169 } 170 171 /* 172 * Now reset rings, MSI-X vectors, and negotiated capabilities. 173 * Do that with the TX lock held, since we need to reset 174 * sc->resetting. 175 */ 176 vi_reset_dev(&sc->vsc_vs); 177 178 sc->resetting = 0; 179 pthread_mutex_unlock(&sc->tx_mtx); 180 pthread_mutex_unlock(&sc->rx_mtx); 181 } 182 183 struct virtio_mrg_rxbuf_info { 184 uint16_t idx; 185 uint16_t pad; 186 uint32_t len; 187 }; 188 189 static void 190 pci_vtnet_rx(struct pci_vtnet_softc *sc) 191 { 192 struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; 193 struct iovec iov[VTNET_MAXSEGS + 1]; 194 struct vqueue_info *vq; 195 uint32_t cur_iov_bytes; 196 struct iovec *cur_iov; 197 uint16_t cur_iov_len; 198 uint32_t ulen; 199 int n_chains; 200 int len; 201 202 vq = &sc->vsc_queues[VTNET_RXQ]; 203 for (;;) { 204 /* 205 * Get a descriptor chain to store the next ingress 206 * packet. In case of mergeable rx buffers, get as 207 * many chains as necessary in order to make room 208 * for a maximum sized LRO packet. 209 */ 210 cur_iov_bytes = 0; 211 cur_iov_len = 0; 212 cur_iov = iov; 213 n_chains = 0; 214 do { 215 int n = vq_getchain(vq, &info[n_chains].idx, cur_iov, 216 VTNET_MAXSEGS - cur_iov_len, NULL); 217 218 if (n == 0) { 219 /* 220 * No rx buffers. Enable RX kicks and double 221 * check. 222 */ 223 vq_kick_enable(vq); 224 if (!vq_has_descs(vq)) { 225 /* 226 * Still no buffers. Return the unused 227 * chains (if any), interrupt if needed 228 * (including for NOTIFY_ON_EMPTY), and 229 * disable the backend until the next 230 * kick. 231 */ 232 vq_retchains(vq, n_chains); 233 vq_endchains(vq, /*used_all_avail=*/1); 234 netbe_rx_disable(sc->vsc_be); 235 return; 236 } 237 238 /* More rx buffers found, so keep going. */ 239 vq_kick_disable(vq); 240 continue; 241 } 242 assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS); 243 cur_iov_len += n; 244 if (!sc->rx_merge) { 245 n_chains = 1; 246 break; 247 } 248 info[n_chains].len = (uint32_t)count_iov(cur_iov, n); 249 cur_iov_bytes += info[n_chains].len; 250 cur_iov += n; 251 n_chains++; 252 } while (cur_iov_bytes < VTNET_MAX_PKT_LEN && 253 cur_iov_len < VTNET_MAXSEGS); 254 255 len = netbe_recv(sc->vsc_be, iov, cur_iov_len); 256 257 if (len <= 0) { 258 /* 259 * No more packets (len == 0), or backend errored 260 * (err < 0). Return unused available buffers 261 * and stop. 262 */ 263 vq_retchains(vq, n_chains); 264 /* Interrupt if needed/appropriate and stop. */ 265 vq_endchains(vq, /*used_all_avail=*/0); 266 return; 267 } 268 269 ulen = (uint32_t)len; /* avoid too many casts below */ 270 271 /* Publish the used buffers to the guest. */ 272 if (!sc->rx_merge) { 273 vq_relchain(vq, info[0].idx, ulen); 274 } else { 275 struct virtio_net_rxhdr *hdr = iov[0].iov_base; 276 uint32_t iolen; 277 int i = 0; 278 279 assert(iov[0].iov_len >= sizeof(*hdr)); 280 281 do { 282 iolen = info[i].len; 283 if (iolen > ulen) { 284 iolen = ulen; 285 } 286 vq_relchain_prepare(vq, info[i].idx, iolen); 287 ulen -= iolen; 288 i++; 289 assert(i <= n_chains); 290 } while (ulen > 0); 291 292 hdr->vrh_bufs = i; 293 vq_relchain_publish(vq); 294 vq_retchains(vq, n_chains - i); 295 } 296 } 297 298 } 299 300 /* 301 * Called when there is read activity on the backend file descriptor. 302 * Each buffer posted by the guest is assumed to be able to contain 303 * an entire ethernet frame + rx header. 304 */ 305 static void 306 pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) 307 { 308 struct pci_vtnet_softc *sc = param; 309 310 pthread_mutex_lock(&sc->rx_mtx); 311 pci_vtnet_rx(sc); 312 pthread_mutex_unlock(&sc->rx_mtx); 313 314 } 315 316 /* Called on RX kick. */ 317 static void 318 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 319 { 320 struct pci_vtnet_softc *sc = vsc; 321 322 /* 323 * A qnotify means that the rx process can now begin. 324 */ 325 pthread_mutex_lock(&sc->rx_mtx); 326 vq_kick_disable(vq); 327 netbe_rx_enable(sc->vsc_be); 328 pthread_mutex_unlock(&sc->rx_mtx); 329 } 330 331 /* TX virtqueue processing, called by the TX thread. */ 332 static void 333 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 334 { 335 struct iovec iov[VTNET_MAXSEGS + 1]; 336 uint16_t idx; 337 ssize_t len; 338 int n; 339 340 /* 341 * Obtain chain of descriptors. The first descriptor also 342 * contains the virtio-net header. 343 */ 344 n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 345 assert(n >= 1 && n <= VTNET_MAXSEGS); 346 347 len = netbe_send(sc->vsc_be, iov, n); 348 349 /* chain is processed, release it and set len */ 350 vq_relchain(vq, idx, len > 0 ? len : 0); 351 } 352 353 /* Called on TX kick. */ 354 static void 355 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 356 { 357 struct pci_vtnet_softc *sc = vsc; 358 359 /* 360 * Any ring entries to process? 361 */ 362 if (!vq_has_descs(vq)) 363 return; 364 365 /* Signal the tx thread for processing */ 366 pthread_mutex_lock(&sc->tx_mtx); 367 vq_kick_disable(vq); 368 if (sc->tx_in_progress == 0) 369 pthread_cond_signal(&sc->tx_cond); 370 pthread_mutex_unlock(&sc->tx_mtx); 371 } 372 373 /* 374 * Thread which will handle processing of TX desc 375 */ 376 static void * 377 pci_vtnet_tx_thread(void *param) 378 { 379 struct pci_vtnet_softc *sc = param; 380 struct vqueue_info *vq; 381 int error; 382 383 vq = &sc->vsc_queues[VTNET_TXQ]; 384 385 /* 386 * Let us wait till the tx queue pointers get initialised & 387 * first tx signaled 388 */ 389 pthread_mutex_lock(&sc->tx_mtx); 390 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 391 assert(error == 0); 392 393 for (;;) { 394 /* note - tx mutex is locked here */ 395 while (sc->resetting || !vq_has_descs(vq)) { 396 vq_kick_enable(vq); 397 if (!sc->resetting && vq_has_descs(vq)) 398 break; 399 400 sc->tx_in_progress = 0; 401 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 402 assert(error == 0); 403 } 404 vq_kick_disable(vq); 405 sc->tx_in_progress = 1; 406 pthread_mutex_unlock(&sc->tx_mtx); 407 408 do { 409 /* 410 * Run through entries, placing them into 411 * iovecs and sending when an end-of-packet 412 * is found 413 */ 414 pci_vtnet_proctx(sc, vq); 415 } while (vq_has_descs(vq)); 416 417 /* 418 * Generate an interrupt if needed. 419 */ 420 vq_endchains(vq, /*used_all_avail=*/1); 421 422 pthread_mutex_lock(&sc->tx_mtx); 423 } 424 } 425 426 #ifdef notyet 427 static void 428 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 429 { 430 431 DPRINTF(("vtnet: control qnotify!")); 432 } 433 #endif 434 435 static int 436 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 437 { 438 struct pci_vtnet_softc *sc; 439 char tname[MAXCOMLEN + 1]; 440 int mac_provided; 441 442 /* 443 * Allocate data structures for further virtio initializations. 444 * sc also contains a copy of vtnet_vi_consts, since capabilities 445 * change depending on the backend. 446 */ 447 sc = calloc(1, sizeof(struct pci_vtnet_softc)); 448 449 sc->vsc_consts = vtnet_vi_consts; 450 pthread_mutex_init(&sc->vsc_mtx, NULL); 451 452 sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 453 sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 454 sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 455 sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 456 #ifdef notyet 457 sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 458 sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 459 #endif 460 461 /* 462 * Attempt to open the backend device and read the MAC address 463 * if specified. 464 */ 465 mac_provided = 0; 466 if (opts != NULL) { 467 char *devname; 468 char *vtopts; 469 int err; 470 471 devname = vtopts = strdup(opts); 472 (void) strsep(&vtopts, ","); 473 474 if (vtopts != NULL) { 475 err = net_parsemac(vtopts, sc->vsc_config.mac); 476 if (err != 0) { 477 free(devname); 478 free(sc); 479 return (err); 480 } 481 mac_provided = 1; 482 } 483 484 err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback, 485 sc); 486 free(devname); 487 if (err) { 488 free(sc); 489 return (err); 490 } 491 sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); 492 } 493 494 if (!mac_provided) { 495 net_genmac(pi, sc->vsc_config.mac); 496 } 497 498 /* initialize config space */ 499 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 500 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 501 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 502 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 503 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 504 505 /* Link is up if we managed to open backend device. */ 506 sc->vsc_config.status = (opts == NULL || sc->vsc_be); 507 508 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 509 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 510 511 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 512 if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { 513 free(sc); 514 return (1); 515 } 516 517 /* use BAR 0 to map config regs in IO space */ 518 vi_set_io_bar(&sc->vsc_vs, 0); 519 520 sc->resetting = 0; 521 522 sc->rx_merge = 0; 523 pthread_mutex_init(&sc->rx_mtx, NULL); 524 525 /* 526 * Initialize tx semaphore & spawn TX processing thread. 527 * As of now, only one thread for TX desc processing is 528 * spawned. 529 */ 530 sc->tx_in_progress = 0; 531 pthread_mutex_init(&sc->tx_mtx, NULL); 532 pthread_cond_init(&sc->tx_cond, NULL); 533 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 534 snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 535 pi->pi_func); 536 pthread_set_name_np(sc->tx_tid, tname); 537 538 return (0); 539 } 540 541 static int 542 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 543 { 544 struct pci_vtnet_softc *sc = vsc; 545 void *ptr; 546 547 if (offset < (int)sizeof(sc->vsc_config.mac)) { 548 assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); 549 /* 550 * The driver is allowed to change the MAC address 551 */ 552 ptr = &sc->vsc_config.mac[offset]; 553 memcpy(ptr, &value, size); 554 } else { 555 /* silently ignore other writes */ 556 DPRINTF(("vtnet: write to readonly reg %d", offset)); 557 } 558 559 return (0); 560 } 561 562 static int 563 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 564 { 565 struct pci_vtnet_softc *sc = vsc; 566 void *ptr; 567 568 ptr = (uint8_t *)&sc->vsc_config + offset; 569 memcpy(retval, ptr, size); 570 return (0); 571 } 572 573 static void 574 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 575 { 576 struct pci_vtnet_softc *sc = vsc; 577 unsigned int rx_vhdrlen; 578 579 sc->vsc_features = negotiated_features; 580 581 if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { 582 rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 583 sc->rx_merge = 1; 584 } else { 585 /* 586 * Without mergeable rx buffers, virtio-net header is 2 587 * bytes shorter than sizeof(struct virtio_net_rxhdr). 588 */ 589 rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 590 sc->rx_merge = 0; 591 } 592 593 /* Tell the backend to enable some capabilities it has advertised. */ 594 netbe_set_cap(sc->vsc_be, negotiated_features, rx_vhdrlen); 595 } 596 597 static struct pci_devemu pci_de_vnet = { 598 .pe_emu = "virtio-net", 599 .pe_init = pci_vtnet_init, 600 .pe_barwrite = vi_pci_write, 601 .pe_barread = vi_pci_read 602 }; 603 PCI_EMUL_SET(pci_de_vnet); 604