1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <sys/select.h> 37 #include <sys/uio.h> 38 #include <sys/ioctl.h> 39 #include <net/ethernet.h> 40 #include <net/if.h> /* IFNAMSIZ */ 41 42 #include <err.h> 43 #include <errno.h> 44 #include <fcntl.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stdint.h> 48 #include <string.h> 49 #include <strings.h> 50 #include <unistd.h> 51 #include <assert.h> 52 #include <pthread.h> 53 #include <pthread_np.h> 54 55 #include "bhyverun.h" 56 #include "pci_emul.h" 57 #include "mevent.h" 58 #include "virtio.h" 59 #include "net_utils.h" 60 #include "net_backends.h" 61 #include "iov.h" 62 63 #define VTNET_RINGSZ 1024 64 65 #define VTNET_MAXSEGS 256 66 67 #define VTNET_MAX_PKT_LEN (65536 + 64) 68 69 #define VTNET_S_HOSTCAPS \ 70 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ 71 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 72 73 /* 74 * PCI config-space "registers" 75 */ 76 struct virtio_net_config { 77 uint8_t mac[6]; 78 uint16_t status; 79 } __packed; 80 81 /* 82 * Queue definitions. 83 */ 84 #define VTNET_RXQ 0 85 #define VTNET_TXQ 1 86 #define VTNET_CTLQ 2 /* NB: not yet supported */ 87 88 #define VTNET_MAXQ 3 89 90 /* 91 * Debug printf 92 */ 93 static int pci_vtnet_debug; 94 #define DPRINTF(params) if (pci_vtnet_debug) printf params 95 #define WPRINTF(params) printf params 96 97 /* 98 * Per-device softc 99 */ 100 struct pci_vtnet_softc { 101 struct virtio_softc vsc_vs; 102 struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 103 pthread_mutex_t vsc_mtx; 104 105 net_backend_t *vsc_be; 106 107 int resetting; /* protected by tx_mtx */ 108 109 uint64_t vsc_features; /* negotiated features */ 110 111 pthread_mutex_t rx_mtx; 112 int rx_merge; /* merged rx bufs in use */ 113 114 pthread_t tx_tid; 115 pthread_mutex_t tx_mtx; 116 pthread_cond_t tx_cond; 117 int tx_in_progress; 118 119 struct virtio_net_config vsc_config; 120 struct virtio_consts vsc_consts; 121 }; 122 123 static void pci_vtnet_reset(void *); 124 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 125 static int pci_vtnet_cfgread(void *, int, int, uint32_t *); 126 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 127 static void pci_vtnet_neg_features(void *, uint64_t); 128 129 static struct virtio_consts vtnet_vi_consts = { 130 "vtnet", /* our name */ 131 VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ 132 sizeof(struct virtio_net_config), /* config reg size */ 133 pci_vtnet_reset, /* reset */ 134 NULL, /* device-wide qnotify -- not used */ 135 pci_vtnet_cfgread, /* read PCI config */ 136 pci_vtnet_cfgwrite, /* write PCI config */ 137 pci_vtnet_neg_features, /* apply negotiated features */ 138 VTNET_S_HOSTCAPS, /* our capabilities */ 139 }; 140 141 static void 142 pci_vtnet_reset(void *vsc) 143 { 144 struct pci_vtnet_softc *sc = vsc; 145 146 DPRINTF(("vtnet: device reset requested !\n\r")); 147 148 /* Acquire the RX lock to block RX processing. */ 149 pthread_mutex_lock(&sc->rx_mtx); 150 151 /* 152 * Make sure receive operation is disabled at least until we 153 * re-negotiate the features, since receive operation depends 154 * on the value of sc->rx_merge and the header length, which 155 * are both set in pci_vtnet_neg_features(). 156 * Receive operation will be enabled again once the guest adds 157 * the first receive buffers and kicks us. 158 */ 159 netbe_rx_disable(sc->vsc_be); 160 161 /* Set sc->resetting and give a chance to the TX thread to stop. */ 162 pthread_mutex_lock(&sc->tx_mtx); 163 sc->resetting = 1; 164 while (sc->tx_in_progress) { 165 pthread_mutex_unlock(&sc->tx_mtx); 166 usleep(10000); 167 pthread_mutex_lock(&sc->tx_mtx); 168 } 169 170 /* 171 * Now reset rings, MSI-X vectors, and negotiated capabilities. 172 * Do that with the TX lock held, since we need to reset 173 * sc->resetting. 174 */ 175 vi_reset_dev(&sc->vsc_vs); 176 177 sc->resetting = 0; 178 pthread_mutex_unlock(&sc->tx_mtx); 179 pthread_mutex_unlock(&sc->rx_mtx); 180 } 181 182 struct virtio_mrg_rxbuf_info { 183 uint16_t idx; 184 uint16_t pad; 185 uint32_t len; 186 }; 187 188 static void 189 pci_vtnet_rx(struct pci_vtnet_softc *sc) 190 { 191 struct virtio_mrg_rxbuf_info info[VTNET_MAXSEGS]; 192 struct iovec iov[VTNET_MAXSEGS + 1]; 193 struct vqueue_info *vq; 194 uint32_t cur_iov_bytes; 195 struct iovec *cur_iov; 196 uint16_t cur_iov_len; 197 uint32_t ulen; 198 int n_chains; 199 int len; 200 201 vq = &sc->vsc_queues[VTNET_RXQ]; 202 for (;;) { 203 /* 204 * Get a descriptor chain to store the next ingress 205 * packet. In case of mergeable rx buffers, get as 206 * many chains as necessary in order to make room 207 * for a maximum sized LRO packet. 208 */ 209 cur_iov_bytes = 0; 210 cur_iov_len = 0; 211 cur_iov = iov; 212 n_chains = 0; 213 do { 214 int n = vq_getchain(vq, &info[n_chains].idx, cur_iov, 215 VTNET_MAXSEGS - cur_iov_len, NULL); 216 217 if (n == 0) { 218 /* 219 * No rx buffers. Enable RX kicks and double 220 * check. 221 */ 222 vq_kick_enable(vq); 223 if (!vq_has_descs(vq)) { 224 /* 225 * Still no buffers. Return the unused 226 * chains (if any), interrupt if needed 227 * (including for NOTIFY_ON_EMPTY), and 228 * disable the backend until the next 229 * kick. 230 */ 231 vq_retchains(vq, n_chains); 232 vq_endchains(vq, /*used_all_avail=*/1); 233 netbe_rx_disable(sc->vsc_be); 234 return; 235 } 236 237 /* More rx buffers found, so keep going. */ 238 vq_kick_disable(vq); 239 continue; 240 } 241 assert(n >= 1 && cur_iov_len + n <= VTNET_MAXSEGS); 242 cur_iov_len += n; 243 if (!sc->rx_merge) { 244 n_chains = 1; 245 break; 246 } 247 info[n_chains].len = (uint32_t)count_iov(cur_iov, n); 248 cur_iov_bytes += info[n_chains].len; 249 cur_iov += n; 250 n_chains++; 251 } while (cur_iov_bytes < VTNET_MAX_PKT_LEN && 252 cur_iov_len < VTNET_MAXSEGS); 253 254 len = netbe_recv(sc->vsc_be, iov, cur_iov_len); 255 256 if (len <= 0) { 257 /* 258 * No more packets (len == 0), or backend errored 259 * (err < 0). Return unused available buffers 260 * and stop. 261 */ 262 vq_retchains(vq, n_chains); 263 /* Interrupt if needed/appropriate and stop. */ 264 vq_endchains(vq, /*used_all_avail=*/0); 265 return; 266 } 267 268 ulen = (uint32_t)len; /* avoid too many casts below */ 269 270 /* Publish the used buffers to the guest. */ 271 if (!sc->rx_merge) { 272 vq_relchain(vq, info[0].idx, ulen); 273 } else { 274 struct virtio_net_rxhdr *hdr = iov[0].iov_base; 275 uint32_t iolen; 276 int i = 0; 277 278 assert(iov[0].iov_len >= sizeof(*hdr)); 279 280 do { 281 iolen = info[i].len; 282 if (iolen > ulen) { 283 iolen = ulen; 284 } 285 vq_relchain_prepare(vq, info[i].idx, iolen); 286 ulen -= iolen; 287 i++; 288 assert(i <= n_chains); 289 } while (ulen > 0); 290 291 hdr->vrh_bufs = i; 292 vq_relchain_publish(vq); 293 vq_retchains(vq, n_chains - i); 294 } 295 } 296 297 } 298 299 /* 300 * Called when there is read activity on the backend file descriptor. 301 * Each buffer posted by the guest is assumed to be able to contain 302 * an entire ethernet frame + rx header. 303 */ 304 static void 305 pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) 306 { 307 struct pci_vtnet_softc *sc = param; 308 309 pthread_mutex_lock(&sc->rx_mtx); 310 pci_vtnet_rx(sc); 311 pthread_mutex_unlock(&sc->rx_mtx); 312 313 } 314 315 /* Called on RX kick. */ 316 static void 317 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 318 { 319 struct pci_vtnet_softc *sc = vsc; 320 321 /* 322 * A qnotify means that the rx process can now begin. 323 */ 324 pthread_mutex_lock(&sc->rx_mtx); 325 vq_kick_disable(vq); 326 netbe_rx_enable(sc->vsc_be); 327 pthread_mutex_unlock(&sc->rx_mtx); 328 } 329 330 /* TX virtqueue processing, called by the TX thread. */ 331 static void 332 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 333 { 334 struct iovec iov[VTNET_MAXSEGS + 1]; 335 uint16_t idx; 336 ssize_t len; 337 int n; 338 339 /* 340 * Obtain chain of descriptors. The first descriptor also 341 * contains the virtio-net header. 342 */ 343 n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 344 assert(n >= 1 && n <= VTNET_MAXSEGS); 345 346 len = netbe_send(sc->vsc_be, iov, n); 347 348 /* chain is processed, release it and set len */ 349 vq_relchain(vq, idx, len > 0 ? len : 0); 350 } 351 352 /* Called on TX kick. */ 353 static void 354 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 355 { 356 struct pci_vtnet_softc *sc = vsc; 357 358 /* 359 * Any ring entries to process? 360 */ 361 if (!vq_has_descs(vq)) 362 return; 363 364 /* Signal the tx thread for processing */ 365 pthread_mutex_lock(&sc->tx_mtx); 366 vq_kick_disable(vq); 367 if (sc->tx_in_progress == 0) 368 pthread_cond_signal(&sc->tx_cond); 369 pthread_mutex_unlock(&sc->tx_mtx); 370 } 371 372 /* 373 * Thread which will handle processing of TX desc 374 */ 375 static void * 376 pci_vtnet_tx_thread(void *param) 377 { 378 struct pci_vtnet_softc *sc = param; 379 struct vqueue_info *vq; 380 int error; 381 382 vq = &sc->vsc_queues[VTNET_TXQ]; 383 384 /* 385 * Let us wait till the tx queue pointers get initialised & 386 * first tx signaled 387 */ 388 pthread_mutex_lock(&sc->tx_mtx); 389 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 390 assert(error == 0); 391 392 for (;;) { 393 /* note - tx mutex is locked here */ 394 while (sc->resetting || !vq_has_descs(vq)) { 395 vq_kick_enable(vq); 396 if (!sc->resetting && vq_has_descs(vq)) 397 break; 398 399 sc->tx_in_progress = 0; 400 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 401 assert(error == 0); 402 } 403 vq_kick_disable(vq); 404 sc->tx_in_progress = 1; 405 pthread_mutex_unlock(&sc->tx_mtx); 406 407 do { 408 /* 409 * Run through entries, placing them into 410 * iovecs and sending when an end-of-packet 411 * is found 412 */ 413 pci_vtnet_proctx(sc, vq); 414 } while (vq_has_descs(vq)); 415 416 /* 417 * Generate an interrupt if needed. 418 */ 419 vq_endchains(vq, /*used_all_avail=*/1); 420 421 pthread_mutex_lock(&sc->tx_mtx); 422 } 423 } 424 425 #ifdef notyet 426 static void 427 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 428 { 429 430 DPRINTF(("vtnet: control qnotify!\n\r")); 431 } 432 #endif 433 434 static int 435 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 436 { 437 struct pci_vtnet_softc *sc; 438 char tname[MAXCOMLEN + 1]; 439 int mac_provided; 440 441 /* 442 * Allocate data structures for further virtio initializations. 443 * sc also contains a copy of vtnet_vi_consts, since capabilities 444 * change depending on the backend. 445 */ 446 sc = calloc(1, sizeof(struct pci_vtnet_softc)); 447 448 sc->vsc_consts = vtnet_vi_consts; 449 pthread_mutex_init(&sc->vsc_mtx, NULL); 450 451 sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 452 sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 453 sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 454 sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 455 #ifdef notyet 456 sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 457 sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 458 #endif 459 460 /* 461 * Attempt to open the backend device and read the MAC address 462 * if specified. 463 */ 464 mac_provided = 0; 465 if (opts != NULL) { 466 char *devname; 467 char *vtopts; 468 int err; 469 470 devname = vtopts = strdup(opts); 471 (void) strsep(&vtopts, ","); 472 473 if (vtopts != NULL) { 474 err = net_parsemac(vtopts, sc->vsc_config.mac); 475 if (err != 0) { 476 free(devname); 477 free(sc); 478 return (err); 479 } 480 mac_provided = 1; 481 } 482 483 err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback, 484 sc); 485 free(devname); 486 if (err) { 487 free(sc); 488 return (err); 489 } 490 sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); 491 } 492 493 if (!mac_provided) { 494 net_genmac(pi, sc->vsc_config.mac); 495 } 496 497 /* initialize config space */ 498 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 499 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 500 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 501 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 502 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 503 504 /* Link is up if we managed to open backend device. */ 505 sc->vsc_config.status = (opts == NULL || sc->vsc_be); 506 507 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 508 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 509 510 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 511 if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { 512 free(sc); 513 return (1); 514 } 515 516 /* use BAR 0 to map config regs in IO space */ 517 vi_set_io_bar(&sc->vsc_vs, 0); 518 519 sc->resetting = 0; 520 521 sc->rx_merge = 0; 522 pthread_mutex_init(&sc->rx_mtx, NULL); 523 524 /* 525 * Initialize tx semaphore & spawn TX processing thread. 526 * As of now, only one thread for TX desc processing is 527 * spawned. 528 */ 529 sc->tx_in_progress = 0; 530 pthread_mutex_init(&sc->tx_mtx, NULL); 531 pthread_cond_init(&sc->tx_cond, NULL); 532 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 533 snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 534 pi->pi_func); 535 pthread_set_name_np(sc->tx_tid, tname); 536 537 return (0); 538 } 539 540 static int 541 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 542 { 543 struct pci_vtnet_softc *sc = vsc; 544 void *ptr; 545 546 if (offset < (int)sizeof(sc->vsc_config.mac)) { 547 assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); 548 /* 549 * The driver is allowed to change the MAC address 550 */ 551 ptr = &sc->vsc_config.mac[offset]; 552 memcpy(ptr, &value, size); 553 } else { 554 /* silently ignore other writes */ 555 DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); 556 } 557 558 return (0); 559 } 560 561 static int 562 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 563 { 564 struct pci_vtnet_softc *sc = vsc; 565 void *ptr; 566 567 ptr = (uint8_t *)&sc->vsc_config + offset; 568 memcpy(retval, ptr, size); 569 return (0); 570 } 571 572 static void 573 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 574 { 575 struct pci_vtnet_softc *sc = vsc; 576 unsigned int rx_vhdrlen; 577 578 sc->vsc_features = negotiated_features; 579 580 if (negotiated_features & VIRTIO_NET_F_MRG_RXBUF) { 581 rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 582 sc->rx_merge = 1; 583 } else { 584 /* 585 * Without mergeable rx buffers, virtio-net header is 2 586 * bytes shorter than sizeof(struct virtio_net_rxhdr). 587 */ 588 rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 589 sc->rx_merge = 0; 590 } 591 592 /* Tell the backend to enable some capabilities it has advertised. */ 593 netbe_set_cap(sc->vsc_be, negotiated_features, rx_vhdrlen); 594 } 595 596 static struct pci_devemu pci_de_vnet = { 597 .pe_emu = "virtio-net", 598 .pe_init = pci_vtnet_init, 599 .pe_barwrite = vi_pci_write, 600 .pe_barread = vi_pci_read 601 }; 602 PCI_EMUL_SET(pci_de_vnet); 603