1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <sys/select.h> 37 #include <sys/uio.h> 38 #include <sys/ioctl.h> 39 #include <net/ethernet.h> 40 #include <net/if.h> /* IFNAMSIZ */ 41 42 #include <err.h> 43 #include <errno.h> 44 #include <fcntl.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stdint.h> 48 #include <string.h> 49 #include <strings.h> 50 #include <unistd.h> 51 #include <assert.h> 52 #include <pthread.h> 53 #include <pthread_np.h> 54 55 #include "bhyverun.h" 56 #include "pci_emul.h" 57 #include "mevent.h" 58 #include "virtio.h" 59 #include "net_utils.h" 60 #include "net_backends.h" 61 62 #define VTNET_RINGSZ 1024 63 64 #define VTNET_MAXSEGS 256 65 66 #define VTNET_S_HOSTCAPS \ 67 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ 68 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 69 70 /* 71 * PCI config-space "registers" 72 */ 73 struct virtio_net_config { 74 uint8_t mac[6]; 75 uint16_t status; 76 } __packed; 77 78 /* 79 * Queue definitions. 80 */ 81 #define VTNET_RXQ 0 82 #define VTNET_TXQ 1 83 #define VTNET_CTLQ 2 /* NB: not yet supported */ 84 85 #define VTNET_MAXQ 3 86 87 /* 88 * Debug printf 89 */ 90 static int pci_vtnet_debug; 91 #define DPRINTF(params) if (pci_vtnet_debug) printf params 92 #define WPRINTF(params) printf params 93 94 /* 95 * Per-device softc 96 */ 97 struct pci_vtnet_softc { 98 struct virtio_softc vsc_vs; 99 struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 100 pthread_mutex_t vsc_mtx; 101 102 net_backend_t *vsc_be; 103 104 int resetting; /* protected by tx_mtx */ 105 106 uint64_t vsc_features; /* negotiated features */ 107 108 pthread_mutex_t rx_mtx; 109 unsigned int rx_vhdrlen; 110 int rx_merge; /* merged rx bufs in use */ 111 112 pthread_t tx_tid; 113 pthread_mutex_t tx_mtx; 114 pthread_cond_t tx_cond; 115 int tx_in_progress; 116 117 struct virtio_net_config vsc_config; 118 struct virtio_consts vsc_consts; 119 }; 120 121 static void pci_vtnet_reset(void *); 122 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 123 static int pci_vtnet_cfgread(void *, int, int, uint32_t *); 124 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 125 static void pci_vtnet_neg_features(void *, uint64_t); 126 127 static struct virtio_consts vtnet_vi_consts = { 128 "vtnet", /* our name */ 129 VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ 130 sizeof(struct virtio_net_config), /* config reg size */ 131 pci_vtnet_reset, /* reset */ 132 NULL, /* device-wide qnotify -- not used */ 133 pci_vtnet_cfgread, /* read PCI config */ 134 pci_vtnet_cfgwrite, /* write PCI config */ 135 pci_vtnet_neg_features, /* apply negotiated features */ 136 VTNET_S_HOSTCAPS, /* our capabilities */ 137 }; 138 139 static void 140 pci_vtnet_reset(void *vsc) 141 { 142 struct pci_vtnet_softc *sc = vsc; 143 144 DPRINTF(("vtnet: device reset requested !\n")); 145 146 /* Acquire the RX lock to block RX processing. */ 147 pthread_mutex_lock(&sc->rx_mtx); 148 149 /* Set sc->resetting and give a chance to the TX thread to stop. */ 150 pthread_mutex_lock(&sc->tx_mtx); 151 sc->resetting = 1; 152 while (sc->tx_in_progress) { 153 pthread_mutex_unlock(&sc->tx_mtx); 154 usleep(10000); 155 pthread_mutex_lock(&sc->tx_mtx); 156 } 157 158 sc->rx_merge = 1; 159 sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 160 161 /* 162 * Now reset rings, MSI-X vectors, and negotiated capabilities. 163 * Do that with the TX lock held, since we need to reset 164 * sc->resetting. 165 */ 166 vi_reset_dev(&sc->vsc_vs); 167 168 sc->resetting = 0; 169 pthread_mutex_unlock(&sc->tx_mtx); 170 pthread_mutex_unlock(&sc->rx_mtx); 171 } 172 173 static void 174 pci_vtnet_rx(struct pci_vtnet_softc *sc) 175 { 176 struct iovec iov[VTNET_MAXSEGS + 1]; 177 struct vqueue_info *vq; 178 int len, n; 179 uint16_t idx; 180 181 vq = &sc->vsc_queues[VTNET_RXQ]; 182 for (;;) { 183 /* 184 * Check for available rx buffers. 185 */ 186 if (!vq_has_descs(vq)) { 187 /* No rx buffers. Enable RX kicks and double check. */ 188 vq_kick_enable(vq); 189 if (!vq_has_descs(vq)) { 190 /* 191 * Still no buffers. Interrupt if needed 192 * (including for NOTIFY_ON_EMPTY), and 193 * disable the backend until the next kick. 194 */ 195 vq_endchains(vq, /*used_all_avail=*/1); 196 netbe_rx_disable(sc->vsc_be); 197 return; 198 } 199 200 /* More rx buffers found, so keep going. */ 201 vq_kick_disable(vq); 202 } 203 204 /* 205 * Get descriptor chain. 206 */ 207 n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 208 assert(n >= 1 && n <= VTNET_MAXSEGS); 209 210 len = netbe_recv(sc->vsc_be, iov, n); 211 212 if (len <= 0) { 213 /* 214 * No more packets (len == 0), or backend errored 215 * (err < 0). Return unused available buffers 216 * and stop. 217 */ 218 vq_retchain(vq); 219 /* Interrupt if needed/appropriate and stop. */ 220 vq_endchains(vq, /*used_all_avail=*/0); 221 return; 222 } 223 224 /* Publish the info to the guest */ 225 vq_relchain(vq, idx, (uint32_t)len); 226 } 227 228 } 229 230 /* 231 * Called when there is read activity on the backend file descriptor. 232 * Each buffer posted by the guest is assumed to be able to contain 233 * an entire ethernet frame + rx header. 234 */ 235 static void 236 pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) 237 { 238 struct pci_vtnet_softc *sc = param; 239 240 pthread_mutex_lock(&sc->rx_mtx); 241 pci_vtnet_rx(sc); 242 pthread_mutex_unlock(&sc->rx_mtx); 243 244 } 245 246 /* Called on RX kick. */ 247 static void 248 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 249 { 250 struct pci_vtnet_softc *sc = vsc; 251 252 /* 253 * A qnotify means that the rx process can now begin. 254 */ 255 pthread_mutex_lock(&sc->rx_mtx); 256 vq_kick_disable(vq); 257 netbe_rx_enable(sc->vsc_be); 258 pthread_mutex_unlock(&sc->rx_mtx); 259 } 260 261 /* TX virtqueue processing, called by the TX thread. */ 262 static void 263 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 264 { 265 struct iovec iov[VTNET_MAXSEGS + 1]; 266 uint16_t idx; 267 ssize_t len; 268 int n; 269 270 /* 271 * Obtain chain of descriptors. The first descriptor also 272 * contains the virtio-net header. 273 */ 274 n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 275 assert(n >= 1 && n <= VTNET_MAXSEGS); 276 277 len = netbe_send(sc->vsc_be, iov, n); 278 279 /* chain is processed, release it and set len */ 280 vq_relchain(vq, idx, len > 0 ? len : 0); 281 } 282 283 /* Called on TX kick. */ 284 static void 285 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 286 { 287 struct pci_vtnet_softc *sc = vsc; 288 289 /* 290 * Any ring entries to process? 291 */ 292 if (!vq_has_descs(vq)) 293 return; 294 295 /* Signal the tx thread for processing */ 296 pthread_mutex_lock(&sc->tx_mtx); 297 vq_kick_disable(vq); 298 if (sc->tx_in_progress == 0) 299 pthread_cond_signal(&sc->tx_cond); 300 pthread_mutex_unlock(&sc->tx_mtx); 301 } 302 303 /* 304 * Thread which will handle processing of TX desc 305 */ 306 static void * 307 pci_vtnet_tx_thread(void *param) 308 { 309 struct pci_vtnet_softc *sc = param; 310 struct vqueue_info *vq; 311 int error; 312 313 vq = &sc->vsc_queues[VTNET_TXQ]; 314 315 /* 316 * Let us wait till the tx queue pointers get initialised & 317 * first tx signaled 318 */ 319 pthread_mutex_lock(&sc->tx_mtx); 320 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 321 assert(error == 0); 322 323 for (;;) { 324 /* note - tx mutex is locked here */ 325 while (sc->resetting || !vq_has_descs(vq)) { 326 vq_kick_enable(vq); 327 if (!sc->resetting && vq_has_descs(vq)) 328 break; 329 330 sc->tx_in_progress = 0; 331 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 332 assert(error == 0); 333 } 334 vq_kick_disable(vq); 335 sc->tx_in_progress = 1; 336 pthread_mutex_unlock(&sc->tx_mtx); 337 338 do { 339 /* 340 * Run through entries, placing them into 341 * iovecs and sending when an end-of-packet 342 * is found 343 */ 344 pci_vtnet_proctx(sc, vq); 345 } while (vq_has_descs(vq)); 346 347 /* 348 * Generate an interrupt if needed. 349 */ 350 vq_endchains(vq, /*used_all_avail=*/1); 351 352 pthread_mutex_lock(&sc->tx_mtx); 353 } 354 } 355 356 #ifdef notyet 357 static void 358 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 359 { 360 361 DPRINTF(("vtnet: control qnotify!\n\r")); 362 } 363 #endif 364 365 static int 366 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 367 { 368 struct pci_vtnet_softc *sc; 369 char tname[MAXCOMLEN + 1]; 370 int mac_provided; 371 372 /* 373 * Allocate data structures for further virtio initializations. 374 * sc also contains a copy of vtnet_vi_consts, since capabilities 375 * change depending on the backend. 376 */ 377 sc = calloc(1, sizeof(struct pci_vtnet_softc)); 378 379 sc->vsc_consts = vtnet_vi_consts; 380 pthread_mutex_init(&sc->vsc_mtx, NULL); 381 382 sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 383 sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 384 sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 385 sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 386 #ifdef notyet 387 sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 388 sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 389 #endif 390 391 /* 392 * Attempt to open the backend device and read the MAC address 393 * if specified. 394 */ 395 mac_provided = 0; 396 if (opts != NULL) { 397 char *devname; 398 char *vtopts; 399 int err; 400 401 devname = vtopts = strdup(opts); 402 (void) strsep(&vtopts, ","); 403 404 if (vtopts != NULL) { 405 err = net_parsemac(vtopts, sc->vsc_config.mac); 406 if (err != 0) { 407 free(devname); 408 free(sc); 409 return (err); 410 } 411 mac_provided = 1; 412 } 413 414 err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback, 415 sc); 416 free(devname); 417 if (err) { 418 free(sc); 419 return (err); 420 } 421 sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); 422 } 423 424 if (!mac_provided) { 425 net_genmac(pi, sc->vsc_config.mac); 426 } 427 428 /* initialize config space */ 429 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 430 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 431 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 432 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 433 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 434 435 /* Link is up if we managed to open backend device. */ 436 sc->vsc_config.status = (opts == NULL || sc->vsc_be); 437 438 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 439 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 440 441 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 442 if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { 443 free(sc); 444 return (1); 445 } 446 447 /* use BAR 0 to map config regs in IO space */ 448 vi_set_io_bar(&sc->vsc_vs, 0); 449 450 sc->resetting = 0; 451 452 sc->rx_merge = 1; 453 sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 454 pthread_mutex_init(&sc->rx_mtx, NULL); 455 456 /* 457 * Initialize tx semaphore & spawn TX processing thread. 458 * As of now, only one thread for TX desc processing is 459 * spawned. 460 */ 461 sc->tx_in_progress = 0; 462 pthread_mutex_init(&sc->tx_mtx, NULL); 463 pthread_cond_init(&sc->tx_cond, NULL); 464 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 465 snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 466 pi->pi_func); 467 pthread_set_name_np(sc->tx_tid, tname); 468 469 return (0); 470 } 471 472 static int 473 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 474 { 475 struct pci_vtnet_softc *sc = vsc; 476 void *ptr; 477 478 if (offset < (int)sizeof(sc->vsc_config.mac)) { 479 assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); 480 /* 481 * The driver is allowed to change the MAC address 482 */ 483 ptr = &sc->vsc_config.mac[offset]; 484 memcpy(ptr, &value, size); 485 } else { 486 /* silently ignore other writes */ 487 DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); 488 } 489 490 return (0); 491 } 492 493 static int 494 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 495 { 496 struct pci_vtnet_softc *sc = vsc; 497 void *ptr; 498 499 ptr = (uint8_t *)&sc->vsc_config + offset; 500 memcpy(retval, ptr, size); 501 return (0); 502 } 503 504 static void 505 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 506 { 507 struct pci_vtnet_softc *sc = vsc; 508 509 sc->vsc_features = negotiated_features; 510 511 if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) { 512 sc->rx_merge = 0; 513 /* Without mergeable rx buffers, virtio-net header is 2 514 * bytes shorter than sizeof(struct virtio_net_rxhdr). */ 515 sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 516 } 517 518 /* Tell the backend to enable some capabilities it has advertised. */ 519 netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen); 520 } 521 522 static struct pci_devemu pci_de_vnet = { 523 .pe_emu = "virtio-net", 524 .pe_init = pci_vtnet_init, 525 .pe_barwrite = vi_pci_write, 526 .pe_barread = vi_pci_read 527 }; 528 PCI_EMUL_SET(pci_de_vnet); 529