1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/linker_set.h> 36 #include <sys/select.h> 37 #include <sys/uio.h> 38 #include <sys/ioctl.h> 39 #include <net/ethernet.h> 40 #include <net/if.h> /* IFNAMSIZ */ 41 42 #include <err.h> 43 #include <errno.h> 44 #include <fcntl.h> 45 #include <stdio.h> 46 #include <stdlib.h> 47 #include <stdint.h> 48 #include <string.h> 49 #include <strings.h> 50 #include <unistd.h> 51 #include <assert.h> 52 #include <pthread.h> 53 #include <pthread_np.h> 54 55 #include "bhyverun.h" 56 #include "pci_emul.h" 57 #include "mevent.h" 58 #include "virtio.h" 59 #include "net_utils.h" 60 #include "net_backends.h" 61 62 #define VTNET_RINGSZ 1024 63 64 #define VTNET_MAXSEGS 256 65 66 #define VTNET_S_HOSTCAPS \ 67 ( VIRTIO_NET_F_MAC | VIRTIO_NET_F_STATUS | \ 68 VIRTIO_F_NOTIFY_ON_EMPTY | VIRTIO_RING_F_INDIRECT_DESC) 69 70 /* 71 * PCI config-space "registers" 72 */ 73 struct virtio_net_config { 74 uint8_t mac[6]; 75 uint16_t status; 76 } __packed; 77 78 /* 79 * Queue definitions. 80 */ 81 #define VTNET_RXQ 0 82 #define VTNET_TXQ 1 83 #define VTNET_CTLQ 2 /* NB: not yet supported */ 84 85 #define VTNET_MAXQ 3 86 87 /* 88 * Debug printf 89 */ 90 static int pci_vtnet_debug; 91 #define DPRINTF(params) if (pci_vtnet_debug) printf params 92 #define WPRINTF(params) printf params 93 94 /* 95 * Per-device softc 96 */ 97 struct pci_vtnet_softc { 98 struct virtio_softc vsc_vs; 99 struct vqueue_info vsc_queues[VTNET_MAXQ - 1]; 100 pthread_mutex_t vsc_mtx; 101 102 net_backend_t *vsc_be; 103 104 int vsc_rx_ready; 105 int resetting; /* protected by tx_mtx */ 106 107 uint64_t vsc_features; /* negotiated features */ 108 109 pthread_mutex_t rx_mtx; 110 unsigned int rx_vhdrlen; 111 int rx_merge; /* merged rx bufs in use */ 112 113 pthread_t tx_tid; 114 pthread_mutex_t tx_mtx; 115 pthread_cond_t tx_cond; 116 int tx_in_progress; 117 118 struct virtio_net_config vsc_config; 119 struct virtio_consts vsc_consts; 120 }; 121 122 static void pci_vtnet_reset(void *); 123 /* static void pci_vtnet_notify(void *, struct vqueue_info *); */ 124 static int pci_vtnet_cfgread(void *, int, int, uint32_t *); 125 static int pci_vtnet_cfgwrite(void *, int, int, uint32_t); 126 static void pci_vtnet_neg_features(void *, uint64_t); 127 128 static struct virtio_consts vtnet_vi_consts = { 129 "vtnet", /* our name */ 130 VTNET_MAXQ - 1, /* we currently support 2 virtqueues */ 131 sizeof(struct virtio_net_config), /* config reg size */ 132 pci_vtnet_reset, /* reset */ 133 NULL, /* device-wide qnotify -- not used */ 134 pci_vtnet_cfgread, /* read PCI config */ 135 pci_vtnet_cfgwrite, /* write PCI config */ 136 pci_vtnet_neg_features, /* apply negotiated features */ 137 VTNET_S_HOSTCAPS, /* our capabilities */ 138 }; 139 140 static void 141 pci_vtnet_reset(void *vsc) 142 { 143 struct pci_vtnet_softc *sc = vsc; 144 145 DPRINTF(("vtnet: device reset requested !\n")); 146 147 /* Acquire the RX lock to block RX processing. */ 148 pthread_mutex_lock(&sc->rx_mtx); 149 150 /* Set sc->resetting and give a chance to the TX thread to stop. */ 151 pthread_mutex_lock(&sc->tx_mtx); 152 sc->resetting = 1; 153 while (sc->tx_in_progress) { 154 pthread_mutex_unlock(&sc->tx_mtx); 155 usleep(10000); 156 pthread_mutex_lock(&sc->tx_mtx); 157 } 158 159 sc->vsc_rx_ready = 0; 160 sc->rx_merge = 1; 161 sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 162 163 /* 164 * Now reset rings, MSI-X vectors, and negotiated capabilities. 165 * Do that with the TX lock held, since we need to reset 166 * sc->resetting. 167 */ 168 vi_reset_dev(&sc->vsc_vs); 169 170 sc->resetting = 0; 171 pthread_mutex_unlock(&sc->tx_mtx); 172 pthread_mutex_unlock(&sc->rx_mtx); 173 } 174 175 static void 176 pci_vtnet_rx(struct pci_vtnet_softc *sc) 177 { 178 struct iovec iov[VTNET_MAXSEGS + 1]; 179 struct vqueue_info *vq; 180 int len, n; 181 uint16_t idx; 182 183 if (!sc->vsc_rx_ready) { 184 /* 185 * The rx ring has not yet been set up. 186 * Drop the packet and try later. 187 */ 188 netbe_rx_discard(sc->vsc_be); 189 return; 190 } 191 192 /* 193 * Check for available rx buffers 194 */ 195 vq = &sc->vsc_queues[VTNET_RXQ]; 196 if (!vq_has_descs(vq)) { 197 /* 198 * No available rx buffers. Drop the packet and try later. 199 * Interrupt on empty, if that's negotiated. 200 */ 201 netbe_rx_discard(sc->vsc_be); 202 vq_endchains(vq, /*used_all_avail=*/1); 203 return; 204 } 205 206 do { 207 /* 208 * Get descriptor chain. 209 */ 210 n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 211 assert(n >= 1 && n <= VTNET_MAXSEGS); 212 213 len = netbe_recv(sc->vsc_be, iov, n); 214 215 if (len <= 0) { 216 /* 217 * No more packets (len == 0), or backend errored 218 * (err < 0). Return unused available buffers. 219 */ 220 vq_retchain(vq); 221 /* Interrupt if needed/appropriate and stop. */ 222 vq_endchains(vq, /*used_all_avail=*/0); 223 return; 224 } 225 226 /* Publish the info to the guest */ 227 vq_relchain(vq, idx, (uint32_t)len); 228 } while (vq_has_descs(vq)); 229 230 /* Interrupt if needed, including for NOTIFY_ON_EMPTY. */ 231 vq_endchains(vq, /*used_all_avail=*/1); 232 } 233 234 /* 235 * Called when there is read activity on the backend file descriptor. 236 * Each buffer posted by the guest is assumed to be able to contain 237 * an entire ethernet frame + rx header. 238 */ 239 static void 240 pci_vtnet_rx_callback(int fd, enum ev_type type, void *param) 241 { 242 struct pci_vtnet_softc *sc = param; 243 244 pthread_mutex_lock(&sc->rx_mtx); 245 pci_vtnet_rx(sc); 246 pthread_mutex_unlock(&sc->rx_mtx); 247 248 } 249 250 /* Called on RX kick. */ 251 static void 252 pci_vtnet_ping_rxq(void *vsc, struct vqueue_info *vq) 253 { 254 struct pci_vtnet_softc *sc = vsc; 255 256 /* 257 * A qnotify means that the rx process can now begin 258 */ 259 pthread_mutex_lock(&sc->rx_mtx); 260 if (sc->vsc_rx_ready == 0) { 261 sc->vsc_rx_ready = 1; 262 vq_kick_disable(vq); 263 } 264 pthread_mutex_unlock(&sc->rx_mtx); 265 } 266 267 /* TX virtqueue processing, called by the TX thread. */ 268 static void 269 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vqueue_info *vq) 270 { 271 struct iovec iov[VTNET_MAXSEGS + 1]; 272 uint16_t idx; 273 ssize_t len; 274 int n; 275 276 /* 277 * Obtain chain of descriptors. The first descriptor also 278 * contains the virtio-net header. 279 */ 280 n = vq_getchain(vq, &idx, iov, VTNET_MAXSEGS, NULL); 281 assert(n >= 1 && n <= VTNET_MAXSEGS); 282 283 len = netbe_send(sc->vsc_be, iov, n); 284 285 /* chain is processed, release it and set len */ 286 vq_relchain(vq, idx, len > 0 ? len : 0); 287 } 288 289 /* Called on TX kick. */ 290 static void 291 pci_vtnet_ping_txq(void *vsc, struct vqueue_info *vq) 292 { 293 struct pci_vtnet_softc *sc = vsc; 294 295 /* 296 * Any ring entries to process? 297 */ 298 if (!vq_has_descs(vq)) 299 return; 300 301 /* Signal the tx thread for processing */ 302 pthread_mutex_lock(&sc->tx_mtx); 303 vq_kick_disable(vq); 304 if (sc->tx_in_progress == 0) 305 pthread_cond_signal(&sc->tx_cond); 306 pthread_mutex_unlock(&sc->tx_mtx); 307 } 308 309 /* 310 * Thread which will handle processing of TX desc 311 */ 312 static void * 313 pci_vtnet_tx_thread(void *param) 314 { 315 struct pci_vtnet_softc *sc = param; 316 struct vqueue_info *vq; 317 int error; 318 319 vq = &sc->vsc_queues[VTNET_TXQ]; 320 321 /* 322 * Let us wait till the tx queue pointers get initialised & 323 * first tx signaled 324 */ 325 pthread_mutex_lock(&sc->tx_mtx); 326 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 327 assert(error == 0); 328 329 for (;;) { 330 /* note - tx mutex is locked here */ 331 while (sc->resetting || !vq_has_descs(vq)) { 332 vq_kick_enable(vq); 333 if (!sc->resetting && vq_has_descs(vq)) 334 break; 335 336 sc->tx_in_progress = 0; 337 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 338 assert(error == 0); 339 } 340 vq_kick_disable(vq); 341 sc->tx_in_progress = 1; 342 pthread_mutex_unlock(&sc->tx_mtx); 343 344 do { 345 /* 346 * Run through entries, placing them into 347 * iovecs and sending when an end-of-packet 348 * is found 349 */ 350 pci_vtnet_proctx(sc, vq); 351 } while (vq_has_descs(vq)); 352 353 /* 354 * Generate an interrupt if needed. 355 */ 356 vq_endchains(vq, /*used_all_avail=*/1); 357 358 pthread_mutex_lock(&sc->tx_mtx); 359 } 360 } 361 362 #ifdef notyet 363 static void 364 pci_vtnet_ping_ctlq(void *vsc, struct vqueue_info *vq) 365 { 366 367 DPRINTF(("vtnet: control qnotify!\n\r")); 368 } 369 #endif 370 371 static int 372 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 373 { 374 struct pci_vtnet_softc *sc; 375 char tname[MAXCOMLEN + 1]; 376 int mac_provided; 377 378 /* 379 * Allocate data structures for further virtio initializations. 380 * sc also contains a copy of vtnet_vi_consts, since capabilities 381 * change depending on the backend. 382 */ 383 sc = calloc(1, sizeof(struct pci_vtnet_softc)); 384 385 sc->vsc_consts = vtnet_vi_consts; 386 pthread_mutex_init(&sc->vsc_mtx, NULL); 387 388 sc->vsc_queues[VTNET_RXQ].vq_qsize = VTNET_RINGSZ; 389 sc->vsc_queues[VTNET_RXQ].vq_notify = pci_vtnet_ping_rxq; 390 sc->vsc_queues[VTNET_TXQ].vq_qsize = VTNET_RINGSZ; 391 sc->vsc_queues[VTNET_TXQ].vq_notify = pci_vtnet_ping_txq; 392 #ifdef notyet 393 sc->vsc_queues[VTNET_CTLQ].vq_qsize = VTNET_RINGSZ; 394 sc->vsc_queues[VTNET_CTLQ].vq_notify = pci_vtnet_ping_ctlq; 395 #endif 396 397 /* 398 * Attempt to open the backend device and read the MAC address 399 * if specified. 400 */ 401 mac_provided = 0; 402 if (opts != NULL) { 403 char *devname; 404 char *vtopts; 405 int err; 406 407 devname = vtopts = strdup(opts); 408 (void) strsep(&vtopts, ","); 409 410 if (vtopts != NULL) { 411 err = net_parsemac(vtopts, sc->vsc_config.mac); 412 if (err != 0) { 413 free(devname); 414 free(sc); 415 return (err); 416 } 417 mac_provided = 1; 418 } 419 420 err = netbe_init(&sc->vsc_be, devname, pci_vtnet_rx_callback, 421 sc); 422 free(devname); 423 if (err) { 424 free(sc); 425 return (err); 426 } 427 sc->vsc_consts.vc_hv_caps |= netbe_get_cap(sc->vsc_be); 428 } 429 430 if (!mac_provided) { 431 net_genmac(pi, sc->vsc_config.mac); 432 } 433 434 /* initialize config space */ 435 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 436 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 437 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 438 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 439 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 440 441 /* Link is up if we managed to open backend device. */ 442 sc->vsc_config.status = (opts == NULL || sc->vsc_be); 443 444 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 445 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 446 447 /* use BAR 1 to map MSI-X table and PBA, if we're using MSI-X */ 448 if (vi_intr_init(&sc->vsc_vs, 1, fbsdrun_virtio_msix())) { 449 free(sc); 450 return (1); 451 } 452 453 /* use BAR 0 to map config regs in IO space */ 454 vi_set_io_bar(&sc->vsc_vs, 0); 455 456 sc->resetting = 0; 457 458 sc->rx_merge = 1; 459 sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr); 460 pthread_mutex_init(&sc->rx_mtx, NULL); 461 462 /* 463 * Initialize tx semaphore & spawn TX processing thread. 464 * As of now, only one thread for TX desc processing is 465 * spawned. 466 */ 467 sc->tx_in_progress = 0; 468 pthread_mutex_init(&sc->tx_mtx, NULL); 469 pthread_cond_init(&sc->tx_cond, NULL); 470 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 471 snprintf(tname, sizeof(tname), "vtnet-%d:%d tx", pi->pi_slot, 472 pi->pi_func); 473 pthread_set_name_np(sc->tx_tid, tname); 474 475 return (0); 476 } 477 478 static int 479 pci_vtnet_cfgwrite(void *vsc, int offset, int size, uint32_t value) 480 { 481 struct pci_vtnet_softc *sc = vsc; 482 void *ptr; 483 484 if (offset < (int)sizeof(sc->vsc_config.mac)) { 485 assert(offset + size <= (int)sizeof(sc->vsc_config.mac)); 486 /* 487 * The driver is allowed to change the MAC address 488 */ 489 ptr = &sc->vsc_config.mac[offset]; 490 memcpy(ptr, &value, size); 491 } else { 492 /* silently ignore other writes */ 493 DPRINTF(("vtnet: write to readonly reg %d\n\r", offset)); 494 } 495 496 return (0); 497 } 498 499 static int 500 pci_vtnet_cfgread(void *vsc, int offset, int size, uint32_t *retval) 501 { 502 struct pci_vtnet_softc *sc = vsc; 503 void *ptr; 504 505 ptr = (uint8_t *)&sc->vsc_config + offset; 506 memcpy(retval, ptr, size); 507 return (0); 508 } 509 510 static void 511 pci_vtnet_neg_features(void *vsc, uint64_t negotiated_features) 512 { 513 struct pci_vtnet_softc *sc = vsc; 514 515 sc->vsc_features = negotiated_features; 516 517 if (!(negotiated_features & VIRTIO_NET_F_MRG_RXBUF)) { 518 sc->rx_merge = 0; 519 /* Without mergeable rx buffers, virtio-net header is 2 520 * bytes shorter than sizeof(struct virtio_net_rxhdr). */ 521 sc->rx_vhdrlen = sizeof(struct virtio_net_rxhdr) - 2; 522 } 523 524 /* Tell the backend to enable some capabilities it has advertised. */ 525 netbe_set_cap(sc->vsc_be, negotiated_features, sc->rx_vhdrlen); 526 } 527 528 static struct pci_devemu pci_de_vnet = { 529 .pe_emu = "virtio-net", 530 .pe_init = pci_vtnet_init, 531 .pe_barwrite = vi_pci_write, 532 .pe_barread = vi_pci_read 533 }; 534 PCI_EMUL_SET(pci_de_vnet); 535