1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/select.h> 35 #include <sys/uio.h> 36 #include <sys/ioctl.h> 37 #include <net/ethernet.h> 38 39 #include <errno.h> 40 #include <fcntl.h> 41 #include <stdio.h> 42 #include <stdlib.h> 43 #include <stdint.h> 44 #include <string.h> 45 #include <strings.h> 46 #include <unistd.h> 47 #include <assert.h> 48 #include <md5.h> 49 #include <pthread.h> 50 #include <pthread_np.h> 51 52 #include "bhyverun.h" 53 #include "pci_emul.h" 54 #include "mevent.h" 55 #include "virtio.h" 56 57 #define VTNET_RINGSZ 1024 58 59 #define VTNET_MAXSEGS 32 60 61 /* 62 * PCI config-space register offsets 63 */ 64 #define VTNET_R_CFG0 24 65 #define VTNET_R_CFG1 25 66 #define VTNET_R_CFG2 26 67 #define VTNET_R_CFG3 27 68 #define VTNET_R_CFG4 28 69 #define VTNET_R_CFG5 29 70 #define VTNET_R_CFG6 30 71 #define VTNET_R_CFG7 31 72 #define VTNET_R_MAX 31 73 74 #define VTNET_REGSZ VTNET_R_MAX+1 75 76 /* 77 * Host capabilities 78 */ 79 #define VTNET_S_HOSTCAPS \ 80 ( 0x00000020 | /* host supplies MAC */ \ 81 0x00008000 | /* host can merge Rx buffers */ \ 82 0x00010000 | /* config status available */ \ 83 VIRTIO_F_NOTIFY_ON_EMPTY) 84 85 /* 86 * Queue definitions. 87 */ 88 #define VTNET_RXQ 0 89 #define VTNET_TXQ 1 90 #define VTNET_CTLQ 2 91 92 #define VTNET_MAXQ 3 93 94 static int use_msix = 1; 95 96 struct vring_hqueue { 97 /* Internal state */ 98 uint16_t hq_size; 99 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 100 101 /* Host-context pointers to the queue */ 102 struct virtio_desc *hq_dtable; 103 uint16_t *hq_avail_flags; 104 uint16_t *hq_avail_idx; /* monotonically increasing */ 105 uint16_t *hq_avail_ring; 106 107 uint16_t *hq_used_flags; 108 uint16_t *hq_used_idx; /* monotonically increasing */ 109 struct virtio_used *hq_used_ring; 110 }; 111 112 /* 113 * Fixed network header size 114 */ 115 struct virtio_net_rxhdr { 116 uint8_t vrh_flags; 117 uint8_t vrh_gso_type; 118 uint16_t vrh_hdr_len; 119 uint16_t vrh_gso_size; 120 uint16_t vrh_csum_start; 121 uint16_t vrh_csum_offset; 122 uint16_t vrh_bufs; 123 } __packed; 124 125 /* 126 * Debug printf 127 */ 128 static int pci_vtnet_debug; 129 #define DPRINTF(params) if (pci_vtnet_debug) printf params 130 #define WPRINTF(params) printf params 131 132 /* 133 * Per-device softc 134 */ 135 struct pci_vtnet_softc { 136 struct pci_devinst *vsc_pi; 137 pthread_mutex_t vsc_mtx; 138 struct mevent *vsc_mevp; 139 140 int vsc_curq; 141 int vsc_status; 142 int vsc_isr; 143 int vsc_tapfd; 144 int vsc_rx_ready; 145 int resetting; 146 147 uint32_t vsc_features; 148 uint8_t vsc_macaddr[6]; 149 150 uint64_t vsc_pfn[VTNET_MAXQ]; 151 struct vring_hqueue vsc_hq[VTNET_MAXQ]; 152 uint16_t vsc_msix_table_idx[VTNET_MAXQ]; 153 154 pthread_mutex_t rx_mtx; 155 int rx_in_progress; 156 157 pthread_t tx_tid; 158 pthread_mutex_t tx_mtx; 159 pthread_cond_t tx_cond; 160 int tx_in_progress; 161 }; 162 #define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx) 163 #define notify_on_empty(sc) ((sc)->vsc_features & VIRTIO_F_NOTIFY_ON_EMPTY) 164 165 /* 166 * Return the size of IO BAR that maps virtio header and device specific 167 * region. The size would vary depending on whether MSI-X is enabled or 168 * not. 169 */ 170 static uint64_t 171 pci_vtnet_iosize(struct pci_devinst *pi) 172 { 173 if (pci_msix_enabled(pi)) 174 return (VTNET_REGSZ); 175 else 176 return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 177 } 178 179 /* 180 * Return the number of available descriptors in the vring taking care 181 * of the 16-bit index wraparound. 182 */ 183 static int 184 hq_num_avail(struct vring_hqueue *hq) 185 { 186 uint16_t ndesc; 187 188 /* 189 * We're just computing (a-b) mod 2^16 190 * 191 * The only glitch here is that in standard C, 192 * uint16_t promotes to (signed) int when int has 193 * more than 16 bits (pretty much always now), so 194 * we have to force it back to unsigned. 195 */ 196 ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; 197 198 assert(ndesc <= hq->hq_size); 199 200 return (ndesc); 201 } 202 203 static uint16_t 204 pci_vtnet_qsize(int qnum) 205 { 206 /* XXX no ctl queue currently */ 207 if (qnum == VTNET_CTLQ) { 208 return (0); 209 } 210 211 /* XXX fixed currently. Maybe different for tx/rx/ctl */ 212 return (VTNET_RINGSZ); 213 } 214 215 static void 216 pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) 217 { 218 struct vring_hqueue *hq; 219 220 assert(ring < VTNET_MAXQ); 221 222 hq = &sc->vsc_hq[ring]; 223 224 /* 225 * Reset all soft state 226 */ 227 hq->hq_cur_aidx = 0; 228 } 229 230 /* 231 * If the transmit thread is active then stall until it is done. 232 */ 233 static void 234 pci_vtnet_txwait(struct pci_vtnet_softc *sc) 235 { 236 237 pthread_mutex_lock(&sc->tx_mtx); 238 while (sc->tx_in_progress) { 239 pthread_mutex_unlock(&sc->tx_mtx); 240 usleep(10000); 241 pthread_mutex_lock(&sc->tx_mtx); 242 } 243 pthread_mutex_unlock(&sc->tx_mtx); 244 } 245 246 /* 247 * If the receive thread is active then stall until it is done. 248 */ 249 static void 250 pci_vtnet_rxwait(struct pci_vtnet_softc *sc) 251 { 252 253 pthread_mutex_lock(&sc->rx_mtx); 254 while (sc->rx_in_progress) { 255 pthread_mutex_unlock(&sc->rx_mtx); 256 usleep(10000); 257 pthread_mutex_lock(&sc->rx_mtx); 258 } 259 pthread_mutex_unlock(&sc->rx_mtx); 260 } 261 262 static void 263 pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 264 { 265 int i; 266 267 if (value == 0) { 268 DPRINTF(("vtnet: device reset requested !\n")); 269 270 sc->resetting = 1; 271 272 /* 273 * Wait for the transmit and receive threads to finish their 274 * processing. 275 */ 276 pci_vtnet_txwait(sc); 277 pci_vtnet_rxwait(sc); 278 279 sc->vsc_rx_ready = 0; 280 pci_vtnet_ring_reset(sc, VTNET_RXQ); 281 pci_vtnet_ring_reset(sc, VTNET_TXQ); 282 283 for (i = 0; i < VTNET_MAXQ; i++) 284 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 285 286 sc->vsc_isr = 0; 287 sc->vsc_features = 0; 288 289 sc->resetting = 0; 290 } 291 292 sc->vsc_status = value; 293 } 294 295 static void 296 vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx) 297 { 298 299 if (use_msix) { 300 pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]); 301 } else { 302 sc->vsc_isr |= 1; 303 pci_generate_msi(sc->vsc_pi, 0); 304 } 305 } 306 307 /* 308 * Called to send a buffer chain out to the tap device 309 */ 310 static void 311 pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 312 int len) 313 { 314 char pad[60]; 315 316 if (sc->vsc_tapfd == -1) 317 return; 318 319 /* 320 * If the length is < 60, pad out to that and add the 321 * extra zero'd segment to the iov. It is guaranteed that 322 * there is always an extra iov available by the caller. 323 */ 324 if (len < 60) { 325 memset(pad, 0, 60 - len); 326 iov[iovcnt].iov_base = pad; 327 iov[iovcnt].iov_len = 60 - len; 328 iovcnt++; 329 } 330 (void) writev(sc->vsc_tapfd, iov, iovcnt); 331 } 332 333 /* 334 * Called when there is read activity on the tap file descriptor. 335 * Each buffer posted by the guest is assumed to be able to contain 336 * an entire ethernet frame + rx header. 337 * MP note: the dummybuf is only used for discarding frames, so there 338 * is no need for it to be per-vtnet or locked. 339 */ 340 static uint8_t dummybuf[2048]; 341 342 static void 343 pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 344 { 345 struct virtio_desc *vd; 346 struct virtio_used *vu; 347 struct vring_hqueue *hq; 348 struct virtio_net_rxhdr *vrx; 349 uint8_t *buf; 350 int i; 351 int len; 352 int ndescs; 353 int didx, uidx, aidx; /* descriptor, avail and used index */ 354 355 /* 356 * Should never be called without a valid tap fd 357 */ 358 assert(sc->vsc_tapfd != -1); 359 360 /* 361 * But, will be called when the rx ring hasn't yet 362 * been set up or the guest is resetting the device. 363 */ 364 if (!sc->vsc_rx_ready || sc->resetting) { 365 /* 366 * Drop the packet and try later. 367 */ 368 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 369 return; 370 } 371 372 /* 373 * Calculate the number of available rx buffers 374 */ 375 hq = &sc->vsc_hq[VTNET_RXQ]; 376 377 ndescs = hq_num_avail(hq); 378 379 if (ndescs == 0) { 380 /* 381 * Drop the packet and try later 382 */ 383 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 384 385 if (notify_on_empty(sc)) 386 vtnet_generate_interrupt(sc, VTNET_RXQ); 387 388 return; 389 } 390 391 aidx = hq->hq_cur_aidx; 392 uidx = *hq->hq_used_idx; 393 for (i = 0; i < ndescs; i++) { 394 /* 395 * 'aidx' indexes into the an array of descriptor indexes 396 */ 397 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 398 assert(didx >= 0 && didx < hq->hq_size); 399 400 vd = &hq->hq_dtable[didx]; 401 402 /* 403 * Get a pointer to the rx header, and use the 404 * data immediately following it for the packet buffer. 405 */ 406 vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len); 407 buf = (uint8_t *)(vrx + 1); 408 409 len = read(sc->vsc_tapfd, buf, 410 vd->vd_len - sizeof(struct virtio_net_rxhdr)); 411 412 if (len < 0 && errno == EWOULDBLOCK) { 413 break; 414 } 415 416 /* 417 * The only valid field in the rx packet header is the 418 * number of buffers, which is always 1 without TSO 419 * support. 420 */ 421 memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 422 vrx->vrh_bufs = 1; 423 424 /* 425 * Write this descriptor into the used ring 426 */ 427 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 428 vu->vu_idx = didx; 429 vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 430 uidx++; 431 aidx++; 432 } 433 434 /* 435 * Update the used pointer, and signal an interrupt if allowed 436 */ 437 *hq->hq_used_idx = uidx; 438 hq->hq_cur_aidx = aidx; 439 440 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) 441 vtnet_generate_interrupt(sc, VTNET_RXQ); 442 } 443 444 static void 445 pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 446 { 447 struct pci_vtnet_softc *sc = param; 448 449 pthread_mutex_lock(&sc->rx_mtx); 450 sc->rx_in_progress = 1; 451 pci_vtnet_tap_rx(sc); 452 sc->rx_in_progress = 0; 453 pthread_mutex_unlock(&sc->rx_mtx); 454 455 } 456 457 static void 458 pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 459 { 460 /* 461 * A qnotify means that the rx process can now begin 462 */ 463 if (sc->vsc_rx_ready == 0) { 464 sc->vsc_rx_ready = 1; 465 } 466 } 467 468 static void 469 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 470 { 471 struct iovec iov[VTNET_MAXSEGS + 1]; 472 struct virtio_desc *vd; 473 struct virtio_used *vu; 474 int i; 475 int plen; 476 int tlen; 477 int uidx, aidx, didx; 478 479 uidx = *hq->hq_used_idx; 480 aidx = hq->hq_cur_aidx; 481 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 482 assert(didx >= 0 && didx < hq->hq_size); 483 484 vd = &hq->hq_dtable[didx]; 485 486 /* 487 * Run through the chain of descriptors, ignoring the 488 * first header descriptor. However, include the header 489 * length in the total length that will be put into the 490 * used queue. 491 */ 492 tlen = vd->vd_len; 493 vd = &hq->hq_dtable[vd->vd_next]; 494 495 for (i = 0, plen = 0; 496 i < VTNET_MAXSEGS; 497 i++, vd = &hq->hq_dtable[vd->vd_next]) { 498 iov[i].iov_base = paddr_guest2host(vtnet_ctx(sc), 499 vd->vd_addr, vd->vd_len); 500 iov[i].iov_len = vd->vd_len; 501 plen += vd->vd_len; 502 tlen += vd->vd_len; 503 504 if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 505 break; 506 } 507 assert(i < VTNET_MAXSEGS); 508 509 DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 510 pci_vtnet_tap_tx(sc, iov, i + 1, plen); 511 512 /* 513 * Return this chain back to the host 514 */ 515 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 516 vu->vu_idx = didx; 517 vu->vu_tlen = tlen; 518 hq->hq_cur_aidx = aidx + 1; 519 *hq->hq_used_idx = uidx + 1; 520 } 521 522 static void 523 pci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 524 { 525 struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 526 int ndescs; 527 528 /* 529 * Calculate number of ring entries to process 530 */ 531 ndescs = hq_num_avail(hq); 532 533 if (ndescs == 0) 534 return; 535 536 /* Signal the tx thread for processing */ 537 pthread_mutex_lock(&sc->tx_mtx); 538 if (sc->tx_in_progress == 0) 539 pthread_cond_signal(&sc->tx_cond); 540 pthread_mutex_unlock(&sc->tx_mtx); 541 } 542 543 /* 544 * Thread which will handle processing of TX desc 545 */ 546 static void * 547 pci_vtnet_tx_thread(void *param) 548 { 549 struct pci_vtnet_softc *sc = (struct pci_vtnet_softc *) param; 550 struct vring_hqueue *hq; 551 int i, ndescs, error; 552 553 hq = &sc->vsc_hq[VTNET_TXQ]; 554 555 /* 556 * Let us wait till the tx queue pointers get initialised & 557 * first tx signaled 558 */ 559 pthread_mutex_lock(&sc->tx_mtx); 560 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 561 assert(error == 0); 562 563 for (;;) { 564 pthread_mutex_lock(&sc->tx_mtx); 565 for (;;) { 566 if (sc->resetting) 567 ndescs = 0; 568 else 569 ndescs = hq_num_avail(hq); 570 571 if (ndescs != 0) 572 break; 573 574 sc->tx_in_progress = 0; 575 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 576 assert(error == 0); 577 } 578 sc->tx_in_progress = 1; 579 pthread_mutex_unlock(&sc->tx_mtx); 580 581 while (ndescs > 0) { 582 /* 583 * Run through all the entries, placing them into 584 * iovecs and sending when an end-of-packet is found 585 */ 586 for (i = 0; i < ndescs; i++) 587 pci_vtnet_proctx(sc, hq); 588 589 ndescs = hq_num_avail(hq); 590 } 591 592 /* 593 * Generate an interrupt if needed. 594 */ 595 if (notify_on_empty(sc) || 596 (*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) 597 vtnet_generate_interrupt(sc, VTNET_TXQ); 598 } 599 } 600 601 static void 602 pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 603 { 604 605 DPRINTF(("vtnet: control qnotify!\n\r")); 606 } 607 608 static void 609 pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 610 { 611 struct vring_hqueue *hq; 612 int qnum = sc->vsc_curq; 613 614 assert(qnum < VTNET_MAXQ); 615 616 sc->vsc_pfn[qnum] = pfn << VRING_PFN; 617 618 /* 619 * Set up host pointers to the various parts of the 620 * queue 621 */ 622 hq = &sc->vsc_hq[qnum]; 623 hq->hq_size = pci_vtnet_qsize(qnum); 624 625 hq->hq_dtable = paddr_guest2host(vtnet_ctx(sc), pfn << VRING_PFN, 626 vring_size(hq->hq_size)); 627 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 628 hq->hq_avail_idx = hq->hq_avail_flags + 1; 629 hq->hq_avail_ring = hq->hq_avail_flags + 2; 630 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 631 VRING_ALIGN); 632 hq->hq_used_idx = hq->hq_used_flags + 1; 633 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 634 635 /* 636 * Initialize queue indexes 637 */ 638 hq->hq_cur_aidx = 0; 639 } 640 641 static int 642 pci_vtnet_parsemac(char *mac_str, uint8_t *mac_addr) 643 { 644 struct ether_addr *ea; 645 char *tmpstr; 646 char zero_addr[ETHER_ADDR_LEN] = { 0, 0, 0, 0, 0, 0 }; 647 648 tmpstr = strsep(&mac_str,"="); 649 650 if ((mac_str != NULL) && (!strcmp(tmpstr,"mac"))) { 651 ea = ether_aton(mac_str); 652 653 if (ea == NULL || ETHER_IS_MULTICAST(ea->octet) || 654 memcmp(ea->octet, zero_addr, ETHER_ADDR_LEN) == 0) { 655 fprintf(stderr, "Invalid MAC %s\n", mac_str); 656 return (EINVAL); 657 } else 658 memcpy(mac_addr, ea->octet, ETHER_ADDR_LEN); 659 } 660 661 return (0); 662 } 663 664 665 static int 666 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 667 { 668 MD5_CTX mdctx; 669 unsigned char digest[16]; 670 char nstr[80]; 671 char tname[MAXCOMLEN + 1]; 672 struct pci_vtnet_softc *sc; 673 const char *env_msi; 674 char *devname; 675 char *vtopts; 676 int mac_provided; 677 678 sc = malloc(sizeof(struct pci_vtnet_softc)); 679 memset(sc, 0, sizeof(struct pci_vtnet_softc)); 680 681 pi->pi_arg = sc; 682 sc->vsc_pi = pi; 683 684 pthread_mutex_init(&sc->vsc_mtx, NULL); 685 686 /* 687 * Use MSI if set by user 688 */ 689 if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { 690 if (strcasecmp(env_msi, "yes") == 0) 691 use_msix = 0; 692 } 693 694 /* 695 * Attempt to open the tap device and read the MAC address 696 * if specified 697 */ 698 mac_provided = 0; 699 sc->vsc_tapfd = -1; 700 if (opts != NULL) { 701 char tbuf[80]; 702 int err; 703 704 devname = vtopts = strdup(opts); 705 (void) strsep(&vtopts, ","); 706 707 if (vtopts != NULL) { 708 err = pci_vtnet_parsemac(vtopts, sc->vsc_macaddr); 709 if (err != 0) { 710 free(devname); 711 return (err); 712 } 713 mac_provided = 1; 714 } 715 716 strcpy(tbuf, "/dev/"); 717 strlcat(tbuf, devname, sizeof(tbuf)); 718 719 free(devname); 720 721 sc->vsc_tapfd = open(tbuf, O_RDWR); 722 if (sc->vsc_tapfd == -1) { 723 WPRINTF(("open of tap device %s failed\n", tbuf)); 724 } else { 725 /* 726 * Set non-blocking and register for read 727 * notifications with the event loop 728 */ 729 int opt = 1; 730 if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 731 WPRINTF(("tap device O_NONBLOCK failed\n")); 732 close(sc->vsc_tapfd); 733 sc->vsc_tapfd = -1; 734 } 735 736 sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 737 EVF_READ, 738 pci_vtnet_tap_callback, 739 sc); 740 if (sc->vsc_mevp == NULL) { 741 WPRINTF(("Could not register event\n")); 742 close(sc->vsc_tapfd); 743 sc->vsc_tapfd = -1; 744 } 745 } 746 } 747 748 /* 749 * The default MAC address is the standard NetApp OUI of 00-a0-98, 750 * followed by an MD5 of the PCI slot/func number and dev name 751 */ 752 if (!mac_provided) { 753 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 754 pi->pi_func, vmname); 755 756 MD5Init(&mdctx); 757 MD5Update(&mdctx, nstr, strlen(nstr)); 758 MD5Final(digest, &mdctx); 759 760 sc->vsc_macaddr[0] = 0x00; 761 sc->vsc_macaddr[1] = 0xa0; 762 sc->vsc_macaddr[2] = 0x98; 763 sc->vsc_macaddr[3] = digest[0]; 764 sc->vsc_macaddr[4] = digest[1]; 765 sc->vsc_macaddr[5] = digest[2]; 766 } 767 768 /* initialize config space */ 769 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 770 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 771 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 772 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 773 774 if (use_msix) { 775 /* MSI-X support */ 776 int i; 777 778 for (i = 0; i < VTNET_MAXQ; i++) 779 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 780 781 /* 782 * BAR 1 used to map MSI-X table and PBA 783 */ 784 if (pci_emul_add_msixcap(pi, VTNET_MAXQ, 1)) 785 return (1); 786 } else { 787 /* MSI support */ 788 pci_emul_add_msicap(pi, 1); 789 } 790 791 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 792 793 sc->resetting = 0; 794 795 sc->rx_in_progress = 0; 796 pthread_mutex_init(&sc->rx_mtx, NULL); 797 798 /* 799 * Initialize tx semaphore & spawn TX processing thread 800 * As of now, only one thread for TX desc processing is 801 * spawned. 802 */ 803 sc->tx_in_progress = 0; 804 pthread_mutex_init(&sc->tx_mtx, NULL); 805 pthread_cond_init(&sc->tx_cond, NULL); 806 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 807 snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot); 808 pthread_set_name_np(sc->tx_tid, tname); 809 810 return (0); 811 } 812 813 /* 814 * Function pointer array to handle queue notifications 815 */ 816 static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 817 pci_vtnet_ping_rxq, 818 pci_vtnet_ping_txq, 819 pci_vtnet_ping_ctlq 820 }; 821 822 static uint64_t 823 vtnet_adjust_offset(struct pci_devinst *pi, uint64_t offset) 824 { 825 /* 826 * Device specific offsets used by guest would change based on 827 * whether MSI-X capability is enabled or not 828 */ 829 if (!pci_msix_enabled(pi)) { 830 if (offset >= VTCFG_R_MSIX) 831 return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 832 } 833 834 return (offset); 835 } 836 837 static void 838 pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 839 int baridx, uint64_t offset, int size, uint64_t value) 840 { 841 struct pci_vtnet_softc *sc = pi->pi_arg; 842 void *ptr; 843 844 if (use_msix) { 845 if (baridx == pci_msix_table_bar(pi) || 846 baridx == pci_msix_pba_bar(pi)) { 847 pci_emul_msix_twrite(pi, offset, size, value); 848 return; 849 } 850 } 851 852 assert(baridx == 0); 853 854 if (offset + size > pci_vtnet_iosize(pi)) { 855 DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 856 offset, size)); 857 return; 858 } 859 860 pthread_mutex_lock(&sc->vsc_mtx); 861 862 offset = vtnet_adjust_offset(pi, offset); 863 864 switch (offset) { 865 case VTCFG_R_GUESTCAP: 866 assert(size == 4); 867 sc->vsc_features = value & VTNET_S_HOSTCAPS; 868 break; 869 case VTCFG_R_PFN: 870 assert(size == 4); 871 pci_vtnet_ring_init(sc, value); 872 break; 873 case VTCFG_R_QSEL: 874 assert(size == 2); 875 assert(value < VTNET_MAXQ); 876 sc->vsc_curq = value; 877 break; 878 case VTCFG_R_QNOTIFY: 879 assert(size == 2); 880 assert(value < VTNET_MAXQ); 881 (*pci_vtnet_qnotify[value])(sc); 882 break; 883 case VTCFG_R_STATUS: 884 assert(size == 1); 885 pci_vtnet_update_status(sc, value); 886 break; 887 case VTCFG_R_CFGVEC: 888 assert(size == 2); 889 sc->vsc_msix_table_idx[VTNET_CTLQ] = value; 890 break; 891 case VTCFG_R_QVEC: 892 assert(size == 2); 893 assert(sc->vsc_curq != VTNET_CTLQ); 894 sc->vsc_msix_table_idx[sc->vsc_curq] = value; 895 break; 896 case VTNET_R_CFG0: 897 case VTNET_R_CFG1: 898 case VTNET_R_CFG2: 899 case VTNET_R_CFG3: 900 case VTNET_R_CFG4: 901 case VTNET_R_CFG5: 902 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 903 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 904 /* 905 * The driver is allowed to change the MAC address 906 */ 907 sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 908 if (size == 1) { 909 *(uint8_t *) ptr = value; 910 } else if (size == 2) { 911 *(uint16_t *) ptr = value; 912 } else { 913 *(uint32_t *) ptr = value; 914 } 915 break; 916 case VTCFG_R_HOSTCAP: 917 case VTCFG_R_QNUM: 918 case VTCFG_R_ISR: 919 case VTNET_R_CFG6: 920 case VTNET_R_CFG7: 921 DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 922 break; 923 default: 924 DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 925 value = 0; 926 break; 927 } 928 929 pthread_mutex_unlock(&sc->vsc_mtx); 930 } 931 932 uint64_t 933 pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 934 int baridx, uint64_t offset, int size) 935 { 936 struct pci_vtnet_softc *sc = pi->pi_arg; 937 void *ptr; 938 uint64_t value; 939 940 if (use_msix) { 941 if (baridx == pci_msix_table_bar(pi) || 942 baridx == pci_msix_pba_bar(pi)) { 943 return (pci_emul_msix_tread(pi, offset, size)); 944 } 945 } 946 947 assert(baridx == 0); 948 949 if (offset + size > pci_vtnet_iosize(pi)) { 950 DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 951 offset, size)); 952 return (0); 953 } 954 955 pthread_mutex_lock(&sc->vsc_mtx); 956 957 offset = vtnet_adjust_offset(pi, offset); 958 959 switch (offset) { 960 case VTCFG_R_HOSTCAP: 961 assert(size == 4); 962 value = VTNET_S_HOSTCAPS; 963 break; 964 case VTCFG_R_GUESTCAP: 965 assert(size == 4); 966 value = sc->vsc_features; /* XXX never read ? */ 967 break; 968 case VTCFG_R_PFN: 969 assert(size == 4); 970 value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 971 break; 972 case VTCFG_R_QNUM: 973 assert(size == 2); 974 value = pci_vtnet_qsize(sc->vsc_curq); 975 break; 976 case VTCFG_R_QSEL: 977 assert(size == 2); 978 value = sc->vsc_curq; /* XXX never read ? */ 979 break; 980 case VTCFG_R_QNOTIFY: 981 assert(size == 2); 982 value = sc->vsc_curq; /* XXX never read ? */ 983 break; 984 case VTCFG_R_STATUS: 985 assert(size == 1); 986 value = sc->vsc_status; 987 break; 988 case VTCFG_R_ISR: 989 assert(size == 1); 990 value = sc->vsc_isr; 991 sc->vsc_isr = 0; /* a read clears this flag */ 992 break; 993 case VTCFG_R_CFGVEC: 994 assert(size == 2); 995 value = sc->vsc_msix_table_idx[VTNET_CTLQ]; 996 break; 997 case VTCFG_R_QVEC: 998 assert(size == 2); 999 assert(sc->vsc_curq != VTNET_CTLQ); 1000 value = sc->vsc_msix_table_idx[sc->vsc_curq]; 1001 break; 1002 case VTNET_R_CFG0: 1003 case VTNET_R_CFG1: 1004 case VTNET_R_CFG2: 1005 case VTNET_R_CFG3: 1006 case VTNET_R_CFG4: 1007 case VTNET_R_CFG5: 1008 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 1009 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 1010 if (size == 1) { 1011 value = *(uint8_t *) ptr; 1012 } else if (size == 2) { 1013 value = *(uint16_t *) ptr; 1014 } else { 1015 value = *(uint32_t *) ptr; 1016 } 1017 break; 1018 case VTNET_R_CFG6: 1019 assert(size != 4); 1020 value = 0x01; /* XXX link always up */ 1021 break; 1022 case VTNET_R_CFG7: 1023 assert(size == 1); 1024 value = 0; /* XXX link status in LSB */ 1025 break; 1026 default: 1027 DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 1028 value = 0; 1029 break; 1030 } 1031 1032 pthread_mutex_unlock(&sc->vsc_mtx); 1033 1034 return (value); 1035 } 1036 1037 struct pci_devemu pci_de_vnet = { 1038 .pe_emu = "virtio-net", 1039 .pe_init = pci_vtnet_init, 1040 .pe_barwrite = pci_vtnet_write, 1041 .pe_barread = pci_vtnet_read 1042 }; 1043 PCI_EMUL_SET(pci_de_vnet); 1044