1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/linker_set.h> 34 #include <sys/select.h> 35 #include <sys/uio.h> 36 #include <sys/ioctl.h> 37 38 #include <errno.h> 39 #include <fcntl.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <stdint.h> 43 #include <string.h> 44 #include <strings.h> 45 #include <unistd.h> 46 #include <assert.h> 47 #include <md5.h> 48 #include <pthread.h> 49 #include <pthread_np.h> 50 51 #include "bhyverun.h" 52 #include "pci_emul.h" 53 #include "mevent.h" 54 #include "virtio.h" 55 56 #define VTNET_RINGSZ 1024 57 58 #define VTNET_MAXSEGS 32 59 60 /* 61 * PCI config-space register offsets 62 */ 63 #define VTNET_R_CFG0 24 64 #define VTNET_R_CFG1 25 65 #define VTNET_R_CFG2 26 66 #define VTNET_R_CFG3 27 67 #define VTNET_R_CFG4 28 68 #define VTNET_R_CFG5 29 69 #define VTNET_R_CFG6 30 70 #define VTNET_R_CFG7 31 71 #define VTNET_R_MAX 31 72 73 #define VTNET_REGSZ VTNET_R_MAX+1 74 75 /* 76 * Host capabilities 77 */ 78 #define VTNET_S_HOSTCAPS \ 79 ( 0x00000020 | /* host supplies MAC */ \ 80 0x00008000 | /* host can merge Rx buffers */ \ 81 0x00010000 | /* config status available */ \ 82 VIRTIO_F_NOTIFY_ON_EMPTY) 83 84 /* 85 * Queue definitions. 86 */ 87 #define VTNET_RXQ 0 88 #define VTNET_TXQ 1 89 #define VTNET_CTLQ 2 90 91 #define VTNET_MAXQ 3 92 93 static int use_msix = 1; 94 95 struct vring_hqueue { 96 /* Internal state */ 97 uint16_t hq_size; 98 uint16_t hq_cur_aidx; /* trails behind 'avail_idx' */ 99 100 /* Host-context pointers to the queue */ 101 struct virtio_desc *hq_dtable; 102 uint16_t *hq_avail_flags; 103 uint16_t *hq_avail_idx; /* monotonically increasing */ 104 uint16_t *hq_avail_ring; 105 106 uint16_t *hq_used_flags; 107 uint16_t *hq_used_idx; /* monotonically increasing */ 108 struct virtio_used *hq_used_ring; 109 }; 110 111 /* 112 * Fixed network header size 113 */ 114 struct virtio_net_rxhdr { 115 uint8_t vrh_flags; 116 uint8_t vrh_gso_type; 117 uint16_t vrh_hdr_len; 118 uint16_t vrh_gso_size; 119 uint16_t vrh_csum_start; 120 uint16_t vrh_csum_offset; 121 uint16_t vrh_bufs; 122 } __packed; 123 124 /* 125 * Debug printf 126 */ 127 static int pci_vtnet_debug; 128 #define DPRINTF(params) if (pci_vtnet_debug) printf params 129 #define WPRINTF(params) printf params 130 131 /* 132 * Per-device softc 133 */ 134 struct pci_vtnet_softc { 135 struct pci_devinst *vsc_pi; 136 pthread_mutex_t vsc_mtx; 137 struct mevent *vsc_mevp; 138 139 int vsc_curq; 140 int vsc_status; 141 int vsc_isr; 142 int vsc_tapfd; 143 int vsc_rx_ready; 144 int resetting; 145 146 uint32_t vsc_features; 147 uint8_t vsc_macaddr[6]; 148 149 uint64_t vsc_pfn[VTNET_MAXQ]; 150 struct vring_hqueue vsc_hq[VTNET_MAXQ]; 151 uint16_t vsc_msix_table_idx[VTNET_MAXQ]; 152 153 pthread_mutex_t rx_mtx; 154 int rx_in_progress; 155 156 pthread_t tx_tid; 157 pthread_mutex_t tx_mtx; 158 pthread_cond_t tx_cond; 159 int tx_in_progress; 160 }; 161 #define vtnet_ctx(sc) ((sc)->vsc_pi->pi_vmctx) 162 #define notify_on_empty(sc) ((sc)->vsc_features & VIRTIO_F_NOTIFY_ON_EMPTY) 163 164 /* 165 * Return the size of IO BAR that maps virtio header and device specific 166 * region. The size would vary depending on whether MSI-X is enabled or 167 * not. 168 */ 169 static uint64_t 170 pci_vtnet_iosize(struct pci_devinst *pi) 171 { 172 if (pci_msix_enabled(pi)) 173 return (VTNET_REGSZ); 174 else 175 return (VTNET_REGSZ - (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 176 } 177 178 /* 179 * Return the number of available descriptors in the vring taking care 180 * of the 16-bit index wraparound. 181 */ 182 static int 183 hq_num_avail(struct vring_hqueue *hq) 184 { 185 uint16_t ndesc; 186 187 /* 188 * We're just computing (a-b) mod 2^16 189 * 190 * The only glitch here is that in standard C, 191 * uint16_t promotes to (signed) int when int has 192 * more than 16 bits (pretty much always now), so 193 * we have to force it back to unsigned. 194 */ 195 ndesc = (unsigned)*hq->hq_avail_idx - (unsigned)hq->hq_cur_aidx; 196 197 assert(ndesc <= hq->hq_size); 198 199 return (ndesc); 200 } 201 202 static uint16_t 203 pci_vtnet_qsize(int qnum) 204 { 205 /* XXX no ctl queue currently */ 206 if (qnum == VTNET_CTLQ) { 207 return (0); 208 } 209 210 /* XXX fixed currently. Maybe different for tx/rx/ctl */ 211 return (VTNET_RINGSZ); 212 } 213 214 static void 215 pci_vtnet_ring_reset(struct pci_vtnet_softc *sc, int ring) 216 { 217 struct vring_hqueue *hq; 218 219 assert(ring < VTNET_MAXQ); 220 221 hq = &sc->vsc_hq[ring]; 222 223 /* 224 * Reset all soft state 225 */ 226 hq->hq_cur_aidx = 0; 227 } 228 229 /* 230 * If the transmit thread is active then stall until it is done. 231 */ 232 static void 233 pci_vtnet_txwait(struct pci_vtnet_softc *sc) 234 { 235 236 pthread_mutex_lock(&sc->tx_mtx); 237 while (sc->tx_in_progress) { 238 pthread_mutex_unlock(&sc->tx_mtx); 239 usleep(10000); 240 pthread_mutex_lock(&sc->tx_mtx); 241 } 242 pthread_mutex_unlock(&sc->tx_mtx); 243 } 244 245 /* 246 * If the receive thread is active then stall until it is done. 247 */ 248 static void 249 pci_vtnet_rxwait(struct pci_vtnet_softc *sc) 250 { 251 252 pthread_mutex_lock(&sc->rx_mtx); 253 while (sc->rx_in_progress) { 254 pthread_mutex_unlock(&sc->rx_mtx); 255 usleep(10000); 256 pthread_mutex_lock(&sc->rx_mtx); 257 } 258 pthread_mutex_unlock(&sc->rx_mtx); 259 } 260 261 static void 262 pci_vtnet_update_status(struct pci_vtnet_softc *sc, uint32_t value) 263 { 264 int i; 265 266 if (value == 0) { 267 DPRINTF(("vtnet: device reset requested !\n")); 268 269 sc->resetting = 1; 270 271 /* 272 * Wait for the transmit and receive threads to finish their 273 * processing. 274 */ 275 pci_vtnet_txwait(sc); 276 pci_vtnet_rxwait(sc); 277 278 sc->vsc_rx_ready = 0; 279 pci_vtnet_ring_reset(sc, VTNET_RXQ); 280 pci_vtnet_ring_reset(sc, VTNET_TXQ); 281 282 for (i = 0; i < VTNET_MAXQ; i++) 283 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 284 285 sc->vsc_isr = 0; 286 sc->vsc_features = 0; 287 288 sc->resetting = 0; 289 } 290 291 sc->vsc_status = value; 292 } 293 294 static void 295 vtnet_generate_interrupt(struct pci_vtnet_softc *sc, int qidx) 296 { 297 298 if (use_msix) { 299 pci_generate_msix(sc->vsc_pi, sc->vsc_msix_table_idx[qidx]); 300 } else { 301 sc->vsc_isr |= 1; 302 pci_generate_msi(sc->vsc_pi, 0); 303 } 304 } 305 306 /* 307 * Called to send a buffer chain out to the tap device 308 */ 309 static void 310 pci_vtnet_tap_tx(struct pci_vtnet_softc *sc, struct iovec *iov, int iovcnt, 311 int len) 312 { 313 char pad[60]; 314 315 if (sc->vsc_tapfd == -1) 316 return; 317 318 /* 319 * If the length is < 60, pad out to that and add the 320 * extra zero'd segment to the iov. It is guaranteed that 321 * there is always an extra iov available by the caller. 322 */ 323 if (len < 60) { 324 memset(pad, 0, 60 - len); 325 iov[iovcnt].iov_base = pad; 326 iov[iovcnt].iov_len = 60 - len; 327 iovcnt++; 328 } 329 (void) writev(sc->vsc_tapfd, iov, iovcnt); 330 } 331 332 /* 333 * Called when there is read activity on the tap file descriptor. 334 * Each buffer posted by the guest is assumed to be able to contain 335 * an entire ethernet frame + rx header. 336 * MP note: the dummybuf is only used for discarding frames, so there 337 * is no need for it to be per-vtnet or locked. 338 */ 339 static uint8_t dummybuf[2048]; 340 341 static void 342 pci_vtnet_tap_rx(struct pci_vtnet_softc *sc) 343 { 344 struct virtio_desc *vd; 345 struct virtio_used *vu; 346 struct vring_hqueue *hq; 347 struct virtio_net_rxhdr *vrx; 348 uint8_t *buf; 349 int i; 350 int len; 351 int ndescs; 352 int didx, uidx, aidx; /* descriptor, avail and used index */ 353 354 /* 355 * Should never be called without a valid tap fd 356 */ 357 assert(sc->vsc_tapfd != -1); 358 359 /* 360 * But, will be called when the rx ring hasn't yet 361 * been set up or the guest is resetting the device. 362 */ 363 if (!sc->vsc_rx_ready || sc->resetting) { 364 /* 365 * Drop the packet and try later. 366 */ 367 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 368 return; 369 } 370 371 /* 372 * Calculate the number of available rx buffers 373 */ 374 hq = &sc->vsc_hq[VTNET_RXQ]; 375 376 ndescs = hq_num_avail(hq); 377 378 if (ndescs == 0) { 379 /* 380 * Drop the packet and try later 381 */ 382 (void) read(sc->vsc_tapfd, dummybuf, sizeof(dummybuf)); 383 384 if (notify_on_empty(sc)) 385 vtnet_generate_interrupt(sc, VTNET_RXQ); 386 387 return; 388 } 389 390 aidx = hq->hq_cur_aidx; 391 uidx = *hq->hq_used_idx; 392 for (i = 0; i < ndescs; i++) { 393 /* 394 * 'aidx' indexes into the an array of descriptor indexes 395 */ 396 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 397 assert(didx >= 0 && didx < hq->hq_size); 398 399 vd = &hq->hq_dtable[didx]; 400 401 /* 402 * Get a pointer to the rx header, and use the 403 * data immediately following it for the packet buffer. 404 */ 405 vrx = paddr_guest2host(vtnet_ctx(sc), vd->vd_addr, vd->vd_len); 406 buf = (uint8_t *)(vrx + 1); 407 408 len = read(sc->vsc_tapfd, buf, 409 vd->vd_len - sizeof(struct virtio_net_rxhdr)); 410 411 if (len < 0 && errno == EWOULDBLOCK) { 412 break; 413 } 414 415 /* 416 * The only valid field in the rx packet header is the 417 * number of buffers, which is always 1 without TSO 418 * support. 419 */ 420 memset(vrx, 0, sizeof(struct virtio_net_rxhdr)); 421 vrx->vrh_bufs = 1; 422 423 /* 424 * Write this descriptor into the used ring 425 */ 426 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 427 vu->vu_idx = didx; 428 vu->vu_tlen = len + sizeof(struct virtio_net_rxhdr); 429 uidx++; 430 aidx++; 431 } 432 433 /* 434 * Update the used pointer, and signal an interrupt if allowed 435 */ 436 *hq->hq_used_idx = uidx; 437 hq->hq_cur_aidx = aidx; 438 439 if ((*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) 440 vtnet_generate_interrupt(sc, VTNET_RXQ); 441 } 442 443 static void 444 pci_vtnet_tap_callback(int fd, enum ev_type type, void *param) 445 { 446 struct pci_vtnet_softc *sc = param; 447 448 pthread_mutex_lock(&sc->rx_mtx); 449 sc->rx_in_progress = 1; 450 pci_vtnet_tap_rx(sc); 451 sc->rx_in_progress = 0; 452 pthread_mutex_unlock(&sc->rx_mtx); 453 454 } 455 456 static void 457 pci_vtnet_ping_rxq(struct pci_vtnet_softc *sc) 458 { 459 /* 460 * A qnotify means that the rx process can now begin 461 */ 462 if (sc->vsc_rx_ready == 0) { 463 sc->vsc_rx_ready = 1; 464 } 465 } 466 467 static void 468 pci_vtnet_proctx(struct pci_vtnet_softc *sc, struct vring_hqueue *hq) 469 { 470 struct iovec iov[VTNET_MAXSEGS + 1]; 471 struct virtio_desc *vd; 472 struct virtio_used *vu; 473 int i; 474 int plen; 475 int tlen; 476 int uidx, aidx, didx; 477 478 uidx = *hq->hq_used_idx; 479 aidx = hq->hq_cur_aidx; 480 didx = hq->hq_avail_ring[aidx % hq->hq_size]; 481 assert(didx >= 0 && didx < hq->hq_size); 482 483 vd = &hq->hq_dtable[didx]; 484 485 /* 486 * Run through the chain of descriptors, ignoring the 487 * first header descriptor. However, include the header 488 * length in the total length that will be put into the 489 * used queue. 490 */ 491 tlen = vd->vd_len; 492 vd = &hq->hq_dtable[vd->vd_next]; 493 494 for (i = 0, plen = 0; 495 i < VTNET_MAXSEGS; 496 i++, vd = &hq->hq_dtable[vd->vd_next]) { 497 iov[i].iov_base = paddr_guest2host(vtnet_ctx(sc), 498 vd->vd_addr, vd->vd_len); 499 iov[i].iov_len = vd->vd_len; 500 plen += vd->vd_len; 501 tlen += vd->vd_len; 502 503 if ((vd->vd_flags & VRING_DESC_F_NEXT) == 0) 504 break; 505 } 506 assert(i < VTNET_MAXSEGS); 507 508 DPRINTF(("virtio: packet send, %d bytes, %d segs\n\r", plen, i + 1)); 509 pci_vtnet_tap_tx(sc, iov, i + 1, plen); 510 511 /* 512 * Return this chain back to the host 513 */ 514 vu = &hq->hq_used_ring[uidx % hq->hq_size]; 515 vu->vu_idx = didx; 516 vu->vu_tlen = tlen; 517 hq->hq_cur_aidx = aidx + 1; 518 *hq->hq_used_idx = uidx + 1; 519 } 520 521 static void 522 pci_vtnet_ping_txq(struct pci_vtnet_softc *sc) 523 { 524 struct vring_hqueue *hq = &sc->vsc_hq[VTNET_TXQ]; 525 int ndescs; 526 527 /* 528 * Calculate number of ring entries to process 529 */ 530 ndescs = hq_num_avail(hq); 531 532 if (ndescs == 0) 533 return; 534 535 /* Signal the tx thread for processing */ 536 pthread_mutex_lock(&sc->tx_mtx); 537 if (sc->tx_in_progress == 0) 538 pthread_cond_signal(&sc->tx_cond); 539 pthread_mutex_unlock(&sc->tx_mtx); 540 } 541 542 /* 543 * Thread which will handle processing of TX desc 544 */ 545 static void * 546 pci_vtnet_tx_thread(void *param) 547 { 548 struct pci_vtnet_softc *sc = (struct pci_vtnet_softc *) param; 549 struct vring_hqueue *hq; 550 int i, ndescs, error; 551 552 hq = &sc->vsc_hq[VTNET_TXQ]; 553 554 /* 555 * Let us wait till the tx queue pointers get initialised & 556 * first tx signaled 557 */ 558 pthread_mutex_lock(&sc->tx_mtx); 559 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 560 assert(error == 0); 561 562 for (;;) { 563 pthread_mutex_lock(&sc->tx_mtx); 564 for (;;) { 565 if (sc->resetting) 566 ndescs = 0; 567 else 568 ndescs = hq_num_avail(hq); 569 570 if (ndescs != 0) 571 break; 572 573 sc->tx_in_progress = 0; 574 error = pthread_cond_wait(&sc->tx_cond, &sc->tx_mtx); 575 assert(error == 0); 576 } 577 sc->tx_in_progress = 1; 578 pthread_mutex_unlock(&sc->tx_mtx); 579 580 while (ndescs > 0) { 581 /* 582 * Run through all the entries, placing them into 583 * iovecs and sending when an end-of-packet is found 584 */ 585 for (i = 0; i < ndescs; i++) 586 pci_vtnet_proctx(sc, hq); 587 588 ndescs = hq_num_avail(hq); 589 } 590 591 /* 592 * Generate an interrupt if needed. 593 */ 594 if (notify_on_empty(sc) || 595 (*hq->hq_avail_flags & VRING_AVAIL_F_NO_INTERRUPT) == 0) 596 vtnet_generate_interrupt(sc, VTNET_TXQ); 597 } 598 } 599 600 static void 601 pci_vtnet_ping_ctlq(struct pci_vtnet_softc *sc) 602 { 603 604 DPRINTF(("vtnet: control qnotify!\n\r")); 605 } 606 607 static void 608 pci_vtnet_ring_init(struct pci_vtnet_softc *sc, uint64_t pfn) 609 { 610 struct vring_hqueue *hq; 611 int qnum = sc->vsc_curq; 612 613 assert(qnum < VTNET_MAXQ); 614 615 sc->vsc_pfn[qnum] = pfn << VRING_PFN; 616 617 /* 618 * Set up host pointers to the various parts of the 619 * queue 620 */ 621 hq = &sc->vsc_hq[qnum]; 622 hq->hq_size = pci_vtnet_qsize(qnum); 623 624 hq->hq_dtable = paddr_guest2host(vtnet_ctx(sc), pfn << VRING_PFN, 625 vring_size(hq->hq_size)); 626 hq->hq_avail_flags = (uint16_t *)(hq->hq_dtable + hq->hq_size); 627 hq->hq_avail_idx = hq->hq_avail_flags + 1; 628 hq->hq_avail_ring = hq->hq_avail_flags + 2; 629 hq->hq_used_flags = (uint16_t *)roundup2((uintptr_t)hq->hq_avail_ring, 630 VRING_ALIGN); 631 hq->hq_used_idx = hq->hq_used_flags + 1; 632 hq->hq_used_ring = (struct virtio_used *)(hq->hq_used_flags + 2); 633 634 /* 635 * Initialize queue indexes 636 */ 637 hq->hq_cur_aidx = 0; 638 } 639 640 static int 641 pci_vtnet_init(struct vmctx *ctx, struct pci_devinst *pi, char *opts) 642 { 643 MD5_CTX mdctx; 644 unsigned char digest[16]; 645 char nstr[80]; 646 char tname[MAXCOMLEN + 1]; 647 struct pci_vtnet_softc *sc; 648 const char *env_msi; 649 650 sc = malloc(sizeof(struct pci_vtnet_softc)); 651 memset(sc, 0, sizeof(struct pci_vtnet_softc)); 652 653 pi->pi_arg = sc; 654 sc->vsc_pi = pi; 655 656 pthread_mutex_init(&sc->vsc_mtx, NULL); 657 658 /* 659 * Use MSI if set by user 660 */ 661 if ((env_msi = getenv("BHYVE_USE_MSI")) != NULL) { 662 if (strcasecmp(env_msi, "yes") == 0) 663 use_msix = 0; 664 } 665 666 /* 667 * Attempt to open the tap device 668 */ 669 sc->vsc_tapfd = -1; 670 if (opts != NULL) { 671 char tbuf[80]; 672 673 strcpy(tbuf, "/dev/"); 674 strlcat(tbuf, opts, sizeof(tbuf)); 675 676 sc->vsc_tapfd = open(tbuf, O_RDWR); 677 if (sc->vsc_tapfd == -1) { 678 WPRINTF(("open of tap device %s failed\n", tbuf)); 679 } else { 680 /* 681 * Set non-blocking and register for read 682 * notifications with the event loop 683 */ 684 int opt = 1; 685 if (ioctl(sc->vsc_tapfd, FIONBIO, &opt) < 0) { 686 WPRINTF(("tap device O_NONBLOCK failed\n")); 687 close(sc->vsc_tapfd); 688 sc->vsc_tapfd = -1; 689 } 690 691 sc->vsc_mevp = mevent_add(sc->vsc_tapfd, 692 EVF_READ, 693 pci_vtnet_tap_callback, 694 sc); 695 if (sc->vsc_mevp == NULL) { 696 WPRINTF(("Could not register event\n")); 697 close(sc->vsc_tapfd); 698 sc->vsc_tapfd = -1; 699 } 700 } 701 } 702 703 /* 704 * The MAC address is the standard NetApp OUI of 00-a0-98, 705 * followed by an MD5 of the vm name. The slot/func number is 706 * prepended to this for slots other than 1:0, so that 707 * a bootloader can netboot from the equivalent of slot 1. 708 */ 709 if (pi->pi_slot == 1 && pi->pi_func == 0) { 710 strncpy(nstr, vmname, sizeof(nstr)); 711 } else { 712 snprintf(nstr, sizeof(nstr), "%d-%d-%s", pi->pi_slot, 713 pi->pi_func, vmname); 714 } 715 716 MD5Init(&mdctx); 717 MD5Update(&mdctx, nstr, strlen(nstr)); 718 MD5Final(digest, &mdctx); 719 720 sc->vsc_macaddr[0] = 0x00; 721 sc->vsc_macaddr[1] = 0xa0; 722 sc->vsc_macaddr[2] = 0x98; 723 sc->vsc_macaddr[3] = digest[0]; 724 sc->vsc_macaddr[4] = digest[1]; 725 sc->vsc_macaddr[5] = digest[2]; 726 727 /* initialize config space */ 728 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 729 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 730 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 731 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_TYPE_NET); 732 733 if (use_msix) { 734 /* MSI-X support */ 735 int i; 736 737 for (i = 0; i < VTNET_MAXQ; i++) 738 sc->vsc_msix_table_idx[i] = VIRTIO_MSI_NO_VECTOR; 739 740 /* 741 * BAR 1 used to map MSI-X table and PBA 742 */ 743 if (pci_emul_add_msixcap(pi, VTNET_MAXQ, 1)) 744 return (1); 745 } else { 746 /* MSI support */ 747 pci_emul_add_msicap(pi, 1); 748 } 749 750 pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VTNET_REGSZ); 751 752 sc->resetting = 0; 753 754 sc->rx_in_progress = 0; 755 pthread_mutex_init(&sc->rx_mtx, NULL); 756 757 /* 758 * Initialize tx semaphore & spawn TX processing thread 759 * As of now, only one thread for TX desc processing is 760 * spawned. 761 */ 762 sc->tx_in_progress = 0; 763 pthread_mutex_init(&sc->tx_mtx, NULL); 764 pthread_cond_init(&sc->tx_cond, NULL); 765 pthread_create(&sc->tx_tid, NULL, pci_vtnet_tx_thread, (void *)sc); 766 snprintf(tname, sizeof(tname), "%s vtnet%d tx", vmname, pi->pi_slot); 767 pthread_set_name_np(sc->tx_tid, tname); 768 769 return (0); 770 } 771 772 /* 773 * Function pointer array to handle queue notifications 774 */ 775 static void (*pci_vtnet_qnotify[VTNET_MAXQ])(struct pci_vtnet_softc *) = { 776 pci_vtnet_ping_rxq, 777 pci_vtnet_ping_txq, 778 pci_vtnet_ping_ctlq 779 }; 780 781 static uint64_t 782 vtnet_adjust_offset(struct pci_devinst *pi, uint64_t offset) 783 { 784 /* 785 * Device specific offsets used by guest would change based on 786 * whether MSI-X capability is enabled or not 787 */ 788 if (!pci_msix_enabled(pi)) { 789 if (offset >= VTCFG_R_MSIX) 790 return (offset + (VTCFG_R_CFG1 - VTCFG_R_MSIX)); 791 } 792 793 return (offset); 794 } 795 796 static void 797 pci_vtnet_write(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 798 int baridx, uint64_t offset, int size, uint64_t value) 799 { 800 struct pci_vtnet_softc *sc = pi->pi_arg; 801 void *ptr; 802 803 if (use_msix) { 804 if (baridx == pci_msix_table_bar(pi) || 805 baridx == pci_msix_pba_bar(pi)) { 806 pci_emul_msix_twrite(pi, offset, size, value); 807 return; 808 } 809 } 810 811 assert(baridx == 0); 812 813 if (offset + size > pci_vtnet_iosize(pi)) { 814 DPRINTF(("vtnet_write: 2big, offset %ld size %d\n", 815 offset, size)); 816 return; 817 } 818 819 pthread_mutex_lock(&sc->vsc_mtx); 820 821 offset = vtnet_adjust_offset(pi, offset); 822 823 switch (offset) { 824 case VTCFG_R_GUESTCAP: 825 assert(size == 4); 826 sc->vsc_features = value & VTNET_S_HOSTCAPS; 827 break; 828 case VTCFG_R_PFN: 829 assert(size == 4); 830 pci_vtnet_ring_init(sc, value); 831 break; 832 case VTCFG_R_QSEL: 833 assert(size == 2); 834 assert(value < VTNET_MAXQ); 835 sc->vsc_curq = value; 836 break; 837 case VTCFG_R_QNOTIFY: 838 assert(size == 2); 839 assert(value < VTNET_MAXQ); 840 (*pci_vtnet_qnotify[value])(sc); 841 break; 842 case VTCFG_R_STATUS: 843 assert(size == 1); 844 pci_vtnet_update_status(sc, value); 845 break; 846 case VTCFG_R_CFGVEC: 847 assert(size == 2); 848 sc->vsc_msix_table_idx[VTNET_CTLQ] = value; 849 break; 850 case VTCFG_R_QVEC: 851 assert(size == 2); 852 assert(sc->vsc_curq != VTNET_CTLQ); 853 sc->vsc_msix_table_idx[sc->vsc_curq] = value; 854 break; 855 case VTNET_R_CFG0: 856 case VTNET_R_CFG1: 857 case VTNET_R_CFG2: 858 case VTNET_R_CFG3: 859 case VTNET_R_CFG4: 860 case VTNET_R_CFG5: 861 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 862 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 863 /* 864 * The driver is allowed to change the MAC address 865 */ 866 sc->vsc_macaddr[offset - VTNET_R_CFG0] = value; 867 if (size == 1) { 868 *(uint8_t *) ptr = value; 869 } else if (size == 2) { 870 *(uint16_t *) ptr = value; 871 } else { 872 *(uint32_t *) ptr = value; 873 } 874 break; 875 case VTCFG_R_HOSTCAP: 876 case VTCFG_R_QNUM: 877 case VTCFG_R_ISR: 878 case VTNET_R_CFG6: 879 case VTNET_R_CFG7: 880 DPRINTF(("vtnet: write to readonly reg %ld\n\r", offset)); 881 break; 882 default: 883 DPRINTF(("vtnet: unknown i/o write offset %ld\n\r", offset)); 884 value = 0; 885 break; 886 } 887 888 pthread_mutex_unlock(&sc->vsc_mtx); 889 } 890 891 uint64_t 892 pci_vtnet_read(struct vmctx *ctx, int vcpu, struct pci_devinst *pi, 893 int baridx, uint64_t offset, int size) 894 { 895 struct pci_vtnet_softc *sc = pi->pi_arg; 896 void *ptr; 897 uint64_t value; 898 899 if (use_msix) { 900 if (baridx == pci_msix_table_bar(pi) || 901 baridx == pci_msix_pba_bar(pi)) { 902 return (pci_emul_msix_tread(pi, offset, size)); 903 } 904 } 905 906 assert(baridx == 0); 907 908 if (offset + size > pci_vtnet_iosize(pi)) { 909 DPRINTF(("vtnet_read: 2big, offset %ld size %d\n", 910 offset, size)); 911 return (0); 912 } 913 914 pthread_mutex_lock(&sc->vsc_mtx); 915 916 offset = vtnet_adjust_offset(pi, offset); 917 918 switch (offset) { 919 case VTCFG_R_HOSTCAP: 920 assert(size == 4); 921 value = VTNET_S_HOSTCAPS; 922 break; 923 case VTCFG_R_GUESTCAP: 924 assert(size == 4); 925 value = sc->vsc_features; /* XXX never read ? */ 926 break; 927 case VTCFG_R_PFN: 928 assert(size == 4); 929 value = sc->vsc_pfn[sc->vsc_curq] >> VRING_PFN; 930 break; 931 case VTCFG_R_QNUM: 932 assert(size == 2); 933 value = pci_vtnet_qsize(sc->vsc_curq); 934 break; 935 case VTCFG_R_QSEL: 936 assert(size == 2); 937 value = sc->vsc_curq; /* XXX never read ? */ 938 break; 939 case VTCFG_R_QNOTIFY: 940 assert(size == 2); 941 value = sc->vsc_curq; /* XXX never read ? */ 942 break; 943 case VTCFG_R_STATUS: 944 assert(size == 1); 945 value = sc->vsc_status; 946 break; 947 case VTCFG_R_ISR: 948 assert(size == 1); 949 value = sc->vsc_isr; 950 sc->vsc_isr = 0; /* a read clears this flag */ 951 break; 952 case VTCFG_R_CFGVEC: 953 assert(size == 2); 954 value = sc->vsc_msix_table_idx[VTNET_CTLQ]; 955 break; 956 case VTCFG_R_QVEC: 957 assert(size == 2); 958 assert(sc->vsc_curq != VTNET_CTLQ); 959 value = sc->vsc_msix_table_idx[sc->vsc_curq]; 960 break; 961 case VTNET_R_CFG0: 962 case VTNET_R_CFG1: 963 case VTNET_R_CFG2: 964 case VTNET_R_CFG3: 965 case VTNET_R_CFG4: 966 case VTNET_R_CFG5: 967 assert((size + offset) <= (VTNET_R_CFG5 + 1)); 968 ptr = &sc->vsc_macaddr[offset - VTNET_R_CFG0]; 969 if (size == 1) { 970 value = *(uint8_t *) ptr; 971 } else if (size == 2) { 972 value = *(uint16_t *) ptr; 973 } else { 974 value = *(uint32_t *) ptr; 975 } 976 break; 977 case VTNET_R_CFG6: 978 assert(size != 4); 979 value = 0x01; /* XXX link always up */ 980 break; 981 case VTNET_R_CFG7: 982 assert(size == 1); 983 value = 0; /* XXX link status in LSB */ 984 break; 985 default: 986 DPRINTF(("vtnet: unknown i/o read offset %ld\n\r", offset)); 987 value = 0; 988 break; 989 } 990 991 pthread_mutex_unlock(&sc->vsc_mtx); 992 993 return (value); 994 } 995 996 struct pci_devemu pci_de_vnet = { 997 .pe_emu = "virtio-net", 998 .pe_init = pci_vtnet_init, 999 .pe_barwrite = pci_vtnet_write, 1000 .pe_barread = pci_vtnet_read 1001 }; 1002 PCI_EMUL_SET(pci_de_vnet); 1003