1 /* 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 /* 27 * This file and its contents are supplied under the terms of the 28 * Common Development and Distribution License ("CDDL"), version 1.0. 29 * You may only use this file in accordance with the terms of version 30 * 1.0 of the CDDL. 31 * 32 * A full copy of the text of the CDDL should have accompanied this 33 * source. A copy of the CDDL is also available via the Internet at 34 * http://www.illumos.org/license/CDDL. 35 * 36 * Copyright 2015 Pluribus Networks Inc. 37 * Copyright 2019 Joyent, Inc. 38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 39 */ 40 41 42 #include <sys/param.h> 43 #include <sys/linker_set.h> 44 #include <sys/ioctl.h> 45 #include <sys/uio.h> 46 #include <sys/viona_io.h> 47 48 #include <errno.h> 49 #include <fcntl.h> 50 #include <stdio.h> 51 #include <stdlib.h> 52 #include <stdint.h> 53 #include <string.h> 54 #include <strings.h> 55 #include <unistd.h> 56 #include <assert.h> 57 #include <pthread.h> 58 #include <signal.h> 59 #include <stdbool.h> 60 #include <poll.h> 61 #include <libdladm.h> 62 #include <libdllink.h> 63 #include <libdlvnic.h> 64 65 #include <machine/vmm.h> 66 #include <vmmapi.h> 67 68 #include "bhyverun.h" 69 #include "config.h" 70 #include "debug.h" 71 #include "pci_emul.h" 72 #include "virtio.h" 73 #include "iov.h" 74 #include "virtio_net.h" 75 76 #define VIONA_RINGSZ 1024 77 #define VIONA_CTLQ_SIZE 64 78 #define VIONA_CTLQ_MAXSEGS 32 79 80 /* 81 * PCI config-space register offsets 82 */ 83 #define VIONA_R_CFG0 24 84 #define VIONA_R_CFG1 25 85 #define VIONA_R_CFG2 26 86 #define VIONA_R_CFG3 27 87 #define VIONA_R_CFG4 28 88 #define VIONA_R_CFG5 29 89 #define VIONA_R_CFG6 30 90 #define VIONA_R_CFG7 31 91 #define VIONA_R_MAX 31 92 93 #define VIONA_REGSZ (VIONA_R_MAX + 1) 94 95 /* 96 * Queue definitions. 97 */ 98 #define VIONA_RXQ 0 99 #define VIONA_TXQ 1 100 #define VIONA_CTLQ 2 101 102 #define VIONA_MAXQ 3 103 104 /* 105 * Supplementary host capabilities provided in the userspace component. 106 */ 107 #define VIONA_S_HOSTCAPS_USERSPACE ( \ 108 VIRTIO_NET_F_CTRL_VQ | \ 109 VIRTIO_NET_F_CTRL_RX) 110 111 /* 112 * Debug printf 113 */ 114 static volatile int pci_viona_debug; 115 #define DPRINTF(fmt, arg...) \ 116 do { \ 117 if (pci_viona_debug) { \ 118 FPRINTLN(stdout, fmt, ##arg); \ 119 fflush(stdout); \ 120 } \ 121 } while (0) 122 #define WPRINTF(fmt, arg...) FPRINTLN(stderr, fmt, ##arg) 123 124 /* 125 * Per-device softc 126 */ 127 struct pci_viona_softc { 128 struct virtio_softc vsc_vs; 129 struct virtio_consts vsc_consts; 130 struct vqueue_info vsc_queues[VIONA_MAXQ]; 131 pthread_mutex_t vsc_mtx; 132 133 datalink_id_t vsc_linkid; 134 int vsc_vnafd; 135 136 /* Configurable parameters */ 137 char vsc_linkname[MAXLINKNAMELEN]; 138 uint32_t vsc_feature_mask; 139 uint16_t vsc_vq_size; 140 141 uint8_t vsc_macaddr[6]; 142 143 bool vsc_resetting; 144 bool vsc_msix_active; 145 146 viona_promisc_t vsc_promisc; /* Current promisc mode */ 147 bool vsc_promisc_promisc; /* PROMISC enabled */ 148 bool vsc_promisc_allmulti; /* ALLMULTI enabled */ 149 bool vsc_promisc_umac; /* unicast MACs sent */ 150 bool vsc_promisc_mmac; /* multicast MACs sent */ 151 }; 152 153 static struct virtio_consts viona_vi_consts = { 154 .vc_name = "viona", 155 .vc_nvq = VIONA_MAXQ, 156 /* 157 * We use the common bhyve virtio framework so that we can call 158 * the utility functions to work with the queues handled in userspace. 159 * The framework PCI read/write functions are not used so these 160 * callbacks will not be invoked. 161 */ 162 .vc_cfgsize = 0, 163 .vc_reset = NULL, 164 .vc_qnotify = NULL, 165 .vc_cfgread = NULL, 166 .vc_cfgwrite = NULL, 167 .vc_apply_features = NULL, 168 /* 169 * The following field is populated using the response from the 170 * viona driver during initialisation, augmented with the additional 171 * capabilities emulated in userspace. 172 */ 173 .vc_hv_caps = 0, 174 }; 175 176 /* 177 * Return the size of IO BAR that maps virtio header and device specific 178 * region. The size would vary depending on whether MSI-X is enabled or 179 * not. 180 */ 181 static uint64_t 182 pci_viona_iosize(struct pci_devinst *pi) 183 { 184 if (pci_msix_enabled(pi)) { 185 return (VIONA_REGSZ); 186 } else { 187 return (VIONA_REGSZ - 188 (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0))); 189 } 190 } 191 192 static uint16_t 193 pci_viona_qsize(struct pci_viona_softc *sc, int qnum) 194 { 195 if (qnum == VIONA_CTLQ) 196 return (VIONA_CTLQ_SIZE); 197 198 return (sc->vsc_vq_size); 199 } 200 201 static void 202 pci_viona_ring_reset(struct pci_viona_softc *sc, int ring) 203 { 204 assert(ring < VIONA_MAXQ); 205 206 switch (ring) { 207 case VIONA_RXQ: 208 case VIONA_TXQ: 209 break; 210 case VIONA_CTLQ: 211 default: 212 return; 213 } 214 215 for (;;) { 216 int res; 217 218 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_RESET, ring); 219 if (res == 0) { 220 break; 221 } else if (errno != EINTR) { 222 WPRINTF("ioctl viona ring %d reset failed %d", 223 ring, errno); 224 return; 225 } 226 } 227 } 228 229 static void 230 pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value) 231 { 232 233 if (value == 0) { 234 DPRINTF("viona: device reset requested !"); 235 236 vi_reset_dev(&sc->vsc_vs); 237 pci_viona_ring_reset(sc, VIONA_RXQ); 238 pci_viona_ring_reset(sc, VIONA_TXQ); 239 } 240 241 sc->vsc_vs.vs_status = value; 242 } 243 244 static const char * 245 pci_viona_promisc_descr(viona_promisc_t mode) 246 { 247 switch (mode) { 248 case VIONA_PROMISC_NONE: 249 return ("none"); 250 case VIONA_PROMISC_MULTI: 251 return ("multicast"); 252 case VIONA_PROMISC_ALL: 253 return ("all"); 254 default: 255 abort(); 256 } 257 } 258 259 static int 260 pci_viona_eval_promisc(struct pci_viona_softc *sc) 261 { 262 viona_promisc_t mode = VIONA_PROMISC_NONE; 263 int err = 0; 264 265 /* 266 * If the guest has explicitly requested promiscuous mode or has sent a 267 * non-empty unicast MAC address table, then set viona to promiscuous 268 * mode. Otherwise, if the guest has explicitly requested multicast 269 * promiscuity or has sent a non-empty multicast MAC address table, 270 * then set viona to multicast promiscuous mode. 271 */ 272 if (sc->vsc_promisc_promisc || sc->vsc_promisc_umac) 273 mode = VIONA_PROMISC_ALL; 274 else if (sc->vsc_promisc_allmulti || sc->vsc_promisc_mmac) 275 mode = VIONA_PROMISC_MULTI; 276 277 if (mode != sc->vsc_promisc) { 278 DPRINTF("viona: setting promiscuous mode to %d (%s)", 279 mode, pci_viona_promisc_descr(mode)); 280 DPRINTF(" promisc=%u, umac=%u, allmulti=%u, mmac=%u", 281 sc->vsc_promisc_promisc, sc->vsc_promisc_umac, 282 sc->vsc_promisc_allmulti, sc->vsc_promisc_mmac); 283 284 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_PROMISC, mode); 285 if (err == 0) 286 sc->vsc_promisc = mode; 287 else 288 WPRINTF("ioctl viona set promisc failed %d", errno); 289 } 290 291 return (err); 292 } 293 294 static uint8_t 295 pci_viona_control_rx(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr, 296 struct iovec *iov, size_t niov) 297 { 298 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs; 299 uint8_t v; 300 301 if (iov[0].iov_len != sizeof (uint8_t) || niov != 1) { 302 EPRINTLN("viona: bad control RX data"); 303 return (VIRTIO_NET_CQ_ERR); 304 } 305 306 v = *(uint8_t *)iov[0].iov_base; 307 308 switch (hdr->vnch_command) { 309 case VIRTIO_NET_CTRL_RX_PROMISC: 310 DPRINTF("viona: ctrl RX promisc %d", v); 311 sc->vsc_promisc_promisc = (v != 0); 312 break; 313 case VIRTIO_NET_CTRL_RX_ALLMULTI: 314 DPRINTF("viona: ctrl RX allmulti %d", v); 315 sc->vsc_promisc_allmulti = (v != 0); 316 break; 317 default: 318 /* 319 * VIRTIO_NET_F_CTRL_RX_EXTRA was not offered so no other 320 * commands are expected. 321 */ 322 EPRINTLN("viona: unrecognised RX control cmd %u", 323 hdr->vnch_command); 324 return (VIRTIO_NET_CQ_ERR); 325 } 326 327 if (pci_viona_eval_promisc(sc) == 0) 328 return (VIRTIO_NET_CQ_OK); 329 return (VIRTIO_NET_CQ_ERR); 330 } 331 332 static void 333 pci_viona_control_mac_dump(const char *tag, const struct iovec *iov) 334 { 335 virtio_net_ctrl_mac_t *table = (virtio_net_ctrl_mac_t *)iov->iov_base; 336 ether_addr_t *mac = &table->vncm_mac; 337 338 DPRINTF("-- %s MAC TABLE (entries: %u)", tag, table->vncm_entries); 339 340 if (table->vncm_entries * ETHERADDRL != 341 iov->iov_len - sizeof (table->vncm_entries)) { 342 DPRINTF(" Bad table size %u", iov->iov_len); 343 return; 344 } 345 346 for (uint32_t i = 0; i < table->vncm_entries; i++) { 347 DPRINTF(" [%2d] %s", i, ether_ntoa((struct ether_addr *)mac)); 348 mac++; 349 } 350 } 351 352 static uint8_t 353 pci_viona_control_mac(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr, 354 struct iovec *iov, size_t niov) 355 { 356 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs; 357 358 switch (hdr->vnch_command) { 359 case VIRTIO_NET_CTRL_MAC_TABLE_SET: { 360 virtio_net_ctrl_mac_t *table; 361 362 DPRINTF("viona: ctrl MAC table set"); 363 364 if (niov != 2) { 365 EPRINTLN("viona: bad control MAC data"); 366 return (VIRTIO_NET_CQ_ERR); 367 } 368 369 /* 370 * We advertise VIRTIO_NET_F_CTRL_RX and therefore need to 371 * accept VIRTIO_NET_CTRL_MAC, but we don't support passing 372 * changes in the MAC address lists down to viona. 373 * Instead, we set flags to indicate if the guest has sent 374 * any MAC addresses for each table, and use these to determine 375 * the resulting promiscuous mode, see pci_viona_eval_promisc() 376 * above. 377 */ 378 379 /* Unicast MAC table */ 380 table = (virtio_net_ctrl_mac_t *)iov[0].iov_base; 381 sc->vsc_promisc_umac = (table->vncm_entries != 0); 382 if (pci_viona_debug) 383 pci_viona_control_mac_dump("UNICAST", &iov[0]); 384 385 /* Multicast MAC table */ 386 table = (virtio_net_ctrl_mac_t *)iov[1].iov_base; 387 sc->vsc_promisc_mmac = (table->vncm_entries != 0); 388 if (pci_viona_debug) 389 pci_viona_control_mac_dump("MULTICAST", &iov[1]); 390 391 break; 392 } 393 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 394 /* disallow setting the primary filter MAC address */ 395 DPRINTF("viona: ctrl MAC addr set %d", niov); 396 return (VIRTIO_NET_CQ_ERR); 397 default: 398 EPRINTLN("viona: unrecognised MAC control cmd %u", 399 hdr->vnch_command); 400 return (VIRTIO_NET_CQ_ERR); 401 } 402 403 if (pci_viona_eval_promisc(sc) == 0) 404 return (VIRTIO_NET_CQ_OK); 405 return (VIRTIO_NET_CQ_ERR); 406 } 407 408 static void 409 pci_viona_control(struct vqueue_info *vq) 410 { 411 struct iovec iov[VIONA_CTLQ_MAXSEGS + 1]; 412 const virtio_net_ctrl_hdr_t *hdr; 413 struct iovec *siov = iov; 414 struct vi_req req = { 0 }; 415 uint8_t *ackp; 416 size_t nsiov; 417 uint32_t len; 418 int n; 419 420 n = vq_getchain(vq, iov, VIONA_CTLQ_MAXSEGS, &req); 421 422 assert(n >= 1 && n <= VIONA_CTLQ_MAXSEGS); 423 424 /* 425 * Since we have not negotiated VIRTIO_F_ANY_LAYOUT, we expect the 426 * control message to be laid out in at least three descriptors as 427 * follows: 428 * header - sizeof (virtio_net_ctrl_hdr_t) 429 * data[] - at least one descriptor, varying size 430 * ack - uint8_t, flagged as writable 431 * Check the incoming message to make sure it matches this layout and 432 * drop the entire chain if not. 433 */ 434 if (n < 3 || req.writable != 1 || req.readable + 1 != n || 435 iov[req.readable].iov_len != sizeof (uint8_t)) { 436 EPRINTLN("viona: bad control chain, len=%d, w=%d, r=%d", 437 n, req.writable, req.readable); 438 goto drop; 439 } 440 441 hdr = (const virtio_net_ctrl_hdr_t *)iov[0].iov_base; 442 if (iov[0].iov_len < sizeof (virtio_net_ctrl_hdr_t)) { 443 EPRINTLN("viona: control header too short: %u", iov[0].iov_len); 444 goto drop; 445 } 446 447 /* 448 * Writable iovecs start at iov[req.readable], and we've already 449 * checked that there is only one writable, it's at the end, and the 450 * right size; it's the acknowledgement byte. 451 */ 452 ackp = (uint8_t *)iov[req.readable].iov_base; 453 454 siov = &iov[1]; 455 nsiov = n - 2; 456 457 switch (hdr->vnch_class) { 458 case VIRTIO_NET_CTRL_RX: 459 *ackp = pci_viona_control_rx(vq, hdr, siov, nsiov); 460 break; 461 case VIRTIO_NET_CTRL_MAC: 462 *ackp = pci_viona_control_mac(vq, hdr, siov, nsiov); 463 break; 464 default: 465 EPRINTLN("viona: unrecognised control class %u, cmd %u", 466 hdr->vnch_class, hdr->vnch_command); 467 *ackp = VIRTIO_NET_CQ_ERR; 468 break; 469 } 470 471 drop: 472 len = 0; 473 for (uint_t i = 0; i < n; i++) 474 len += iov[i].iov_len; 475 476 vq_relchain(vq, req.idx, len); 477 } 478 479 static void 480 pci_viona_process_ctrlq(struct vqueue_info *vq) 481 { 482 for (;;) { 483 vq_kick_disable(vq); 484 485 while (vq_has_descs(vq)) 486 pci_viona_control(vq); 487 488 vq_kick_enable(vq); 489 490 /* 491 * One more check in case a late addition raced with 492 * re-enabling kicks. Note that vq_kick_enable() includes a 493 * memory barrier. 494 */ 495 496 if (!vq_has_descs(vq)) 497 break; 498 } 499 500 vq_endchains(vq, /* used_all_avail= */1); 501 } 502 503 static void * 504 pci_viona_poll_thread(void *param) 505 { 506 struct pci_viona_softc *sc = param; 507 pollfd_t pollset; 508 const int fd = sc->vsc_vnafd; 509 510 pollset.fd = fd; 511 pollset.events = POLLRDBAND; 512 513 for (;;) { 514 if (poll(&pollset, 1, -1) < 0) { 515 if (errno == EINTR || errno == EAGAIN) { 516 continue; 517 } else { 518 WPRINTF("pci_viona_poll_thread poll() error %d", 519 errno); 520 break; 521 } 522 } 523 if (pollset.revents & POLLRDBAND) { 524 vioc_intr_poll_t vip; 525 uint_t i; 526 int res; 527 bool assert_lintr = false; 528 const bool do_msix = pci_msix_enabled(sc->vsc_vs.vs_pi); 529 530 res = ioctl(fd, VNA_IOC_INTR_POLL, &vip); 531 for (i = 0; res > 0 && i < VIONA_VQ_MAX; i++) { 532 if (vip.vip_status[i] == 0) { 533 continue; 534 } 535 if (do_msix) { 536 pci_generate_msix(sc->vsc_vs.vs_pi, 537 sc->vsc_queues[i].vq_msix_idx); 538 } else { 539 assert_lintr = true; 540 } 541 res = ioctl(fd, VNA_IOC_RING_INTR_CLR, i); 542 if (res != 0) { 543 WPRINTF("ioctl viona vq %d intr " 544 "clear failed %d", i, errno); 545 } 546 } 547 if (assert_lintr) { 548 pthread_mutex_lock(&sc->vsc_mtx); 549 sc->vsc_vs.vs_isr |= VIRTIO_PCI_ISR_INTR; 550 pci_lintr_assert(sc->vsc_vs.vs_pi); 551 pthread_mutex_unlock(&sc->vsc_mtx); 552 } 553 } 554 } 555 556 pthread_exit(NULL); 557 } 558 559 static void 560 pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn) 561 { 562 int qnum = sc->vsc_vs.vs_curq; 563 vioc_ring_init_t vna_ri; 564 int error; 565 566 assert(qnum < VIONA_MAXQ); 567 568 if (qnum == VIONA_CTLQ) { 569 vi_vq_init(&sc->vsc_vs, pfn); 570 return; 571 } 572 573 sc->vsc_queues[qnum].vq_pfn = (pfn << VRING_PFN); 574 vna_ri.ri_index = qnum; 575 vna_ri.ri_qsize = pci_viona_qsize(sc, qnum); 576 vna_ri.ri_qaddr = (pfn << VRING_PFN); 577 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_INIT, &vna_ri); 578 579 if (error != 0) { 580 WPRINTF("ioctl viona ring %u init failed %d", qnum, errno); 581 } 582 } 583 584 static int 585 pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc) 586 { 587 vioc_create_t vna_create; 588 int error; 589 590 sc->vsc_vnafd = open("/dev/viona", O_RDWR | O_EXCL); 591 if (sc->vsc_vnafd == -1) { 592 WPRINTF("open viona ctl failed: %d", errno); 593 return (-1); 594 } 595 596 vna_create.c_linkid = sc->vsc_linkid; 597 vna_create.c_vmfd = vm_get_device_fd(ctx); 598 error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create); 599 if (error != 0) { 600 (void) close(sc->vsc_vnafd); 601 WPRINTF("ioctl viona create failed %d", errno); 602 return (-1); 603 } 604 605 return (0); 606 } 607 608 static int 609 pci_viona_legacy_config(nvlist_t *nvl, const char *opt) 610 { 611 char *config, *name, *tofree, *value; 612 613 if (opt == NULL) 614 return (0); 615 616 config = tofree = strdup(opt); 617 while ((name = strsep(&config, ",")) != NULL) { 618 value = strchr(name, '='); 619 if (value != NULL) { 620 *value++ = '\0'; 621 set_config_value_node(nvl, name, value); 622 } else { 623 set_config_value_node(nvl, "vnic", name); 624 } 625 } 626 free(tofree); 627 return (0); 628 } 629 630 static int 631 pci_viona_parse_opts(struct pci_viona_softc *sc, nvlist_t *nvl) 632 { 633 const char *value; 634 int err = 0; 635 636 sc->vsc_vq_size = VIONA_RINGSZ; 637 sc->vsc_feature_mask = 0; 638 sc->vsc_linkname[0] = '\0'; 639 640 value = get_config_value_node(nvl, "feature_mask"); 641 if (value != NULL) { 642 long num; 643 644 errno = 0; 645 num = strtol(value, NULL, 0); 646 if (errno != 0 || num < 0) { 647 fprintf(stderr, 648 "viona: invalid mask '%s'", value); 649 } else { 650 sc->vsc_feature_mask = num; 651 } 652 } 653 654 value = get_config_value_node(nvl, "vqsize"); 655 if (value != NULL) { 656 long num; 657 658 errno = 0; 659 num = strtol(value, NULL, 0); 660 if (errno != 0) { 661 fprintf(stderr, 662 "viona: invalid vsqize '%s'", value); 663 err = -1; 664 } else if (num <= 2 || num > 32768) { 665 fprintf(stderr, 666 "viona: vqsize out of range", num); 667 err = -1; 668 } else if ((1 << (ffs(num) - 1)) != num) { 669 fprintf(stderr, 670 "viona: vqsize must be power of 2", num); 671 err = -1; 672 } else { 673 sc->vsc_vq_size = num; 674 } 675 } 676 677 value = get_config_value_node(nvl, "vnic"); 678 if (value == NULL) { 679 fprintf(stderr, "viona: vnic name required"); 680 err = -1; 681 } else { 682 (void) strlcpy(sc->vsc_linkname, value, MAXLINKNAMELEN); 683 } 684 685 DPRINTF("viona=%p dev=%s vqsize=%x feature_mask=%x", sc, 686 sc->vsc_linkname, sc->vsc_vq_size, sc->vsc_feature_mask); 687 return (err); 688 } 689 690 static int 691 pci_viona_init(struct pci_devinst *pi, nvlist_t *nvl) 692 { 693 dladm_handle_t handle; 694 dladm_status_t status; 695 dladm_vnic_attr_t attr; 696 char errmsg[DLADM_STRSIZE]; 697 char tname[MAXCOMLEN + 1]; 698 int error, i; 699 struct pci_viona_softc *sc; 700 const char *vnic; 701 pthread_t tid; 702 703 if (get_config_bool_default("viona.debug", false)) 704 pci_viona_debug = 1; 705 706 vnic = get_config_value_node(nvl, "vnic"); 707 if (vnic == NULL) { 708 WPRINTF("virtio-viona: vnic required"); 709 return (1); 710 } 711 712 sc = malloc(sizeof (struct pci_viona_softc)); 713 memset(sc, 0, sizeof (struct pci_viona_softc)); 714 715 if (pci_viona_parse_opts(sc, nvl) != 0) { 716 free(sc); 717 return (1); 718 } 719 720 if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) { 721 WPRINTF("could not open /dev/dld"); 722 free(sc); 723 return (1); 724 } 725 726 if ((status = dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid, 727 NULL, NULL, NULL)) != DLADM_STATUS_OK) { 728 WPRINTF("dladm_name2info() for %s failed: %s", vnic, 729 dladm_status2str(status, errmsg)); 730 dladm_close(handle); 731 free(sc); 732 return (1); 733 } 734 735 if ((status = dladm_vnic_info(handle, sc->vsc_linkid, &attr, 736 DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) { 737 WPRINTF("dladm_vnic_info() for %s failed: %s", vnic, 738 dladm_status2str(status, errmsg)); 739 dladm_close(handle); 740 free(sc); 741 return (1); 742 } 743 744 memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL); 745 746 dladm_close(handle); 747 748 error = pci_viona_viona_init(pi->pi_vmctx, sc); 749 if (error != 0) { 750 free(sc); 751 return (1); 752 } 753 754 error = pthread_create(&tid, NULL, pci_viona_poll_thread, sc); 755 assert(error == 0); 756 snprintf(tname, sizeof (tname), "vionapoll:%s", vnic); 757 pthread_set_name_np(tid, tname); 758 759 /* initialize config space */ 760 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 761 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 762 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 763 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); 764 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 765 766 sc->vsc_consts = viona_vi_consts; 767 pthread_mutex_init(&sc->vsc_mtx, NULL); 768 769 /* 770 * The RX and TX queues are handled in the kernel component of 771 * viona; however The control queue is emulated in userspace. 772 */ 773 sc->vsc_queues[VIONA_CTLQ].vq_qsize = pci_viona_qsize(sc, VIONA_CTLQ); 774 775 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 776 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 777 778 /* 779 * Guests that do not support CTRL_RX_MAC still generally need to 780 * receive multicast packets. Guests that do support this feature will 781 * end up setting this flag indirectly via messages on the control 782 * queue but it does not hurt to default to multicast promiscuity here 783 * and it is what older version of viona did. 784 */ 785 sc->vsc_promisc_mmac = true; 786 pci_viona_eval_promisc(sc); 787 788 /* MSI-X support */ 789 for (i = 0; i < VIONA_MAXQ; i++) 790 sc->vsc_queues[i].vq_msix_idx = VIRTIO_MSI_NO_VECTOR; 791 792 /* BAR 1 used to map MSI-X table and PBA */ 793 if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) { 794 free(sc); 795 return (1); 796 } 797 798 /* BAR 0 for legacy-style virtio register access. */ 799 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ); 800 if (error != 0) { 801 WPRINTF("could not allocate virtio BAR"); 802 free(sc); 803 return (1); 804 } 805 806 /* 807 * Need a legacy interrupt for virtio compliance, even though MSI-X 808 * operation is _strongly_ suggested for adequate performance. 809 */ 810 pci_lintr_request(pi); 811 812 return (0); 813 } 814 815 static uint64_t 816 viona_adjust_offset(struct pci_devinst *pi, uint64_t offset) 817 { 818 /* 819 * Device specific offsets used by guest would change based on 820 * whether MSI-X capability is enabled or not 821 */ 822 if (!pci_msix_enabled(pi)) { 823 if (offset >= VIRTIO_PCI_CONFIG_OFF(0)) { 824 return (offset + (VIRTIO_PCI_CONFIG_OFF(1) - 825 VIRTIO_PCI_CONFIG_OFF(0))); 826 } 827 } 828 829 return (offset); 830 } 831 832 static void 833 pci_viona_ring_set_msix(struct pci_devinst *pi, uint_t ring) 834 { 835 struct pci_viona_softc *sc = pi->pi_arg; 836 struct msix_table_entry mte; 837 uint16_t tab_index; 838 vioc_ring_msi_t vrm; 839 int res; 840 841 if (ring == VIONA_CTLQ) 842 return; 843 844 assert(ring <= VIONA_VQ_TX); 845 846 vrm.rm_index = ring; 847 vrm.rm_addr = 0; 848 vrm.rm_msg = 0; 849 tab_index = sc->vsc_queues[ring].vq_msix_idx; 850 851 if (tab_index != VIRTIO_MSI_NO_VECTOR && sc->vsc_msix_active) { 852 mte = pi->pi_msix.table[tab_index]; 853 if ((mte.vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 854 vrm.rm_addr = mte.addr; 855 vrm.rm_msg = mte.msg_data; 856 } 857 } 858 859 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_SET_MSI, &vrm); 860 if (res != 0) { 861 WPRINTF("ioctl viona set_msi %d failed %d", ring, errno); 862 } 863 } 864 865 static void 866 pci_viona_lintrupdate(struct pci_devinst *pi) 867 { 868 struct pci_viona_softc *sc = pi->pi_arg; 869 bool msix_on = false; 870 871 pthread_mutex_lock(&sc->vsc_mtx); 872 msix_on = pci_msix_enabled(pi) && (pi->pi_msix.function_mask == 0); 873 if ((sc->vsc_msix_active && !msix_on) || 874 (msix_on && !sc->vsc_msix_active)) { 875 uint_t i; 876 877 sc->vsc_msix_active = msix_on; 878 /* Update in-kernel ring configs */ 879 for (i = 0; i <= VIONA_VQ_TX; i++) { 880 pci_viona_ring_set_msix(pi, i); 881 } 882 } 883 pthread_mutex_unlock(&sc->vsc_mtx); 884 } 885 886 static void 887 pci_viona_msix_update(struct pci_devinst *pi, uint64_t offset) 888 { 889 struct pci_viona_softc *sc = pi->pi_arg; 890 uint_t tab_index, i; 891 892 pthread_mutex_lock(&sc->vsc_mtx); 893 if (!sc->vsc_msix_active) { 894 pthread_mutex_unlock(&sc->vsc_mtx); 895 return; 896 } 897 898 /* 899 * Rather than update every possible MSI-X vector, cheat and use the 900 * offset to calculate the entry within the table. Since this should 901 * only be called when a write to the table succeeds, the index should 902 * be valid. 903 */ 904 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 905 906 for (i = 0; i <= VIONA_VQ_TX; i++) { 907 if (sc->vsc_queues[i].vq_msix_idx != tab_index) { 908 continue; 909 } 910 pci_viona_ring_set_msix(pi, i); 911 } 912 913 pthread_mutex_unlock(&sc->vsc_mtx); 914 } 915 916 static void 917 pci_viona_qnotify(struct pci_viona_softc *sc, int ring) 918 { 919 int error; 920 921 switch (ring) { 922 case VIONA_TXQ: 923 case VIONA_RXQ: 924 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_KICK, ring); 925 if (error != 0) { 926 WPRINTF("ioctl viona ring %d kick failed %d", 927 ring, errno); 928 } 929 break; 930 case VIONA_CTLQ: { 931 struct vqueue_info *vq = &sc->vsc_queues[VIONA_CTLQ]; 932 933 if (vq_has_descs(vq)) 934 pci_viona_process_ctrlq(vq); 935 break; 936 } 937 } 938 } 939 940 static void 941 pci_viona_baraddr(struct pci_devinst *pi, int baridx, int enabled, 942 uint64_t address) 943 { 944 struct pci_viona_softc *sc = pi->pi_arg; 945 uint64_t ioport; 946 int error; 947 948 if (baridx != 0) 949 return; 950 951 if (enabled == 0) { 952 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, 0); 953 if (error != 0) 954 WPRINTF("uninstall ioport hook failed %d", errno); 955 return; 956 } 957 958 /* 959 * Install ioport hook for virtqueue notification. 960 * This is part of the virtio common configuration area so the 961 * address does not change with MSI-X status. 962 */ 963 ioport = address + VIRTIO_PCI_QUEUE_NOTIFY; 964 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, ioport); 965 if (error != 0) { 966 WPRINTF("install ioport hook at %x failed %d", 967 ioport, errno); 968 } 969 } 970 971 static void 972 pci_viona_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 973 uint64_t value) 974 { 975 struct pci_viona_softc *sc = pi->pi_arg; 976 void *ptr; 977 int err = 0; 978 979 if (baridx == pci_msix_table_bar(pi) || 980 baridx == pci_msix_pba_bar(pi)) { 981 if (pci_emul_msix_twrite(pi, offset, size, value) == 0) { 982 pci_viona_msix_update(pi, offset); 983 } 984 return; 985 } 986 987 assert(baridx == 0); 988 989 if (offset + size > pci_viona_iosize(pi)) { 990 DPRINTF("viona_write: 2big, offset %ld size %d", 991 offset, size); 992 return; 993 } 994 995 pthread_mutex_lock(&sc->vsc_mtx); 996 997 offset = viona_adjust_offset(pi, offset); 998 999 switch (offset) { 1000 case VIRTIO_PCI_GUEST_FEATURES: 1001 assert(size == 4); 1002 value &= ~(sc->vsc_feature_mask); 1003 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value); 1004 if (err != 0) { 1005 WPRINTF("ioctl feature negotiation returned err = %d", 1006 errno); 1007 } else { 1008 sc->vsc_vs.vs_negotiated_caps = value; 1009 } 1010 break; 1011 case VIRTIO_PCI_QUEUE_PFN: 1012 assert(size == 4); 1013 pci_viona_ring_init(sc, value); 1014 break; 1015 case VIRTIO_PCI_QUEUE_SEL: 1016 assert(size == 2); 1017 assert(value < VIONA_MAXQ); 1018 sc->vsc_vs.vs_curq = value; 1019 break; 1020 case VIRTIO_PCI_QUEUE_NOTIFY: 1021 assert(size == 2); 1022 assert(value < VIONA_MAXQ); 1023 pci_viona_qnotify(sc, value); 1024 break; 1025 case VIRTIO_PCI_STATUS: 1026 assert(size == 1); 1027 pci_viona_update_status(sc, value); 1028 break; 1029 case VIRTIO_MSI_CONFIG_VECTOR: 1030 assert(size == 2); 1031 sc->vsc_vs.vs_msix_cfg_idx = value; 1032 break; 1033 case VIRTIO_MSI_QUEUE_VECTOR: 1034 assert(size == 2); 1035 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ); 1036 sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx = value; 1037 pci_viona_ring_set_msix(pi, sc->vsc_vs.vs_curq); 1038 break; 1039 case VIONA_R_CFG0: 1040 case VIONA_R_CFG1: 1041 case VIONA_R_CFG2: 1042 case VIONA_R_CFG3: 1043 case VIONA_R_CFG4: 1044 case VIONA_R_CFG5: 1045 assert((size + offset) <= (VIONA_R_CFG5 + 1)); 1046 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0]; 1047 /* 1048 * The driver is allowed to change the MAC address 1049 */ 1050 sc->vsc_macaddr[offset - VIONA_R_CFG0] = value; 1051 if (size == 1) { 1052 *(uint8_t *)ptr = value; 1053 } else if (size == 2) { 1054 *(uint16_t *)ptr = value; 1055 } else { 1056 *(uint32_t *)ptr = value; 1057 } 1058 break; 1059 case VIRTIO_PCI_HOST_FEATURES: 1060 case VIRTIO_PCI_QUEUE_NUM: 1061 case VIRTIO_PCI_ISR: 1062 case VIONA_R_CFG6: 1063 case VIONA_R_CFG7: 1064 DPRINTF("viona: write to readonly reg %ld", offset); 1065 break; 1066 default: 1067 DPRINTF("viona: unknown i/o write offset %ld", offset); 1068 value = 0; 1069 break; 1070 } 1071 1072 pthread_mutex_unlock(&sc->vsc_mtx); 1073 } 1074 1075 static uint64_t 1076 pci_viona_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 1077 { 1078 struct pci_viona_softc *sc = pi->pi_arg; 1079 void *ptr; 1080 uint64_t value; 1081 int err = 0; 1082 1083 if (baridx == pci_msix_table_bar(pi) || 1084 baridx == pci_msix_pba_bar(pi)) { 1085 return (pci_emul_msix_tread(pi, offset, size)); 1086 } 1087 1088 assert(baridx == 0); 1089 1090 if (offset + size > pci_viona_iosize(pi)) { 1091 DPRINTF("viona_read: 2big, offset %ld size %d", 1092 offset, size); 1093 return (0); 1094 } 1095 1096 pthread_mutex_lock(&sc->vsc_mtx); 1097 1098 offset = viona_adjust_offset(pi, offset); 1099 1100 switch (offset) { 1101 case VIRTIO_PCI_HOST_FEATURES: 1102 assert(size == 4); 1103 err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value); 1104 if (err != 0) { 1105 WPRINTF("ioctl get host features returned err = %d", 1106 errno); 1107 } 1108 value |= VIONA_S_HOSTCAPS_USERSPACE; 1109 value &= ~sc->vsc_feature_mask; 1110 sc->vsc_consts.vc_hv_caps = value; 1111 break; 1112 case VIRTIO_PCI_GUEST_FEATURES: 1113 assert(size == 4); 1114 value = sc->vsc_vs.vs_negotiated_caps; /* XXX never read ? */ 1115 break; 1116 case VIRTIO_PCI_QUEUE_PFN: 1117 assert(size == 4); 1118 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_pfn >> VRING_PFN; 1119 break; 1120 case VIRTIO_PCI_QUEUE_NUM: 1121 assert(size == 2); 1122 value = pci_viona_qsize(sc, sc->vsc_vs.vs_curq); 1123 break; 1124 case VIRTIO_PCI_QUEUE_SEL: 1125 assert(size == 2); 1126 value = sc->vsc_vs.vs_curq; /* XXX never read ? */ 1127 break; 1128 case VIRTIO_PCI_QUEUE_NOTIFY: 1129 assert(size == 2); 1130 value = sc->vsc_vs.vs_curq; /* XXX never read ? */ 1131 break; 1132 case VIRTIO_PCI_STATUS: 1133 assert(size == 1); 1134 value = sc->vsc_vs.vs_status; 1135 break; 1136 case VIRTIO_PCI_ISR: 1137 assert(size == 1); 1138 value = sc->vsc_vs.vs_isr; 1139 sc->vsc_vs.vs_isr = 0; /* a read clears this flag */ 1140 if (value != 0) { 1141 pci_lintr_deassert(pi); 1142 } 1143 break; 1144 case VIRTIO_MSI_CONFIG_VECTOR: 1145 assert(size == 2); 1146 value = sc->vsc_vs.vs_msix_cfg_idx; 1147 break; 1148 case VIRTIO_MSI_QUEUE_VECTOR: 1149 assert(size == 2); 1150 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ); 1151 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx; 1152 break; 1153 case VIONA_R_CFG0: 1154 case VIONA_R_CFG1: 1155 case VIONA_R_CFG2: 1156 case VIONA_R_CFG3: 1157 case VIONA_R_CFG4: 1158 case VIONA_R_CFG5: 1159 assert((size + offset) <= (VIONA_R_CFG5 + 1)); 1160 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0]; 1161 if (size == 1) { 1162 value = *(uint8_t *)ptr; 1163 } else if (size == 2) { 1164 value = *(uint16_t *)ptr; 1165 } else { 1166 value = *(uint32_t *)ptr; 1167 } 1168 break; 1169 case VIONA_R_CFG6: 1170 assert(size != 4); 1171 value = 0x01; /* XXX link always up */ 1172 break; 1173 case VIONA_R_CFG7: 1174 assert(size == 1); 1175 value = 0; /* XXX link status in LSB */ 1176 break; 1177 default: 1178 DPRINTF("viona: unknown i/o read offset %ld", offset); 1179 value = 0; 1180 break; 1181 } 1182 1183 pthread_mutex_unlock(&sc->vsc_mtx); 1184 1185 return (value); 1186 } 1187 1188 struct pci_devemu pci_de_viona = { 1189 .pe_emu = "virtio-net-viona", 1190 .pe_init = pci_viona_init, 1191 .pe_legacy_config = pci_viona_legacy_config, 1192 .pe_barwrite = pci_viona_write, 1193 .pe_barread = pci_viona_read, 1194 .pe_baraddr = pci_viona_baraddr, 1195 .pe_lintrupdate = pci_viona_lintrupdate 1196 }; 1197 PCI_EMUL_SET(pci_de_viona); 1198