1 /* 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 /* 27 * This file and its contents are supplied under the terms of the 28 * Common Development and Distribution License ("CDDL"), version 1.0. 29 * You may only use this file in accordance with the terms of version 30 * 1.0 of the CDDL. 31 * 32 * A full copy of the text of the CDDL should have accompanied this 33 * source. A copy of the CDDL is also available via the Internet at 34 * http://www.illumos.org/license/CDDL. 35 * 36 * Copyright 2015 Pluribus Networks Inc. 37 * Copyright 2019 Joyent, Inc. 38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 39 * Copyright 2025 Oxide Computer Company 40 */ 41 42 43 #include <sys/param.h> 44 #include <sys/linker_set.h> 45 #include <sys/ioctl.h> 46 #include <sys/uio.h> 47 #include <sys/viona_io.h> 48 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <stdint.h> 54 #include <string.h> 55 #include <strings.h> 56 #include <unistd.h> 57 #include <assert.h> 58 #include <pthread.h> 59 #include <signal.h> 60 #include <stdbool.h> 61 #include <poll.h> 62 #include <libdladm.h> 63 #include <libdllink.h> 64 #include <libdlvnic.h> 65 66 #include <machine/vmm.h> 67 #include <vmmapi.h> 68 69 #include "bhyverun.h" 70 #include "config.h" 71 #include "debug.h" 72 #include "pci_emul.h" 73 #include "virtio.h" 74 #include "iov.h" 75 #include "virtio_net.h" 76 77 #define VIONA_RINGSZ 1024 78 #define VIONA_CTLQ_SIZE 64 79 #define VIONA_CTLQ_MAXSEGS 32 80 81 /* 82 * PCI config-space register offsets 83 */ 84 #define VIONA_R_CFG0 24 85 #define VIONA_R_CFG1 25 86 #define VIONA_R_CFG2 26 87 #define VIONA_R_CFG3 27 88 #define VIONA_R_CFG4 28 89 #define VIONA_R_CFG5 29 90 #define VIONA_R_CFG6 30 91 #define VIONA_R_CFG7 31 92 #define VIONA_R_MAX 31 93 94 #define VIONA_REGSZ (VIONA_R_MAX + 1) 95 96 /* 97 * Queue definitions. 98 */ 99 #define VIONA_RXQ 0 100 #define VIONA_TXQ 1 101 #define VIONA_CTLQ 2 102 103 #define VIONA_MAXQ 3 104 105 /* 106 * Supplementary host capabilities provided in the userspace component. 107 */ 108 #define VIONA_S_HOSTCAPS_USERSPACE ( \ 109 VIRTIO_NET_F_CTRL_VQ | \ 110 VIRTIO_NET_F_CTRL_RX) 111 112 /* 113 * Debug printf 114 */ 115 static volatile int pci_viona_debug; 116 #define DPRINTF(fmt, arg...) \ 117 do { \ 118 if (pci_viona_debug) { \ 119 FPRINTLN(stdout, fmt, ##arg); \ 120 fflush(stdout); \ 121 } \ 122 } while (0) 123 #define WPRINTF(fmt, arg...) FPRINTLN(stderr, fmt, ##arg) 124 125 /* 126 * Per-device softc 127 */ 128 struct pci_viona_softc { 129 struct virtio_softc vsc_vs; 130 struct virtio_consts vsc_consts; 131 struct vqueue_info vsc_queues[VIONA_MAXQ]; 132 pthread_mutex_t vsc_mtx; 133 134 datalink_id_t vsc_linkid; 135 int vsc_vnafd; 136 137 /* Configurable parameters */ 138 char vsc_linkname[MAXLINKNAMELEN]; 139 uint32_t vsc_feature_mask; 140 uint16_t vsc_vq_size; 141 142 uint8_t vsc_macaddr[6]; 143 uint16_t vsc_mtu; 144 145 bool vsc_resetting; 146 bool vsc_msix_active; 147 148 viona_promisc_t vsc_promisc; /* Current promisc mode */ 149 bool vsc_promisc_promisc; /* PROMISC enabled */ 150 bool vsc_promisc_allmulti; /* ALLMULTI enabled */ 151 bool vsc_promisc_umac; /* unicast MACs sent */ 152 bool vsc_promisc_mmac; /* multicast MACs sent */ 153 }; 154 155 static struct virtio_consts viona_vi_consts = { 156 .vc_name = "viona", 157 .vc_nvq = VIONA_MAXQ, 158 /* 159 * We use the common bhyve virtio framework so that we can call 160 * the utility functions to work with the queues handled in userspace. 161 * The framework PCI read/write functions are not used so these 162 * callbacks will not be invoked. 163 */ 164 .vc_cfgsize = 0, 165 .vc_reset = NULL, 166 .vc_qnotify = NULL, 167 .vc_cfgread = NULL, 168 .vc_cfgwrite = NULL, 169 .vc_apply_features = NULL, 170 /* 171 * The following field is populated using the response from the 172 * viona driver during initialisation, augmented with the additional 173 * capabilities emulated in userspace. 174 */ 175 .vc_hv_caps = 0, 176 }; 177 178 /* 179 * Return the size of IO BAR that maps virtio header and device specific 180 * region. The size would vary depending on whether MSI-X is enabled or 181 * not. 182 */ 183 static uint64_t 184 pci_viona_iosize(struct pci_devinst *pi) 185 { 186 if (pci_msix_enabled(pi)) { 187 return (VIONA_REGSZ); 188 } else { 189 return (VIONA_REGSZ - 190 (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0))); 191 } 192 } 193 194 static uint16_t 195 pci_viona_qsize(struct pci_viona_softc *sc, int qnum) 196 { 197 if (qnum == VIONA_CTLQ) 198 return (VIONA_CTLQ_SIZE); 199 200 return (sc->vsc_vq_size); 201 } 202 203 static void 204 pci_viona_ring_reset(struct pci_viona_softc *sc, int ring) 205 { 206 assert(ring < VIONA_MAXQ); 207 208 switch (ring) { 209 case VIONA_RXQ: 210 case VIONA_TXQ: 211 break; 212 case VIONA_CTLQ: 213 default: 214 return; 215 } 216 217 for (;;) { 218 int res; 219 220 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_RESET, ring); 221 if (res == 0) { 222 break; 223 } else if (errno != EINTR) { 224 WPRINTF("ioctl viona ring %d reset failed %d", 225 ring, errno); 226 return; 227 } 228 } 229 } 230 231 static void 232 pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value) 233 { 234 235 if (value == 0) { 236 DPRINTF("viona: device reset requested !"); 237 238 vi_reset_dev(&sc->vsc_vs); 239 pci_viona_ring_reset(sc, VIONA_RXQ); 240 pci_viona_ring_reset(sc, VIONA_TXQ); 241 } 242 243 sc->vsc_vs.vs_status = value; 244 } 245 246 static const char * 247 pci_viona_promisc_descr(viona_promisc_t mode) 248 { 249 switch (mode) { 250 case VIONA_PROMISC_NONE: 251 return ("none"); 252 case VIONA_PROMISC_MULTI: 253 return ("multicast"); 254 case VIONA_PROMISC_ALL: 255 return ("all"); 256 default: 257 abort(); 258 } 259 } 260 261 static int 262 pci_viona_eval_promisc(struct pci_viona_softc *sc) 263 { 264 viona_promisc_t mode = VIONA_PROMISC_NONE; 265 int err = 0; 266 267 /* 268 * If the guest has explicitly requested promiscuous mode or has sent a 269 * non-empty unicast MAC address table, then set viona to promiscuous 270 * mode. Otherwise, if the guest has explicitly requested multicast 271 * promiscuity or has sent a non-empty multicast MAC address table, 272 * then set viona to multicast promiscuous mode. 273 */ 274 if (sc->vsc_promisc_promisc || sc->vsc_promisc_umac) 275 mode = VIONA_PROMISC_ALL; 276 else if (sc->vsc_promisc_allmulti || sc->vsc_promisc_mmac) 277 mode = VIONA_PROMISC_MULTI; 278 279 if (mode != sc->vsc_promisc) { 280 DPRINTF("viona: setting promiscuous mode to %d (%s)", 281 mode, pci_viona_promisc_descr(mode)); 282 DPRINTF(" promisc=%u, umac=%u, allmulti=%u, mmac=%u", 283 sc->vsc_promisc_promisc, sc->vsc_promisc_umac, 284 sc->vsc_promisc_allmulti, sc->vsc_promisc_mmac); 285 286 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_PROMISC, mode); 287 if (err == 0) 288 sc->vsc_promisc = mode; 289 else 290 WPRINTF("ioctl viona set promisc failed %d", errno); 291 } 292 293 return (err); 294 } 295 296 static uint8_t 297 pci_viona_control_rx(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr, 298 struct iovec *iov, size_t niov) 299 { 300 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs; 301 uint8_t v; 302 303 if (iov[0].iov_len != sizeof (uint8_t) || niov != 1) { 304 EPRINTLN("viona: bad control RX data"); 305 return (VIRTIO_NET_CQ_ERR); 306 } 307 308 v = *(uint8_t *)iov[0].iov_base; 309 310 switch (hdr->vnch_command) { 311 case VIRTIO_NET_CTRL_RX_PROMISC: 312 DPRINTF("viona: ctrl RX promisc %d", v); 313 sc->vsc_promisc_promisc = (v != 0); 314 break; 315 case VIRTIO_NET_CTRL_RX_ALLMULTI: 316 DPRINTF("viona: ctrl RX allmulti %d", v); 317 sc->vsc_promisc_allmulti = (v != 0); 318 break; 319 default: 320 /* 321 * VIRTIO_NET_F_CTRL_RX_EXTRA was not offered so no other 322 * commands are expected. 323 */ 324 EPRINTLN("viona: unrecognised RX control cmd %u", 325 hdr->vnch_command); 326 return (VIRTIO_NET_CQ_ERR); 327 } 328 329 if (pci_viona_eval_promisc(sc) == 0) 330 return (VIRTIO_NET_CQ_OK); 331 return (VIRTIO_NET_CQ_ERR); 332 } 333 334 static void 335 pci_viona_control_mac_dump(const char *tag, const struct iovec *iov) 336 { 337 virtio_net_ctrl_mac_t *table = (virtio_net_ctrl_mac_t *)iov->iov_base; 338 ether_addr_t *mac = &table->vncm_mac; 339 340 DPRINTF("-- %s MAC TABLE (entries: %u)", tag, table->vncm_entries); 341 342 if (table->vncm_entries * ETHERADDRL != 343 iov->iov_len - sizeof (table->vncm_entries)) { 344 DPRINTF(" Bad table size %u", iov->iov_len); 345 return; 346 } 347 348 for (uint32_t i = 0; i < table->vncm_entries; i++) { 349 DPRINTF(" [%2d] %s", i, ether_ntoa((struct ether_addr *)mac)); 350 mac++; 351 } 352 } 353 354 static uint8_t 355 pci_viona_control_mac(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr, 356 struct iovec *iov, size_t niov) 357 { 358 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs; 359 360 switch (hdr->vnch_command) { 361 case VIRTIO_NET_CTRL_MAC_TABLE_SET: { 362 virtio_net_ctrl_mac_t *table; 363 364 DPRINTF("viona: ctrl MAC table set"); 365 366 if (niov != 2) { 367 EPRINTLN("viona: bad control MAC data"); 368 return (VIRTIO_NET_CQ_ERR); 369 } 370 371 /* 372 * We advertise VIRTIO_NET_F_CTRL_RX and therefore need to 373 * accept VIRTIO_NET_CTRL_MAC, but we don't support passing 374 * changes in the MAC address lists down to viona. 375 * Instead, we set flags to indicate if the guest has sent 376 * any MAC addresses for each table, and use these to determine 377 * the resulting promiscuous mode, see pci_viona_eval_promisc() 378 * above. 379 */ 380 381 /* Unicast MAC table */ 382 table = (virtio_net_ctrl_mac_t *)iov[0].iov_base; 383 sc->vsc_promisc_umac = (table->vncm_entries != 0); 384 if (pci_viona_debug) 385 pci_viona_control_mac_dump("UNICAST", &iov[0]); 386 387 /* Multicast MAC table */ 388 table = (virtio_net_ctrl_mac_t *)iov[1].iov_base; 389 sc->vsc_promisc_mmac = (table->vncm_entries != 0); 390 if (pci_viona_debug) 391 pci_viona_control_mac_dump("MULTICAST", &iov[1]); 392 393 break; 394 } 395 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 396 /* disallow setting the primary filter MAC address */ 397 DPRINTF("viona: ctrl MAC addr set %d", niov); 398 return (VIRTIO_NET_CQ_ERR); 399 default: 400 EPRINTLN("viona: unrecognised MAC control cmd %u", 401 hdr->vnch_command); 402 return (VIRTIO_NET_CQ_ERR); 403 } 404 405 if (pci_viona_eval_promisc(sc) == 0) 406 return (VIRTIO_NET_CQ_OK); 407 return (VIRTIO_NET_CQ_ERR); 408 } 409 410 static void 411 pci_viona_control(struct vqueue_info *vq) 412 { 413 struct iovec iov[VIONA_CTLQ_MAXSEGS + 1]; 414 const virtio_net_ctrl_hdr_t *hdr; 415 struct iovec *siov = iov; 416 struct vi_req req = { 0 }; 417 uint8_t *ackp; 418 size_t nsiov; 419 uint32_t len; 420 int n; 421 422 n = vq_getchain(vq, iov, VIONA_CTLQ_MAXSEGS, &req); 423 424 assert(n >= 1 && n <= VIONA_CTLQ_MAXSEGS); 425 426 /* 427 * Since we have not negotiated VIRTIO_F_ANY_LAYOUT, we expect the 428 * control message to be laid out in at least three descriptors as 429 * follows: 430 * header - sizeof (virtio_net_ctrl_hdr_t) 431 * data[] - at least one descriptor, varying size 432 * ack - uint8_t, flagged as writable 433 * Check the incoming message to make sure it matches this layout and 434 * drop the entire chain if not. 435 */ 436 if (n < 3 || req.writable != 1 || req.readable + 1 != n || 437 iov[req.readable].iov_len != sizeof (uint8_t)) { 438 EPRINTLN("viona: bad control chain, len=%d, w=%d, r=%d", 439 n, req.writable, req.readable); 440 goto drop; 441 } 442 443 hdr = (const virtio_net_ctrl_hdr_t *)iov[0].iov_base; 444 if (iov[0].iov_len < sizeof (virtio_net_ctrl_hdr_t)) { 445 EPRINTLN("viona: control header too short: %u", iov[0].iov_len); 446 goto drop; 447 } 448 449 /* 450 * Writable iovecs start at iov[req.readable], and we've already 451 * checked that there is only one writable, it's at the end, and the 452 * right size; it's the acknowledgement byte. 453 */ 454 ackp = (uint8_t *)iov[req.readable].iov_base; 455 456 siov = &iov[1]; 457 nsiov = n - 2; 458 459 switch (hdr->vnch_class) { 460 case VIRTIO_NET_CTRL_RX: 461 *ackp = pci_viona_control_rx(vq, hdr, siov, nsiov); 462 break; 463 case VIRTIO_NET_CTRL_MAC: 464 *ackp = pci_viona_control_mac(vq, hdr, siov, nsiov); 465 break; 466 default: 467 EPRINTLN("viona: unrecognised control class %u, cmd %u", 468 hdr->vnch_class, hdr->vnch_command); 469 *ackp = VIRTIO_NET_CQ_ERR; 470 break; 471 } 472 473 drop: 474 len = 0; 475 for (uint_t i = 0; i < n; i++) 476 len += iov[i].iov_len; 477 478 vq_relchain(vq, req.idx, len); 479 } 480 481 static void 482 pci_viona_process_ctrlq(struct vqueue_info *vq) 483 { 484 for (;;) { 485 vq_kick_disable(vq); 486 487 while (vq_has_descs(vq)) 488 pci_viona_control(vq); 489 490 vq_kick_enable(vq); 491 492 /* 493 * One more check in case a late addition raced with 494 * re-enabling kicks. Note that vq_kick_enable() includes a 495 * memory barrier. 496 */ 497 498 if (!vq_has_descs(vq)) 499 break; 500 } 501 502 vq_endchains(vq, /* used_all_avail= */1); 503 } 504 505 static void * 506 pci_viona_poll_thread(void *param) 507 { 508 struct pci_viona_softc *sc = param; 509 pollfd_t pollset; 510 const int fd = sc->vsc_vnafd; 511 512 pollset.fd = fd; 513 pollset.events = POLLRDBAND; 514 515 for (;;) { 516 if (poll(&pollset, 1, -1) < 0) { 517 if (errno == EINTR || errno == EAGAIN) { 518 continue; 519 } else { 520 WPRINTF("pci_viona_poll_thread poll() error %d", 521 errno); 522 break; 523 } 524 } 525 if (pollset.revents & POLLRDBAND) { 526 vioc_intr_poll_t vip; 527 uint_t i; 528 int res; 529 bool assert_lintr = false; 530 const bool do_msix = pci_msix_enabled(sc->vsc_vs.vs_pi); 531 532 res = ioctl(fd, VNA_IOC_INTR_POLL, &vip); 533 for (i = 0; res > 0 && i < VIONA_VQ_MAX; i++) { 534 if (vip.vip_status[i] == 0) { 535 continue; 536 } 537 if (do_msix) { 538 pci_generate_msix(sc->vsc_vs.vs_pi, 539 sc->vsc_queues[i].vq_msix_idx); 540 } else { 541 assert_lintr = true; 542 } 543 res = ioctl(fd, VNA_IOC_RING_INTR_CLR, i); 544 if (res != 0) { 545 WPRINTF("ioctl viona vq %d intr " 546 "clear failed %d", i, errno); 547 } 548 } 549 if (assert_lintr) { 550 pthread_mutex_lock(&sc->vsc_mtx); 551 sc->vsc_vs.vs_isr |= VIRTIO_PCI_ISR_INTR; 552 pci_lintr_assert(sc->vsc_vs.vs_pi); 553 pthread_mutex_unlock(&sc->vsc_mtx); 554 } 555 } 556 } 557 558 pthread_exit(NULL); 559 } 560 561 static void 562 pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn) 563 { 564 int qnum = sc->vsc_vs.vs_curq; 565 vioc_ring_init_t vna_ri; 566 int error; 567 568 assert(qnum < VIONA_MAXQ); 569 570 if (qnum == VIONA_CTLQ) { 571 vi_vq_init(&sc->vsc_vs, pfn); 572 return; 573 } 574 575 sc->vsc_queues[qnum].vq_pfn = (pfn << VRING_PFN); 576 vna_ri.ri_index = qnum; 577 vna_ri.ri_qsize = pci_viona_qsize(sc, qnum); 578 vna_ri.ri_qaddr = (pfn << VRING_PFN); 579 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_INIT, &vna_ri); 580 581 if (error != 0) { 582 WPRINTF("ioctl viona ring %u init failed %d", qnum, errno); 583 } 584 } 585 586 static int 587 pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc) 588 { 589 vioc_create_t vna_create; 590 int error; 591 592 sc->vsc_vnafd = open("/dev/viona", O_RDWR | O_EXCL); 593 if (sc->vsc_vnafd == -1) { 594 WPRINTF("open viona ctl failed: %d", errno); 595 return (-1); 596 } 597 598 vna_create.c_linkid = sc->vsc_linkid; 599 vna_create.c_vmfd = vm_get_device_fd(ctx); 600 error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create); 601 if (error != 0) { 602 (void) close(sc->vsc_vnafd); 603 WPRINTF("ioctl viona create failed %d", errno); 604 return (-1); 605 } 606 607 return (0); 608 } 609 610 static int 611 pci_viona_legacy_config(nvlist_t *nvl, const char *opt) 612 { 613 char *config, *name, *tofree, *value; 614 615 if (opt == NULL) 616 return (0); 617 618 config = tofree = strdup(opt); 619 while ((name = strsep(&config, ",")) != NULL) { 620 value = strchr(name, '='); 621 if (value != NULL) { 622 *value++ = '\0'; 623 set_config_value_node(nvl, name, value); 624 } else { 625 set_config_value_node(nvl, "vnic", name); 626 } 627 } 628 free(tofree); 629 return (0); 630 } 631 632 static int 633 pci_viona_parse_opts(struct pci_viona_softc *sc, nvlist_t *nvl) 634 { 635 const char *value; 636 int err = 0; 637 638 sc->vsc_vq_size = VIONA_RINGSZ; 639 sc->vsc_feature_mask = 0; 640 sc->vsc_linkname[0] = '\0'; 641 642 value = get_config_value_node(nvl, "feature_mask"); 643 if (value != NULL) { 644 long num; 645 646 errno = 0; 647 num = strtol(value, NULL, 0); 648 if (errno != 0 || num < 0) { 649 fprintf(stderr, 650 "viona: invalid mask '%s'", value); 651 } else { 652 sc->vsc_feature_mask = num; 653 } 654 } 655 656 value = get_config_value_node(nvl, "vqsize"); 657 if (value != NULL) { 658 long num; 659 660 errno = 0; 661 num = strtol(value, NULL, 0); 662 if (errno != 0) { 663 fprintf(stderr, 664 "viona: invalid vsqize '%s'", value); 665 err = -1; 666 } else if (num <= 2 || num > 32768) { 667 fprintf(stderr, 668 "viona: vqsize out of range", num); 669 err = -1; 670 } else if ((1 << (ffs(num) - 1)) != num) { 671 fprintf(stderr, 672 "viona: vqsize must be power of 2", num); 673 err = -1; 674 } else { 675 sc->vsc_vq_size = num; 676 } 677 } 678 679 value = get_config_value_node(nvl, "vnic"); 680 if (value == NULL) { 681 fprintf(stderr, "viona: vnic name required"); 682 err = -1; 683 } else { 684 (void) strlcpy(sc->vsc_linkname, value, MAXLINKNAMELEN); 685 } 686 687 DPRINTF("viona=%p dev=%s vqsize=%x feature_mask=%x", sc, 688 sc->vsc_linkname, sc->vsc_vq_size, sc->vsc_feature_mask); 689 return (err); 690 } 691 692 static uint16_t 693 pci_viona_query_mtu(dladm_handle_t handle, datalink_id_t linkid) 694 { 695 char buf[DLADM_PROP_VAL_MAX]; 696 char *propval = buf; 697 uint_t propcnt = 1; 698 699 if (dladm_get_linkprop(handle, linkid, DLADM_PROP_VAL_CURRENT, "mtu", 700 &propval, &propcnt) == DLADM_STATUS_OK && propcnt == 1) { 701 ulong_t parsed = strtoul(buf, NULL, 10); 702 703 /* 704 * The virtio spec notes that for devices implementing 705 * VIRTIO_NET_F_MTU, that the noted MTU MUST be between 706 * 68-65535, inclusive. Although the viona device does not 707 * offer that feature today (the reporting of the MTU to the 708 * guest), we can still use those bounds for how we configure 709 * the limits of the in-kernel emulation. 710 */ 711 if (parsed >= 68 && parsed <= 65535) { 712 return (parsed); 713 } 714 } 715 716 /* Default to 1500 if query is unsuccessful */ 717 return (1500); 718 } 719 720 static int 721 pci_viona_init(struct pci_devinst *pi, nvlist_t *nvl) 722 { 723 dladm_handle_t handle; 724 dladm_status_t status; 725 dladm_vnic_attr_t attr; 726 char errmsg[DLADM_STRSIZE]; 727 char tname[MAXCOMLEN + 1]; 728 int error, i; 729 struct pci_viona_softc *sc; 730 const char *vnic; 731 pthread_t tid; 732 733 if (get_config_bool_default("viona.debug", false)) 734 pci_viona_debug = 1; 735 736 vnic = get_config_value_node(nvl, "vnic"); 737 if (vnic == NULL) { 738 WPRINTF("virtio-viona: vnic required"); 739 return (1); 740 } 741 742 sc = malloc(sizeof (struct pci_viona_softc)); 743 memset(sc, 0, sizeof (struct pci_viona_softc)); 744 745 if (pci_viona_parse_opts(sc, nvl) != 0) { 746 free(sc); 747 return (1); 748 } 749 750 if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) { 751 WPRINTF("could not open /dev/dld"); 752 free(sc); 753 return (1); 754 } 755 756 if ((status = dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid, 757 NULL, NULL, NULL)) != DLADM_STATUS_OK) { 758 WPRINTF("dladm_name2info() for %s failed: %s", vnic, 759 dladm_status2str(status, errmsg)); 760 dladm_close(handle); 761 free(sc); 762 return (1); 763 } 764 765 if ((status = dladm_vnic_info(handle, sc->vsc_linkid, &attr, 766 DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) { 767 WPRINTF("dladm_vnic_info() for %s failed: %s", vnic, 768 dladm_status2str(status, errmsg)); 769 dladm_close(handle); 770 free(sc); 771 return (1); 772 } 773 memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL); 774 sc->vsc_mtu = pci_viona_query_mtu(handle, sc->vsc_linkid); 775 776 dladm_close(handle); 777 778 error = pci_viona_viona_init(pi->pi_vmctx, sc); 779 if (error != 0) { 780 free(sc); 781 return (1); 782 } 783 784 if (ioctl(sc->vsc_vnafd, VNA_IOC_SET_MTU, sc->vsc_mtu) != 0) { 785 WPRINTF("error setting viona MTU(%u): %s", sc->vsc_mtu, 786 strerror(errno)); 787 } 788 789 error = pthread_create(&tid, NULL, pci_viona_poll_thread, sc); 790 assert(error == 0); 791 snprintf(tname, sizeof (tname), "vionapoll:%s", vnic); 792 pthread_set_name_np(tid, tname); 793 794 /* initialize config space */ 795 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 796 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 797 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 798 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); 799 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 800 801 sc->vsc_consts = viona_vi_consts; 802 pthread_mutex_init(&sc->vsc_mtx, NULL); 803 804 /* 805 * The RX and TX queues are handled in the kernel component of 806 * viona; however The control queue is emulated in userspace. 807 */ 808 sc->vsc_queues[VIONA_CTLQ].vq_qsize = pci_viona_qsize(sc, VIONA_CTLQ); 809 810 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 811 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 812 813 /* 814 * Guests that do not support CTRL_RX_MAC still generally need to 815 * receive multicast packets. Guests that do support this feature will 816 * end up setting this flag indirectly via messages on the control 817 * queue but it does not hurt to default to multicast promiscuity here 818 * and it is what older version of viona did. 819 */ 820 sc->vsc_promisc_mmac = true; 821 pci_viona_eval_promisc(sc); 822 823 /* MSI-X support */ 824 for (i = 0; i < VIONA_MAXQ; i++) 825 sc->vsc_queues[i].vq_msix_idx = VIRTIO_MSI_NO_VECTOR; 826 827 /* BAR 1 used to map MSI-X table and PBA */ 828 if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) { 829 free(sc); 830 return (1); 831 } 832 833 /* BAR 0 for legacy-style virtio register access. */ 834 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ); 835 if (error != 0) { 836 WPRINTF("could not allocate virtio BAR"); 837 free(sc); 838 return (1); 839 } 840 841 /* 842 * Need a legacy interrupt for virtio compliance, even though MSI-X 843 * operation is _strongly_ suggested for adequate performance. 844 */ 845 pci_lintr_request(pi); 846 847 return (0); 848 } 849 850 static uint64_t 851 viona_adjust_offset(struct pci_devinst *pi, uint64_t offset) 852 { 853 /* 854 * Device specific offsets used by guest would change based on 855 * whether MSI-X capability is enabled or not 856 */ 857 if (!pci_msix_enabled(pi)) { 858 if (offset >= VIRTIO_PCI_CONFIG_OFF(0)) { 859 return (offset + (VIRTIO_PCI_CONFIG_OFF(1) - 860 VIRTIO_PCI_CONFIG_OFF(0))); 861 } 862 } 863 864 return (offset); 865 } 866 867 static void 868 pci_viona_ring_set_msix(struct pci_devinst *pi, uint_t ring) 869 { 870 struct pci_viona_softc *sc = pi->pi_arg; 871 struct msix_table_entry mte; 872 uint16_t tab_index; 873 vioc_ring_msi_t vrm; 874 int res; 875 876 if (ring == VIONA_CTLQ) 877 return; 878 879 assert(ring <= VIONA_VQ_TX); 880 881 vrm.rm_index = ring; 882 vrm.rm_addr = 0; 883 vrm.rm_msg = 0; 884 tab_index = sc->vsc_queues[ring].vq_msix_idx; 885 886 if (tab_index != VIRTIO_MSI_NO_VECTOR && sc->vsc_msix_active) { 887 mte = pi->pi_msix.table[tab_index]; 888 if ((mte.vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 889 vrm.rm_addr = mte.addr; 890 vrm.rm_msg = mte.msg_data; 891 } 892 } 893 894 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_SET_MSI, &vrm); 895 if (res != 0) { 896 WPRINTF("ioctl viona set_msi %d failed %d", ring, errno); 897 } 898 } 899 900 static void 901 pci_viona_lintrupdate(struct pci_devinst *pi) 902 { 903 struct pci_viona_softc *sc = pi->pi_arg; 904 bool msix_on = false; 905 906 pthread_mutex_lock(&sc->vsc_mtx); 907 msix_on = pci_msix_enabled(pi) && (pi->pi_msix.function_mask == 0); 908 if ((sc->vsc_msix_active && !msix_on) || 909 (msix_on && !sc->vsc_msix_active)) { 910 uint_t i; 911 912 sc->vsc_msix_active = msix_on; 913 /* Update in-kernel ring configs */ 914 for (i = 0; i <= VIONA_VQ_TX; i++) { 915 pci_viona_ring_set_msix(pi, i); 916 } 917 } 918 pthread_mutex_unlock(&sc->vsc_mtx); 919 } 920 921 static void 922 pci_viona_msix_update(struct pci_devinst *pi, uint64_t offset) 923 { 924 struct pci_viona_softc *sc = pi->pi_arg; 925 uint_t tab_index, i; 926 927 pthread_mutex_lock(&sc->vsc_mtx); 928 if (!sc->vsc_msix_active) { 929 pthread_mutex_unlock(&sc->vsc_mtx); 930 return; 931 } 932 933 /* 934 * Rather than update every possible MSI-X vector, cheat and use the 935 * offset to calculate the entry within the table. Since this should 936 * only be called when a write to the table succeeds, the index should 937 * be valid. 938 */ 939 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 940 941 for (i = 0; i <= VIONA_VQ_TX; i++) { 942 if (sc->vsc_queues[i].vq_msix_idx != tab_index) { 943 continue; 944 } 945 pci_viona_ring_set_msix(pi, i); 946 } 947 948 pthread_mutex_unlock(&sc->vsc_mtx); 949 } 950 951 static void 952 pci_viona_qnotify(struct pci_viona_softc *sc, int ring) 953 { 954 int error; 955 956 switch (ring) { 957 case VIONA_TXQ: 958 case VIONA_RXQ: 959 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_KICK, ring); 960 if (error != 0) { 961 WPRINTF("ioctl viona ring %d kick failed %d", 962 ring, errno); 963 } 964 break; 965 case VIONA_CTLQ: { 966 struct vqueue_info *vq = &sc->vsc_queues[VIONA_CTLQ]; 967 968 if (vq_has_descs(vq)) 969 pci_viona_process_ctrlq(vq); 970 break; 971 } 972 } 973 } 974 975 static void 976 pci_viona_baraddr(struct pci_devinst *pi, int baridx, int enabled, 977 uint64_t address) 978 { 979 struct pci_viona_softc *sc = pi->pi_arg; 980 uint64_t ioport; 981 int error; 982 983 if (baridx != 0) 984 return; 985 986 if (enabled == 0) { 987 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, 0); 988 if (error != 0) 989 WPRINTF("uninstall ioport hook failed %d", errno); 990 return; 991 } 992 993 /* 994 * Install ioport hook for virtqueue notification. 995 * This is part of the virtio common configuration area so the 996 * address does not change with MSI-X status. 997 */ 998 ioport = address + VIRTIO_PCI_QUEUE_NOTIFY; 999 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, ioport); 1000 if (error != 0) { 1001 WPRINTF("install ioport hook at %x failed %d", 1002 ioport, errno); 1003 } 1004 } 1005 1006 static void 1007 pci_viona_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 1008 uint64_t value) 1009 { 1010 struct pci_viona_softc *sc = pi->pi_arg; 1011 void *ptr; 1012 int err = 0; 1013 1014 if (baridx == pci_msix_table_bar(pi) || 1015 baridx == pci_msix_pba_bar(pi)) { 1016 if (pci_emul_msix_twrite(pi, offset, size, value) == 0) { 1017 pci_viona_msix_update(pi, offset); 1018 } 1019 return; 1020 } 1021 1022 assert(baridx == 0); 1023 1024 if (offset + size > pci_viona_iosize(pi)) { 1025 DPRINTF("viona_write: 2big, offset %ld size %d", 1026 offset, size); 1027 return; 1028 } 1029 1030 pthread_mutex_lock(&sc->vsc_mtx); 1031 1032 offset = viona_adjust_offset(pi, offset); 1033 1034 switch (offset) { 1035 case VIRTIO_PCI_GUEST_FEATURES: 1036 assert(size == 4); 1037 value &= ~(sc->vsc_feature_mask); 1038 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value); 1039 if (err != 0) { 1040 WPRINTF("ioctl feature negotiation returned err = %d", 1041 errno); 1042 } else { 1043 sc->vsc_vs.vs_negotiated_caps = value; 1044 } 1045 break; 1046 case VIRTIO_PCI_QUEUE_PFN: 1047 assert(size == 4); 1048 pci_viona_ring_init(sc, value); 1049 break; 1050 case VIRTIO_PCI_QUEUE_SEL: 1051 assert(size == 2); 1052 assert(value < VIONA_MAXQ); 1053 sc->vsc_vs.vs_curq = value; 1054 break; 1055 case VIRTIO_PCI_QUEUE_NOTIFY: 1056 assert(size == 2); 1057 assert(value < VIONA_MAXQ); 1058 pci_viona_qnotify(sc, value); 1059 break; 1060 case VIRTIO_PCI_STATUS: 1061 assert(size == 1); 1062 pci_viona_update_status(sc, value); 1063 break; 1064 case VIRTIO_MSI_CONFIG_VECTOR: 1065 assert(size == 2); 1066 sc->vsc_vs.vs_msix_cfg_idx = value; 1067 break; 1068 case VIRTIO_MSI_QUEUE_VECTOR: 1069 assert(size == 2); 1070 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ); 1071 sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx = value; 1072 pci_viona_ring_set_msix(pi, sc->vsc_vs.vs_curq); 1073 break; 1074 case VIONA_R_CFG0: 1075 case VIONA_R_CFG1: 1076 case VIONA_R_CFG2: 1077 case VIONA_R_CFG3: 1078 case VIONA_R_CFG4: 1079 case VIONA_R_CFG5: 1080 assert((size + offset) <= (VIONA_R_CFG5 + 1)); 1081 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0]; 1082 /* 1083 * The driver is allowed to change the MAC address 1084 */ 1085 sc->vsc_macaddr[offset - VIONA_R_CFG0] = value; 1086 if (size == 1) { 1087 *(uint8_t *)ptr = value; 1088 } else if (size == 2) { 1089 *(uint16_t *)ptr = value; 1090 } else { 1091 *(uint32_t *)ptr = value; 1092 } 1093 break; 1094 case VIRTIO_PCI_HOST_FEATURES: 1095 case VIRTIO_PCI_QUEUE_NUM: 1096 case VIRTIO_PCI_ISR: 1097 case VIONA_R_CFG6: 1098 case VIONA_R_CFG7: 1099 DPRINTF("viona: write to readonly reg %ld", offset); 1100 break; 1101 default: 1102 DPRINTF("viona: unknown i/o write offset %ld", offset); 1103 value = 0; 1104 break; 1105 } 1106 1107 pthread_mutex_unlock(&sc->vsc_mtx); 1108 } 1109 1110 static uint64_t 1111 pci_viona_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 1112 { 1113 struct pci_viona_softc *sc = pi->pi_arg; 1114 void *ptr; 1115 uint64_t value; 1116 int err = 0; 1117 1118 if (baridx == pci_msix_table_bar(pi) || 1119 baridx == pci_msix_pba_bar(pi)) { 1120 return (pci_emul_msix_tread(pi, offset, size)); 1121 } 1122 1123 assert(baridx == 0); 1124 1125 if (offset + size > pci_viona_iosize(pi)) { 1126 DPRINTF("viona_read: 2big, offset %ld size %d", 1127 offset, size); 1128 return (0); 1129 } 1130 1131 pthread_mutex_lock(&sc->vsc_mtx); 1132 1133 offset = viona_adjust_offset(pi, offset); 1134 1135 switch (offset) { 1136 case VIRTIO_PCI_HOST_FEATURES: 1137 assert(size == 4); 1138 err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value); 1139 if (err != 0) { 1140 WPRINTF("ioctl get host features returned err = %d", 1141 errno); 1142 } 1143 value |= VIONA_S_HOSTCAPS_USERSPACE; 1144 value &= ~sc->vsc_feature_mask; 1145 sc->vsc_consts.vc_hv_caps = value; 1146 break; 1147 case VIRTIO_PCI_GUEST_FEATURES: 1148 assert(size == 4); 1149 value = sc->vsc_vs.vs_negotiated_caps; /* XXX never read ? */ 1150 break; 1151 case VIRTIO_PCI_QUEUE_PFN: 1152 assert(size == 4); 1153 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_pfn >> VRING_PFN; 1154 break; 1155 case VIRTIO_PCI_QUEUE_NUM: 1156 assert(size == 2); 1157 value = pci_viona_qsize(sc, sc->vsc_vs.vs_curq); 1158 break; 1159 case VIRTIO_PCI_QUEUE_SEL: 1160 assert(size == 2); 1161 value = sc->vsc_vs.vs_curq; /* XXX never read ? */ 1162 break; 1163 case VIRTIO_PCI_QUEUE_NOTIFY: 1164 assert(size == 2); 1165 value = sc->vsc_vs.vs_curq; /* XXX never read ? */ 1166 break; 1167 case VIRTIO_PCI_STATUS: 1168 assert(size == 1); 1169 value = sc->vsc_vs.vs_status; 1170 break; 1171 case VIRTIO_PCI_ISR: 1172 assert(size == 1); 1173 value = sc->vsc_vs.vs_isr; 1174 sc->vsc_vs.vs_isr = 0; /* a read clears this flag */ 1175 if (value != 0) { 1176 pci_lintr_deassert(pi); 1177 } 1178 break; 1179 case VIRTIO_MSI_CONFIG_VECTOR: 1180 assert(size == 2); 1181 value = sc->vsc_vs.vs_msix_cfg_idx; 1182 break; 1183 case VIRTIO_MSI_QUEUE_VECTOR: 1184 assert(size == 2); 1185 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ); 1186 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx; 1187 break; 1188 case VIONA_R_CFG0: 1189 case VIONA_R_CFG1: 1190 case VIONA_R_CFG2: 1191 case VIONA_R_CFG3: 1192 case VIONA_R_CFG4: 1193 case VIONA_R_CFG5: 1194 assert((size + offset) <= (VIONA_R_CFG5 + 1)); 1195 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0]; 1196 if (size == 1) { 1197 value = *(uint8_t *)ptr; 1198 } else if (size == 2) { 1199 value = *(uint16_t *)ptr; 1200 } else { 1201 value = *(uint32_t *)ptr; 1202 } 1203 break; 1204 case VIONA_R_CFG6: 1205 assert(size != 4); 1206 value = 0x01; /* XXX link always up */ 1207 break; 1208 case VIONA_R_CFG7: 1209 assert(size == 1); 1210 value = 0; /* XXX link status in LSB */ 1211 break; 1212 default: 1213 DPRINTF("viona: unknown i/o read offset %ld", offset); 1214 value = 0; 1215 break; 1216 } 1217 1218 pthread_mutex_unlock(&sc->vsc_mtx); 1219 1220 return (value); 1221 } 1222 1223 struct pci_devemu pci_de_viona = { 1224 .pe_emu = "virtio-net-viona", 1225 .pe_init = pci_viona_init, 1226 .pe_legacy_config = pci_viona_legacy_config, 1227 .pe_barwrite = pci_viona_write, 1228 .pe_barread = pci_viona_read, 1229 .pe_baraddr = pci_viona_baraddr, 1230 .pe_lintrupdate = pci_viona_lintrupdate 1231 }; 1232 PCI_EMUL_SET(pci_de_viona); 1233