1 /* 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 /* 27 * This file and its contents are supplied under the terms of the 28 * Common Development and Distribution License ("CDDL"), version 1.0. 29 * You may only use this file in accordance with the terms of version 30 * 1.0 of the CDDL. 31 * 32 * A full copy of the text of the CDDL should have accompanied this 33 * source. A copy of the CDDL is also available via the Internet at 34 * http://www.illumos.org/license/CDDL. 35 * 36 * Copyright 2015 Pluribus Networks Inc. 37 * Copyright 2019 Joyent, Inc. 38 * Copyright 2022 OmniOS Community Edition (OmniOSce) Association. 39 */ 40 41 #include <sys/cdefs.h> 42 43 #include <sys/param.h> 44 #include <sys/linker_set.h> 45 #include <sys/ioctl.h> 46 #include <sys/uio.h> 47 #include <sys/viona_io.h> 48 49 #include <errno.h> 50 #include <fcntl.h> 51 #include <stdio.h> 52 #include <stdlib.h> 53 #include <stdint.h> 54 #include <string.h> 55 #include <strings.h> 56 #include <unistd.h> 57 #include <assert.h> 58 #include <pthread.h> 59 #include <signal.h> 60 #include <stdbool.h> 61 #include <poll.h> 62 #include <libdladm.h> 63 #include <libdllink.h> 64 #include <libdlvnic.h> 65 66 #include <machine/vmm.h> 67 #include <vmmapi.h> 68 69 #include "bhyverun.h" 70 #include "config.h" 71 #include "debug.h" 72 #include "pci_emul.h" 73 #include "virtio.h" 74 #include "iov.h" 75 #include "virtio_net.h" 76 77 #define VIONA_RINGSZ 1024 78 #define VIONA_CTLQ_SIZE 64 79 #define VIONA_CTLQ_MAXSEGS 32 80 81 /* 82 * PCI config-space register offsets 83 */ 84 #define VIONA_R_CFG0 24 85 #define VIONA_R_CFG1 25 86 #define VIONA_R_CFG2 26 87 #define VIONA_R_CFG3 27 88 #define VIONA_R_CFG4 28 89 #define VIONA_R_CFG5 29 90 #define VIONA_R_CFG6 30 91 #define VIONA_R_CFG7 31 92 #define VIONA_R_MAX 31 93 94 #define VIONA_REGSZ (VIONA_R_MAX + 1) 95 96 /* 97 * Queue definitions. 98 */ 99 #define VIONA_RXQ 0 100 #define VIONA_TXQ 1 101 #define VIONA_CTLQ 2 102 103 #define VIONA_MAXQ 3 104 105 /* 106 * Supplementary host capabilities provided in the userspace component. 107 */ 108 #define VIONA_S_HOSTCAPS_USERSPACE ( \ 109 VIRTIO_NET_F_CTRL_VQ | \ 110 VIRTIO_NET_F_CTRL_RX) 111 112 /* 113 * Debug printf 114 */ 115 static volatile int pci_viona_debug; 116 #define DPRINTF(fmt, arg...) \ 117 do { \ 118 if (pci_viona_debug) { \ 119 FPRINTLN(stdout, fmt, ##arg); \ 120 fflush(stdout); \ 121 } \ 122 } while (0) 123 #define WPRINTF(fmt, arg...) FPRINTLN(stderr, fmt, ##arg) 124 125 /* 126 * Per-device softc 127 */ 128 struct pci_viona_softc { 129 struct virtio_softc vsc_vs; 130 struct virtio_consts vsc_consts; 131 struct vqueue_info vsc_queues[VIONA_MAXQ]; 132 pthread_mutex_t vsc_mtx; 133 134 datalink_id_t vsc_linkid; 135 int vsc_vnafd; 136 137 /* Configurable parameters */ 138 char vsc_linkname[MAXLINKNAMELEN]; 139 uint32_t vsc_feature_mask; 140 uint16_t vsc_vq_size; 141 142 uint8_t vsc_macaddr[6]; 143 144 bool vsc_resetting; 145 bool vsc_msix_active; 146 147 viona_promisc_t vsc_promisc; /* Current promisc mode */ 148 bool vsc_promisc_promisc; /* PROMISC enabled */ 149 bool vsc_promisc_allmulti; /* ALLMULTI enabled */ 150 bool vsc_promisc_umac; /* unicast MACs sent */ 151 bool vsc_promisc_mmac; /* multicast MACs sent */ 152 }; 153 154 static struct virtio_consts viona_vi_consts = { 155 .vc_name = "viona", 156 .vc_nvq = VIONA_MAXQ, 157 /* 158 * We use the common bhyve virtio framework so that we can call 159 * the utility functions to work with the queues handled in userspace. 160 * The framework PCI read/write functions are not used so these 161 * callbacks will not be invoked. 162 */ 163 .vc_cfgsize = 0, 164 .vc_reset = NULL, 165 .vc_qnotify = NULL, 166 .vc_cfgread = NULL, 167 .vc_cfgwrite = NULL, 168 .vc_apply_features = NULL, 169 /* 170 * The following field is populated using the response from the 171 * viona driver during initialisation, augmented with the additional 172 * capabilities emulated in userspace. 173 */ 174 .vc_hv_caps = 0, 175 }; 176 177 /* 178 * Return the size of IO BAR that maps virtio header and device specific 179 * region. The size would vary depending on whether MSI-X is enabled or 180 * not. 181 */ 182 static uint64_t 183 pci_viona_iosize(struct pci_devinst *pi) 184 { 185 if (pci_msix_enabled(pi)) { 186 return (VIONA_REGSZ); 187 } else { 188 return (VIONA_REGSZ - 189 (VIRTIO_PCI_CONFIG_OFF(1) - VIRTIO_PCI_CONFIG_OFF(0))); 190 } 191 } 192 193 static uint16_t 194 pci_viona_qsize(struct pci_viona_softc *sc, int qnum) 195 { 196 if (qnum == VIONA_CTLQ) 197 return (VIONA_CTLQ_SIZE); 198 199 return (sc->vsc_vq_size); 200 } 201 202 static void 203 pci_viona_ring_reset(struct pci_viona_softc *sc, int ring) 204 { 205 assert(ring < VIONA_MAXQ); 206 207 switch (ring) { 208 case VIONA_RXQ: 209 case VIONA_TXQ: 210 break; 211 case VIONA_CTLQ: 212 default: 213 return; 214 } 215 216 for (;;) { 217 int res; 218 219 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_RESET, ring); 220 if (res == 0) { 221 break; 222 } else if (errno != EINTR) { 223 WPRINTF("ioctl viona ring %d reset failed %d", 224 ring, errno); 225 return; 226 } 227 } 228 } 229 230 static void 231 pci_viona_update_status(struct pci_viona_softc *sc, uint32_t value) 232 { 233 234 if (value == 0) { 235 DPRINTF("viona: device reset requested !"); 236 237 vi_reset_dev(&sc->vsc_vs); 238 pci_viona_ring_reset(sc, VIONA_RXQ); 239 pci_viona_ring_reset(sc, VIONA_TXQ); 240 } 241 242 sc->vsc_vs.vs_status = value; 243 } 244 245 static const char * 246 pci_viona_promisc_descr(viona_promisc_t mode) 247 { 248 switch (mode) { 249 case VIONA_PROMISC_NONE: 250 return ("none"); 251 case VIONA_PROMISC_MULTI: 252 return ("multicast"); 253 case VIONA_PROMISC_ALL: 254 return ("all"); 255 default: 256 abort(); 257 } 258 } 259 260 static int 261 pci_viona_eval_promisc(struct pci_viona_softc *sc) 262 { 263 viona_promisc_t mode = VIONA_PROMISC_NONE; 264 int err = 0; 265 266 /* 267 * If the guest has explicitly requested promiscuous mode or has sent a 268 * non-empty unicast MAC address table, then set viona to promiscuous 269 * mode. Otherwise, if the guest has explicitly requested multicast 270 * promiscuity or has sent a non-empty multicast MAC address table, 271 * then set viona to multicast promiscuous mode. 272 */ 273 if (sc->vsc_promisc_promisc || sc->vsc_promisc_umac) 274 mode = VIONA_PROMISC_ALL; 275 else if (sc->vsc_promisc_allmulti || sc->vsc_promisc_mmac) 276 mode = VIONA_PROMISC_MULTI; 277 278 if (mode != sc->vsc_promisc) { 279 DPRINTF("viona: setting promiscuous mode to %d (%s)", 280 mode, pci_viona_promisc_descr(mode)); 281 DPRINTF(" promisc=%u, umac=%u, allmulti=%u, mmac=%u", 282 sc->vsc_promisc_promisc, sc->vsc_promisc_umac, 283 sc->vsc_promisc_allmulti, sc->vsc_promisc_mmac); 284 285 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_PROMISC, mode); 286 if (err == 0) 287 sc->vsc_promisc = mode; 288 else 289 WPRINTF("ioctl viona set promisc failed %d", errno); 290 } 291 292 return (err); 293 } 294 295 static uint8_t 296 pci_viona_control_rx(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr, 297 struct iovec *iov, size_t niov) 298 { 299 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs; 300 uint8_t v; 301 302 if (iov[0].iov_len != sizeof (uint8_t) || niov != 1) { 303 EPRINTLN("viona: bad control RX data"); 304 return (VIRTIO_NET_CQ_ERR); 305 } 306 307 v = *(uint8_t *)iov[0].iov_base; 308 309 switch (hdr->vnch_command) { 310 case VIRTIO_NET_CTRL_RX_PROMISC: 311 DPRINTF("viona: ctrl RX promisc %d", v); 312 sc->vsc_promisc_promisc = (v != 0); 313 break; 314 case VIRTIO_NET_CTRL_RX_ALLMULTI: 315 DPRINTF("viona: ctrl RX allmulti %d", v); 316 sc->vsc_promisc_allmulti = (v != 0); 317 break; 318 default: 319 /* 320 * VIRTIO_NET_F_CTRL_RX_EXTRA was not offered so no other 321 * commands are expected. 322 */ 323 EPRINTLN("viona: unrecognised RX control cmd %u", 324 hdr->vnch_command); 325 return (VIRTIO_NET_CQ_ERR); 326 } 327 328 if (pci_viona_eval_promisc(sc) == 0) 329 return (VIRTIO_NET_CQ_OK); 330 return (VIRTIO_NET_CQ_ERR); 331 } 332 333 static void 334 pci_viona_control_mac_dump(const char *tag, const struct iovec *iov) 335 { 336 virtio_net_ctrl_mac_t *table = (virtio_net_ctrl_mac_t *)iov->iov_base; 337 ether_addr_t *mac = &table->vncm_mac; 338 339 DPRINTF("-- %s MAC TABLE (entries: %u)", tag, table->vncm_entries); 340 341 if (table->vncm_entries * ETHERADDRL != 342 iov->iov_len - sizeof (table->vncm_entries)) { 343 DPRINTF(" Bad table size %u", iov->iov_len); 344 return; 345 } 346 347 for (uint32_t i = 0; i < table->vncm_entries; i++) { 348 DPRINTF(" [%2d] %s", i, ether_ntoa((struct ether_addr *)mac)); 349 mac++; 350 } 351 } 352 353 static uint8_t 354 pci_viona_control_mac(struct vqueue_info *vq, const virtio_net_ctrl_hdr_t *hdr, 355 struct iovec *iov, size_t niov) 356 { 357 struct pci_viona_softc *sc = (struct pci_viona_softc *)vq->vq_vs; 358 359 switch (hdr->vnch_command) { 360 case VIRTIO_NET_CTRL_MAC_TABLE_SET: { 361 virtio_net_ctrl_mac_t *table; 362 363 DPRINTF("viona: ctrl MAC table set"); 364 365 if (niov != 2) { 366 EPRINTLN("viona: bad control MAC data"); 367 return (VIRTIO_NET_CQ_ERR); 368 } 369 370 /* 371 * We advertise VIRTIO_NET_F_CTRL_RX and therefore need to 372 * accept VIRTIO_NET_CTRL_MAC, but we don't support passing 373 * changes in the MAC address lists down to viona. 374 * Instead, we set flags to indicate if the guest has sent 375 * any MAC addresses for each table, and use these to determine 376 * the resulting promiscuous mode, see pci_viona_eval_promisc() 377 * above. 378 */ 379 380 /* Unicast MAC table */ 381 table = (virtio_net_ctrl_mac_t *)iov[0].iov_base; 382 sc->vsc_promisc_umac = (table->vncm_entries != 0); 383 if (pci_viona_debug) 384 pci_viona_control_mac_dump("UNICAST", &iov[0]); 385 386 /* Multicast MAC table */ 387 table = (virtio_net_ctrl_mac_t *)iov[1].iov_base; 388 sc->vsc_promisc_mmac = (table->vncm_entries != 0); 389 if (pci_viona_debug) 390 pci_viona_control_mac_dump("MULTICAST", &iov[1]); 391 392 break; 393 } 394 case VIRTIO_NET_CTRL_MAC_ADDR_SET: 395 /* disallow setting the primary filter MAC address */ 396 DPRINTF("viona: ctrl MAC addr set %d", niov); 397 return (VIRTIO_NET_CQ_ERR); 398 default: 399 EPRINTLN("viona: unrecognised MAC control cmd %u", 400 hdr->vnch_command); 401 return (VIRTIO_NET_CQ_ERR); 402 } 403 404 if (pci_viona_eval_promisc(sc) == 0) 405 return (VIRTIO_NET_CQ_OK); 406 return (VIRTIO_NET_CQ_ERR); 407 } 408 409 static void 410 pci_viona_control(struct vqueue_info *vq) 411 { 412 struct iovec iov[VIONA_CTLQ_MAXSEGS + 1]; 413 const virtio_net_ctrl_hdr_t *hdr; 414 struct iovec *siov = iov; 415 struct vi_req req = { 0 }; 416 uint8_t *ackp; 417 size_t nsiov; 418 uint32_t len; 419 int n; 420 421 n = vq_getchain(vq, iov, VIONA_CTLQ_MAXSEGS, &req); 422 423 assert(n >= 1 && n <= VIONA_CTLQ_MAXSEGS); 424 425 /* 426 * Since we have not negotiated VIRTIO_F_ANY_LAYOUT, we expect the 427 * control message to be laid out in at least three descriptors as 428 * follows: 429 * header - sizeof (virtio_net_ctrl_hdr_t) 430 * data[] - at least one descriptor, varying size 431 * ack - uint8_t, flagged as writable 432 * Check the incoming message to make sure it matches this layout and 433 * drop the entire chain if not. 434 */ 435 if (n < 3 || req.writable != 1 || req.readable + 1 != n || 436 iov[req.readable].iov_len != sizeof (uint8_t)) { 437 EPRINTLN("viona: bad control chain, len=%d, w=%d, r=%d", 438 n, req.writable, req.readable); 439 goto drop; 440 } 441 442 hdr = (const virtio_net_ctrl_hdr_t *)iov[0].iov_base; 443 if (iov[0].iov_len < sizeof (virtio_net_ctrl_hdr_t)) { 444 EPRINTLN("viona: control header too short: %u", iov[0].iov_len); 445 goto drop; 446 } 447 448 /* 449 * Writable iovecs start at iov[req.readable], and we've already 450 * checked that there is only one writable, it's at the end, and the 451 * right size; it's the acknowledgement byte. 452 */ 453 ackp = (uint8_t *)iov[req.readable].iov_base; 454 455 siov = &iov[1]; 456 nsiov = n - 2; 457 458 switch (hdr->vnch_class) { 459 case VIRTIO_NET_CTRL_RX: 460 *ackp = pci_viona_control_rx(vq, hdr, siov, nsiov); 461 break; 462 case VIRTIO_NET_CTRL_MAC: 463 *ackp = pci_viona_control_mac(vq, hdr, siov, nsiov); 464 break; 465 default: 466 EPRINTLN("viona: unrecognised control class %u, cmd %u", 467 hdr->vnch_class, hdr->vnch_command); 468 *ackp = VIRTIO_NET_CQ_ERR; 469 break; 470 } 471 472 drop: 473 len = 0; 474 for (uint_t i = 0; i < n; i++) 475 len += iov[i].iov_len; 476 477 vq_relchain(vq, req.idx, len); 478 } 479 480 static void 481 pci_viona_process_ctrlq(struct vqueue_info *vq) 482 { 483 for (;;) { 484 vq_kick_disable(vq); 485 486 while (vq_has_descs(vq)) 487 pci_viona_control(vq); 488 489 vq_kick_enable(vq); 490 491 /* 492 * One more check in case a late addition raced with 493 * re-enabling kicks. Note that vq_kick_enable() includes a 494 * memory barrier. 495 */ 496 497 if (!vq_has_descs(vq)) 498 break; 499 } 500 501 vq_endchains(vq, /* used_all_avail= */1); 502 } 503 504 static void * 505 pci_viona_poll_thread(void *param) 506 { 507 struct pci_viona_softc *sc = param; 508 pollfd_t pollset; 509 const int fd = sc->vsc_vnafd; 510 511 pollset.fd = fd; 512 pollset.events = POLLRDBAND; 513 514 for (;;) { 515 if (poll(&pollset, 1, -1) < 0) { 516 if (errno == EINTR || errno == EAGAIN) { 517 continue; 518 } else { 519 WPRINTF("pci_viona_poll_thread poll() error %d", 520 errno); 521 break; 522 } 523 } 524 if (pollset.revents & POLLRDBAND) { 525 vioc_intr_poll_t vip; 526 uint_t i; 527 int res; 528 bool assert_lintr = false; 529 const bool do_msix = pci_msix_enabled(sc->vsc_vs.vs_pi); 530 531 res = ioctl(fd, VNA_IOC_INTR_POLL, &vip); 532 for (i = 0; res > 0 && i < VIONA_VQ_MAX; i++) { 533 if (vip.vip_status[i] == 0) { 534 continue; 535 } 536 if (do_msix) { 537 pci_generate_msix(sc->vsc_vs.vs_pi, 538 sc->vsc_queues[i].vq_msix_idx); 539 } else { 540 assert_lintr = true; 541 } 542 res = ioctl(fd, VNA_IOC_RING_INTR_CLR, i); 543 if (res != 0) { 544 WPRINTF("ioctl viona vq %d intr " 545 "clear failed %d", i, errno); 546 } 547 } 548 if (assert_lintr) { 549 pthread_mutex_lock(&sc->vsc_mtx); 550 sc->vsc_vs.vs_isr |= VIRTIO_PCI_ISR_INTR; 551 pci_lintr_assert(sc->vsc_vs.vs_pi); 552 pthread_mutex_unlock(&sc->vsc_mtx); 553 } 554 } 555 } 556 557 pthread_exit(NULL); 558 } 559 560 static void 561 pci_viona_ring_init(struct pci_viona_softc *sc, uint64_t pfn) 562 { 563 int qnum = sc->vsc_vs.vs_curq; 564 vioc_ring_init_t vna_ri; 565 int error; 566 567 assert(qnum < VIONA_MAXQ); 568 569 if (qnum == VIONA_CTLQ) { 570 vi_vq_init(&sc->vsc_vs, pfn); 571 return; 572 } 573 574 sc->vsc_queues[qnum].vq_pfn = (pfn << VRING_PFN); 575 vna_ri.ri_index = qnum; 576 vna_ri.ri_qsize = pci_viona_qsize(sc, qnum); 577 vna_ri.ri_qaddr = (pfn << VRING_PFN); 578 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_INIT, &vna_ri); 579 580 if (error != 0) { 581 WPRINTF("ioctl viona ring %u init failed %d", qnum, errno); 582 } 583 } 584 585 static int 586 pci_viona_viona_init(struct vmctx *ctx, struct pci_viona_softc *sc) 587 { 588 vioc_create_t vna_create; 589 int error; 590 591 sc->vsc_vnafd = open("/dev/viona", O_RDWR | O_EXCL); 592 if (sc->vsc_vnafd == -1) { 593 WPRINTF("open viona ctl failed: %d", errno); 594 return (-1); 595 } 596 597 vna_create.c_linkid = sc->vsc_linkid; 598 vna_create.c_vmfd = vm_get_device_fd(ctx); 599 error = ioctl(sc->vsc_vnafd, VNA_IOC_CREATE, &vna_create); 600 if (error != 0) { 601 (void) close(sc->vsc_vnafd); 602 WPRINTF("ioctl viona create failed %d", errno); 603 return (-1); 604 } 605 606 return (0); 607 } 608 609 static int 610 pci_viona_legacy_config(nvlist_t *nvl, const char *opt) 611 { 612 char *config, *name, *tofree, *value; 613 614 if (opt == NULL) 615 return (0); 616 617 config = tofree = strdup(opt); 618 while ((name = strsep(&config, ",")) != NULL) { 619 value = strchr(name, '='); 620 if (value != NULL) { 621 *value++ = '\0'; 622 set_config_value_node(nvl, name, value); 623 } else { 624 set_config_value_node(nvl, "vnic", name); 625 } 626 } 627 free(tofree); 628 return (0); 629 } 630 631 static int 632 pci_viona_parse_opts(struct pci_viona_softc *sc, nvlist_t *nvl) 633 { 634 const char *value; 635 int err = 0; 636 637 sc->vsc_vq_size = VIONA_RINGSZ; 638 sc->vsc_feature_mask = 0; 639 sc->vsc_linkname[0] = '\0'; 640 641 value = get_config_value_node(nvl, "feature_mask"); 642 if (value != NULL) { 643 long num; 644 645 errno = 0; 646 num = strtol(value, NULL, 0); 647 if (errno != 0 || num < 0) { 648 fprintf(stderr, 649 "viona: invalid mask '%s'", value); 650 } else { 651 sc->vsc_feature_mask = num; 652 } 653 } 654 655 value = get_config_value_node(nvl, "vqsize"); 656 if (value != NULL) { 657 long num; 658 659 errno = 0; 660 num = strtol(value, NULL, 0); 661 if (errno != 0) { 662 fprintf(stderr, 663 "viona: invalid vsqize '%s'", value); 664 err = -1; 665 } else if (num <= 2 || num > 32768) { 666 fprintf(stderr, 667 "viona: vqsize out of range", num); 668 err = -1; 669 } else if ((1 << (ffs(num) - 1)) != num) { 670 fprintf(stderr, 671 "viona: vqsize must be power of 2", num); 672 err = -1; 673 } else { 674 sc->vsc_vq_size = num; 675 } 676 } 677 678 value = get_config_value_node(nvl, "vnic"); 679 if (value == NULL) { 680 fprintf(stderr, "viona: vnic name required"); 681 err = -1; 682 } else { 683 (void) strlcpy(sc->vsc_linkname, value, MAXLINKNAMELEN); 684 } 685 686 DPRINTF("viona=%p dev=%s vqsize=%x feature_mask=%x", sc, 687 sc->vsc_linkname, sc->vsc_vq_size, sc->vsc_feature_mask); 688 return (err); 689 } 690 691 static int 692 pci_viona_init(struct pci_devinst *pi, nvlist_t *nvl) 693 { 694 dladm_handle_t handle; 695 dladm_status_t status; 696 dladm_vnic_attr_t attr; 697 char errmsg[DLADM_STRSIZE]; 698 char tname[MAXCOMLEN + 1]; 699 int error, i; 700 struct pci_viona_softc *sc; 701 const char *vnic; 702 pthread_t tid; 703 704 if (get_config_bool_default("viona.debug", false)) 705 pci_viona_debug = 1; 706 707 vnic = get_config_value_node(nvl, "vnic"); 708 if (vnic == NULL) { 709 WPRINTF("virtio-viona: vnic required"); 710 return (1); 711 } 712 713 sc = malloc(sizeof (struct pci_viona_softc)); 714 memset(sc, 0, sizeof (struct pci_viona_softc)); 715 716 if (pci_viona_parse_opts(sc, nvl) != 0) { 717 free(sc); 718 return (1); 719 } 720 721 if ((status = dladm_open(&handle)) != DLADM_STATUS_OK) { 722 WPRINTF("could not open /dev/dld"); 723 free(sc); 724 return (1); 725 } 726 727 if ((status = dladm_name2info(handle, sc->vsc_linkname, &sc->vsc_linkid, 728 NULL, NULL, NULL)) != DLADM_STATUS_OK) { 729 WPRINTF("dladm_name2info() for %s failed: %s", vnic, 730 dladm_status2str(status, errmsg)); 731 dladm_close(handle); 732 free(sc); 733 return (1); 734 } 735 736 if ((status = dladm_vnic_info(handle, sc->vsc_linkid, &attr, 737 DLADM_OPT_ACTIVE)) != DLADM_STATUS_OK) { 738 WPRINTF("dladm_vnic_info() for %s failed: %s", vnic, 739 dladm_status2str(status, errmsg)); 740 dladm_close(handle); 741 free(sc); 742 return (1); 743 } 744 745 memcpy(sc->vsc_macaddr, attr.va_mac_addr, ETHERADDRL); 746 747 dladm_close(handle); 748 749 error = pci_viona_viona_init(pi->pi_vmctx, sc); 750 if (error != 0) { 751 free(sc); 752 return (1); 753 } 754 755 error = pthread_create(&tid, NULL, pci_viona_poll_thread, sc); 756 assert(error == 0); 757 snprintf(tname, sizeof (tname), "vionapoll:%s", vnic); 758 pthread_set_name_np(tid, tname); 759 760 /* initialize config space */ 761 pci_set_cfgdata16(pi, PCIR_DEVICE, VIRTIO_DEV_NET); 762 pci_set_cfgdata16(pi, PCIR_VENDOR, VIRTIO_VENDOR); 763 pci_set_cfgdata8(pi, PCIR_CLASS, PCIC_NETWORK); 764 pci_set_cfgdata16(pi, PCIR_SUBDEV_0, VIRTIO_ID_NETWORK); 765 pci_set_cfgdata16(pi, PCIR_SUBVEND_0, VIRTIO_VENDOR); 766 767 sc->vsc_consts = viona_vi_consts; 768 pthread_mutex_init(&sc->vsc_mtx, NULL); 769 770 /* 771 * The RX and TX queues are handled in the kernel component of 772 * viona; however The control queue is emulated in userspace. 773 */ 774 sc->vsc_queues[VIONA_CTLQ].vq_qsize = pci_viona_qsize(sc, VIONA_CTLQ); 775 776 vi_softc_linkup(&sc->vsc_vs, &sc->vsc_consts, sc, pi, sc->vsc_queues); 777 sc->vsc_vs.vs_mtx = &sc->vsc_mtx; 778 779 /* 780 * Guests that do not support CTRL_RX_MAC still generally need to 781 * receive multicast packets. Guests that do support this feature will 782 * end up setting this flag indirectly via messages on the control 783 * queue but it does not hurt to default to multicast promiscuity here 784 * and it is what older version of viona did. 785 */ 786 sc->vsc_promisc_mmac = true; 787 pci_viona_eval_promisc(sc); 788 789 /* MSI-X support */ 790 for (i = 0; i < VIONA_MAXQ; i++) 791 sc->vsc_queues[i].vq_msix_idx = VIRTIO_MSI_NO_VECTOR; 792 793 /* BAR 1 used to map MSI-X table and PBA */ 794 if (pci_emul_add_msixcap(pi, VIONA_MAXQ, 1)) { 795 free(sc); 796 return (1); 797 } 798 799 /* BAR 0 for legacy-style virtio register access. */ 800 error = pci_emul_alloc_bar(pi, 0, PCIBAR_IO, VIONA_REGSZ); 801 if (error != 0) { 802 WPRINTF("could not allocate virtio BAR"); 803 free(sc); 804 return (1); 805 } 806 807 /* 808 * Need a legacy interrupt for virtio compliance, even though MSI-X 809 * operation is _strongly_ suggested for adequate performance. 810 */ 811 pci_lintr_request(pi); 812 813 return (0); 814 } 815 816 static uint64_t 817 viona_adjust_offset(struct pci_devinst *pi, uint64_t offset) 818 { 819 /* 820 * Device specific offsets used by guest would change based on 821 * whether MSI-X capability is enabled or not 822 */ 823 if (!pci_msix_enabled(pi)) { 824 if (offset >= VIRTIO_PCI_CONFIG_OFF(0)) { 825 return (offset + (VIRTIO_PCI_CONFIG_OFF(1) - 826 VIRTIO_PCI_CONFIG_OFF(0))); 827 } 828 } 829 830 return (offset); 831 } 832 833 static void 834 pci_viona_ring_set_msix(struct pci_devinst *pi, uint_t ring) 835 { 836 struct pci_viona_softc *sc = pi->pi_arg; 837 struct msix_table_entry mte; 838 uint16_t tab_index; 839 vioc_ring_msi_t vrm; 840 int res; 841 842 if (ring == VIONA_CTLQ) 843 return; 844 845 assert(ring <= VIONA_VQ_TX); 846 847 vrm.rm_index = ring; 848 vrm.rm_addr = 0; 849 vrm.rm_msg = 0; 850 tab_index = sc->vsc_queues[ring].vq_msix_idx; 851 852 if (tab_index != VIRTIO_MSI_NO_VECTOR && sc->vsc_msix_active) { 853 mte = pi->pi_msix.table[tab_index]; 854 if ((mte.vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 855 vrm.rm_addr = mte.addr; 856 vrm.rm_msg = mte.msg_data; 857 } 858 } 859 860 res = ioctl(sc->vsc_vnafd, VNA_IOC_RING_SET_MSI, &vrm); 861 if (res != 0) { 862 WPRINTF("ioctl viona set_msi %d failed %d", ring, errno); 863 } 864 } 865 866 static void 867 pci_viona_lintrupdate(struct pci_devinst *pi) 868 { 869 struct pci_viona_softc *sc = pi->pi_arg; 870 bool msix_on = false; 871 872 pthread_mutex_lock(&sc->vsc_mtx); 873 msix_on = pci_msix_enabled(pi) && (pi->pi_msix.function_mask == 0); 874 if ((sc->vsc_msix_active && !msix_on) || 875 (msix_on && !sc->vsc_msix_active)) { 876 uint_t i; 877 878 sc->vsc_msix_active = msix_on; 879 /* Update in-kernel ring configs */ 880 for (i = 0; i <= VIONA_VQ_TX; i++) { 881 pci_viona_ring_set_msix(pi, i); 882 } 883 } 884 pthread_mutex_unlock(&sc->vsc_mtx); 885 } 886 887 static void 888 pci_viona_msix_update(struct pci_devinst *pi, uint64_t offset) 889 { 890 struct pci_viona_softc *sc = pi->pi_arg; 891 uint_t tab_index, i; 892 893 pthread_mutex_lock(&sc->vsc_mtx); 894 if (!sc->vsc_msix_active) { 895 pthread_mutex_unlock(&sc->vsc_mtx); 896 return; 897 } 898 899 /* 900 * Rather than update every possible MSI-X vector, cheat and use the 901 * offset to calculate the entry within the table. Since this should 902 * only be called when a write to the table succeeds, the index should 903 * be valid. 904 */ 905 tab_index = offset / MSIX_TABLE_ENTRY_SIZE; 906 907 for (i = 0; i <= VIONA_VQ_TX; i++) { 908 if (sc->vsc_queues[i].vq_msix_idx != tab_index) { 909 continue; 910 } 911 pci_viona_ring_set_msix(pi, i); 912 } 913 914 pthread_mutex_unlock(&sc->vsc_mtx); 915 } 916 917 static void 918 pci_viona_qnotify(struct pci_viona_softc *sc, int ring) 919 { 920 int error; 921 922 switch (ring) { 923 case VIONA_TXQ: 924 case VIONA_RXQ: 925 error = ioctl(sc->vsc_vnafd, VNA_IOC_RING_KICK, ring); 926 if (error != 0) { 927 WPRINTF("ioctl viona ring %d kick failed %d", 928 ring, errno); 929 } 930 break; 931 case VIONA_CTLQ: { 932 struct vqueue_info *vq = &sc->vsc_queues[VIONA_CTLQ]; 933 934 if (vq_has_descs(vq)) 935 pci_viona_process_ctrlq(vq); 936 break; 937 } 938 } 939 } 940 941 static void 942 pci_viona_baraddr(struct pci_devinst *pi, int baridx, int enabled, 943 uint64_t address) 944 { 945 struct pci_viona_softc *sc = pi->pi_arg; 946 uint64_t ioport; 947 int error; 948 949 if (baridx != 0) 950 return; 951 952 if (enabled == 0) { 953 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, 0); 954 if (error != 0) 955 WPRINTF("uninstall ioport hook failed %d", errno); 956 return; 957 } 958 959 /* 960 * Install ioport hook for virtqueue notification. 961 * This is part of the virtio common configuration area so the 962 * address does not change with MSI-X status. 963 */ 964 ioport = address + VIRTIO_PCI_QUEUE_NOTIFY; 965 error = ioctl(sc->vsc_vnafd, VNA_IOC_SET_NOTIFY_IOP, ioport); 966 if (error != 0) { 967 WPRINTF("install ioport hook at %x failed %d", 968 ioport, errno); 969 } 970 } 971 972 static void 973 pci_viona_write(struct pci_devinst *pi, int baridx, uint64_t offset, int size, 974 uint64_t value) 975 { 976 struct pci_viona_softc *sc = pi->pi_arg; 977 void *ptr; 978 int err = 0; 979 980 if (baridx == pci_msix_table_bar(pi) || 981 baridx == pci_msix_pba_bar(pi)) { 982 if (pci_emul_msix_twrite(pi, offset, size, value) == 0) { 983 pci_viona_msix_update(pi, offset); 984 } 985 return; 986 } 987 988 assert(baridx == 0); 989 990 if (offset + size > pci_viona_iosize(pi)) { 991 DPRINTF("viona_write: 2big, offset %ld size %d", 992 offset, size); 993 return; 994 } 995 996 pthread_mutex_lock(&sc->vsc_mtx); 997 998 offset = viona_adjust_offset(pi, offset); 999 1000 switch (offset) { 1001 case VIRTIO_PCI_GUEST_FEATURES: 1002 assert(size == 4); 1003 value &= ~(sc->vsc_feature_mask); 1004 err = ioctl(sc->vsc_vnafd, VNA_IOC_SET_FEATURES, &value); 1005 if (err != 0) { 1006 WPRINTF("ioctl feature negotiation returned err = %d", 1007 errno); 1008 } else { 1009 sc->vsc_vs.vs_negotiated_caps = value; 1010 } 1011 break; 1012 case VIRTIO_PCI_QUEUE_PFN: 1013 assert(size == 4); 1014 pci_viona_ring_init(sc, value); 1015 break; 1016 case VIRTIO_PCI_QUEUE_SEL: 1017 assert(size == 2); 1018 assert(value < VIONA_MAXQ); 1019 sc->vsc_vs.vs_curq = value; 1020 break; 1021 case VIRTIO_PCI_QUEUE_NOTIFY: 1022 assert(size == 2); 1023 assert(value < VIONA_MAXQ); 1024 pci_viona_qnotify(sc, value); 1025 break; 1026 case VIRTIO_PCI_STATUS: 1027 assert(size == 1); 1028 pci_viona_update_status(sc, value); 1029 break; 1030 case VIRTIO_MSI_CONFIG_VECTOR: 1031 assert(size == 2); 1032 sc->vsc_vs.vs_msix_cfg_idx = value; 1033 break; 1034 case VIRTIO_MSI_QUEUE_VECTOR: 1035 assert(size == 2); 1036 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ); 1037 sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx = value; 1038 pci_viona_ring_set_msix(pi, sc->vsc_vs.vs_curq); 1039 break; 1040 case VIONA_R_CFG0: 1041 case VIONA_R_CFG1: 1042 case VIONA_R_CFG2: 1043 case VIONA_R_CFG3: 1044 case VIONA_R_CFG4: 1045 case VIONA_R_CFG5: 1046 assert((size + offset) <= (VIONA_R_CFG5 + 1)); 1047 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0]; 1048 /* 1049 * The driver is allowed to change the MAC address 1050 */ 1051 sc->vsc_macaddr[offset - VIONA_R_CFG0] = value; 1052 if (size == 1) { 1053 *(uint8_t *)ptr = value; 1054 } else if (size == 2) { 1055 *(uint16_t *)ptr = value; 1056 } else { 1057 *(uint32_t *)ptr = value; 1058 } 1059 break; 1060 case VIRTIO_PCI_HOST_FEATURES: 1061 case VIRTIO_PCI_QUEUE_NUM: 1062 case VIRTIO_PCI_ISR: 1063 case VIONA_R_CFG6: 1064 case VIONA_R_CFG7: 1065 DPRINTF("viona: write to readonly reg %ld", offset); 1066 break; 1067 default: 1068 DPRINTF("viona: unknown i/o write offset %ld", offset); 1069 value = 0; 1070 break; 1071 } 1072 1073 pthread_mutex_unlock(&sc->vsc_mtx); 1074 } 1075 1076 static uint64_t 1077 pci_viona_read(struct pci_devinst *pi, int baridx, uint64_t offset, int size) 1078 { 1079 struct pci_viona_softc *sc = pi->pi_arg; 1080 void *ptr; 1081 uint64_t value; 1082 int err = 0; 1083 1084 if (baridx == pci_msix_table_bar(pi) || 1085 baridx == pci_msix_pba_bar(pi)) { 1086 return (pci_emul_msix_tread(pi, offset, size)); 1087 } 1088 1089 assert(baridx == 0); 1090 1091 if (offset + size > pci_viona_iosize(pi)) { 1092 DPRINTF("viona_read: 2big, offset %ld size %d", 1093 offset, size); 1094 return (0); 1095 } 1096 1097 pthread_mutex_lock(&sc->vsc_mtx); 1098 1099 offset = viona_adjust_offset(pi, offset); 1100 1101 switch (offset) { 1102 case VIRTIO_PCI_HOST_FEATURES: 1103 assert(size == 4); 1104 err = ioctl(sc->vsc_vnafd, VNA_IOC_GET_FEATURES, &value); 1105 if (err != 0) { 1106 WPRINTF("ioctl get host features returned err = %d", 1107 errno); 1108 } 1109 value |= VIONA_S_HOSTCAPS_USERSPACE; 1110 value &= ~sc->vsc_feature_mask; 1111 sc->vsc_consts.vc_hv_caps = value; 1112 break; 1113 case VIRTIO_PCI_GUEST_FEATURES: 1114 assert(size == 4); 1115 value = sc->vsc_vs.vs_negotiated_caps; /* XXX never read ? */ 1116 break; 1117 case VIRTIO_PCI_QUEUE_PFN: 1118 assert(size == 4); 1119 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_pfn >> VRING_PFN; 1120 break; 1121 case VIRTIO_PCI_QUEUE_NUM: 1122 assert(size == 2); 1123 value = pci_viona_qsize(sc, sc->vsc_vs.vs_curq); 1124 break; 1125 case VIRTIO_PCI_QUEUE_SEL: 1126 assert(size == 2); 1127 value = sc->vsc_vs.vs_curq; /* XXX never read ? */ 1128 break; 1129 case VIRTIO_PCI_QUEUE_NOTIFY: 1130 assert(size == 2); 1131 value = sc->vsc_vs.vs_curq; /* XXX never read ? */ 1132 break; 1133 case VIRTIO_PCI_STATUS: 1134 assert(size == 1); 1135 value = sc->vsc_vs.vs_status; 1136 break; 1137 case VIRTIO_PCI_ISR: 1138 assert(size == 1); 1139 value = sc->vsc_vs.vs_isr; 1140 sc->vsc_vs.vs_isr = 0; /* a read clears this flag */ 1141 if (value != 0) { 1142 pci_lintr_deassert(pi); 1143 } 1144 break; 1145 case VIRTIO_MSI_CONFIG_VECTOR: 1146 assert(size == 2); 1147 value = sc->vsc_vs.vs_msix_cfg_idx; 1148 break; 1149 case VIRTIO_MSI_QUEUE_VECTOR: 1150 assert(size == 2); 1151 assert(sc->vsc_vs.vs_curq < VIONA_MAXQ); 1152 value = sc->vsc_queues[sc->vsc_vs.vs_curq].vq_msix_idx; 1153 break; 1154 case VIONA_R_CFG0: 1155 case VIONA_R_CFG1: 1156 case VIONA_R_CFG2: 1157 case VIONA_R_CFG3: 1158 case VIONA_R_CFG4: 1159 case VIONA_R_CFG5: 1160 assert((size + offset) <= (VIONA_R_CFG5 + 1)); 1161 ptr = &sc->vsc_macaddr[offset - VIONA_R_CFG0]; 1162 if (size == 1) { 1163 value = *(uint8_t *)ptr; 1164 } else if (size == 2) { 1165 value = *(uint16_t *)ptr; 1166 } else { 1167 value = *(uint32_t *)ptr; 1168 } 1169 break; 1170 case VIONA_R_CFG6: 1171 assert(size != 4); 1172 value = 0x01; /* XXX link always up */ 1173 break; 1174 case VIONA_R_CFG7: 1175 assert(size == 1); 1176 value = 0; /* XXX link status in LSB */ 1177 break; 1178 default: 1179 DPRINTF("viona: unknown i/o read offset %ld", offset); 1180 value = 0; 1181 break; 1182 } 1183 1184 pthread_mutex_unlock(&sc->vsc_mtx); 1185 1186 return (value); 1187 } 1188 1189 struct pci_devemu pci_de_viona = { 1190 .pe_emu = "virtio-net-viona", 1191 .pe_init = pci_viona_init, 1192 .pe_legacy_config = pci_viona_legacy_config, 1193 .pe_barwrite = pci_viona_write, 1194 .pe_barread = pci_viona_read, 1195 .pe_baraddr = pci_viona_baraddr, 1196 .pe_lintrupdate = pci_viona_lintrupdate 1197 }; 1198 PCI_EMUL_SET(pci_de_viona); 1199