1 /*- 2 * SPDX-License-Identifier: BSD-2-Clause-FreeBSD 3 * 4 * Copyright (c) 2011 NetApp, Inc. 5 * All rights reserved. 6 * 7 * Redistribution and use in source and binary forms, with or without 8 * modification, are permitted provided that the following conditions 9 * are met: 10 * 1. Redistributions of source code must retain the above copyright 11 * notice, this list of conditions and the following disclaimer. 12 * 2. Redistributions in binary form must reproduce the above copyright 13 * notice, this list of conditions and the following disclaimer in the 14 * documentation and/or other materials provided with the distribution. 15 * 16 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 17 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 18 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 19 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 20 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 21 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 22 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 23 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 24 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 25 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 26 * SUCH DAMAGE. 27 * 28 * $FreeBSD$ 29 */ 30 31 #include <sys/cdefs.h> 32 __FBSDID("$FreeBSD$"); 33 34 #include <sys/param.h> 35 #include <sys/systm.h> 36 #include <sys/kernel.h> 37 #include <sys/malloc.h> 38 #include <sys/module.h> 39 #include <sys/bus.h> 40 #include <sys/pciio.h> 41 #include <sys/rman.h> 42 #include <sys/smp.h> 43 #include <sys/sysctl.h> 44 45 #include <dev/pci/pcivar.h> 46 #include <dev/pci/pcireg.h> 47 48 #include <machine/resource.h> 49 50 #include <machine/vmm.h> 51 #include <machine/vmm_dev.h> 52 53 #include "vmm_lapic.h" 54 #include "vmm_ktr.h" 55 56 #include "iommu.h" 57 #include "ppt.h" 58 59 /* XXX locking */ 60 61 #define MAX_MSIMSGS 32 62 63 /* 64 * If the MSI-X table is located in the middle of a BAR then that MMIO 65 * region gets split into two segments - one segment above the MSI-X table 66 * and the other segment below the MSI-X table - with a hole in place of 67 * the MSI-X table so accesses to it can be trapped and emulated. 68 * 69 * So, allocate a MMIO segment for each BAR register + 1 additional segment. 70 */ 71 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) 72 73 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); 74 75 struct pptintr_arg { /* pptintr(pptintr_arg) */ 76 struct pptdev *pptdev; 77 uint64_t addr; 78 uint64_t msg_data; 79 }; 80 81 struct pptseg { 82 vm_paddr_t gpa; 83 size_t len; 84 int wired; 85 }; 86 87 struct pptdev { 88 device_t dev; 89 struct vm *vm; /* owner of this device */ 90 TAILQ_ENTRY(pptdev) next; 91 struct pptseg mmio[MAX_MMIOSEGS]; 92 struct { 93 int num_msgs; /* guest state */ 94 95 int startrid; /* host state */ 96 struct resource *res[MAX_MSIMSGS]; 97 void *cookie[MAX_MSIMSGS]; 98 struct pptintr_arg arg[MAX_MSIMSGS]; 99 } msi; 100 101 struct { 102 int num_msgs; 103 int startrid; 104 int msix_table_rid; 105 int msix_pba_rid; 106 struct resource *msix_table_res; 107 struct resource *msix_pba_res; 108 struct resource **res; 109 void **cookie; 110 struct pptintr_arg *arg; 111 } msix; 112 }; 113 114 SYSCTL_DECL(_hw_vmm); 115 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices"); 116 117 static int num_pptdevs; 118 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0, 119 "number of pci passthru devices"); 120 121 static TAILQ_HEAD(, pptdev) pptdev_list = TAILQ_HEAD_INITIALIZER(pptdev_list); 122 123 static int 124 ppt_probe(device_t dev) 125 { 126 int bus, slot, func; 127 struct pci_devinfo *dinfo; 128 129 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 130 131 bus = pci_get_bus(dev); 132 slot = pci_get_slot(dev); 133 func = pci_get_function(dev); 134 135 /* 136 * To qualify as a pci passthrough device a device must: 137 * - be allowed by administrator to be used in this role 138 * - be an endpoint device 139 */ 140 if ((dinfo->cfg.hdrtype & PCIM_HDRTYPE) != PCIM_HDRTYPE_NORMAL) 141 return (ENXIO); 142 else if (vmm_is_pptdev(bus, slot, func)) 143 return (0); 144 else 145 /* 146 * Returning BUS_PROBE_NOWILDCARD here matches devices that the 147 * SR-IOV infrastructure specified as "ppt" passthrough devices. 148 * All normal devices that did not have "ppt" specified as their 149 * driver will not be matched by this. 150 */ 151 return (BUS_PROBE_NOWILDCARD); 152 } 153 154 static int 155 ppt_attach(device_t dev) 156 { 157 struct pptdev *ppt; 158 159 ppt = device_get_softc(dev); 160 161 iommu_remove_device(iommu_host_domain(), pci_get_rid(dev)); 162 num_pptdevs++; 163 TAILQ_INSERT_TAIL(&pptdev_list, ppt, next); 164 ppt->dev = dev; 165 166 if (bootverbose) 167 device_printf(dev, "attached\n"); 168 169 return (0); 170 } 171 172 static int 173 ppt_detach(device_t dev) 174 { 175 struct pptdev *ppt; 176 177 ppt = device_get_softc(dev); 178 179 if (ppt->vm != NULL) 180 return (EBUSY); 181 num_pptdevs--; 182 TAILQ_REMOVE(&pptdev_list, ppt, next); 183 pci_disable_busmaster(dev); 184 iommu_add_device(iommu_host_domain(), pci_get_rid(dev)); 185 186 return (0); 187 } 188 189 static device_method_t ppt_methods[] = { 190 /* Device interface */ 191 DEVMETHOD(device_probe, ppt_probe), 192 DEVMETHOD(device_attach, ppt_attach), 193 DEVMETHOD(device_detach, ppt_detach), 194 {0, 0} 195 }; 196 197 static devclass_t ppt_devclass; 198 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, sizeof(struct pptdev)); 199 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 200 201 static struct pptdev * 202 ppt_find(int bus, int slot, int func) 203 { 204 device_t dev; 205 struct pptdev *ppt; 206 int b, s, f; 207 208 TAILQ_FOREACH(ppt, &pptdev_list, next) { 209 dev = ppt->dev; 210 b = pci_get_bus(dev); 211 s = pci_get_slot(dev); 212 f = pci_get_function(dev); 213 if (bus == b && slot == s && func == f) 214 return (ppt); 215 } 216 return (NULL); 217 } 218 219 static void 220 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 221 { 222 int i; 223 struct pptseg *seg; 224 225 for (i = 0; i < MAX_MMIOSEGS; i++) { 226 seg = &ppt->mmio[i]; 227 if (seg->len == 0) 228 continue; 229 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 230 bzero(seg, sizeof(struct pptseg)); 231 } 232 } 233 234 static void 235 ppt_teardown_msi(struct pptdev *ppt) 236 { 237 int i, rid; 238 void *cookie; 239 struct resource *res; 240 241 if (ppt->msi.num_msgs == 0) 242 return; 243 244 for (i = 0; i < ppt->msi.num_msgs; i++) { 245 rid = ppt->msi.startrid + i; 246 res = ppt->msi.res[i]; 247 cookie = ppt->msi.cookie[i]; 248 249 if (cookie != NULL) 250 bus_teardown_intr(ppt->dev, res, cookie); 251 252 if (res != NULL) 253 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 254 255 ppt->msi.res[i] = NULL; 256 ppt->msi.cookie[i] = NULL; 257 } 258 259 if (ppt->msi.startrid == 1) 260 pci_release_msi(ppt->dev); 261 262 ppt->msi.num_msgs = 0; 263 } 264 265 static void 266 ppt_teardown_msix_intr(struct pptdev *ppt, int idx) 267 { 268 int rid; 269 struct resource *res; 270 void *cookie; 271 272 rid = ppt->msix.startrid + idx; 273 res = ppt->msix.res[idx]; 274 cookie = ppt->msix.cookie[idx]; 275 276 if (cookie != NULL) 277 bus_teardown_intr(ppt->dev, res, cookie); 278 279 if (res != NULL) 280 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 281 282 ppt->msix.res[idx] = NULL; 283 ppt->msix.cookie[idx] = NULL; 284 } 285 286 static void 287 ppt_teardown_msix(struct pptdev *ppt) 288 { 289 int i; 290 291 if (ppt->msix.num_msgs == 0) 292 return; 293 294 for (i = 0; i < ppt->msix.num_msgs; i++) 295 ppt_teardown_msix_intr(ppt, i); 296 297 free(ppt->msix.res, M_PPTMSIX); 298 free(ppt->msix.cookie, M_PPTMSIX); 299 free(ppt->msix.arg, M_PPTMSIX); 300 301 pci_release_msi(ppt->dev); 302 303 if (ppt->msix.msix_table_res) { 304 bus_release_resource(ppt->dev, SYS_RES_MEMORY, 305 ppt->msix.msix_table_rid, 306 ppt->msix.msix_table_res); 307 ppt->msix.msix_table_res = NULL; 308 ppt->msix.msix_table_rid = 0; 309 } 310 if (ppt->msix.msix_pba_res) { 311 bus_release_resource(ppt->dev, SYS_RES_MEMORY, 312 ppt->msix.msix_pba_rid, 313 ppt->msix.msix_pba_res); 314 ppt->msix.msix_pba_res = NULL; 315 ppt->msix.msix_pba_rid = 0; 316 } 317 318 ppt->msix.num_msgs = 0; 319 } 320 321 int 322 ppt_avail_devices(void) 323 { 324 325 return (num_pptdevs); 326 } 327 328 int 329 ppt_assigned_devices(struct vm *vm) 330 { 331 struct pptdev *ppt; 332 int num; 333 334 num = 0; 335 TAILQ_FOREACH(ppt, &pptdev_list, next) { 336 if (ppt->vm == vm) 337 num++; 338 } 339 return (num); 340 } 341 342 bool 343 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) 344 { 345 int i; 346 struct pptdev *ppt; 347 struct pptseg *seg; 348 349 TAILQ_FOREACH(ppt, &pptdev_list, next) { 350 if (ppt->vm != vm) 351 continue; 352 353 for (i = 0; i < MAX_MMIOSEGS; i++) { 354 seg = &ppt->mmio[i]; 355 if (seg->len == 0) 356 continue; 357 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) 358 return (true); 359 } 360 } 361 362 return (false); 363 } 364 365 static void 366 ppt_pci_reset(device_t dev) 367 { 368 369 if (pcie_flr(dev, 370 max(pcie_get_max_completion_timeout(dev) / 1000, 10), true)) 371 return; 372 373 pci_power_reset(dev); 374 } 375 376 int 377 ppt_assign_device(struct vm *vm, int bus, int slot, int func) 378 { 379 struct pptdev *ppt; 380 381 ppt = ppt_find(bus, slot, func); 382 if (ppt != NULL) { 383 /* 384 * If this device is owned by a different VM then we 385 * cannot change its owner. 386 */ 387 if (ppt->vm != NULL && ppt->vm != vm) 388 return (EBUSY); 389 390 pci_save_state(ppt->dev); 391 ppt_pci_reset(ppt->dev); 392 pci_restore_state(ppt->dev); 393 ppt->vm = vm; 394 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); 395 return (0); 396 } 397 return (ENOENT); 398 } 399 400 int 401 ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 402 { 403 struct pptdev *ppt; 404 405 ppt = ppt_find(bus, slot, func); 406 if (ppt != NULL) { 407 /* 408 * If this device is not owned by this 'vm' then bail out. 409 */ 410 if (ppt->vm != vm) 411 return (EBUSY); 412 413 pci_save_state(ppt->dev); 414 ppt_pci_reset(ppt->dev); 415 pci_restore_state(ppt->dev); 416 ppt_unmap_mmio(vm, ppt); 417 ppt_teardown_msi(ppt); 418 ppt_teardown_msix(ppt); 419 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); 420 ppt->vm = NULL; 421 return (0); 422 } 423 return (ENOENT); 424 } 425 426 int 427 ppt_unassign_all(struct vm *vm) 428 { 429 struct pptdev *ppt; 430 int bus, slot, func; 431 device_t dev; 432 433 TAILQ_FOREACH(ppt, &pptdev_list, next) { 434 if (ppt->vm == vm) { 435 dev = ppt->dev; 436 bus = pci_get_bus(dev); 437 slot = pci_get_slot(dev); 438 func = pci_get_function(dev); 439 vm_unassign_pptdev(vm, bus, slot, func); 440 } 441 } 442 443 return (0); 444 } 445 446 int 447 ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 448 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 449 { 450 int i, error; 451 struct pptseg *seg; 452 struct pptdev *ppt; 453 454 ppt = ppt_find(bus, slot, func); 455 if (ppt != NULL) { 456 if (ppt->vm != vm) 457 return (EBUSY); 458 459 for (i = 0; i < MAX_MMIOSEGS; i++) { 460 seg = &ppt->mmio[i]; 461 if (seg->len == 0) { 462 error = vm_map_mmio(vm, gpa, len, hpa); 463 if (error == 0) { 464 seg->gpa = gpa; 465 seg->len = len; 466 } 467 return (error); 468 } 469 } 470 return (ENOSPC); 471 } 472 return (ENOENT); 473 } 474 475 static int 476 pptintr(void *arg) 477 { 478 struct pptdev *ppt; 479 struct pptintr_arg *pptarg; 480 481 pptarg = arg; 482 ppt = pptarg->pptdev; 483 484 if (ppt->vm != NULL) 485 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); 486 else { 487 /* 488 * XXX 489 * This is not expected to happen - panic? 490 */ 491 } 492 493 /* 494 * For legacy interrupts give other filters a chance in case 495 * the interrupt was not generated by the passthrough device. 496 */ 497 if (ppt->msi.startrid == 0) 498 return (FILTER_STRAY); 499 else 500 return (FILTER_HANDLED); 501 } 502 503 int 504 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 505 uint64_t addr, uint64_t msg, int numvec) 506 { 507 int i, rid, flags; 508 int msi_count, startrid, error, tmp; 509 struct pptdev *ppt; 510 511 if (numvec < 0 || numvec > MAX_MSIMSGS) 512 return (EINVAL); 513 514 ppt = ppt_find(bus, slot, func); 515 if (ppt == NULL) 516 return (ENOENT); 517 if (ppt->vm != vm) /* Make sure we own this device */ 518 return (EBUSY); 519 520 /* Free any allocated resources */ 521 ppt_teardown_msi(ppt); 522 523 if (numvec == 0) /* nothing more to do */ 524 return (0); 525 526 flags = RF_ACTIVE; 527 msi_count = pci_msi_count(ppt->dev); 528 if (msi_count == 0) { 529 startrid = 0; /* legacy interrupt */ 530 msi_count = 1; 531 flags |= RF_SHAREABLE; 532 } else 533 startrid = 1; /* MSI */ 534 535 /* 536 * The device must be capable of supporting the number of vectors 537 * the guest wants to allocate. 538 */ 539 if (numvec > msi_count) 540 return (EINVAL); 541 542 /* 543 * Make sure that we can allocate all the MSI vectors that are needed 544 * by the guest. 545 */ 546 if (startrid == 1) { 547 tmp = numvec; 548 error = pci_alloc_msi(ppt->dev, &tmp); 549 if (error) 550 return (error); 551 else if (tmp != numvec) { 552 pci_release_msi(ppt->dev); 553 return (ENOSPC); 554 } else { 555 /* success */ 556 } 557 } 558 559 ppt->msi.startrid = startrid; 560 561 /* 562 * Allocate the irq resource and attach it to the interrupt handler. 563 */ 564 for (i = 0; i < numvec; i++) { 565 ppt->msi.num_msgs = i + 1; 566 ppt->msi.cookie[i] = NULL; 567 568 rid = startrid + i; 569 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 570 &rid, flags); 571 if (ppt->msi.res[i] == NULL) 572 break; 573 574 ppt->msi.arg[i].pptdev = ppt; 575 ppt->msi.arg[i].addr = addr; 576 ppt->msi.arg[i].msg_data = msg + i; 577 578 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 579 INTR_TYPE_NET | INTR_MPSAFE, 580 pptintr, NULL, &ppt->msi.arg[i], 581 &ppt->msi.cookie[i]); 582 if (error != 0) 583 break; 584 } 585 586 if (i < numvec) { 587 ppt_teardown_msi(ppt); 588 return (ENXIO); 589 } 590 591 return (0); 592 } 593 594 int 595 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, 596 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 597 { 598 struct pptdev *ppt; 599 struct pci_devinfo *dinfo; 600 int numvec, alloced, rid, error; 601 size_t res_size, cookie_size, arg_size; 602 603 ppt = ppt_find(bus, slot, func); 604 if (ppt == NULL) 605 return (ENOENT); 606 if (ppt->vm != vm) /* Make sure we own this device */ 607 return (EBUSY); 608 609 dinfo = device_get_ivars(ppt->dev); 610 if (!dinfo) 611 return (ENXIO); 612 613 /* 614 * First-time configuration: 615 * Allocate the MSI-X table 616 * Allocate the IRQ resources 617 * Set up some variables in ppt->msix 618 */ 619 if (ppt->msix.num_msgs == 0) { 620 numvec = pci_msix_count(ppt->dev); 621 if (numvec <= 0) 622 return (EINVAL); 623 624 ppt->msix.startrid = 1; 625 ppt->msix.num_msgs = numvec; 626 627 res_size = numvec * sizeof(ppt->msix.res[0]); 628 cookie_size = numvec * sizeof(ppt->msix.cookie[0]); 629 arg_size = numvec * sizeof(ppt->msix.arg[0]); 630 631 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO); 632 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, 633 M_WAITOK | M_ZERO); 634 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO); 635 636 rid = dinfo->cfg.msix.msix_table_bar; 637 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, 638 SYS_RES_MEMORY, &rid, RF_ACTIVE); 639 640 if (ppt->msix.msix_table_res == NULL) { 641 ppt_teardown_msix(ppt); 642 return (ENOSPC); 643 } 644 ppt->msix.msix_table_rid = rid; 645 646 if (dinfo->cfg.msix.msix_table_bar != 647 dinfo->cfg.msix.msix_pba_bar) { 648 rid = dinfo->cfg.msix.msix_pba_bar; 649 ppt->msix.msix_pba_res = bus_alloc_resource_any( 650 ppt->dev, SYS_RES_MEMORY, &rid, RF_ACTIVE); 651 652 if (ppt->msix.msix_pba_res == NULL) { 653 ppt_teardown_msix(ppt); 654 return (ENOSPC); 655 } 656 ppt->msix.msix_pba_rid = rid; 657 } 658 659 alloced = numvec; 660 error = pci_alloc_msix(ppt->dev, &alloced); 661 if (error || alloced != numvec) { 662 ppt_teardown_msix(ppt); 663 return (error == 0 ? ENOSPC: error); 664 } 665 } 666 667 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 668 /* Tear down the IRQ if it's already set up */ 669 ppt_teardown_msix_intr(ppt, idx); 670 671 /* Allocate the IRQ resource */ 672 ppt->msix.cookie[idx] = NULL; 673 rid = ppt->msix.startrid + idx; 674 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 675 &rid, RF_ACTIVE); 676 if (ppt->msix.res[idx] == NULL) 677 return (ENXIO); 678 679 ppt->msix.arg[idx].pptdev = ppt; 680 ppt->msix.arg[idx].addr = addr; 681 ppt->msix.arg[idx].msg_data = msg; 682 683 /* Setup the MSI-X interrupt */ 684 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], 685 INTR_TYPE_NET | INTR_MPSAFE, 686 pptintr, NULL, &ppt->msix.arg[idx], 687 &ppt->msix.cookie[idx]); 688 689 if (error != 0) { 690 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); 691 ppt->msix.cookie[idx] = NULL; 692 ppt->msix.res[idx] = NULL; 693 return (ENXIO); 694 } 695 } else { 696 /* Masked, tear it down if it's already been set up */ 697 ppt_teardown_msix_intr(ppt, idx); 698 } 699 700 return (0); 701 } 702