1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/pciio.h> 39 #include <sys/rman.h> 40 #include <sys/smp.h> 41 #include <sys/sysctl.h> 42 43 #include <dev/pci/pcivar.h> 44 #include <dev/pci/pcireg.h> 45 46 #include <machine/resource.h> 47 48 #include <machine/vmm.h> 49 #include <machine/vmm_dev.h> 50 51 #include "vmm_lapic.h" 52 #include "vmm_ktr.h" 53 54 #include "iommu.h" 55 #include "ppt.h" 56 57 /* XXX locking */ 58 59 #define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) 60 #define MAX_MSIMSGS 32 61 62 /* 63 * If the MSI-X table is located in the middle of a BAR then that MMIO 64 * region gets split into two segments - one segment above the MSI-X table 65 * and the other segment below the MSI-X table - with a hole in place of 66 * the MSI-X table so accesses to it can be trapped and emulated. 67 * 68 * So, allocate a MMIO segment for each BAR register + 1 additional segment. 69 */ 70 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) 71 72 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); 73 74 struct pptintr_arg { /* pptintr(pptintr_arg) */ 75 struct pptdev *pptdev; 76 uint64_t addr; 77 uint64_t msg_data; 78 }; 79 80 static struct pptdev { 81 device_t dev; 82 struct vm *vm; /* owner of this device */ 83 struct vm_memory_segment mmio[MAX_MMIOSEGS]; 84 struct { 85 int num_msgs; /* guest state */ 86 87 int startrid; /* host state */ 88 struct resource *res[MAX_MSIMSGS]; 89 void *cookie[MAX_MSIMSGS]; 90 struct pptintr_arg arg[MAX_MSIMSGS]; 91 } msi; 92 93 struct { 94 int num_msgs; 95 int startrid; 96 int msix_table_rid; 97 struct resource *msix_table_res; 98 struct resource **res; 99 void **cookie; 100 struct pptintr_arg *arg; 101 } msix; 102 } pptdevs[64]; 103 104 SYSCTL_DECL(_hw_vmm); 105 SYSCTL_NODE(_hw_vmm, OID_AUTO, ppt, CTLFLAG_RW, 0, "bhyve passthru devices"); 106 107 static int num_pptdevs; 108 SYSCTL_INT(_hw_vmm_ppt, OID_AUTO, devices, CTLFLAG_RD, &num_pptdevs, 0, 109 "number of pci passthru devices"); 110 111 static int 112 ppt_probe(device_t dev) 113 { 114 int bus, slot, func; 115 struct pci_devinfo *dinfo; 116 117 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 118 119 bus = pci_get_bus(dev); 120 slot = pci_get_slot(dev); 121 func = pci_get_function(dev); 122 123 /* 124 * To qualify as a pci passthrough device a device must: 125 * - be allowed by administrator to be used in this role 126 * - be an endpoint device 127 */ 128 if (vmm_is_pptdev(bus, slot, func) && 129 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL) 130 return (0); 131 else 132 return (ENXIO); 133 } 134 135 static int 136 ppt_attach(device_t dev) 137 { 138 int n; 139 140 if (num_pptdevs >= MAX_PPTDEVS) { 141 printf("ppt_attach: maximum number of pci passthrough devices " 142 "exceeded\n"); 143 return (ENXIO); 144 } 145 146 n = num_pptdevs++; 147 pptdevs[n].dev = dev; 148 149 if (bootverbose) 150 device_printf(dev, "attached\n"); 151 152 return (0); 153 } 154 155 static int 156 ppt_detach(device_t dev) 157 { 158 /* 159 * XXX check whether there are any pci passthrough devices assigned 160 * to guests before we allow this driver to detach. 161 */ 162 163 return (0); 164 } 165 166 static device_method_t ppt_methods[] = { 167 /* Device interface */ 168 DEVMETHOD(device_probe, ppt_probe), 169 DEVMETHOD(device_attach, ppt_attach), 170 DEVMETHOD(device_detach, ppt_detach), 171 {0, 0} 172 }; 173 174 static devclass_t ppt_devclass; 175 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0); 176 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 177 178 static struct pptdev * 179 ppt_find(int bus, int slot, int func) 180 { 181 device_t dev; 182 int i, b, s, f; 183 184 for (i = 0; i < num_pptdevs; i++) { 185 dev = pptdevs[i].dev; 186 b = pci_get_bus(dev); 187 s = pci_get_slot(dev); 188 f = pci_get_function(dev); 189 if (bus == b && slot == s && func == f) 190 return (&pptdevs[i]); 191 } 192 return (NULL); 193 } 194 195 static void 196 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 197 { 198 int i; 199 struct vm_memory_segment *seg; 200 201 for (i = 0; i < MAX_MMIOSEGS; i++) { 202 seg = &ppt->mmio[i]; 203 if (seg->len == 0) 204 continue; 205 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 206 bzero(seg, sizeof(struct vm_memory_segment)); 207 } 208 } 209 210 static void 211 ppt_teardown_msi(struct pptdev *ppt) 212 { 213 int i, rid; 214 void *cookie; 215 struct resource *res; 216 217 if (ppt->msi.num_msgs == 0) 218 return; 219 220 for (i = 0; i < ppt->msi.num_msgs; i++) { 221 rid = ppt->msi.startrid + i; 222 res = ppt->msi.res[i]; 223 cookie = ppt->msi.cookie[i]; 224 225 if (cookie != NULL) 226 bus_teardown_intr(ppt->dev, res, cookie); 227 228 if (res != NULL) 229 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 230 231 ppt->msi.res[i] = NULL; 232 ppt->msi.cookie[i] = NULL; 233 } 234 235 if (ppt->msi.startrid == 1) 236 pci_release_msi(ppt->dev); 237 238 ppt->msi.num_msgs = 0; 239 } 240 241 static void 242 ppt_teardown_msix_intr(struct pptdev *ppt, int idx) 243 { 244 int rid; 245 struct resource *res; 246 void *cookie; 247 248 rid = ppt->msix.startrid + idx; 249 res = ppt->msix.res[idx]; 250 cookie = ppt->msix.cookie[idx]; 251 252 if (cookie != NULL) 253 bus_teardown_intr(ppt->dev, res, cookie); 254 255 if (res != NULL) 256 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 257 258 ppt->msix.res[idx] = NULL; 259 ppt->msix.cookie[idx] = NULL; 260 } 261 262 static void 263 ppt_teardown_msix(struct pptdev *ppt) 264 { 265 int i; 266 267 if (ppt->msix.num_msgs == 0) 268 return; 269 270 for (i = 0; i < ppt->msix.num_msgs; i++) 271 ppt_teardown_msix_intr(ppt, i); 272 273 if (ppt->msix.msix_table_res) { 274 bus_release_resource(ppt->dev, SYS_RES_MEMORY, 275 ppt->msix.msix_table_rid, 276 ppt->msix.msix_table_res); 277 ppt->msix.msix_table_res = NULL; 278 ppt->msix.msix_table_rid = 0; 279 } 280 281 free(ppt->msix.res, M_PPTMSIX); 282 free(ppt->msix.cookie, M_PPTMSIX); 283 free(ppt->msix.arg, M_PPTMSIX); 284 285 pci_release_msi(ppt->dev); 286 287 ppt->msix.num_msgs = 0; 288 } 289 290 int 291 ppt_avail_devices(void) 292 { 293 294 return (num_pptdevs); 295 } 296 297 int 298 ppt_assigned_devices(struct vm *vm) 299 { 300 int i, num; 301 302 num = 0; 303 for (i = 0; i < num_pptdevs; i++) { 304 if (pptdevs[i].vm == vm) 305 num++; 306 } 307 return (num); 308 } 309 310 boolean_t 311 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) 312 { 313 int i, n; 314 struct pptdev *ppt; 315 struct vm_memory_segment *seg; 316 317 for (n = 0; n < num_pptdevs; n++) { 318 ppt = &pptdevs[n]; 319 if (ppt->vm != vm) 320 continue; 321 322 for (i = 0; i < MAX_MMIOSEGS; i++) { 323 seg = &ppt->mmio[i]; 324 if (seg->len == 0) 325 continue; 326 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) 327 return (TRUE); 328 } 329 } 330 331 return (FALSE); 332 } 333 334 int 335 ppt_assign_device(struct vm *vm, int bus, int slot, int func) 336 { 337 struct pptdev *ppt; 338 339 ppt = ppt_find(bus, slot, func); 340 if (ppt != NULL) { 341 /* 342 * If this device is owned by a different VM then we 343 * cannot change its owner. 344 */ 345 if (ppt->vm != NULL && ppt->vm != vm) 346 return (EBUSY); 347 348 ppt->vm = vm; 349 iommu_add_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); 350 return (0); 351 } 352 return (ENOENT); 353 } 354 355 int 356 ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 357 { 358 struct pptdev *ppt; 359 360 ppt = ppt_find(bus, slot, func); 361 if (ppt != NULL) { 362 /* 363 * If this device is not owned by this 'vm' then bail out. 364 */ 365 if (ppt->vm != vm) 366 return (EBUSY); 367 ppt_unmap_mmio(vm, ppt); 368 ppt_teardown_msi(ppt); 369 ppt_teardown_msix(ppt); 370 iommu_remove_device(vm_iommu_domain(vm), pci_get_rid(ppt->dev)); 371 ppt->vm = NULL; 372 return (0); 373 } 374 return (ENOENT); 375 } 376 377 int 378 ppt_unassign_all(struct vm *vm) 379 { 380 int i, bus, slot, func; 381 device_t dev; 382 383 for (i = 0; i < num_pptdevs; i++) { 384 if (pptdevs[i].vm == vm) { 385 dev = pptdevs[i].dev; 386 bus = pci_get_bus(dev); 387 slot = pci_get_slot(dev); 388 func = pci_get_function(dev); 389 vm_unassign_pptdev(vm, bus, slot, func); 390 } 391 } 392 393 return (0); 394 } 395 396 int 397 ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 398 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 399 { 400 int i, error; 401 struct vm_memory_segment *seg; 402 struct pptdev *ppt; 403 404 ppt = ppt_find(bus, slot, func); 405 if (ppt != NULL) { 406 if (ppt->vm != vm) 407 return (EBUSY); 408 409 for (i = 0; i < MAX_MMIOSEGS; i++) { 410 seg = &ppt->mmio[i]; 411 if (seg->len == 0) { 412 error = vm_map_mmio(vm, gpa, len, hpa); 413 if (error == 0) { 414 seg->gpa = gpa; 415 seg->len = len; 416 } 417 return (error); 418 } 419 } 420 return (ENOSPC); 421 } 422 return (ENOENT); 423 } 424 425 static int 426 pptintr(void *arg) 427 { 428 struct pptdev *ppt; 429 struct pptintr_arg *pptarg; 430 431 pptarg = arg; 432 ppt = pptarg->pptdev; 433 434 if (ppt->vm != NULL) 435 lapic_intr_msi(ppt->vm, pptarg->addr, pptarg->msg_data); 436 else { 437 /* 438 * XXX 439 * This is not expected to happen - panic? 440 */ 441 } 442 443 /* 444 * For legacy interrupts give other filters a chance in case 445 * the interrupt was not generated by the passthrough device. 446 */ 447 if (ppt->msi.startrid == 0) 448 return (FILTER_STRAY); 449 else 450 return (FILTER_HANDLED); 451 } 452 453 int 454 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 455 uint64_t addr, uint64_t msg, int numvec) 456 { 457 int i, rid, flags; 458 int msi_count, startrid, error, tmp; 459 struct pptdev *ppt; 460 461 if (numvec < 0 || numvec > MAX_MSIMSGS) 462 return (EINVAL); 463 464 ppt = ppt_find(bus, slot, func); 465 if (ppt == NULL) 466 return (ENOENT); 467 if (ppt->vm != vm) /* Make sure we own this device */ 468 return (EBUSY); 469 470 /* Free any allocated resources */ 471 ppt_teardown_msi(ppt); 472 473 if (numvec == 0) /* nothing more to do */ 474 return (0); 475 476 flags = RF_ACTIVE; 477 msi_count = pci_msi_count(ppt->dev); 478 if (msi_count == 0) { 479 startrid = 0; /* legacy interrupt */ 480 msi_count = 1; 481 flags |= RF_SHAREABLE; 482 } else 483 startrid = 1; /* MSI */ 484 485 /* 486 * The device must be capable of supporting the number of vectors 487 * the guest wants to allocate. 488 */ 489 if (numvec > msi_count) 490 return (EINVAL); 491 492 /* 493 * Make sure that we can allocate all the MSI vectors that are needed 494 * by the guest. 495 */ 496 if (startrid == 1) { 497 tmp = numvec; 498 error = pci_alloc_msi(ppt->dev, &tmp); 499 if (error) 500 return (error); 501 else if (tmp != numvec) { 502 pci_release_msi(ppt->dev); 503 return (ENOSPC); 504 } else { 505 /* success */ 506 } 507 } 508 509 ppt->msi.startrid = startrid; 510 511 /* 512 * Allocate the irq resource and attach it to the interrupt handler. 513 */ 514 for (i = 0; i < numvec; i++) { 515 ppt->msi.num_msgs = i + 1; 516 ppt->msi.cookie[i] = NULL; 517 518 rid = startrid + i; 519 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 520 &rid, flags); 521 if (ppt->msi.res[i] == NULL) 522 break; 523 524 ppt->msi.arg[i].pptdev = ppt; 525 ppt->msi.arg[i].addr = addr; 526 ppt->msi.arg[i].msg_data = msg + i; 527 528 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 529 INTR_TYPE_NET | INTR_MPSAFE, 530 pptintr, NULL, &ppt->msi.arg[i], 531 &ppt->msi.cookie[i]); 532 if (error != 0) 533 break; 534 } 535 536 if (i < numvec) { 537 ppt_teardown_msi(ppt); 538 return (ENXIO); 539 } 540 541 return (0); 542 } 543 544 int 545 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, 546 int idx, uint64_t addr, uint64_t msg, uint32_t vector_control) 547 { 548 struct pptdev *ppt; 549 struct pci_devinfo *dinfo; 550 int numvec, alloced, rid, error; 551 size_t res_size, cookie_size, arg_size; 552 553 ppt = ppt_find(bus, slot, func); 554 if (ppt == NULL) 555 return (ENOENT); 556 if (ppt->vm != vm) /* Make sure we own this device */ 557 return (EBUSY); 558 559 dinfo = device_get_ivars(ppt->dev); 560 if (!dinfo) 561 return (ENXIO); 562 563 /* 564 * First-time configuration: 565 * Allocate the MSI-X table 566 * Allocate the IRQ resources 567 * Set up some variables in ppt->msix 568 */ 569 if (ppt->msix.num_msgs == 0) { 570 numvec = pci_msix_count(ppt->dev); 571 if (numvec <= 0) 572 return (EINVAL); 573 574 ppt->msix.startrid = 1; 575 ppt->msix.num_msgs = numvec; 576 577 res_size = numvec * sizeof(ppt->msix.res[0]); 578 cookie_size = numvec * sizeof(ppt->msix.cookie[0]); 579 arg_size = numvec * sizeof(ppt->msix.arg[0]); 580 581 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO); 582 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, 583 M_WAITOK | M_ZERO); 584 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO); 585 586 rid = dinfo->cfg.msix.msix_table_bar; 587 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, 588 SYS_RES_MEMORY, &rid, RF_ACTIVE); 589 590 if (ppt->msix.msix_table_res == NULL) { 591 ppt_teardown_msix(ppt); 592 return (ENOSPC); 593 } 594 ppt->msix.msix_table_rid = rid; 595 596 alloced = numvec; 597 error = pci_alloc_msix(ppt->dev, &alloced); 598 if (error || alloced != numvec) { 599 ppt_teardown_msix(ppt); 600 return (error == 0 ? ENOSPC: error); 601 } 602 } 603 604 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 605 /* Tear down the IRQ if it's already set up */ 606 ppt_teardown_msix_intr(ppt, idx); 607 608 /* Allocate the IRQ resource */ 609 ppt->msix.cookie[idx] = NULL; 610 rid = ppt->msix.startrid + idx; 611 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 612 &rid, RF_ACTIVE); 613 if (ppt->msix.res[idx] == NULL) 614 return (ENXIO); 615 616 ppt->msix.arg[idx].pptdev = ppt; 617 ppt->msix.arg[idx].addr = addr; 618 ppt->msix.arg[idx].msg_data = msg; 619 620 /* Setup the MSI-X interrupt */ 621 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], 622 INTR_TYPE_NET | INTR_MPSAFE, 623 pptintr, NULL, &ppt->msix.arg[idx], 624 &ppt->msix.cookie[idx]); 625 626 if (error != 0) { 627 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]); 628 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); 629 ppt->msix.cookie[idx] = NULL; 630 ppt->msix.res[idx] = NULL; 631 return (ENXIO); 632 } 633 } else { 634 /* Masked, tear it down if it's already been set up */ 635 ppt_teardown_msix_intr(ppt, idx); 636 } 637 638 return (0); 639 } 640