1 /*- 2 * Copyright (c) 2011 NetApp, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 * 26 * $FreeBSD$ 27 */ 28 29 #include <sys/cdefs.h> 30 __FBSDID("$FreeBSD$"); 31 32 #include <sys/param.h> 33 #include <sys/systm.h> 34 #include <sys/kernel.h> 35 #include <sys/malloc.h> 36 #include <sys/module.h> 37 #include <sys/bus.h> 38 #include <sys/pciio.h> 39 #include <sys/rman.h> 40 #include <sys/smp.h> 41 42 #include <dev/pci/pcivar.h> 43 #include <dev/pci/pcireg.h> 44 45 #include <machine/resource.h> 46 47 #include <machine/vmm.h> 48 #include <machine/vmm_dev.h> 49 50 #include "vmm_lapic.h" 51 #include "vmm_ktr.h" 52 53 #include "iommu.h" 54 #include "ppt.h" 55 56 /* XXX locking */ 57 58 #define MAX_PPTDEVS (sizeof(pptdevs) / sizeof(pptdevs[0])) 59 #define MAX_MSIMSGS 32 60 61 /* 62 * If the MSI-X table is located in the middle of a BAR then that MMIO 63 * region gets split into two segments - one segment above the MSI-X table 64 * and the other segment below the MSI-X table - with a hole in place of 65 * the MSI-X table so accesses to it can be trapped and emulated. 66 * 67 * So, allocate a MMIO segment for each BAR register + 1 additional segment. 68 */ 69 #define MAX_MMIOSEGS ((PCIR_MAX_BAR_0 + 1) + 1) 70 71 MALLOC_DEFINE(M_PPTMSIX, "pptmsix", "Passthru MSI-X resources"); 72 73 struct pptintr_arg { /* pptintr(pptintr_arg) */ 74 struct pptdev *pptdev; 75 int vec; 76 int vcpu; 77 }; 78 79 static struct pptdev { 80 device_t dev; 81 struct vm *vm; /* owner of this device */ 82 struct vm_memory_segment mmio[MAX_MMIOSEGS]; 83 struct { 84 int num_msgs; /* guest state */ 85 86 int startrid; /* host state */ 87 struct resource *res[MAX_MSIMSGS]; 88 void *cookie[MAX_MSIMSGS]; 89 struct pptintr_arg arg[MAX_MSIMSGS]; 90 } msi; 91 92 struct { 93 int num_msgs; 94 int startrid; 95 int msix_table_rid; 96 struct resource *msix_table_res; 97 struct resource **res; 98 void **cookie; 99 struct pptintr_arg *arg; 100 } msix; 101 } pptdevs[64]; 102 103 static int num_pptdevs; 104 105 static int 106 ppt_probe(device_t dev) 107 { 108 int bus, slot, func; 109 struct pci_devinfo *dinfo; 110 111 dinfo = (struct pci_devinfo *)device_get_ivars(dev); 112 113 bus = pci_get_bus(dev); 114 slot = pci_get_slot(dev); 115 func = pci_get_function(dev); 116 117 /* 118 * To qualify as a pci passthrough device a device must: 119 * - be allowed by administrator to be used in this role 120 * - be an endpoint device 121 */ 122 if (vmm_is_pptdev(bus, slot, func) && 123 (dinfo->cfg.hdrtype & PCIM_HDRTYPE) == PCIM_HDRTYPE_NORMAL) 124 return (0); 125 else 126 return (ENXIO); 127 } 128 129 static int 130 ppt_attach(device_t dev) 131 { 132 int n; 133 134 if (num_pptdevs >= MAX_PPTDEVS) { 135 printf("ppt_attach: maximum number of pci passthrough devices " 136 "exceeded\n"); 137 return (ENXIO); 138 } 139 140 n = num_pptdevs++; 141 pptdevs[n].dev = dev; 142 143 if (bootverbose) 144 device_printf(dev, "attached\n"); 145 146 return (0); 147 } 148 149 static int 150 ppt_detach(device_t dev) 151 { 152 /* 153 * XXX check whether there are any pci passthrough devices assigned 154 * to guests before we allow this driver to detach. 155 */ 156 157 return (0); 158 } 159 160 static device_method_t ppt_methods[] = { 161 /* Device interface */ 162 DEVMETHOD(device_probe, ppt_probe), 163 DEVMETHOD(device_attach, ppt_attach), 164 DEVMETHOD(device_detach, ppt_detach), 165 {0, 0} 166 }; 167 168 static devclass_t ppt_devclass; 169 DEFINE_CLASS_0(ppt, ppt_driver, ppt_methods, 0); 170 DRIVER_MODULE(ppt, pci, ppt_driver, ppt_devclass, NULL, NULL); 171 172 static struct pptdev * 173 ppt_find(int bus, int slot, int func) 174 { 175 device_t dev; 176 int i, b, s, f; 177 178 for (i = 0; i < num_pptdevs; i++) { 179 dev = pptdevs[i].dev; 180 b = pci_get_bus(dev); 181 s = pci_get_slot(dev); 182 f = pci_get_function(dev); 183 if (bus == b && slot == s && func == f) 184 return (&pptdevs[i]); 185 } 186 return (NULL); 187 } 188 189 static void 190 ppt_unmap_mmio(struct vm *vm, struct pptdev *ppt) 191 { 192 int i; 193 struct vm_memory_segment *seg; 194 195 for (i = 0; i < MAX_MMIOSEGS; i++) { 196 seg = &ppt->mmio[i]; 197 if (seg->len == 0) 198 continue; 199 (void)vm_unmap_mmio(vm, seg->gpa, seg->len); 200 bzero(seg, sizeof(struct vm_memory_segment)); 201 } 202 } 203 204 static void 205 ppt_teardown_msi(struct pptdev *ppt) 206 { 207 int i, rid; 208 void *cookie; 209 struct resource *res; 210 211 if (ppt->msi.num_msgs == 0) 212 return; 213 214 for (i = 0; i < ppt->msi.num_msgs; i++) { 215 rid = ppt->msi.startrid + i; 216 res = ppt->msi.res[i]; 217 cookie = ppt->msi.cookie[i]; 218 219 if (cookie != NULL) 220 bus_teardown_intr(ppt->dev, res, cookie); 221 222 if (res != NULL) 223 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 224 225 ppt->msi.res[i] = NULL; 226 ppt->msi.cookie[i] = NULL; 227 } 228 229 if (ppt->msi.startrid == 1) 230 pci_release_msi(ppt->dev); 231 232 ppt->msi.num_msgs = 0; 233 } 234 235 static void 236 ppt_teardown_msix_intr(struct pptdev *ppt, int idx) 237 { 238 int rid; 239 struct resource *res; 240 void *cookie; 241 242 rid = ppt->msix.startrid + idx; 243 res = ppt->msix.res[idx]; 244 cookie = ppt->msix.cookie[idx]; 245 246 if (cookie != NULL) 247 bus_teardown_intr(ppt->dev, res, cookie); 248 249 if (res != NULL) 250 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, res); 251 252 ppt->msix.res[idx] = NULL; 253 ppt->msix.cookie[idx] = NULL; 254 } 255 256 static void 257 ppt_teardown_msix(struct pptdev *ppt) 258 { 259 int i; 260 261 if (ppt->msix.num_msgs == 0) 262 return; 263 264 for (i = 0; i < ppt->msix.num_msgs; i++) 265 ppt_teardown_msix_intr(ppt, i); 266 267 if (ppt->msix.msix_table_res) { 268 bus_release_resource(ppt->dev, SYS_RES_MEMORY, 269 ppt->msix.msix_table_rid, 270 ppt->msix.msix_table_res); 271 ppt->msix.msix_table_res = NULL; 272 ppt->msix.msix_table_rid = 0; 273 } 274 275 free(ppt->msix.res, M_PPTMSIX); 276 free(ppt->msix.cookie, M_PPTMSIX); 277 free(ppt->msix.arg, M_PPTMSIX); 278 279 pci_release_msi(ppt->dev); 280 281 ppt->msix.num_msgs = 0; 282 } 283 284 int 285 ppt_num_devices(struct vm *vm) 286 { 287 int i, num; 288 289 num = 0; 290 for (i = 0; i < num_pptdevs; i++) { 291 if (pptdevs[i].vm == vm) 292 num++; 293 } 294 return (num); 295 } 296 297 boolean_t 298 ppt_is_mmio(struct vm *vm, vm_paddr_t gpa) 299 { 300 int i, n; 301 struct pptdev *ppt; 302 struct vm_memory_segment *seg; 303 304 for (n = 0; n < num_pptdevs; n++) { 305 ppt = &pptdevs[n]; 306 if (ppt->vm != vm) 307 continue; 308 309 for (i = 0; i < MAX_MMIOSEGS; i++) { 310 seg = &ppt->mmio[i]; 311 if (seg->len == 0) 312 continue; 313 if (gpa >= seg->gpa && gpa < seg->gpa + seg->len) 314 return (TRUE); 315 } 316 } 317 318 return (FALSE); 319 } 320 321 int 322 ppt_assign_device(struct vm *vm, int bus, int slot, int func) 323 { 324 struct pptdev *ppt; 325 326 ppt = ppt_find(bus, slot, func); 327 if (ppt != NULL) { 328 /* 329 * If this device is owned by a different VM then we 330 * cannot change its owner. 331 */ 332 if (ppt->vm != NULL && ppt->vm != vm) 333 return (EBUSY); 334 335 ppt->vm = vm; 336 iommu_add_device(vm_iommu_domain(vm), bus, slot, func); 337 return (0); 338 } 339 return (ENOENT); 340 } 341 342 int 343 ppt_unassign_device(struct vm *vm, int bus, int slot, int func) 344 { 345 struct pptdev *ppt; 346 347 ppt = ppt_find(bus, slot, func); 348 if (ppt != NULL) { 349 /* 350 * If this device is not owned by this 'vm' then bail out. 351 */ 352 if (ppt->vm != vm) 353 return (EBUSY); 354 ppt_unmap_mmio(vm, ppt); 355 ppt_teardown_msi(ppt); 356 ppt_teardown_msix(ppt); 357 iommu_remove_device(vm_iommu_domain(vm), bus, slot, func); 358 ppt->vm = NULL; 359 return (0); 360 } 361 return (ENOENT); 362 } 363 364 int 365 ppt_unassign_all(struct vm *vm) 366 { 367 int i, bus, slot, func; 368 device_t dev; 369 370 for (i = 0; i < num_pptdevs; i++) { 371 if (pptdevs[i].vm == vm) { 372 dev = pptdevs[i].dev; 373 bus = pci_get_bus(dev); 374 slot = pci_get_slot(dev); 375 func = pci_get_function(dev); 376 vm_unassign_pptdev(vm, bus, slot, func); 377 } 378 } 379 380 return (0); 381 } 382 383 int 384 ppt_map_mmio(struct vm *vm, int bus, int slot, int func, 385 vm_paddr_t gpa, size_t len, vm_paddr_t hpa) 386 { 387 int i, error; 388 struct vm_memory_segment *seg; 389 struct pptdev *ppt; 390 391 ppt = ppt_find(bus, slot, func); 392 if (ppt != NULL) { 393 if (ppt->vm != vm) 394 return (EBUSY); 395 396 for (i = 0; i < MAX_MMIOSEGS; i++) { 397 seg = &ppt->mmio[i]; 398 if (seg->len == 0) { 399 error = vm_map_mmio(vm, gpa, len, hpa); 400 if (error == 0) { 401 seg->gpa = gpa; 402 seg->len = len; 403 } 404 return (error); 405 } 406 } 407 return (ENOSPC); 408 } 409 return (ENOENT); 410 } 411 412 static int 413 pptintr(void *arg) 414 { 415 int vec; 416 struct pptdev *ppt; 417 struct pptintr_arg *pptarg; 418 419 pptarg = arg; 420 ppt = pptarg->pptdev; 421 vec = pptarg->vec; 422 423 if (ppt->vm != NULL) 424 (void) lapic_set_intr(ppt->vm, pptarg->vcpu, vec); 425 else { 426 /* 427 * XXX 428 * This is not expected to happen - panic? 429 */ 430 } 431 432 /* 433 * For legacy interrupts give other filters a chance in case 434 * the interrupt was not generated by the passthrough device. 435 */ 436 if (ppt->msi.startrid == 0) 437 return (FILTER_STRAY); 438 else 439 return (FILTER_HANDLED); 440 } 441 442 int 443 ppt_setup_msi(struct vm *vm, int vcpu, int bus, int slot, int func, 444 int destcpu, int vector, int numvec) 445 { 446 int i, rid, flags; 447 int msi_count, startrid, error, tmp; 448 struct pptdev *ppt; 449 450 if ((destcpu >= VM_MAXCPU || destcpu < 0) || 451 (vector < 0 || vector > 255) || 452 (numvec < 0 || numvec > MAX_MSIMSGS)) 453 return (EINVAL); 454 455 ppt = ppt_find(bus, slot, func); 456 if (ppt == NULL) 457 return (ENOENT); 458 if (ppt->vm != vm) /* Make sure we own this device */ 459 return (EBUSY); 460 461 /* Free any allocated resources */ 462 ppt_teardown_msi(ppt); 463 464 if (numvec == 0) /* nothing more to do */ 465 return (0); 466 467 flags = RF_ACTIVE; 468 msi_count = pci_msi_count(ppt->dev); 469 if (msi_count == 0) { 470 startrid = 0; /* legacy interrupt */ 471 msi_count = 1; 472 flags |= RF_SHAREABLE; 473 } else 474 startrid = 1; /* MSI */ 475 476 /* 477 * The device must be capable of supporting the number of vectors 478 * the guest wants to allocate. 479 */ 480 if (numvec > msi_count) 481 return (EINVAL); 482 483 /* 484 * Make sure that we can allocate all the MSI vectors that are needed 485 * by the guest. 486 */ 487 if (startrid == 1) { 488 tmp = numvec; 489 error = pci_alloc_msi(ppt->dev, &tmp); 490 if (error) 491 return (error); 492 else if (tmp != numvec) { 493 pci_release_msi(ppt->dev); 494 return (ENOSPC); 495 } else { 496 /* success */ 497 } 498 } 499 500 ppt->msi.startrid = startrid; 501 502 /* 503 * Allocate the irq resource and attach it to the interrupt handler. 504 */ 505 for (i = 0; i < numvec; i++) { 506 ppt->msi.num_msgs = i + 1; 507 ppt->msi.cookie[i] = NULL; 508 509 rid = startrid + i; 510 ppt->msi.res[i] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 511 &rid, flags); 512 if (ppt->msi.res[i] == NULL) 513 break; 514 515 ppt->msi.arg[i].pptdev = ppt; 516 ppt->msi.arg[i].vec = vector + i; 517 ppt->msi.arg[i].vcpu = destcpu; 518 519 error = bus_setup_intr(ppt->dev, ppt->msi.res[i], 520 INTR_TYPE_NET | INTR_MPSAFE, 521 pptintr, NULL, &ppt->msi.arg[i], 522 &ppt->msi.cookie[i]); 523 if (error != 0) 524 break; 525 } 526 527 if (i < numvec) { 528 ppt_teardown_msi(ppt); 529 return (ENXIO); 530 } 531 532 return (0); 533 } 534 535 int 536 ppt_setup_msix(struct vm *vm, int vcpu, int bus, int slot, int func, 537 int idx, uint32_t msg, uint32_t vector_control, uint64_t addr) 538 { 539 struct pptdev *ppt; 540 struct pci_devinfo *dinfo; 541 int numvec, alloced, rid, error; 542 size_t res_size, cookie_size, arg_size; 543 544 ppt = ppt_find(bus, slot, func); 545 if (ppt == NULL) 546 return (ENOENT); 547 if (ppt->vm != vm) /* Make sure we own this device */ 548 return (EBUSY); 549 550 dinfo = device_get_ivars(ppt->dev); 551 if (!dinfo) 552 return (ENXIO); 553 554 /* 555 * First-time configuration: 556 * Allocate the MSI-X table 557 * Allocate the IRQ resources 558 * Set up some variables in ppt->msix 559 */ 560 if (ppt->msix.num_msgs == 0) { 561 numvec = pci_msix_count(ppt->dev); 562 if (numvec <= 0) 563 return (EINVAL); 564 565 ppt->msix.startrid = 1; 566 ppt->msix.num_msgs = numvec; 567 568 res_size = numvec * sizeof(ppt->msix.res[0]); 569 cookie_size = numvec * sizeof(ppt->msix.cookie[0]); 570 arg_size = numvec * sizeof(ppt->msix.arg[0]); 571 572 ppt->msix.res = malloc(res_size, M_PPTMSIX, M_WAITOK | M_ZERO); 573 ppt->msix.cookie = malloc(cookie_size, M_PPTMSIX, 574 M_WAITOK | M_ZERO); 575 ppt->msix.arg = malloc(arg_size, M_PPTMSIX, M_WAITOK | M_ZERO); 576 577 rid = dinfo->cfg.msix.msix_table_bar; 578 ppt->msix.msix_table_res = bus_alloc_resource_any(ppt->dev, 579 SYS_RES_MEMORY, &rid, RF_ACTIVE); 580 581 if (ppt->msix.msix_table_res == NULL) { 582 ppt_teardown_msix(ppt); 583 return (ENOSPC); 584 } 585 ppt->msix.msix_table_rid = rid; 586 587 alloced = numvec; 588 error = pci_alloc_msix(ppt->dev, &alloced); 589 if (error || alloced != numvec) { 590 ppt_teardown_msix(ppt); 591 return (error == 0 ? ENOSPC: error); 592 } 593 } 594 595 if ((vector_control & PCIM_MSIX_VCTRL_MASK) == 0) { 596 /* Tear down the IRQ if it's already set up */ 597 ppt_teardown_msix_intr(ppt, idx); 598 599 /* Allocate the IRQ resource */ 600 ppt->msix.cookie[idx] = NULL; 601 rid = ppt->msix.startrid + idx; 602 ppt->msix.res[idx] = bus_alloc_resource_any(ppt->dev, SYS_RES_IRQ, 603 &rid, RF_ACTIVE); 604 if (ppt->msix.res[idx] == NULL) 605 return (ENXIO); 606 607 ppt->msix.arg[idx].pptdev = ppt; 608 ppt->msix.arg[idx].vec = msg & 0xFF; 609 ppt->msix.arg[idx].vcpu = (addr >> 12) & 0xFF; 610 611 /* Setup the MSI-X interrupt */ 612 error = bus_setup_intr(ppt->dev, ppt->msix.res[idx], 613 INTR_TYPE_NET | INTR_MPSAFE, 614 pptintr, NULL, &ppt->msix.arg[idx], 615 &ppt->msix.cookie[idx]); 616 617 if (error != 0) { 618 bus_teardown_intr(ppt->dev, ppt->msix.res[idx], ppt->msix.cookie[idx]); 619 bus_release_resource(ppt->dev, SYS_RES_IRQ, rid, ppt->msix.res[idx]); 620 ppt->msix.cookie[idx] = NULL; 621 ppt->msix.res[idx] = NULL; 622 return (ENXIO); 623 } 624 } else { 625 /* Masked, tear it down if it's already been set up */ 626 ppt_teardown_msix_intr(ppt, idx); 627 } 628 629 return (0); 630 } 631