1 /*- 2 * Copyright (c) 2013-2015 Sandvine Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_bus.h" 31 32 #include <sys/param.h> 33 #include <sys/conf.h> 34 #include <sys/kernel.h> 35 #include <sys/systm.h> 36 #include <sys/bus.h> 37 #include <sys/fcntl.h> 38 #include <sys/ioccom.h> 39 #include <sys/iov.h> 40 #include <sys/linker.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/mutex.h> 45 #include <sys/pciio.h> 46 #include <sys/queue.h> 47 #include <sys/rman.h> 48 #include <sys/sysctl.h> 49 50 #include <machine/bus.h> 51 #include <machine/stdarg.h> 52 53 #include <sys/nv.h> 54 #include <sys/iov_schema.h> 55 56 #include <dev/pci/pcireg.h> 57 #include <dev/pci/pcivar.h> 58 #include <dev/pci/pci_iov.h> 59 #include <dev/pci/pci_private.h> 60 #include <dev/pci/pci_iov_private.h> 61 #include <dev/pci/schema_private.h> 62 63 #include "pcib_if.h" 64 65 static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations"); 66 67 static d_ioctl_t pci_iov_ioctl; 68 69 static struct cdevsw iov_cdevsw = { 70 .d_version = D_VERSION, 71 .d_name = "iov", 72 .d_ioctl = pci_iov_ioctl 73 }; 74 75 SYSCTL_DECL(_hw_pci); 76 77 /* 78 * The maximum amount of memory we will allocate for user configuration of an 79 * SR-IOV device. 1MB ought to be enough for anyone, but leave this 80 * configurable just in case. 81 */ 82 static u_long pci_iov_max_config = 1024 * 1024; 83 SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN, 84 &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration."); 85 86 #define IOV_READ(d, r, w) \ 87 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w) 88 89 #define IOV_WRITE(d, r, v, w) \ 90 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w) 91 92 static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema, 93 nvlist_t **vf_schema); 94 static void pci_iov_build_pf_schema(nvlist_t *schema, 95 nvlist_t **driver_schema); 96 static void pci_iov_build_vf_schema(nvlist_t *schema, 97 nvlist_t **driver_schema); 98 static nvlist_t *pci_iov_get_pf_subsystem_schema(void); 99 static nvlist_t *pci_iov_get_vf_subsystem_schema(void); 100 101 int 102 pci_iov_attach_name(device_t dev, struct nvlist *pf_schema, 103 struct nvlist *vf_schema, const char *fmt, ...) 104 { 105 char buf[NAME_MAX + 1]; 106 va_list ap; 107 108 va_start(ap, fmt); 109 vsnprintf(buf, sizeof(buf), fmt, ap); 110 va_end(ap); 111 return (PCI_IOV_ATTACH(device_get_parent(dev), dev, pf_schema, 112 vf_schema, buf)); 113 } 114 115 int 116 pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema, 117 nvlist_t *vf_schema, const char *name) 118 { 119 device_t pcib; 120 struct pci_devinfo *dinfo; 121 struct pcicfg_iov *iov; 122 nvlist_t *schema; 123 uint32_t version; 124 int error; 125 int iov_pos; 126 127 dinfo = device_get_ivars(dev); 128 pcib = device_get_parent(bus); 129 schema = NULL; 130 131 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos); 132 133 if (error != 0) 134 return (error); 135 136 version = pci_read_config(dev, iov_pos, 4); 137 if (PCI_EXTCAP_VER(version) != 1) { 138 if (bootverbose) 139 device_printf(dev, 140 "Unsupported version of SR-IOV (%d) detected\n", 141 PCI_EXTCAP_VER(version)); 142 143 return (ENXIO); 144 } 145 146 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO); 147 148 mtx_lock(&Giant); 149 if (dinfo->cfg.iov != NULL) { 150 error = EBUSY; 151 goto cleanup; 152 } 153 iov->iov_pos = iov_pos; 154 155 schema = pci_iov_build_schema(&pf_schema, &vf_schema); 156 if (schema == NULL) { 157 error = ENOMEM; 158 goto cleanup; 159 } 160 161 error = pci_iov_validate_schema(schema); 162 if (error != 0) 163 goto cleanup; 164 iov->iov_schema = schema; 165 166 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), 167 UID_ROOT, GID_WHEEL, 0600, "iov/%s", name); 168 169 if (iov->iov_cdev == NULL) { 170 error = ENOMEM; 171 goto cleanup; 172 } 173 174 dinfo->cfg.iov = iov; 175 iov->iov_cdev->si_drv1 = dinfo; 176 mtx_unlock(&Giant); 177 178 return (0); 179 180 cleanup: 181 nvlist_destroy(schema); 182 nvlist_destroy(pf_schema); 183 nvlist_destroy(vf_schema); 184 free(iov, M_SRIOV); 185 mtx_unlock(&Giant); 186 return (error); 187 } 188 189 int 190 pci_iov_detach_method(device_t bus, device_t dev) 191 { 192 struct pci_devinfo *dinfo; 193 struct pcicfg_iov *iov; 194 195 mtx_lock(&Giant); 196 dinfo = device_get_ivars(dev); 197 iov = dinfo->cfg.iov; 198 199 if (iov == NULL) { 200 mtx_unlock(&Giant); 201 return (0); 202 } 203 204 if (iov->iov_num_vfs != 0 || iov->iov_flags & IOV_BUSY) { 205 mtx_unlock(&Giant); 206 return (EBUSY); 207 } 208 209 dinfo->cfg.iov = NULL; 210 211 if (iov->iov_cdev) { 212 destroy_dev(iov->iov_cdev); 213 iov->iov_cdev = NULL; 214 } 215 nvlist_destroy(iov->iov_schema); 216 217 free(iov, M_SRIOV); 218 mtx_unlock(&Giant); 219 220 return (0); 221 } 222 223 static nvlist_t * 224 pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf) 225 { 226 nvlist_t *schema, *pf_driver, *vf_driver; 227 228 /* We always take ownership of the schemas. */ 229 pf_driver = *pf; 230 *pf = NULL; 231 vf_driver = *vf; 232 *vf = NULL; 233 234 schema = pci_iov_schema_alloc_node(); 235 if (schema == NULL) 236 goto cleanup; 237 238 pci_iov_build_pf_schema(schema, &pf_driver); 239 pci_iov_build_vf_schema(schema, &vf_driver); 240 241 if (nvlist_error(schema) != 0) 242 goto cleanup; 243 244 return (schema); 245 246 cleanup: 247 nvlist_destroy(schema); 248 nvlist_destroy(pf_driver); 249 nvlist_destroy(vf_driver); 250 return (NULL); 251 } 252 253 static void 254 pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema) 255 { 256 nvlist_t *pf_schema, *iov_schema; 257 258 pf_schema = pci_iov_schema_alloc_node(); 259 if (pf_schema == NULL) { 260 nvlist_set_error(schema, ENOMEM); 261 return; 262 } 263 264 iov_schema = pci_iov_get_pf_subsystem_schema(); 265 266 /* 267 * Note that if either *driver_schema or iov_schema is NULL, then 268 * nvlist_move_nvlist will put the schema in the error state and 269 * SR-IOV will fail to initialize later, so we don't have to explicitly 270 * handle that case. 271 */ 272 nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema); 273 nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema); 274 nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema); 275 *driver_schema = NULL; 276 } 277 278 static void 279 pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema) 280 { 281 nvlist_t *vf_schema, *iov_schema; 282 283 vf_schema = pci_iov_schema_alloc_node(); 284 if (vf_schema == NULL) { 285 nvlist_set_error(schema, ENOMEM); 286 return; 287 } 288 289 iov_schema = pci_iov_get_vf_subsystem_schema(); 290 291 /* 292 * Note that if either *driver_schema or iov_schema is NULL, then 293 * nvlist_move_nvlist will put the schema in the error state and 294 * SR-IOV will fail to initialize later, so we don't have to explicitly 295 * handle that case. 296 */ 297 nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema); 298 nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema); 299 nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema); 300 *driver_schema = NULL; 301 } 302 303 static nvlist_t * 304 pci_iov_get_pf_subsystem_schema(void) 305 { 306 nvlist_t *pf; 307 308 pf = pci_iov_schema_alloc_node(); 309 if (pf == NULL) 310 return (NULL); 311 312 pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1); 313 pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL); 314 315 return (pf); 316 } 317 318 static nvlist_t * 319 pci_iov_get_vf_subsystem_schema(void) 320 { 321 nvlist_t *vf; 322 323 vf = pci_iov_schema_alloc_node(); 324 if (vf == NULL) 325 return (NULL); 326 327 pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0); 328 329 return (vf); 330 } 331 332 static int 333 pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift) 334 { 335 struct resource *res; 336 struct pcicfg_iov *iov; 337 device_t dev, bus; 338 rman_res_t start, end; 339 pci_addr_t bar_size; 340 int rid; 341 342 iov = dinfo->cfg.iov; 343 dev = dinfo->cfg.dev; 344 bus = device_get_parent(dev); 345 rid = iov->iov_pos + PCIR_SRIOV_BAR(bar); 346 bar_size = 1 << bar_shift; 347 348 res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0, 349 ~0, 1, iov->iov_num_vfs, RF_ACTIVE); 350 351 if (res == NULL) 352 return (ENXIO); 353 354 iov->iov_bar[bar].res = res; 355 iov->iov_bar[bar].bar_size = bar_size; 356 iov->iov_bar[bar].bar_shift = bar_shift; 357 358 start = rman_get_start(res); 359 end = rman_get_end(res); 360 return (rman_manage_region(&iov->rman, start, end)); 361 } 362 363 static void 364 pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo) 365 { 366 struct pci_iov_bar *bar; 367 uint64_t bar_start; 368 int i; 369 370 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 371 bar = &iov->iov_bar[i]; 372 if (bar->res != NULL) { 373 bar_start = rman_get_start(bar->res) + 374 dinfo->cfg.vf.index * bar->bar_size; 375 376 pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start, 377 bar->bar_shift); 378 } 379 } 380 } 381 382 static int 383 pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg, 384 nvlist_t **ret) 385 { 386 void *packed_config; 387 nvlist_t *config; 388 int error; 389 390 config = NULL; 391 packed_config = NULL; 392 393 if (arg->len > pci_iov_max_config) { 394 error = EMSGSIZE; 395 goto out; 396 } 397 398 packed_config = malloc(arg->len, M_SRIOV, M_WAITOK); 399 400 error = copyin(arg->config, packed_config, arg->len); 401 if (error != 0) 402 goto out; 403 404 config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE); 405 if (config == NULL) { 406 error = EINVAL; 407 goto out; 408 } 409 410 error = pci_iov_schema_validate_config(iov->iov_schema, config); 411 if (error != 0) 412 goto out; 413 414 error = nvlist_error(config); 415 if (error != 0) 416 goto out; 417 418 *ret = config; 419 config = NULL; 420 421 out: 422 nvlist_destroy(config); 423 free(packed_config, M_SRIOV); 424 return (error); 425 } 426 427 /* 428 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV 429 * capability. This bit is only writeable on the lowest-numbered PF but 430 * affects all PFs on the device. 431 */ 432 static int 433 pci_iov_set_ari(device_t bus) 434 { 435 device_t lowest; 436 device_t *devlist; 437 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func; 438 uint16_t iov_ctl; 439 440 /* If ARI is disabled on the downstream port there is nothing to do. */ 441 if (!PCIB_ARI_ENABLED(device_get_parent(bus))) 442 return (0); 443 444 error = device_get_children(bus, &devlist, &devcount); 445 446 if (error != 0) 447 return (error); 448 449 lowest = NULL; 450 for (i = 0; i < devcount; i++) { 451 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) { 452 dev_func = pci_get_function(devlist[i]); 453 if (lowest == NULL || dev_func < lowest_func) { 454 lowest = devlist[i]; 455 lowest_func = dev_func; 456 lowest_pos = iov_pos; 457 } 458 } 459 } 460 free(devlist, M_TEMP); 461 462 /* 463 * If we called this function some device must have the SR-IOV 464 * capability. 465 */ 466 KASSERT(lowest != NULL, 467 ("Could not find child of %s with SR-IOV capability", 468 device_get_nameunit(bus))); 469 470 iov_ctl = pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2); 471 iov_ctl |= PCIM_SRIOV_ARI_EN; 472 pci_write_config(lowest, lowest_pos + PCIR_SRIOV_CTL, iov_ctl, 2); 473 if ((pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2) & 474 PCIM_SRIOV_ARI_EN) == 0) { 475 device_printf(lowest, "failed to enable ARI\n"); 476 return (ENXIO); 477 } 478 return (0); 479 } 480 481 static int 482 pci_iov_config_page_size(struct pci_devinfo *dinfo) 483 { 484 uint32_t page_cap, page_size; 485 486 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4); 487 488 /* 489 * If the system page size is less than the smallest SR-IOV page size 490 * then round up to the smallest SR-IOV page size. 491 */ 492 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT) 493 page_size = (1 << 0); 494 else 495 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT)); 496 497 /* Check that the device supports the system page size. */ 498 if (!(page_size & page_cap)) 499 return (ENXIO); 500 501 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4); 502 return (0); 503 } 504 505 static int 506 pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config) 507 { 508 const nvlist_t *device, *driver_config; 509 510 device = nvlist_get_nvlist(config, PF_CONFIG_NAME); 511 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME); 512 return (PCI_IOV_INIT(dev, num_vfs, driver_config)); 513 } 514 515 static int 516 pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov) 517 { 518 int error; 519 520 iov->rman.rm_start = 0; 521 iov->rman.rm_end = ~0; 522 iov->rman.rm_type = RMAN_ARRAY; 523 snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory", 524 device_get_nameunit(pf)); 525 iov->rman.rm_descr = iov->rman_name; 526 527 error = rman_init(&iov->rman); 528 if (error != 0) 529 return (error); 530 531 iov->iov_flags |= IOV_RMAN_INITED; 532 return (0); 533 } 534 535 static int 536 pci_iov_alloc_bar_ea(struct pci_devinfo *dinfo, int bar) 537 { 538 struct pcicfg_iov *iov; 539 rman_res_t start, end; 540 struct resource *res; 541 struct resource_list *rl; 542 struct resource_list_entry *rle; 543 544 rl = &dinfo->resources; 545 iov = dinfo->cfg.iov; 546 547 rle = resource_list_find(rl, SYS_RES_MEMORY, 548 iov->iov_pos + PCIR_SRIOV_BAR(bar)); 549 if (rle == NULL) 550 rle = resource_list_find(rl, SYS_RES_IOPORT, 551 iov->iov_pos + PCIR_SRIOV_BAR(bar)); 552 if (rle == NULL) 553 return (ENXIO); 554 res = rle->res; 555 556 iov->iov_bar[bar].res = res; 557 iov->iov_bar[bar].bar_size = rman_get_size(res) / iov->iov_num_vfs; 558 iov->iov_bar[bar].bar_shift = pci_mapsize(iov->iov_bar[bar].bar_size); 559 560 start = rman_get_start(res); 561 end = rman_get_end(res); 562 563 return (rman_manage_region(&iov->rman, start, end)); 564 } 565 566 static int 567 pci_iov_setup_bars(struct pci_devinfo *dinfo) 568 { 569 device_t dev; 570 struct pcicfg_iov *iov; 571 pci_addr_t bar_value, testval; 572 int i, last_64, error; 573 574 iov = dinfo->cfg.iov; 575 dev = dinfo->cfg.dev; 576 last_64 = 0; 577 578 pci_add_resources_ea(device_get_parent(dev), dev, 1); 579 580 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 581 /* First, try to use BARs allocated with EA */ 582 error = pci_iov_alloc_bar_ea(dinfo, i); 583 if (error == 0) 584 continue; 585 586 /* Allocate legacy-BAR only if EA is not enabled */ 587 if (pci_ea_is_enabled(dev, iov->iov_pos + PCIR_SRIOV_BAR(i))) 588 continue; 589 590 /* 591 * If a PCI BAR is a 64-bit wide BAR, then it spans two 592 * consecutive registers. Therefore if the last BAR that 593 * we looked at was a 64-bit BAR, we need to skip this 594 * register as it's the second half of the last BAR. 595 */ 596 if (!last_64) { 597 pci_read_bar(dev, 598 iov->iov_pos + PCIR_SRIOV_BAR(i), 599 &bar_value, &testval, &last_64); 600 601 if (testval != 0) { 602 error = pci_iov_alloc_bar(dinfo, i, 603 pci_mapsize(testval)); 604 if (error != 0) 605 return (error); 606 } 607 } else 608 last_64 = 0; 609 } 610 611 return (0); 612 } 613 614 static void 615 pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config, 616 uint16_t first_rid, uint16_t rid_stride) 617 { 618 char device_name[VF_MAX_NAME]; 619 const nvlist_t *device, *driver_config, *iov_config; 620 device_t bus, dev, vf; 621 struct pcicfg_iov *iov; 622 struct pci_devinfo *vfinfo; 623 int i, error; 624 uint16_t vid, did, next_rid; 625 626 iov = dinfo->cfg.iov; 627 dev = dinfo->cfg.dev; 628 bus = device_get_parent(dev); 629 next_rid = first_rid; 630 vid = pci_get_vendor(dev); 631 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2); 632 633 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) { 634 snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i); 635 device = nvlist_get_nvlist(config, device_name); 636 iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME); 637 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME); 638 639 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did); 640 if (vf == NULL) 641 break; 642 643 /* 644 * If we are creating passthrough devices then force the ppt 645 * driver to attach to prevent a VF driver from claiming the 646 * VFs. 647 */ 648 if (nvlist_get_bool(iov_config, "passthrough")) 649 device_set_devclass_fixed(vf, "ppt"); 650 651 vfinfo = device_get_ivars(vf); 652 653 vfinfo->cfg.iov = iov; 654 vfinfo->cfg.vf.index = i; 655 656 pci_iov_add_bars(iov, vfinfo); 657 658 error = PCI_IOV_ADD_VF(dev, i, driver_config); 659 if (error != 0) { 660 device_printf(dev, "Failed to add VF %d\n", i); 661 device_delete_child(bus, vf); 662 } 663 } 664 665 bus_generic_attach(bus); 666 } 667 668 static int 669 pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) 670 { 671 device_t bus, dev; 672 struct pci_devinfo *dinfo; 673 struct pcicfg_iov *iov; 674 nvlist_t *config; 675 int i, error; 676 uint16_t rid_off, rid_stride; 677 uint16_t first_rid, last_rid; 678 uint16_t iov_ctl; 679 uint16_t num_vfs, total_vfs; 680 int iov_inited; 681 682 mtx_lock(&Giant); 683 dinfo = cdev->si_drv1; 684 iov = dinfo->cfg.iov; 685 dev = dinfo->cfg.dev; 686 bus = device_get_parent(dev); 687 iov_inited = 0; 688 config = NULL; 689 690 if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) { 691 mtx_unlock(&Giant); 692 return (EBUSY); 693 } 694 iov->iov_flags |= IOV_BUSY; 695 696 error = pci_iov_parse_config(iov, arg, &config); 697 if (error != 0) 698 goto out; 699 700 num_vfs = pci_iov_config_get_num_vfs(config); 701 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2); 702 if (num_vfs > total_vfs) { 703 error = EINVAL; 704 goto out; 705 } 706 707 error = pci_iov_config_page_size(dinfo); 708 if (error != 0) 709 goto out; 710 711 error = pci_iov_set_ari(bus); 712 if (error != 0) 713 goto out; 714 715 error = pci_iov_init(dev, num_vfs, config); 716 if (error != 0) 717 goto out; 718 iov_inited = 1; 719 720 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2); 721 722 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2); 723 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2); 724 725 first_rid = pci_get_rid(dev) + rid_off; 726 last_rid = first_rid + (num_vfs - 1) * rid_stride; 727 728 /* We don't yet support allocating extra bus numbers for VFs. */ 729 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) { 730 error = ENOSPC; 731 goto out; 732 } 733 734 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 735 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); 736 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 737 738 error = pci_iov_init_rman(dev, iov); 739 if (error != 0) 740 goto out; 741 742 iov->iov_num_vfs = num_vfs; 743 744 error = pci_iov_setup_bars(dinfo); 745 if (error != 0) 746 goto out; 747 748 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 749 iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE; 750 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 751 752 /* Per specification, we must wait 100ms before accessing VFs. */ 753 pause("iov", roundup(hz, 10)); 754 pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride); 755 756 nvlist_destroy(config); 757 iov->iov_flags &= ~IOV_BUSY; 758 mtx_unlock(&Giant); 759 760 return (0); 761 out: 762 if (iov_inited) 763 PCI_IOV_UNINIT(dev); 764 765 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 766 if (iov->iov_bar[i].res != NULL) { 767 pci_release_resource(bus, dev, SYS_RES_MEMORY, 768 iov->iov_pos + PCIR_SRIOV_BAR(i), 769 iov->iov_bar[i].res); 770 pci_delete_resource(bus, dev, SYS_RES_MEMORY, 771 iov->iov_pos + PCIR_SRIOV_BAR(i)); 772 iov->iov_bar[i].res = NULL; 773 } 774 } 775 776 if (iov->iov_flags & IOV_RMAN_INITED) { 777 rman_fini(&iov->rman); 778 iov->iov_flags &= ~IOV_RMAN_INITED; 779 } 780 781 nvlist_destroy(config); 782 iov->iov_num_vfs = 0; 783 iov->iov_flags &= ~IOV_BUSY; 784 mtx_unlock(&Giant); 785 return (error); 786 } 787 788 void 789 pci_iov_cfg_restore(device_t dev, struct pci_devinfo *dinfo) 790 { 791 struct pcicfg_iov *iov; 792 793 iov = dinfo->cfg.iov; 794 795 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, iov->iov_page_size, 4); 796 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, iov->iov_num_vfs, 2); 797 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov->iov_ctl, 2); 798 } 799 800 void 801 pci_iov_cfg_save(device_t dev, struct pci_devinfo *dinfo) 802 { 803 struct pcicfg_iov *iov; 804 805 iov = dinfo->cfg.iov; 806 807 iov->iov_page_size = IOV_READ(dinfo, PCIR_SRIOV_PAGE_SIZE, 4); 808 iov->iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 809 } 810 811 /* Return true if child is a VF of the given PF. */ 812 static int 813 pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child) 814 { 815 struct pci_devinfo *vfinfo; 816 817 vfinfo = device_get_ivars(child); 818 819 if (!(vfinfo->cfg.flags & PCICFG_VF)) 820 return (0); 821 822 return (pf == vfinfo->cfg.iov); 823 } 824 825 static int 826 pci_iov_delete(struct cdev *cdev) 827 { 828 device_t bus, dev, vf, *devlist; 829 struct pci_devinfo *dinfo; 830 struct pcicfg_iov *iov; 831 int i, error, devcount; 832 uint32_t iov_ctl; 833 834 mtx_lock(&Giant); 835 dinfo = cdev->si_drv1; 836 iov = dinfo->cfg.iov; 837 dev = dinfo->cfg.dev; 838 bus = device_get_parent(dev); 839 devlist = NULL; 840 841 if (iov->iov_flags & IOV_BUSY) { 842 mtx_unlock(&Giant); 843 return (EBUSY); 844 } 845 846 if (iov->iov_num_vfs == 0) { 847 mtx_unlock(&Giant); 848 return (ECHILD); 849 } 850 851 iov->iov_flags |= IOV_BUSY; 852 853 error = device_get_children(bus, &devlist, &devcount); 854 855 if (error != 0) 856 goto out; 857 858 for (i = 0; i < devcount; i++) { 859 vf = devlist[i]; 860 861 if (!pci_iov_is_child_vf(iov, vf)) 862 continue; 863 864 error = device_detach(vf); 865 if (error != 0) { 866 device_printf(dev, 867 "Could not disable SR-IOV: failed to detach VF %s\n", 868 device_get_nameunit(vf)); 869 goto out; 870 } 871 } 872 873 for (i = 0; i < devcount; i++) { 874 vf = devlist[i]; 875 876 if (pci_iov_is_child_vf(iov, vf)) 877 device_delete_child(bus, vf); 878 } 879 PCI_IOV_UNINIT(dev); 880 881 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 882 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); 883 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 884 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2); 885 886 iov->iov_num_vfs = 0; 887 888 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 889 if (iov->iov_bar[i].res != NULL) { 890 pci_release_resource(bus, dev, SYS_RES_MEMORY, 891 iov->iov_pos + PCIR_SRIOV_BAR(i), 892 iov->iov_bar[i].res); 893 pci_delete_resource(bus, dev, SYS_RES_MEMORY, 894 iov->iov_pos + PCIR_SRIOV_BAR(i)); 895 iov->iov_bar[i].res = NULL; 896 } 897 } 898 899 if (iov->iov_flags & IOV_RMAN_INITED) { 900 rman_fini(&iov->rman); 901 iov->iov_flags &= ~IOV_RMAN_INITED; 902 } 903 904 error = 0; 905 out: 906 free(devlist, M_TEMP); 907 iov->iov_flags &= ~IOV_BUSY; 908 mtx_unlock(&Giant); 909 return (error); 910 } 911 912 static int 913 pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output) 914 { 915 struct pci_devinfo *dinfo; 916 void *packed; 917 size_t output_len, size; 918 int error; 919 920 packed = NULL; 921 922 mtx_lock(&Giant); 923 dinfo = cdev->si_drv1; 924 packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size); 925 mtx_unlock(&Giant); 926 927 if (packed == NULL) { 928 error = ENOMEM; 929 goto fail; 930 } 931 932 output_len = output->len; 933 output->len = size; 934 if (size <= output_len) { 935 error = copyout(packed, output->schema, size); 936 937 if (error != 0) 938 goto fail; 939 940 output->error = 0; 941 } else 942 /* 943 * If we return an error then the ioctl code won't copyout 944 * output back to userland, so we flag the error in the struct 945 * instead. 946 */ 947 output->error = EMSGSIZE; 948 949 error = 0; 950 951 fail: 952 free(packed, M_NVLIST); 953 954 return (error); 955 } 956 957 static int 958 pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 959 struct thread *td) 960 { 961 962 switch (cmd) { 963 case IOV_CONFIG: 964 return (pci_iov_config(dev, (struct pci_iov_arg *)data)); 965 case IOV_DELETE: 966 return (pci_iov_delete(dev)); 967 case IOV_GET_SCHEMA: 968 return (pci_iov_get_schema_ioctl(dev, 969 (struct pci_iov_schema *)data)); 970 default: 971 return (EINVAL); 972 } 973 } 974 975 struct resource * 976 pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, 977 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 978 { 979 struct pci_devinfo *dinfo; 980 struct pcicfg_iov *iov; 981 struct pci_map *map; 982 struct resource *res; 983 struct resource_list_entry *rle; 984 rman_res_t bar_start, bar_end; 985 pci_addr_t bar_length; 986 int error; 987 988 dinfo = device_get_ivars(child); 989 iov = dinfo->cfg.iov; 990 991 map = pci_find_bar(child, *rid); 992 if (map == NULL) 993 return (NULL); 994 995 bar_length = 1 << map->pm_size; 996 bar_start = map->pm_value; 997 bar_end = bar_start + bar_length - 1; 998 999 /* Make sure that the resource fits the constraints. */ 1000 if (bar_start >= end || bar_end <= bar_start || count != 1) 1001 return (NULL); 1002 1003 /* Clamp the resource to the constraints if necessary. */ 1004 if (bar_start < start) 1005 bar_start = start; 1006 if (bar_end > end) 1007 bar_end = end; 1008 bar_length = bar_end - bar_start + 1; 1009 1010 res = rman_reserve_resource(&iov->rman, bar_start, bar_end, 1011 bar_length, flags, child); 1012 if (res == NULL) 1013 return (NULL); 1014 1015 rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid, 1016 bar_start, bar_end, 1); 1017 if (rle == NULL) { 1018 rman_release_resource(res); 1019 return (NULL); 1020 } 1021 1022 rman_set_rid(res, *rid); 1023 1024 if (flags & RF_ACTIVE) { 1025 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res); 1026 if (error != 0) { 1027 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, 1028 *rid); 1029 rman_release_resource(res); 1030 return (NULL); 1031 } 1032 } 1033 rle->res = res; 1034 1035 return (res); 1036 } 1037 1038 int 1039 pci_vf_release_mem_resource(device_t dev, device_t child, int rid, 1040 struct resource *r) 1041 { 1042 struct pci_devinfo *dinfo; 1043 struct resource_list_entry *rle; 1044 int error; 1045 1046 dinfo = device_get_ivars(child); 1047 1048 if (rman_get_flags(r) & RF_ACTIVE) { 1049 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r); 1050 if (error != 0) 1051 return (error); 1052 } 1053 1054 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid); 1055 if (rle != NULL) { 1056 rle->res = NULL; 1057 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, 1058 rid); 1059 } 1060 1061 return (rman_release_resource(r)); 1062 } 1063