1 /*- 2 * Copyright (c) 2013-2015 Sandvine Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include "opt_bus.h" 31 32 #include <sys/param.h> 33 #include <sys/conf.h> 34 #include <sys/kernel.h> 35 #include <sys/systm.h> 36 #include <sys/bus.h> 37 #include <sys/fcntl.h> 38 #include <sys/ioccom.h> 39 #include <sys/iov.h> 40 #include <sys/linker.h> 41 #include <sys/lock.h> 42 #include <sys/malloc.h> 43 #include <sys/module.h> 44 #include <sys/mutex.h> 45 #include <sys/pciio.h> 46 #include <sys/queue.h> 47 #include <sys/rman.h> 48 #include <sys/sysctl.h> 49 50 #include <machine/bus.h> 51 #include <machine/stdarg.h> 52 53 #include <sys/nv.h> 54 #include <sys/iov_schema.h> 55 56 #include <dev/pci/pcireg.h> 57 #include <dev/pci/pcivar.h> 58 #include <dev/pci/pci_iov.h> 59 #include <dev/pci/pci_private.h> 60 #include <dev/pci/pci_iov_private.h> 61 #include <dev/pci/schema_private.h> 62 63 #include "pcib_if.h" 64 65 static MALLOC_DEFINE(M_SRIOV, "sr_iov", "PCI SR-IOV allocations"); 66 67 static d_ioctl_t pci_iov_ioctl; 68 69 static struct cdevsw iov_cdevsw = { 70 .d_version = D_VERSION, 71 .d_name = "iov", 72 .d_ioctl = pci_iov_ioctl 73 }; 74 75 SYSCTL_DECL(_hw_pci); 76 77 /* 78 * The maximum amount of memory we will allocate for user configuration of an 79 * SR-IOV device. 1MB ought to be enough for anyone, but leave this 80 * configurable just in case. 81 */ 82 static u_long pci_iov_max_config = 1024 * 1024; 83 SYSCTL_ULONG(_hw_pci, OID_AUTO, iov_max_config, CTLFLAG_RWTUN, 84 &pci_iov_max_config, 0, "Maximum allowed size of SR-IOV configuration."); 85 86 #define IOV_READ(d, r, w) \ 87 pci_read_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, w) 88 89 #define IOV_WRITE(d, r, v, w) \ 90 pci_write_config((d)->cfg.dev, (d)->cfg.iov->iov_pos + r, v, w) 91 92 static nvlist_t *pci_iov_build_schema(nvlist_t **pf_schema, 93 nvlist_t **vf_schema); 94 static void pci_iov_build_pf_schema(nvlist_t *schema, 95 nvlist_t **driver_schema); 96 static void pci_iov_build_vf_schema(nvlist_t *schema, 97 nvlist_t **driver_schema); 98 static int pci_iov_delete_iov_children(struct pci_devinfo *dinfo); 99 static nvlist_t *pci_iov_get_pf_subsystem_schema(void); 100 static nvlist_t *pci_iov_get_vf_subsystem_schema(void); 101 102 int 103 pci_iov_attach_name(device_t dev, struct nvlist *pf_schema, 104 struct nvlist *vf_schema, const char *fmt, ...) 105 { 106 char buf[NAME_MAX + 1]; 107 va_list ap; 108 109 va_start(ap, fmt); 110 vsnprintf(buf, sizeof(buf), fmt, ap); 111 va_end(ap); 112 return (PCI_IOV_ATTACH(device_get_parent(dev), dev, pf_schema, 113 vf_schema, buf)); 114 } 115 116 int 117 pci_iov_attach_method(device_t bus, device_t dev, nvlist_t *pf_schema, 118 nvlist_t *vf_schema, const char *name) 119 { 120 struct pci_devinfo *dinfo; 121 struct pcicfg_iov *iov; 122 nvlist_t *schema; 123 uint32_t version; 124 int error; 125 int iov_pos; 126 127 dinfo = device_get_ivars(dev); 128 schema = NULL; 129 130 error = pci_find_extcap(dev, PCIZ_SRIOV, &iov_pos); 131 132 if (error != 0) 133 return (error); 134 135 version = pci_read_config(dev, iov_pos, 4); 136 if (PCI_EXTCAP_VER(version) != 1) { 137 if (bootverbose) 138 device_printf(dev, 139 "Unsupported version of SR-IOV (%d) detected\n", 140 PCI_EXTCAP_VER(version)); 141 142 return (ENXIO); 143 } 144 145 iov = malloc(sizeof(*dinfo->cfg.iov), M_SRIOV, M_WAITOK | M_ZERO); 146 147 mtx_lock(&Giant); 148 if (dinfo->cfg.iov != NULL) { 149 error = EBUSY; 150 goto cleanup; 151 } 152 iov->iov_pf = dev; 153 iov->iov_pos = iov_pos; 154 155 schema = pci_iov_build_schema(&pf_schema, &vf_schema); 156 if (schema == NULL) { 157 error = ENOMEM; 158 goto cleanup; 159 } 160 161 error = pci_iov_validate_schema(schema); 162 if (error != 0) 163 goto cleanup; 164 iov->iov_schema = schema; 165 166 iov->iov_cdev = make_dev(&iov_cdevsw, device_get_unit(dev), 167 UID_ROOT, GID_WHEEL, 0600, "iov/%s", name); 168 169 if (iov->iov_cdev == NULL) { 170 error = ENOMEM; 171 goto cleanup; 172 } 173 174 dinfo->cfg.iov = iov; 175 iov->iov_cdev->si_drv1 = dinfo; 176 mtx_unlock(&Giant); 177 178 return (0); 179 180 cleanup: 181 nvlist_destroy(schema); 182 nvlist_destroy(pf_schema); 183 nvlist_destroy(vf_schema); 184 free(iov, M_SRIOV); 185 mtx_unlock(&Giant); 186 return (error); 187 } 188 189 int 190 pci_iov_detach_method(device_t bus, device_t dev) 191 { 192 struct pci_devinfo *dinfo; 193 struct pcicfg_iov *iov; 194 int error; 195 196 mtx_lock(&Giant); 197 dinfo = device_get_ivars(dev); 198 iov = dinfo->cfg.iov; 199 200 if (iov == NULL) { 201 mtx_unlock(&Giant); 202 return (0); 203 } 204 205 if ((iov->iov_flags & IOV_BUSY) != 0) { 206 mtx_unlock(&Giant); 207 return (EBUSY); 208 } 209 210 error = pci_iov_delete_iov_children(dinfo); 211 if (error != 0) { 212 mtx_unlock(&Giant); 213 return (error); 214 } 215 216 dinfo->cfg.iov = NULL; 217 218 if (iov->iov_cdev) { 219 destroy_dev(iov->iov_cdev); 220 iov->iov_cdev = NULL; 221 } 222 nvlist_destroy(iov->iov_schema); 223 224 free(iov, M_SRIOV); 225 mtx_unlock(&Giant); 226 227 return (0); 228 } 229 230 static nvlist_t * 231 pci_iov_build_schema(nvlist_t **pf, nvlist_t **vf) 232 { 233 nvlist_t *schema, *pf_driver, *vf_driver; 234 235 /* We always take ownership of the schemas. */ 236 pf_driver = *pf; 237 *pf = NULL; 238 vf_driver = *vf; 239 *vf = NULL; 240 241 schema = pci_iov_schema_alloc_node(); 242 if (schema == NULL) 243 goto cleanup; 244 245 pci_iov_build_pf_schema(schema, &pf_driver); 246 pci_iov_build_vf_schema(schema, &vf_driver); 247 248 if (nvlist_error(schema) != 0) 249 goto cleanup; 250 251 return (schema); 252 253 cleanup: 254 nvlist_destroy(schema); 255 nvlist_destroy(pf_driver); 256 nvlist_destroy(vf_driver); 257 return (NULL); 258 } 259 260 static void 261 pci_iov_build_pf_schema(nvlist_t *schema, nvlist_t **driver_schema) 262 { 263 nvlist_t *pf_schema, *iov_schema; 264 265 pf_schema = pci_iov_schema_alloc_node(); 266 if (pf_schema == NULL) { 267 nvlist_set_error(schema, ENOMEM); 268 return; 269 } 270 271 iov_schema = pci_iov_get_pf_subsystem_schema(); 272 273 /* 274 * Note that if either *driver_schema or iov_schema is NULL, then 275 * nvlist_move_nvlist will put the schema in the error state and 276 * SR-IOV will fail to initialize later, so we don't have to explicitly 277 * handle that case. 278 */ 279 nvlist_move_nvlist(pf_schema, DRIVER_CONFIG_NAME, *driver_schema); 280 nvlist_move_nvlist(pf_schema, IOV_CONFIG_NAME, iov_schema); 281 nvlist_move_nvlist(schema, PF_CONFIG_NAME, pf_schema); 282 *driver_schema = NULL; 283 } 284 285 static void 286 pci_iov_build_vf_schema(nvlist_t *schema, nvlist_t **driver_schema) 287 { 288 nvlist_t *vf_schema, *iov_schema; 289 290 vf_schema = pci_iov_schema_alloc_node(); 291 if (vf_schema == NULL) { 292 nvlist_set_error(schema, ENOMEM); 293 return; 294 } 295 296 iov_schema = pci_iov_get_vf_subsystem_schema(); 297 298 /* 299 * Note that if either *driver_schema or iov_schema is NULL, then 300 * nvlist_move_nvlist will put the schema in the error state and 301 * SR-IOV will fail to initialize later, so we don't have to explicitly 302 * handle that case. 303 */ 304 nvlist_move_nvlist(vf_schema, DRIVER_CONFIG_NAME, *driver_schema); 305 nvlist_move_nvlist(vf_schema, IOV_CONFIG_NAME, iov_schema); 306 nvlist_move_nvlist(schema, VF_SCHEMA_NAME, vf_schema); 307 *driver_schema = NULL; 308 } 309 310 static nvlist_t * 311 pci_iov_get_pf_subsystem_schema(void) 312 { 313 nvlist_t *pf; 314 315 pf = pci_iov_schema_alloc_node(); 316 if (pf == NULL) 317 return (NULL); 318 319 pci_iov_schema_add_uint16(pf, "num_vfs", IOV_SCHEMA_REQUIRED, -1); 320 pci_iov_schema_add_string(pf, "device", IOV_SCHEMA_REQUIRED, NULL); 321 322 return (pf); 323 } 324 325 static nvlist_t * 326 pci_iov_get_vf_subsystem_schema(void) 327 { 328 nvlist_t *vf; 329 330 vf = pci_iov_schema_alloc_node(); 331 if (vf == NULL) 332 return (NULL); 333 334 pci_iov_schema_add_bool(vf, "passthrough", IOV_SCHEMA_HASDEFAULT, 0); 335 336 return (vf); 337 } 338 339 static int 340 pci_iov_alloc_bar(struct pci_devinfo *dinfo, int bar, pci_addr_t bar_shift) 341 { 342 struct resource *res; 343 struct pcicfg_iov *iov; 344 device_t dev, bus; 345 rman_res_t start, end; 346 pci_addr_t bar_size; 347 int rid; 348 349 iov = dinfo->cfg.iov; 350 dev = dinfo->cfg.dev; 351 bus = device_get_parent(dev); 352 rid = iov->iov_pos + PCIR_SRIOV_BAR(bar); 353 bar_size = 1 << bar_shift; 354 355 res = pci_alloc_multi_resource(bus, dev, SYS_RES_MEMORY, &rid, 0, 356 ~0, 1, iov->iov_num_vfs, RF_ACTIVE); 357 358 if (res == NULL) 359 return (ENXIO); 360 361 iov->iov_bar[bar].res = res; 362 iov->iov_bar[bar].bar_size = bar_size; 363 iov->iov_bar[bar].bar_shift = bar_shift; 364 365 start = rman_get_start(res); 366 end = rman_get_end(res); 367 return (rman_manage_region(&iov->rman, start, end)); 368 } 369 370 static void 371 pci_iov_add_bars(struct pcicfg_iov *iov, struct pci_devinfo *dinfo) 372 { 373 struct pci_iov_bar *bar; 374 uint64_t bar_start; 375 int i; 376 377 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 378 bar = &iov->iov_bar[i]; 379 if (bar->res != NULL) { 380 bar_start = rman_get_start(bar->res) + 381 dinfo->cfg.vf.index * bar->bar_size; 382 383 pci_add_bar(dinfo->cfg.dev, PCIR_BAR(i), bar_start, 384 bar->bar_shift); 385 } 386 } 387 } 388 389 static int 390 pci_iov_parse_config(struct pcicfg_iov *iov, struct pci_iov_arg *arg, 391 nvlist_t **ret) 392 { 393 void *packed_config; 394 nvlist_t *config; 395 int error; 396 397 config = NULL; 398 packed_config = NULL; 399 400 if (arg->len > pci_iov_max_config) { 401 error = EMSGSIZE; 402 goto out; 403 } 404 405 packed_config = malloc(arg->len, M_SRIOV, M_WAITOK); 406 407 error = copyin(arg->config, packed_config, arg->len); 408 if (error != 0) 409 goto out; 410 411 config = nvlist_unpack(packed_config, arg->len, NV_FLAG_IGNORE_CASE); 412 if (config == NULL) { 413 error = EINVAL; 414 goto out; 415 } 416 417 error = pci_iov_schema_validate_config(iov->iov_schema, config); 418 if (error != 0) 419 goto out; 420 421 error = nvlist_error(config); 422 if (error != 0) 423 goto out; 424 425 *ret = config; 426 config = NULL; 427 428 out: 429 nvlist_destroy(config); 430 free(packed_config, M_SRIOV); 431 return (error); 432 } 433 434 /* 435 * Set the ARI_EN bit in the lowest-numbered PCI function with the SR-IOV 436 * capability. This bit is only writeable on the lowest-numbered PF but 437 * affects all PFs on the device. 438 */ 439 static int 440 pci_iov_set_ari(device_t bus) 441 { 442 device_t lowest; 443 device_t *devlist; 444 int i, error, devcount, lowest_func, lowest_pos, iov_pos, dev_func; 445 uint16_t iov_ctl; 446 447 /* If ARI is disabled on the downstream port there is nothing to do. */ 448 if (!PCIB_ARI_ENABLED(device_get_parent(bus))) 449 return (0); 450 451 error = device_get_children(bus, &devlist, &devcount); 452 453 if (error != 0) 454 return (error); 455 456 lowest = NULL; 457 for (i = 0; i < devcount; i++) { 458 if (pci_find_extcap(devlist[i], PCIZ_SRIOV, &iov_pos) == 0) { 459 dev_func = pci_get_function(devlist[i]); 460 if (lowest == NULL || dev_func < lowest_func) { 461 lowest = devlist[i]; 462 lowest_func = dev_func; 463 lowest_pos = iov_pos; 464 } 465 } 466 } 467 free(devlist, M_TEMP); 468 469 /* 470 * If we called this function some device must have the SR-IOV 471 * capability. 472 */ 473 KASSERT(lowest != NULL, 474 ("Could not find child of %s with SR-IOV capability", 475 device_get_nameunit(bus))); 476 477 iov_ctl = pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2); 478 iov_ctl |= PCIM_SRIOV_ARI_EN; 479 pci_write_config(lowest, lowest_pos + PCIR_SRIOV_CTL, iov_ctl, 2); 480 if ((pci_read_config(lowest, lowest_pos + PCIR_SRIOV_CTL, 2) & 481 PCIM_SRIOV_ARI_EN) == 0) { 482 device_printf(lowest, "failed to enable ARI\n"); 483 return (ENXIO); 484 } 485 return (0); 486 } 487 488 static int 489 pci_iov_config_page_size(struct pci_devinfo *dinfo) 490 { 491 uint32_t page_cap, page_size; 492 493 page_cap = IOV_READ(dinfo, PCIR_SRIOV_PAGE_CAP, 4); 494 495 /* 496 * If the system page size is less than the smallest SR-IOV page size 497 * then round up to the smallest SR-IOV page size. 498 */ 499 if (PAGE_SHIFT < PCI_SRIOV_BASE_PAGE_SHIFT) 500 page_size = (1 << 0); 501 else 502 page_size = (1 << (PAGE_SHIFT - PCI_SRIOV_BASE_PAGE_SHIFT)); 503 504 /* Check that the device supports the system page size. */ 505 if (!(page_size & page_cap)) 506 return (ENXIO); 507 508 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, page_size, 4); 509 return (0); 510 } 511 512 static int 513 pci_iov_init(device_t dev, uint16_t num_vfs, const nvlist_t *config) 514 { 515 const nvlist_t *device, *driver_config; 516 517 device = nvlist_get_nvlist(config, PF_CONFIG_NAME); 518 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME); 519 return (PCI_IOV_INIT(dev, num_vfs, driver_config)); 520 } 521 522 static int 523 pci_iov_init_rman(device_t pf, struct pcicfg_iov *iov) 524 { 525 int error; 526 527 iov->rman.rm_start = 0; 528 iov->rman.rm_end = ~0; 529 iov->rman.rm_type = RMAN_ARRAY; 530 snprintf(iov->rman_name, sizeof(iov->rman_name), "%s VF I/O memory", 531 device_get_nameunit(pf)); 532 iov->rman.rm_descr = iov->rman_name; 533 534 error = rman_init(&iov->rman); 535 if (error != 0) 536 return (error); 537 538 iov->iov_flags |= IOV_RMAN_INITED; 539 return (0); 540 } 541 542 static int 543 pci_iov_alloc_bar_ea(struct pci_devinfo *dinfo, int bar) 544 { 545 struct pcicfg_iov *iov; 546 rman_res_t start, end; 547 struct resource *res; 548 struct resource_list *rl; 549 struct resource_list_entry *rle; 550 551 rl = &dinfo->resources; 552 iov = dinfo->cfg.iov; 553 554 rle = resource_list_find(rl, SYS_RES_MEMORY, 555 iov->iov_pos + PCIR_SRIOV_BAR(bar)); 556 if (rle == NULL) 557 rle = resource_list_find(rl, SYS_RES_IOPORT, 558 iov->iov_pos + PCIR_SRIOV_BAR(bar)); 559 if (rle == NULL) 560 return (ENXIO); 561 res = rle->res; 562 563 iov->iov_bar[bar].res = res; 564 iov->iov_bar[bar].bar_size = rman_get_size(res) / iov->iov_num_vfs; 565 iov->iov_bar[bar].bar_shift = pci_mapsize(iov->iov_bar[bar].bar_size); 566 567 start = rman_get_start(res); 568 end = rman_get_end(res); 569 570 return (rman_manage_region(&iov->rman, start, end)); 571 } 572 573 static int 574 pci_iov_setup_bars(struct pci_devinfo *dinfo) 575 { 576 device_t dev; 577 struct pcicfg_iov *iov; 578 pci_addr_t bar_value, testval; 579 int i, last_64, error; 580 581 iov = dinfo->cfg.iov; 582 dev = dinfo->cfg.dev; 583 last_64 = 0; 584 585 pci_add_resources_ea(device_get_parent(dev), dev, 1); 586 587 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 588 /* First, try to use BARs allocated with EA */ 589 error = pci_iov_alloc_bar_ea(dinfo, i); 590 if (error == 0) 591 continue; 592 593 /* Allocate legacy-BAR only if EA is not enabled */ 594 if (pci_ea_is_enabled(dev, iov->iov_pos + PCIR_SRIOV_BAR(i))) 595 continue; 596 597 /* 598 * If a PCI BAR is a 64-bit wide BAR, then it spans two 599 * consecutive registers. Therefore if the last BAR that 600 * we looked at was a 64-bit BAR, we need to skip this 601 * register as it's the second half of the last BAR. 602 */ 603 if (!last_64) { 604 pci_read_bar(dev, 605 iov->iov_pos + PCIR_SRIOV_BAR(i), 606 &bar_value, &testval, &last_64); 607 608 if (testval != 0) { 609 error = pci_iov_alloc_bar(dinfo, i, 610 pci_mapsize(testval)); 611 if (error != 0) 612 return (error); 613 } 614 } else 615 last_64 = 0; 616 } 617 618 return (0); 619 } 620 621 static void 622 pci_iov_enumerate_vfs(struct pci_devinfo *dinfo, const nvlist_t *config, 623 uint16_t first_rid, uint16_t rid_stride) 624 { 625 char device_name[VF_MAX_NAME]; 626 const nvlist_t *device, *driver_config, *iov_config; 627 device_t bus, dev, vf; 628 struct pcicfg_iov *iov; 629 struct pci_devinfo *vfinfo; 630 int i, error; 631 uint16_t vid, did, next_rid; 632 633 iov = dinfo->cfg.iov; 634 dev = dinfo->cfg.dev; 635 bus = device_get_parent(dev); 636 next_rid = first_rid; 637 vid = pci_get_vendor(dev); 638 did = IOV_READ(dinfo, PCIR_SRIOV_VF_DID, 2); 639 640 for (i = 0; i < iov->iov_num_vfs; i++, next_rid += rid_stride) { 641 snprintf(device_name, sizeof(device_name), VF_PREFIX"%d", i); 642 device = nvlist_get_nvlist(config, device_name); 643 iov_config = nvlist_get_nvlist(device, IOV_CONFIG_NAME); 644 driver_config = nvlist_get_nvlist(device, DRIVER_CONFIG_NAME); 645 646 vf = PCI_CREATE_IOV_CHILD(bus, dev, next_rid, vid, did); 647 if (vf == NULL) 648 break; 649 650 /* 651 * If we are creating passthrough devices then force the ppt 652 * driver to attach to prevent a VF driver from claiming the 653 * VFs. 654 */ 655 if (nvlist_get_bool(iov_config, "passthrough")) 656 device_set_devclass_fixed(vf, "ppt"); 657 658 vfinfo = device_get_ivars(vf); 659 660 vfinfo->cfg.iov = iov; 661 vfinfo->cfg.vf.index = i; 662 663 pci_iov_add_bars(iov, vfinfo); 664 665 error = PCI_IOV_ADD_VF(dev, i, driver_config); 666 if (error != 0) { 667 device_printf(dev, "Failed to add VF %d\n", i); 668 device_delete_child(bus, vf); 669 } 670 } 671 672 bus_generic_attach(bus); 673 } 674 675 static int 676 pci_iov_config(struct cdev *cdev, struct pci_iov_arg *arg) 677 { 678 device_t bus, dev; 679 struct pci_devinfo *dinfo; 680 struct pcicfg_iov *iov; 681 nvlist_t *config; 682 int i, error; 683 uint16_t rid_off, rid_stride; 684 uint16_t first_rid, last_rid; 685 uint16_t iov_ctl; 686 uint16_t num_vfs, total_vfs; 687 int iov_inited; 688 689 mtx_lock(&Giant); 690 dinfo = cdev->si_drv1; 691 iov = dinfo->cfg.iov; 692 dev = dinfo->cfg.dev; 693 bus = device_get_parent(dev); 694 iov_inited = 0; 695 config = NULL; 696 697 if ((iov->iov_flags & IOV_BUSY) || iov->iov_num_vfs != 0) { 698 mtx_unlock(&Giant); 699 return (EBUSY); 700 } 701 iov->iov_flags |= IOV_BUSY; 702 703 error = pci_iov_parse_config(iov, arg, &config); 704 if (error != 0) 705 goto out; 706 707 num_vfs = pci_iov_config_get_num_vfs(config); 708 total_vfs = IOV_READ(dinfo, PCIR_SRIOV_TOTAL_VFS, 2); 709 if (num_vfs > total_vfs) { 710 error = EINVAL; 711 goto out; 712 } 713 714 error = pci_iov_config_page_size(dinfo); 715 if (error != 0) 716 goto out; 717 718 error = pci_iov_set_ari(bus); 719 if (error != 0) 720 goto out; 721 722 error = pci_iov_init(dev, num_vfs, config); 723 if (error != 0) 724 goto out; 725 iov_inited = 1; 726 727 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, num_vfs, 2); 728 729 rid_off = IOV_READ(dinfo, PCIR_SRIOV_VF_OFF, 2); 730 rid_stride = IOV_READ(dinfo, PCIR_SRIOV_VF_STRIDE, 2); 731 732 first_rid = pci_get_rid(dev) + rid_off; 733 last_rid = first_rid + (num_vfs - 1) * rid_stride; 734 735 /* We don't yet support allocating extra bus numbers for VFs. */ 736 if (pci_get_bus(dev) != PCI_RID2BUS(last_rid)) { 737 error = ENOSPC; 738 goto out; 739 } 740 741 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 742 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); 743 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 744 745 error = pci_iov_init_rman(dev, iov); 746 if (error != 0) 747 goto out; 748 749 iov->iov_num_vfs = num_vfs; 750 751 error = pci_iov_setup_bars(dinfo); 752 if (error != 0) 753 goto out; 754 755 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 756 iov_ctl |= PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE; 757 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 758 759 /* Per specification, we must wait 100ms before accessing VFs. */ 760 pause("iov", roundup(hz, 10)); 761 pci_iov_enumerate_vfs(dinfo, config, first_rid, rid_stride); 762 763 nvlist_destroy(config); 764 iov->iov_flags &= ~IOV_BUSY; 765 mtx_unlock(&Giant); 766 767 return (0); 768 out: 769 if (iov_inited) 770 PCI_IOV_UNINIT(dev); 771 772 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 773 if (iov->iov_bar[i].res != NULL) { 774 pci_release_resource(bus, dev, SYS_RES_MEMORY, 775 iov->iov_pos + PCIR_SRIOV_BAR(i), 776 iov->iov_bar[i].res); 777 pci_delete_resource(bus, dev, SYS_RES_MEMORY, 778 iov->iov_pos + PCIR_SRIOV_BAR(i)); 779 iov->iov_bar[i].res = NULL; 780 } 781 } 782 783 if (iov->iov_flags & IOV_RMAN_INITED) { 784 rman_fini(&iov->rman); 785 iov->iov_flags &= ~IOV_RMAN_INITED; 786 } 787 788 nvlist_destroy(config); 789 iov->iov_num_vfs = 0; 790 iov->iov_flags &= ~IOV_BUSY; 791 mtx_unlock(&Giant); 792 return (error); 793 } 794 795 void 796 pci_iov_cfg_restore(device_t dev, struct pci_devinfo *dinfo) 797 { 798 struct pcicfg_iov *iov; 799 800 iov = dinfo->cfg.iov; 801 802 IOV_WRITE(dinfo, PCIR_SRIOV_PAGE_SIZE, iov->iov_page_size, 4); 803 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, iov->iov_num_vfs, 2); 804 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov->iov_ctl, 2); 805 } 806 807 void 808 pci_iov_cfg_save(device_t dev, struct pci_devinfo *dinfo) 809 { 810 struct pcicfg_iov *iov; 811 812 iov = dinfo->cfg.iov; 813 814 iov->iov_page_size = IOV_READ(dinfo, PCIR_SRIOV_PAGE_SIZE, 4); 815 iov->iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 816 } 817 818 /* Return true if child is a VF of the given PF. */ 819 static int 820 pci_iov_is_child_vf(struct pcicfg_iov *pf, device_t child) 821 { 822 struct pci_devinfo *vfinfo; 823 824 vfinfo = device_get_ivars(child); 825 826 if (!(vfinfo->cfg.flags & PCICFG_VF)) 827 return (0); 828 829 return (pf == vfinfo->cfg.iov); 830 } 831 832 static int 833 pci_iov_delete_iov_children(struct pci_devinfo *dinfo) 834 { 835 device_t bus, dev, vf, *devlist; 836 struct pcicfg_iov *iov; 837 int i, error, devcount; 838 uint32_t iov_ctl; 839 840 mtx_assert(&Giant, MA_OWNED); 841 842 iov = dinfo->cfg.iov; 843 dev = dinfo->cfg.dev; 844 bus = device_get_parent(dev); 845 devlist = NULL; 846 847 iov->iov_flags |= IOV_BUSY; 848 849 error = device_get_children(bus, &devlist, &devcount); 850 851 if (error != 0) 852 goto out; 853 854 for (i = 0; i < devcount; i++) { 855 vf = devlist[i]; 856 857 if (!pci_iov_is_child_vf(iov, vf)) 858 continue; 859 860 error = device_detach(vf); 861 if (error != 0) { 862 device_printf(dev, 863 "Could not disable SR-IOV: failed to detach VF %s\n", 864 device_get_nameunit(vf)); 865 goto out; 866 } 867 } 868 869 for (i = 0; i < devcount; i++) { 870 vf = devlist[i]; 871 872 if (pci_iov_is_child_vf(iov, vf)) 873 device_delete_child(bus, vf); 874 } 875 PCI_IOV_UNINIT(dev); 876 877 iov_ctl = IOV_READ(dinfo, PCIR_SRIOV_CTL, 2); 878 iov_ctl &= ~(PCIM_SRIOV_VF_EN | PCIM_SRIOV_VF_MSE); 879 IOV_WRITE(dinfo, PCIR_SRIOV_CTL, iov_ctl, 2); 880 IOV_WRITE(dinfo, PCIR_SRIOV_NUM_VFS, 0, 2); 881 882 iov->iov_num_vfs = 0; 883 884 for (i = 0; i <= PCIR_MAX_BAR_0; i++) { 885 if (iov->iov_bar[i].res != NULL) { 886 pci_release_resource(bus, dev, SYS_RES_MEMORY, 887 iov->iov_pos + PCIR_SRIOV_BAR(i), 888 iov->iov_bar[i].res); 889 pci_delete_resource(bus, dev, SYS_RES_MEMORY, 890 iov->iov_pos + PCIR_SRIOV_BAR(i)); 891 iov->iov_bar[i].res = NULL; 892 } 893 } 894 895 if (iov->iov_flags & IOV_RMAN_INITED) { 896 rman_fini(&iov->rman); 897 iov->iov_flags &= ~IOV_RMAN_INITED; 898 } 899 900 error = 0; 901 out: 902 free(devlist, M_TEMP); 903 iov->iov_flags &= ~IOV_BUSY; 904 return (error); 905 } 906 907 static int 908 pci_iov_delete(struct cdev *cdev) 909 { 910 struct pci_devinfo *dinfo; 911 struct pcicfg_iov *iov; 912 int error; 913 914 mtx_lock(&Giant); 915 dinfo = cdev->si_drv1; 916 iov = dinfo->cfg.iov; 917 918 if ((iov->iov_flags & IOV_BUSY) != 0) { 919 error = EBUSY; 920 goto out; 921 } 922 if (iov->iov_num_vfs == 0) { 923 error = ECHILD; 924 goto out; 925 } 926 927 error = pci_iov_delete_iov_children(dinfo); 928 929 out: 930 mtx_unlock(&Giant); 931 return (error); 932 } 933 934 static int 935 pci_iov_get_schema_ioctl(struct cdev *cdev, struct pci_iov_schema *output) 936 { 937 struct pci_devinfo *dinfo; 938 void *packed; 939 size_t output_len, size; 940 int error; 941 942 packed = NULL; 943 944 mtx_lock(&Giant); 945 dinfo = cdev->si_drv1; 946 packed = nvlist_pack(dinfo->cfg.iov->iov_schema, &size); 947 mtx_unlock(&Giant); 948 949 if (packed == NULL) { 950 error = ENOMEM; 951 goto fail; 952 } 953 954 output_len = output->len; 955 output->len = size; 956 if (size <= output_len) { 957 error = copyout(packed, output->schema, size); 958 959 if (error != 0) 960 goto fail; 961 962 output->error = 0; 963 } else 964 /* 965 * If we return an error then the ioctl code won't copyout 966 * output back to userland, so we flag the error in the struct 967 * instead. 968 */ 969 output->error = EMSGSIZE; 970 971 error = 0; 972 973 fail: 974 free(packed, M_NVLIST); 975 976 return (error); 977 } 978 979 static int 980 pci_iov_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int fflag, 981 struct thread *td) 982 { 983 984 switch (cmd) { 985 case IOV_CONFIG: 986 return (pci_iov_config(dev, (struct pci_iov_arg *)data)); 987 case IOV_DELETE: 988 return (pci_iov_delete(dev)); 989 case IOV_GET_SCHEMA: 990 return (pci_iov_get_schema_ioctl(dev, 991 (struct pci_iov_schema *)data)); 992 default: 993 return (EINVAL); 994 } 995 } 996 997 struct resource * 998 pci_vf_alloc_mem_resource(device_t dev, device_t child, int *rid, 999 rman_res_t start, rman_res_t end, rman_res_t count, u_int flags) 1000 { 1001 struct pci_devinfo *dinfo; 1002 struct pcicfg_iov *iov; 1003 struct pci_map *map; 1004 struct resource *res; 1005 struct resource_list_entry *rle; 1006 rman_res_t bar_start, bar_end; 1007 pci_addr_t bar_length; 1008 int error; 1009 1010 dinfo = device_get_ivars(child); 1011 iov = dinfo->cfg.iov; 1012 1013 map = pci_find_bar(child, *rid); 1014 if (map == NULL) 1015 return (NULL); 1016 1017 bar_length = 1 << map->pm_size; 1018 bar_start = map->pm_value; 1019 bar_end = bar_start + bar_length - 1; 1020 1021 /* Make sure that the resource fits the constraints. */ 1022 if (bar_start >= end || bar_end <= bar_start || count != 1) 1023 return (NULL); 1024 1025 /* Clamp the resource to the constraints if necessary. */ 1026 if (bar_start < start) 1027 bar_start = start; 1028 if (bar_end > end) 1029 bar_end = end; 1030 bar_length = bar_end - bar_start + 1; 1031 1032 res = rman_reserve_resource(&iov->rman, bar_start, bar_end, 1033 bar_length, flags, child); 1034 if (res == NULL) 1035 return (NULL); 1036 1037 rle = resource_list_add(&dinfo->resources, SYS_RES_MEMORY, *rid, 1038 bar_start, bar_end, 1); 1039 if (rle == NULL) { 1040 rman_release_resource(res); 1041 return (NULL); 1042 } 1043 1044 rman_set_rid(res, *rid); 1045 1046 if (flags & RF_ACTIVE) { 1047 error = bus_activate_resource(child, SYS_RES_MEMORY, *rid, res); 1048 if (error != 0) { 1049 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, 1050 *rid); 1051 rman_release_resource(res); 1052 return (NULL); 1053 } 1054 } 1055 rle->res = res; 1056 1057 return (res); 1058 } 1059 1060 int 1061 pci_vf_release_mem_resource(device_t dev, device_t child, int rid, 1062 struct resource *r) 1063 { 1064 struct pci_devinfo *dinfo; 1065 struct resource_list_entry *rle; 1066 int error; 1067 1068 dinfo = device_get_ivars(child); 1069 1070 if (rman_get_flags(r) & RF_ACTIVE) { 1071 error = bus_deactivate_resource(child, SYS_RES_MEMORY, rid, r); 1072 if (error != 0) 1073 return (error); 1074 } 1075 1076 rle = resource_list_find(&dinfo->resources, SYS_RES_MEMORY, rid); 1077 if (rle != NULL) { 1078 rle->res = NULL; 1079 resource_list_delete(&dinfo->resources, SYS_RES_MEMORY, 1080 rid); 1081 } 1082 1083 return (rman_release_resource(r)); 1084 } 1085