1 /* 2 * drivers/pci/iov.c 3 * 4 * Copyright (C) 2009 Intel Corporation, Yu Zhao <yu.zhao@intel.com> 5 * 6 * PCI Express I/O Virtualization (IOV) support. 7 * Single Root IOV 1.0 8 * Address Translation Service 1.0 9 */ 10 11 #include <linux/pci.h> 12 #include <linux/slab.h> 13 #include <linux/mutex.h> 14 #include <linux/export.h> 15 #include <linux/string.h> 16 #include <linux/delay.h> 17 #include <linux/pci-ats.h> 18 #include "pci.h" 19 20 #define VIRTFN_ID_LEN 16 21 22 static inline u8 virtfn_bus(struct pci_dev *dev, int id) 23 { 24 return dev->bus->number + ((dev->devfn + dev->sriov->offset + 25 dev->sriov->stride * id) >> 8); 26 } 27 28 static inline u8 virtfn_devfn(struct pci_dev *dev, int id) 29 { 30 return (dev->devfn + dev->sriov->offset + 31 dev->sriov->stride * id) & 0xff; 32 } 33 34 static struct pci_bus *virtfn_add_bus(struct pci_bus *bus, int busnr) 35 { 36 struct pci_bus *child; 37 38 if (bus->number == busnr) 39 return bus; 40 41 child = pci_find_bus(pci_domain_nr(bus), busnr); 42 if (child) 43 return child; 44 45 child = pci_add_new_bus(bus, NULL, busnr); 46 if (!child) 47 return NULL; 48 49 pci_bus_insert_busn_res(child, busnr, busnr); 50 51 return child; 52 } 53 54 static void virtfn_remove_bus(struct pci_bus *physbus, struct pci_bus *virtbus) 55 { 56 if (physbus != virtbus && list_empty(&virtbus->devices)) 57 pci_remove_bus(virtbus); 58 } 59 60 static int virtfn_add(struct pci_dev *dev, int id, int reset) 61 { 62 int i; 63 int rc = -ENOMEM; 64 u64 size; 65 char buf[VIRTFN_ID_LEN]; 66 struct pci_dev *virtfn; 67 struct resource *res; 68 struct pci_sriov *iov = dev->sriov; 69 struct pci_bus *bus; 70 71 mutex_lock(&iov->dev->sriov->lock); 72 bus = virtfn_add_bus(dev->bus, virtfn_bus(dev, id)); 73 if (!bus) 74 goto failed; 75 76 virtfn = pci_alloc_dev(bus); 77 if (!virtfn) 78 goto failed0; 79 80 virtfn->devfn = virtfn_devfn(dev, id); 81 virtfn->vendor = dev->vendor; 82 pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_DID, &virtfn->device); 83 pci_setup_device(virtfn); 84 virtfn->dev.parent = dev->dev.parent; 85 virtfn->physfn = pci_dev_get(dev); 86 virtfn->is_virtfn = 1; 87 virtfn->multifunction = 0; 88 89 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 90 res = dev->resource + PCI_IOV_RESOURCES + i; 91 if (!res->parent) 92 continue; 93 virtfn->resource[i].name = pci_name(virtfn); 94 virtfn->resource[i].flags = res->flags; 95 size = resource_size(res); 96 do_div(size, iov->total_VFs); 97 virtfn->resource[i].start = res->start + size * id; 98 virtfn->resource[i].end = virtfn->resource[i].start + size - 1; 99 rc = request_resource(res, &virtfn->resource[i]); 100 BUG_ON(rc); 101 } 102 103 if (reset) 104 __pci_reset_function(virtfn); 105 106 pci_device_add(virtfn, virtfn->bus); 107 mutex_unlock(&iov->dev->sriov->lock); 108 109 pci_bus_add_device(virtfn); 110 sprintf(buf, "virtfn%u", id); 111 rc = sysfs_create_link(&dev->dev.kobj, &virtfn->dev.kobj, buf); 112 if (rc) 113 goto failed1; 114 rc = sysfs_create_link(&virtfn->dev.kobj, &dev->dev.kobj, "physfn"); 115 if (rc) 116 goto failed2; 117 118 kobject_uevent(&virtfn->dev.kobj, KOBJ_CHANGE); 119 120 return 0; 121 122 failed2: 123 sysfs_remove_link(&dev->dev.kobj, buf); 124 failed1: 125 pci_dev_put(dev); 126 mutex_lock(&iov->dev->sriov->lock); 127 pci_stop_and_remove_bus_device(virtfn); 128 failed0: 129 virtfn_remove_bus(dev->bus, bus); 130 failed: 131 mutex_unlock(&iov->dev->sriov->lock); 132 133 return rc; 134 } 135 136 static void virtfn_remove(struct pci_dev *dev, int id, int reset) 137 { 138 char buf[VIRTFN_ID_LEN]; 139 struct pci_dev *virtfn; 140 struct pci_sriov *iov = dev->sriov; 141 142 virtfn = pci_get_domain_bus_and_slot(pci_domain_nr(dev->bus), 143 virtfn_bus(dev, id), 144 virtfn_devfn(dev, id)); 145 if (!virtfn) 146 return; 147 148 if (reset) { 149 device_release_driver(&virtfn->dev); 150 __pci_reset_function(virtfn); 151 } 152 153 sprintf(buf, "virtfn%u", id); 154 sysfs_remove_link(&dev->dev.kobj, buf); 155 /* 156 * pci_stop_dev() could have been called for this virtfn already, 157 * so the directory for the virtfn may have been removed before. 158 * Double check to avoid spurious sysfs warnings. 159 */ 160 if (virtfn->dev.kobj.sd) 161 sysfs_remove_link(&virtfn->dev.kobj, "physfn"); 162 163 mutex_lock(&iov->dev->sriov->lock); 164 pci_stop_and_remove_bus_device(virtfn); 165 virtfn_remove_bus(dev->bus, virtfn->bus); 166 mutex_unlock(&iov->dev->sriov->lock); 167 168 /* balance pci_get_domain_bus_and_slot() */ 169 pci_dev_put(virtfn); 170 pci_dev_put(dev); 171 } 172 173 static int sriov_enable(struct pci_dev *dev, int nr_virtfn) 174 { 175 int rc; 176 int i, j; 177 int nres; 178 u16 offset, stride, initial; 179 struct resource *res; 180 struct pci_dev *pdev; 181 struct pci_sriov *iov = dev->sriov; 182 int bars = 0; 183 184 if (!nr_virtfn) 185 return 0; 186 187 if (iov->num_VFs) 188 return -EINVAL; 189 190 pci_read_config_word(dev, iov->pos + PCI_SRIOV_INITIAL_VF, &initial); 191 if (initial > iov->total_VFs || 192 (!(iov->cap & PCI_SRIOV_CAP_VFM) && (initial != iov->total_VFs))) 193 return -EIO; 194 195 if (nr_virtfn < 0 || nr_virtfn > iov->total_VFs || 196 (!(iov->cap & PCI_SRIOV_CAP_VFM) && (nr_virtfn > initial))) 197 return -EINVAL; 198 199 pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_OFFSET, &offset); 200 pci_read_config_word(dev, iov->pos + PCI_SRIOV_VF_STRIDE, &stride); 201 if (!offset || (nr_virtfn > 1 && !stride)) 202 return -EIO; 203 204 nres = 0; 205 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 206 bars |= (1 << (i + PCI_IOV_RESOURCES)); 207 res = dev->resource + PCI_IOV_RESOURCES + i; 208 if (res->parent) 209 nres++; 210 } 211 if (nres != iov->nres) { 212 dev_err(&dev->dev, "not enough MMIO resources for SR-IOV\n"); 213 return -ENOMEM; 214 } 215 216 iov->offset = offset; 217 iov->stride = stride; 218 219 if (virtfn_bus(dev, nr_virtfn - 1) > dev->bus->busn_res.end) { 220 dev_err(&dev->dev, "SR-IOV: bus number out of range\n"); 221 return -ENOMEM; 222 } 223 224 if (pci_enable_resources(dev, bars)) { 225 dev_err(&dev->dev, "SR-IOV: IOV BARS not allocated\n"); 226 return -ENOMEM; 227 } 228 229 if (iov->link != dev->devfn) { 230 pdev = pci_get_slot(dev->bus, iov->link); 231 if (!pdev) 232 return -ENODEV; 233 234 if (!pdev->is_physfn) { 235 pci_dev_put(pdev); 236 return -ENOSYS; 237 } 238 239 rc = sysfs_create_link(&dev->dev.kobj, 240 &pdev->dev.kobj, "dep_link"); 241 pci_dev_put(pdev); 242 if (rc) 243 return rc; 244 } 245 246 pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, nr_virtfn); 247 iov->ctrl |= PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE; 248 pci_cfg_access_lock(dev); 249 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 250 msleep(100); 251 pci_cfg_access_unlock(dev); 252 253 iov->initial_VFs = initial; 254 if (nr_virtfn < initial) 255 initial = nr_virtfn; 256 257 for (i = 0; i < initial; i++) { 258 rc = virtfn_add(dev, i, 0); 259 if (rc) 260 goto failed; 261 } 262 263 kobject_uevent(&dev->dev.kobj, KOBJ_CHANGE); 264 iov->num_VFs = nr_virtfn; 265 266 return 0; 267 268 failed: 269 for (j = 0; j < i; j++) 270 virtfn_remove(dev, j, 0); 271 272 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); 273 pci_cfg_access_lock(dev); 274 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 275 pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); 276 ssleep(1); 277 pci_cfg_access_unlock(dev); 278 279 if (iov->link != dev->devfn) 280 sysfs_remove_link(&dev->dev.kobj, "dep_link"); 281 282 return rc; 283 } 284 285 static void sriov_disable(struct pci_dev *dev) 286 { 287 int i; 288 struct pci_sriov *iov = dev->sriov; 289 290 if (!iov->num_VFs) 291 return; 292 293 for (i = 0; i < iov->num_VFs; i++) 294 virtfn_remove(dev, i, 0); 295 296 iov->ctrl &= ~(PCI_SRIOV_CTRL_VFE | PCI_SRIOV_CTRL_MSE); 297 pci_cfg_access_lock(dev); 298 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 299 ssleep(1); 300 pci_cfg_access_unlock(dev); 301 302 if (iov->link != dev->devfn) 303 sysfs_remove_link(&dev->dev.kobj, "dep_link"); 304 305 iov->num_VFs = 0; 306 pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, 0); 307 } 308 309 static int sriov_init(struct pci_dev *dev, int pos) 310 { 311 int i; 312 int rc; 313 int nres; 314 u32 pgsz; 315 u16 ctrl, total, offset, stride; 316 struct pci_sriov *iov; 317 struct resource *res; 318 struct pci_dev *pdev; 319 320 if (pci_pcie_type(dev) != PCI_EXP_TYPE_RC_END && 321 pci_pcie_type(dev) != PCI_EXP_TYPE_ENDPOINT) 322 return -ENODEV; 323 324 pci_read_config_word(dev, pos + PCI_SRIOV_CTRL, &ctrl); 325 if (ctrl & PCI_SRIOV_CTRL_VFE) { 326 pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, 0); 327 ssleep(1); 328 } 329 330 pci_read_config_word(dev, pos + PCI_SRIOV_TOTAL_VF, &total); 331 if (!total) 332 return 0; 333 334 ctrl = 0; 335 list_for_each_entry(pdev, &dev->bus->devices, bus_list) 336 if (pdev->is_physfn) 337 goto found; 338 339 pdev = NULL; 340 if (pci_ari_enabled(dev->bus)) 341 ctrl |= PCI_SRIOV_CTRL_ARI; 342 343 found: 344 pci_write_config_word(dev, pos + PCI_SRIOV_CTRL, ctrl); 345 pci_write_config_word(dev, pos + PCI_SRIOV_NUM_VF, 0); 346 pci_read_config_word(dev, pos + PCI_SRIOV_VF_OFFSET, &offset); 347 pci_read_config_word(dev, pos + PCI_SRIOV_VF_STRIDE, &stride); 348 if (!offset || (total > 1 && !stride)) 349 return -EIO; 350 351 pci_read_config_dword(dev, pos + PCI_SRIOV_SUP_PGSIZE, &pgsz); 352 i = PAGE_SHIFT > 12 ? PAGE_SHIFT - 12 : 0; 353 pgsz &= ~((1 << i) - 1); 354 if (!pgsz) 355 return -EIO; 356 357 pgsz &= ~(pgsz - 1); 358 pci_write_config_dword(dev, pos + PCI_SRIOV_SYS_PGSIZE, pgsz); 359 360 nres = 0; 361 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 362 res = dev->resource + PCI_IOV_RESOURCES + i; 363 i += __pci_read_base(dev, pci_bar_unknown, res, 364 pos + PCI_SRIOV_BAR + i * 4); 365 if (!res->flags) 366 continue; 367 if (resource_size(res) & (PAGE_SIZE - 1)) { 368 rc = -EIO; 369 goto failed; 370 } 371 res->end = res->start + resource_size(res) * total - 1; 372 nres++; 373 } 374 375 iov = kzalloc(sizeof(*iov), GFP_KERNEL); 376 if (!iov) { 377 rc = -ENOMEM; 378 goto failed; 379 } 380 381 iov->pos = pos; 382 iov->nres = nres; 383 iov->ctrl = ctrl; 384 iov->total_VFs = total; 385 iov->offset = offset; 386 iov->stride = stride; 387 iov->pgsz = pgsz; 388 iov->self = dev; 389 pci_read_config_dword(dev, pos + PCI_SRIOV_CAP, &iov->cap); 390 pci_read_config_byte(dev, pos + PCI_SRIOV_FUNC_LINK, &iov->link); 391 if (pci_pcie_type(dev) == PCI_EXP_TYPE_RC_END) 392 iov->link = PCI_DEVFN(PCI_SLOT(dev->devfn), iov->link); 393 394 if (pdev) 395 iov->dev = pci_dev_get(pdev); 396 else 397 iov->dev = dev; 398 399 mutex_init(&iov->lock); 400 401 dev->sriov = iov; 402 dev->is_physfn = 1; 403 404 return 0; 405 406 failed: 407 for (i = 0; i < PCI_SRIOV_NUM_BARS; i++) { 408 res = dev->resource + PCI_IOV_RESOURCES + i; 409 res->flags = 0; 410 } 411 412 return rc; 413 } 414 415 static void sriov_release(struct pci_dev *dev) 416 { 417 BUG_ON(dev->sriov->num_VFs); 418 419 if (dev != dev->sriov->dev) 420 pci_dev_put(dev->sriov->dev); 421 422 mutex_destroy(&dev->sriov->lock); 423 424 kfree(dev->sriov); 425 dev->sriov = NULL; 426 } 427 428 static void sriov_restore_state(struct pci_dev *dev) 429 { 430 int i; 431 u16 ctrl; 432 struct pci_sriov *iov = dev->sriov; 433 434 pci_read_config_word(dev, iov->pos + PCI_SRIOV_CTRL, &ctrl); 435 if (ctrl & PCI_SRIOV_CTRL_VFE) 436 return; 437 438 for (i = PCI_IOV_RESOURCES; i <= PCI_IOV_RESOURCE_END; i++) 439 pci_update_resource(dev, i); 440 441 pci_write_config_dword(dev, iov->pos + PCI_SRIOV_SYS_PGSIZE, iov->pgsz); 442 pci_write_config_word(dev, iov->pos + PCI_SRIOV_NUM_VF, iov->num_VFs); 443 pci_write_config_word(dev, iov->pos + PCI_SRIOV_CTRL, iov->ctrl); 444 if (iov->ctrl & PCI_SRIOV_CTRL_VFE) 445 msleep(100); 446 } 447 448 /** 449 * pci_iov_init - initialize the IOV capability 450 * @dev: the PCI device 451 * 452 * Returns 0 on success, or negative on failure. 453 */ 454 int pci_iov_init(struct pci_dev *dev) 455 { 456 int pos; 457 458 if (!pci_is_pcie(dev)) 459 return -ENODEV; 460 461 pos = pci_find_ext_capability(dev, PCI_EXT_CAP_ID_SRIOV); 462 if (pos) 463 return sriov_init(dev, pos); 464 465 return -ENODEV; 466 } 467 468 /** 469 * pci_iov_release - release resources used by the IOV capability 470 * @dev: the PCI device 471 */ 472 void pci_iov_release(struct pci_dev *dev) 473 { 474 if (dev->is_physfn) 475 sriov_release(dev); 476 } 477 478 /** 479 * pci_iov_resource_bar - get position of the SR-IOV BAR 480 * @dev: the PCI device 481 * @resno: the resource number 482 * 483 * Returns position of the BAR encapsulated in the SR-IOV capability. 484 */ 485 int pci_iov_resource_bar(struct pci_dev *dev, int resno) 486 { 487 if (resno < PCI_IOV_RESOURCES || resno > PCI_IOV_RESOURCE_END) 488 return 0; 489 490 BUG_ON(!dev->is_physfn); 491 492 return dev->sriov->pos + PCI_SRIOV_BAR + 493 4 * (resno - PCI_IOV_RESOURCES); 494 } 495 496 /** 497 * pci_sriov_resource_alignment - get resource alignment for VF BAR 498 * @dev: the PCI device 499 * @resno: the resource number 500 * 501 * Returns the alignment of the VF BAR found in the SR-IOV capability. 502 * This is not the same as the resource size which is defined as 503 * the VF BAR size multiplied by the number of VFs. The alignment 504 * is just the VF BAR size. 505 */ 506 resource_size_t pci_sriov_resource_alignment(struct pci_dev *dev, int resno) 507 { 508 struct resource tmp; 509 int reg = pci_iov_resource_bar(dev, resno); 510 511 if (!reg) 512 return 0; 513 514 __pci_read_base(dev, pci_bar_unknown, &tmp, reg); 515 return resource_alignment(&tmp); 516 } 517 518 /** 519 * pci_restore_iov_state - restore the state of the IOV capability 520 * @dev: the PCI device 521 */ 522 void pci_restore_iov_state(struct pci_dev *dev) 523 { 524 if (dev->is_physfn) 525 sriov_restore_state(dev); 526 } 527 528 /** 529 * pci_iov_bus_range - find bus range used by Virtual Function 530 * @bus: the PCI bus 531 * 532 * Returns max number of buses (exclude current one) used by Virtual 533 * Functions. 534 */ 535 int pci_iov_bus_range(struct pci_bus *bus) 536 { 537 int max = 0; 538 u8 busnr; 539 struct pci_dev *dev; 540 541 list_for_each_entry(dev, &bus->devices, bus_list) { 542 if (!dev->is_physfn) 543 continue; 544 busnr = virtfn_bus(dev, dev->sriov->total_VFs - 1); 545 if (busnr > max) 546 max = busnr; 547 } 548 549 return max ? max - bus->number : 0; 550 } 551 552 /** 553 * pci_enable_sriov - enable the SR-IOV capability 554 * @dev: the PCI device 555 * @nr_virtfn: number of virtual functions to enable 556 * 557 * Returns 0 on success, or negative on failure. 558 */ 559 int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) 560 { 561 might_sleep(); 562 563 if (!dev->is_physfn) 564 return -ENOSYS; 565 566 return sriov_enable(dev, nr_virtfn); 567 } 568 EXPORT_SYMBOL_GPL(pci_enable_sriov); 569 570 /** 571 * pci_disable_sriov - disable the SR-IOV capability 572 * @dev: the PCI device 573 */ 574 void pci_disable_sriov(struct pci_dev *dev) 575 { 576 might_sleep(); 577 578 if (!dev->is_physfn) 579 return; 580 581 sriov_disable(dev); 582 } 583 EXPORT_SYMBOL_GPL(pci_disable_sriov); 584 585 /** 586 * pci_num_vf - return number of VFs associated with a PF device_release_driver 587 * @dev: the PCI device 588 * 589 * Returns number of VFs, or 0 if SR-IOV is not enabled. 590 */ 591 int pci_num_vf(struct pci_dev *dev) 592 { 593 if (!dev->is_physfn) 594 return 0; 595 596 return dev->sriov->num_VFs; 597 } 598 EXPORT_SYMBOL_GPL(pci_num_vf); 599 600 /** 601 * pci_vfs_assigned - returns number of VFs are assigned to a guest 602 * @dev: the PCI device 603 * 604 * Returns number of VFs belonging to this device that are assigned to a guest. 605 * If device is not a physical function returns 0. 606 */ 607 int pci_vfs_assigned(struct pci_dev *dev) 608 { 609 struct pci_dev *vfdev; 610 unsigned int vfs_assigned = 0; 611 unsigned short dev_id; 612 613 /* only search if we are a PF */ 614 if (!dev->is_physfn) 615 return 0; 616 617 /* 618 * determine the device ID for the VFs, the vendor ID will be the 619 * same as the PF so there is no need to check for that one 620 */ 621 pci_read_config_word(dev, dev->sriov->pos + PCI_SRIOV_VF_DID, &dev_id); 622 623 /* loop through all the VFs to see if we own any that are assigned */ 624 vfdev = pci_get_device(dev->vendor, dev_id, NULL); 625 while (vfdev) { 626 /* 627 * It is considered assigned if it is a virtual function with 628 * our dev as the physical function and the assigned bit is set 629 */ 630 if (vfdev->is_virtfn && (vfdev->physfn == dev) && 631 pci_is_dev_assigned(vfdev)) 632 vfs_assigned++; 633 634 vfdev = pci_get_device(dev->vendor, dev_id, vfdev); 635 } 636 637 return vfs_assigned; 638 } 639 EXPORT_SYMBOL_GPL(pci_vfs_assigned); 640 641 /** 642 * pci_sriov_set_totalvfs -- reduce the TotalVFs available 643 * @dev: the PCI PF device 644 * @numvfs: number that should be used for TotalVFs supported 645 * 646 * Should be called from PF driver's probe routine with 647 * device's mutex held. 648 * 649 * Returns 0 if PF is an SRIOV-capable device and 650 * value of numvfs valid. If not a PF return -ENOSYS; 651 * if numvfs is invalid return -EINVAL; 652 * if VFs already enabled, return -EBUSY. 653 */ 654 int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs) 655 { 656 if (!dev->is_physfn) 657 return -ENOSYS; 658 if (numvfs > dev->sriov->total_VFs) 659 return -EINVAL; 660 661 /* Shouldn't change if VFs already enabled */ 662 if (dev->sriov->ctrl & PCI_SRIOV_CTRL_VFE) 663 return -EBUSY; 664 else 665 dev->sriov->driver_max_VFs = numvfs; 666 667 return 0; 668 } 669 EXPORT_SYMBOL_GPL(pci_sriov_set_totalvfs); 670 671 /** 672 * pci_sriov_get_totalvfs -- get total VFs supported on this device 673 * @dev: the PCI PF device 674 * 675 * For a PCIe device with SRIOV support, return the PCIe 676 * SRIOV capability value of TotalVFs or the value of driver_max_VFs 677 * if the driver reduced it. Otherwise 0. 678 */ 679 int pci_sriov_get_totalvfs(struct pci_dev *dev) 680 { 681 if (!dev->is_physfn) 682 return 0; 683 684 if (dev->sriov->driver_max_VFs) 685 return dev->sriov->driver_max_VFs; 686 687 return dev->sriov->total_VFs; 688 } 689 EXPORT_SYMBOL_GPL(pci_sriov_get_totalvfs); 690