1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * PCI Hotplug Driver for PowerPC PowerNV platform. 4 * 5 * Copyright Gavin Shan, IBM Corporation 2016. 6 * Copyright (C) 2025 Raptor Engineering, LLC 7 * Copyright (C) 2025 Raptor Computing Systems, LLC 8 */ 9 10 #include <linux/bitfield.h> 11 #include <linux/libfdt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/delay.h> 15 #include <linux/pci_hotplug.h> 16 #include <linux/of_fdt.h> 17 18 #include <asm/opal.h> 19 #include <asm/pnv-pci.h> 20 #include <asm/ppc-pci.h> 21 22 #define DRIVER_VERSION "0.1" 23 #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" 24 #define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver" 25 26 #define SLOT_WARN(sl, x...) \ 27 ((sl)->pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x)) 28 29 struct pnv_php_event { 30 bool added; 31 struct pnv_php_slot *php_slot; 32 struct work_struct work; 33 }; 34 35 static LIST_HEAD(pnv_php_slot_list); 36 static DEFINE_SPINLOCK(pnv_php_lock); 37 38 static void pnv_php_register(struct device_node *dn); 39 static void pnv_php_unregister_one(struct device_node *dn); 40 static void pnv_php_unregister(struct device_node *dn); 41 42 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot); 43 44 static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, 45 bool disable_device, bool disable_msi) 46 { 47 struct pci_dev *pdev = php_slot->pdev; 48 u16 ctrl; 49 50 if (php_slot->irq > 0) { 51 pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl); 52 ctrl &= ~(PCI_EXP_SLTCTL_HPIE | 53 PCI_EXP_SLTCTL_PDCE | 54 PCI_EXP_SLTCTL_DLLSCE); 55 pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl); 56 57 free_irq(php_slot->irq, php_slot); 58 php_slot->irq = 0; 59 } 60 61 if (disable_device || disable_msi) { 62 if (pdev->msix_enabled) 63 pci_disable_msix(pdev); 64 else if (pdev->msi_enabled) 65 pci_disable_msi(pdev); 66 } 67 68 if (disable_device) 69 pci_disable_device(pdev); 70 } 71 72 static void pnv_php_free_slot(struct kref *kref) 73 { 74 struct pnv_php_slot *php_slot = container_of(kref, 75 struct pnv_php_slot, kref); 76 77 WARN_ON(!list_empty(&php_slot->children)); 78 pnv_php_disable_irq(php_slot, false, false); 79 destroy_workqueue(php_slot->wq); 80 kfree(php_slot->name); 81 kfree(php_slot); 82 } 83 84 static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot) 85 { 86 87 if (!php_slot) 88 return; 89 90 kref_put(&php_slot->kref, pnv_php_free_slot); 91 } 92 93 static struct pnv_php_slot *pnv_php_match(struct device_node *dn, 94 struct pnv_php_slot *php_slot) 95 { 96 struct pnv_php_slot *target, *tmp; 97 98 if (php_slot->dn == dn) { 99 kref_get(&php_slot->kref); 100 return php_slot; 101 } 102 103 list_for_each_entry(tmp, &php_slot->children, link) { 104 target = pnv_php_match(dn, tmp); 105 if (target) 106 return target; 107 } 108 109 return NULL; 110 } 111 112 struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) 113 { 114 struct pnv_php_slot *php_slot, *tmp; 115 unsigned long flags; 116 117 spin_lock_irqsave(&pnv_php_lock, flags); 118 list_for_each_entry(tmp, &pnv_php_slot_list, link) { 119 php_slot = pnv_php_match(dn, tmp); 120 if (php_slot) { 121 spin_unlock_irqrestore(&pnv_php_lock, flags); 122 return php_slot; 123 } 124 } 125 spin_unlock_irqrestore(&pnv_php_lock, flags); 126 127 return NULL; 128 } 129 EXPORT_SYMBOL_GPL(pnv_php_find_slot); 130 131 /* 132 * Remove pdn for all children of the indicated device node. 133 * The function should remove pdn in a depth-first manner. 134 */ 135 static void pnv_php_rmv_pdns(struct device_node *dn) 136 { 137 struct device_node *child; 138 139 for_each_child_of_node(dn, child) { 140 pnv_php_rmv_pdns(child); 141 142 pci_remove_device_node_info(child); 143 } 144 } 145 146 /* 147 * Detach all child nodes of the indicated device nodes. The 148 * function should handle device nodes in depth-first manner. 149 * 150 * We should not invoke of_node_release() as the memory for 151 * individual device node is part of large memory block. The 152 * large block is allocated from memblock (system bootup) or 153 * kmalloc() when unflattening the device tree by OF changeset. 154 * We can not free the large block allocated from memblock. For 155 * later case, it should be released at once. 156 */ 157 static void pnv_php_detach_device_nodes(struct device_node *parent) 158 { 159 struct device_node *dn; 160 161 for_each_child_of_node(parent, dn) { 162 pnv_php_detach_device_nodes(dn); 163 164 of_node_put(dn); 165 of_detach_node(dn); 166 } 167 } 168 169 static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot) 170 { 171 pnv_php_rmv_pdns(php_slot->dn); 172 173 /* 174 * Decrease the refcount if the device nodes were created 175 * through OF changeset before detaching them. 176 */ 177 if (php_slot->fdt) 178 of_changeset_destroy(&php_slot->ocs); 179 pnv_php_detach_device_nodes(php_slot->dn); 180 181 if (php_slot->fdt) { 182 kfree(php_slot->dt); 183 kfree(php_slot->fdt); 184 php_slot->dt = NULL; 185 php_slot->dn->child = NULL; 186 php_slot->fdt = NULL; 187 } 188 } 189 190 /* 191 * As the nodes in OF changeset are applied in reverse order, we 192 * need revert the nodes in advance so that we have correct node 193 * order after the changeset is applied. 194 */ 195 static void pnv_php_reverse_nodes(struct device_node *parent) 196 { 197 struct device_node *child, *next; 198 199 /* In-depth first */ 200 for_each_child_of_node(parent, child) 201 pnv_php_reverse_nodes(child); 202 203 /* Reverse the nodes in the child list */ 204 child = parent->child; 205 parent->child = NULL; 206 while (child) { 207 next = child->sibling; 208 209 child->sibling = parent->child; 210 parent->child = child; 211 child = next; 212 } 213 } 214 215 static int pnv_php_populate_changeset(struct of_changeset *ocs, 216 struct device_node *dn) 217 { 218 int ret; 219 220 for_each_child_of_node_scoped(dn, child) { 221 ret = of_changeset_attach_node(ocs, child); 222 if (ret) 223 return ret; 224 225 ret = pnv_php_populate_changeset(ocs, child); 226 if (ret) 227 return ret; 228 } 229 230 return 0; 231 } 232 233 static void *pnv_php_add_one_pdn(struct device_node *dn, void *data) 234 { 235 struct pci_controller *hose = (struct pci_controller *)data; 236 struct pci_dn *pdn; 237 238 pdn = pci_add_device_node_info(hose, dn); 239 if (!pdn) 240 return ERR_PTR(-ENOMEM); 241 242 return NULL; 243 } 244 245 static void pnv_php_add_pdns(struct pnv_php_slot *slot) 246 { 247 struct pci_controller *hose = pci_bus_to_host(slot->bus); 248 249 pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose); 250 } 251 252 static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) 253 { 254 void *fdt, *fdt1, *dt; 255 int ret; 256 257 /* We don't know the FDT blob size. We try to get it through 258 * maximal memory chunk and then copy it to another chunk that 259 * fits the real size. 260 */ 261 fdt1 = kzalloc(0x10000, GFP_KERNEL); 262 if (!fdt1) { 263 ret = -ENOMEM; 264 goto out; 265 } 266 267 ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000); 268 if (ret) { 269 SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret); 270 goto free_fdt1; 271 } 272 273 fdt = kmemdup(fdt1, fdt_totalsize(fdt1), GFP_KERNEL); 274 if (!fdt) { 275 ret = -ENOMEM; 276 goto free_fdt1; 277 } 278 279 /* Unflatten device tree blob */ 280 dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL); 281 if (!dt) { 282 ret = -EINVAL; 283 SLOT_WARN(php_slot, "Cannot unflatten FDT\n"); 284 goto free_fdt; 285 } 286 287 /* Initialize and apply the changeset */ 288 of_changeset_init(&php_slot->ocs); 289 pnv_php_reverse_nodes(php_slot->dn); 290 ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn); 291 if (ret) { 292 pnv_php_reverse_nodes(php_slot->dn); 293 SLOT_WARN(php_slot, "Error %d populating changeset\n", 294 ret); 295 goto free_dt; 296 } 297 298 php_slot->dn->child = NULL; 299 ret = of_changeset_apply(&php_slot->ocs); 300 if (ret) { 301 SLOT_WARN(php_slot, "Error %d applying changeset\n", ret); 302 goto destroy_changeset; 303 } 304 305 /* Add device node firmware data */ 306 pnv_php_add_pdns(php_slot); 307 php_slot->fdt = fdt; 308 php_slot->dt = dt; 309 kfree(fdt1); 310 goto out; 311 312 destroy_changeset: 313 of_changeset_destroy(&php_slot->ocs); 314 free_dt: 315 kfree(dt); 316 php_slot->dn->child = NULL; 317 free_fdt: 318 kfree(fdt); 319 free_fdt1: 320 kfree(fdt1); 321 out: 322 return ret; 323 } 324 325 static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot) 326 { 327 return container_of(slot, struct pnv_php_slot, slot); 328 } 329 330 int pnv_php_set_slot_power_state(struct hotplug_slot *slot, 331 uint8_t state) 332 { 333 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 334 struct opal_msg msg; 335 int ret; 336 337 ret = pnv_pci_set_power_state(php_slot->id, state, &msg); 338 if (ret > 0) { 339 if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || 340 be64_to_cpu(msg.params[2]) != state) { 341 SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n", 342 be64_to_cpu(msg.params[1]), 343 be64_to_cpu(msg.params[2]), 344 be64_to_cpu(msg.params[3])); 345 return -ENOMSG; 346 } 347 if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { 348 ret = -ENODEV; 349 goto error; 350 } 351 } else if (ret < 0) { 352 goto error; 353 } 354 355 if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE) 356 pnv_php_rmv_devtree(php_slot); 357 else 358 ret = pnv_php_add_devtree(php_slot); 359 360 return ret; 361 362 error: 363 SLOT_WARN(php_slot, "Error %d powering %s\n", 364 ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); 365 return ret; 366 } 367 EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state); 368 369 static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) 370 { 371 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 372 uint8_t power_state = OPAL_PCI_SLOT_POWER_ON; 373 int ret; 374 375 /* 376 * Retrieve power status from firmware. If we fail 377 * getting that, the power status fails back to 378 * be on. 379 */ 380 ret = pnv_pci_get_power_state(php_slot->id, &power_state); 381 if (ret) { 382 SLOT_WARN(php_slot, "Error %d getting power status\n", 383 ret); 384 } else { 385 *state = power_state; 386 } 387 388 return 0; 389 } 390 391 static int pcie_check_link_active(struct pci_dev *pdev) 392 { 393 u16 lnk_status; 394 int ret; 395 396 ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); 397 if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status)) 398 return -ENODEV; 399 400 ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); 401 402 return ret; 403 } 404 405 static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) 406 { 407 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 408 uint8_t presence = OPAL_PCI_SLOT_EMPTY; 409 int ret; 410 411 /* 412 * Retrieve presence status from firmware. If we can't 413 * get that, it will fail back to be empty. 414 */ 415 ret = pnv_pci_get_presence_state(php_slot->id, &presence); 416 if (ret >= 0) { 417 if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM && 418 presence == OPAL_PCI_SLOT_EMPTY) { 419 /* 420 * Similar to pciehp_hpc, check whether the Link Active 421 * bit is set to account for broken downstream bridges 422 * that don't properly assert Presence Detect State, as 423 * was observed on the Microsemi Switchtec PM8533 PFX 424 * [11f8:8533]. 425 */ 426 if (pcie_check_link_active(php_slot->pdev) > 0) 427 presence = OPAL_PCI_SLOT_PRESENT; 428 } 429 430 *state = presence; 431 ret = 0; 432 } else { 433 SLOT_WARN(php_slot, "Error %d getting presence\n", ret); 434 } 435 436 return ret; 437 } 438 439 static int pnv_php_get_raw_indicator_status(struct hotplug_slot *slot, u8 *state) 440 { 441 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 442 struct pci_dev *bridge = php_slot->pdev; 443 u16 status; 444 445 pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &status); 446 *state = (status & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6; 447 return 0; 448 } 449 450 451 static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state) 452 { 453 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 454 455 pnv_php_get_raw_indicator_status(slot, &php_slot->attention_state); 456 *state = php_slot->attention_state; 457 return 0; 458 } 459 460 static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) 461 { 462 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 463 struct pci_dev *bridge = php_slot->pdev; 464 u16 new, mask; 465 466 php_slot->attention_state = state; 467 if (!bridge) 468 return 0; 469 470 mask = PCI_EXP_SLTCTL_AIC; 471 472 if (state) 473 new = FIELD_PREP(PCI_EXP_SLTCTL_AIC, state); 474 else 475 new = PCI_EXP_SLTCTL_ATTN_IND_OFF; 476 477 pcie_capability_clear_and_set_word(bridge, PCI_EXP_SLTCTL, mask, new); 478 479 return 0; 480 } 481 482 static int pnv_php_activate_slot(struct pnv_php_slot *php_slot, 483 struct hotplug_slot *slot) 484 { 485 int ret, i; 486 487 /* 488 * Issue initial slot activation command to firmware 489 * 490 * Firmware will power slot on, attempt to train the link, and 491 * discover any downstream devices. If this process fails, firmware 492 * will return an error code and an invalid device tree. Failure 493 * can be caused for multiple reasons, including a faulty 494 * downstream device, poor connection to the downstream device, or 495 * a previously latched PHB fence. On failure, issue fundamental 496 * reset up to three times before aborting. 497 */ 498 ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); 499 if (ret) { 500 SLOT_WARN( 501 php_slot, 502 "PCI slot activation failed with error code %d, possible frozen PHB", 503 ret); 504 SLOT_WARN( 505 php_slot, 506 "Attempting complete PHB reset before retrying slot activation\n"); 507 for (i = 0; i < 3; i++) { 508 /* 509 * Slot activation failed, PHB may be fenced from a 510 * prior device failure. 511 * 512 * Use the OPAL fundamental reset call to both try a 513 * device reset and clear any potentially active PHB 514 * fence / freeze. 515 */ 516 SLOT_WARN(php_slot, "Try %d...\n", i + 1); 517 pci_set_pcie_reset_state(php_slot->pdev, 518 pcie_warm_reset); 519 msleep(250); 520 pci_set_pcie_reset_state(php_slot->pdev, 521 pcie_deassert_reset); 522 523 ret = pnv_php_set_slot_power_state( 524 slot, OPAL_PCI_SLOT_POWER_ON); 525 if (!ret) 526 break; 527 } 528 529 if (i >= 3) 530 SLOT_WARN(php_slot, 531 "Failed to bring slot online, aborting!\n"); 532 } 533 534 return ret; 535 } 536 537 static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) 538 { 539 struct hotplug_slot *slot = &php_slot->slot; 540 uint8_t presence = OPAL_PCI_SLOT_EMPTY; 541 uint8_t power_status = OPAL_PCI_SLOT_POWER_ON; 542 int ret; 543 544 /* Check if the slot has been configured */ 545 if (php_slot->state != PNV_PHP_STATE_REGISTERED) 546 return 0; 547 548 /* Retrieve slot presence status */ 549 ret = pnv_php_get_adapter_state(slot, &presence); 550 if (ret) 551 return ret; 552 553 /* 554 * Proceed if there have nothing behind the slot. However, 555 * we should leave the slot in registered state at the 556 * beginning. Otherwise, the PCI devices inserted afterwards 557 * won't be probed and populated. 558 */ 559 if (presence == OPAL_PCI_SLOT_EMPTY) { 560 if (!php_slot->power_state_check) { 561 php_slot->power_state_check = true; 562 563 return 0; 564 } 565 566 goto scan; 567 } 568 569 /* 570 * If the power supply to the slot is off, we can't detect 571 * adapter presence state. That means we have to turn the 572 * slot on before going to probe slot's presence state. 573 * 574 * On the first time, we don't change the power status to 575 * boost system boot with assumption that the firmware 576 * supplies consistent slot power status: empty slot always 577 * has its power off and non-empty slot has its power on. 578 */ 579 if (!php_slot->power_state_check) { 580 php_slot->power_state_check = true; 581 582 ret = pnv_php_get_power_state(slot, &power_status); 583 if (ret) 584 return ret; 585 586 if (power_status != OPAL_PCI_SLOT_POWER_ON) 587 return 0; 588 } 589 590 /* Check the power status. Scan the slot if it is already on */ 591 ret = pnv_php_get_power_state(slot, &power_status); 592 if (ret) 593 return ret; 594 595 if (power_status == OPAL_PCI_SLOT_POWER_ON) 596 goto scan; 597 598 /* Power is off, turn it on and then scan the slot */ 599 ret = pnv_php_activate_slot(php_slot, slot); 600 if (ret) 601 return ret; 602 603 scan: 604 if (presence == OPAL_PCI_SLOT_PRESENT) { 605 if (rescan) { 606 pci_lock_rescan_remove(); 607 pci_hp_add_devices(php_slot->bus); 608 pci_unlock_rescan_remove(); 609 } 610 611 /* Rescan for child hotpluggable slots */ 612 php_slot->state = PNV_PHP_STATE_POPULATED; 613 if (rescan) 614 pnv_php_register(php_slot->dn); 615 } else { 616 php_slot->state = PNV_PHP_STATE_POPULATED; 617 } 618 619 return 0; 620 } 621 622 static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe) 623 { 624 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 625 struct pci_dev *bridge = php_slot->pdev; 626 uint16_t sts; 627 628 /* 629 * The CAPI folks want pnv_php to drive OpenCAPI slots 630 * which don't have a bridge. Only claim to support 631 * reset_slot() if we have a bridge device (for now...) 632 */ 633 if (probe) 634 return !bridge; 635 636 /* mask our interrupt while resetting the bridge */ 637 if (php_slot->irq > 0) 638 disable_irq(php_slot->irq); 639 640 pci_bridge_secondary_bus_reset(bridge); 641 642 /* clear any state changes that happened due to the reset */ 643 pcie_capability_read_word(php_slot->pdev, PCI_EXP_SLTSTA, &sts); 644 sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC); 645 pcie_capability_write_word(php_slot->pdev, PCI_EXP_SLTSTA, sts); 646 647 if (php_slot->irq > 0) 648 enable_irq(php_slot->irq); 649 650 return 0; 651 } 652 653 static int pnv_php_enable_slot(struct hotplug_slot *slot) 654 { 655 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 656 u32 prop32; 657 int ret; 658 659 ret = pnv_php_enable(php_slot, true); 660 if (ret) 661 return ret; 662 663 /* (Re-)enable interrupt if the slot supports surprise hotplug */ 664 ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable", 665 &prop32); 666 if (!ret && prop32) 667 pnv_php_enable_irq(php_slot); 668 669 return 0; 670 } 671 672 /* 673 * Disable any hotplug interrupts for all slots on the provided bus, as well as 674 * all downstream slots in preparation for a hot unplug. 675 */ 676 static int pnv_php_disable_all_irqs(struct pci_bus *bus) 677 { 678 struct pci_bus *child_bus; 679 struct pci_slot *slot; 680 681 /* First go down child buses */ 682 list_for_each_entry(child_bus, &bus->children, node) 683 pnv_php_disable_all_irqs(child_bus); 684 685 /* Disable IRQs for all pnv_php slots on this bus */ 686 list_for_each_entry(slot, &bus->slots, list) { 687 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug); 688 689 pnv_php_disable_irq(php_slot, false, true); 690 } 691 692 return 0; 693 } 694 695 /* 696 * Disable any hotplug interrupts for all downstream slots on the provided 697 * bus in preparation for a hot unplug. 698 */ 699 static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus) 700 { 701 struct pci_bus *child_bus; 702 703 /* Go down child buses, recursively deactivating their IRQs */ 704 list_for_each_entry(child_bus, &bus->children, node) 705 pnv_php_disable_all_irqs(child_bus); 706 707 return 0; 708 } 709 710 static int pnv_php_disable_slot(struct hotplug_slot *slot) 711 { 712 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 713 int ret; 714 715 /* 716 * Allow to disable a slot already in the registered state to 717 * cover cases where the slot couldn't be enabled and never 718 * reached the populated state 719 */ 720 if (php_slot->state != PNV_PHP_STATE_POPULATED && 721 php_slot->state != PNV_PHP_STATE_REGISTERED) 722 return 0; 723 724 /* 725 * Free all IRQ resources from all child slots before remove. 726 * Note that we do not disable the root slot IRQ here as that 727 * would also deactivate the slot hot (re)plug interrupt! 728 */ 729 pnv_php_disable_all_downstream_irqs(php_slot->bus); 730 731 /* Remove all devices behind the slot */ 732 pci_lock_rescan_remove(); 733 pci_hp_remove_devices(php_slot->bus); 734 pci_unlock_rescan_remove(); 735 736 /* Detach the child hotpluggable slots */ 737 pnv_php_unregister(php_slot->dn); 738 739 /* Notify firmware and remove device nodes */ 740 ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF); 741 742 php_slot->state = PNV_PHP_STATE_REGISTERED; 743 return ret; 744 } 745 746 static const struct hotplug_slot_ops php_slot_ops = { 747 .get_power_status = pnv_php_get_power_state, 748 .get_adapter_status = pnv_php_get_adapter_state, 749 .get_attention_status = pnv_php_get_attention_state, 750 .set_attention_status = pnv_php_set_attention_state, 751 .enable_slot = pnv_php_enable_slot, 752 .disable_slot = pnv_php_disable_slot, 753 .reset_slot = pnv_php_reset_slot, 754 }; 755 756 static void pnv_php_release(struct pnv_php_slot *php_slot) 757 { 758 unsigned long flags; 759 760 /* Remove from global or child list */ 761 spin_lock_irqsave(&pnv_php_lock, flags); 762 list_del(&php_slot->link); 763 spin_unlock_irqrestore(&pnv_php_lock, flags); 764 765 /* Detach from parent */ 766 pnv_php_put_slot(php_slot); 767 pnv_php_put_slot(php_slot->parent); 768 } 769 770 static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn) 771 { 772 struct pnv_php_slot *php_slot; 773 struct pci_bus *bus; 774 const char *label; 775 uint64_t id; 776 int ret; 777 778 ret = of_property_read_string(dn, "ibm,slot-label", &label); 779 if (ret) 780 return NULL; 781 782 if (pnv_pci_get_slot_id(dn, &id)) 783 return NULL; 784 785 bus = pci_find_bus_by_node(dn); 786 if (!bus) 787 return NULL; 788 789 php_slot = kzalloc_obj(*php_slot); 790 if (!php_slot) 791 return NULL; 792 793 php_slot->name = kstrdup(label, GFP_KERNEL); 794 if (!php_slot->name) { 795 kfree(php_slot); 796 return NULL; 797 } 798 799 /* Allocate workqueue for this slot's interrupt handling */ 800 php_slot->wq = alloc_workqueue("pciehp-%s", WQ_PERCPU, 0, php_slot->name); 801 if (!php_slot->wq) { 802 SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); 803 kfree(php_slot->name); 804 kfree(php_slot); 805 return NULL; 806 } 807 808 if (dn->child && PCI_DN(dn->child)) 809 php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn); 810 else 811 php_slot->slot_no = -1; /* Placeholder slot */ 812 813 kref_init(&php_slot->kref); 814 php_slot->state = PNV_PHP_STATE_INITIALIZED; 815 php_slot->dn = dn; 816 php_slot->pdev = bus->self; 817 php_slot->bus = bus; 818 php_slot->id = id; 819 php_slot->power_state_check = false; 820 php_slot->slot.ops = &php_slot_ops; 821 822 INIT_LIST_HEAD(&php_slot->children); 823 INIT_LIST_HEAD(&php_slot->link); 824 825 return php_slot; 826 } 827 828 static int pnv_php_register_slot(struct pnv_php_slot *php_slot) 829 { 830 struct pnv_php_slot *parent; 831 struct device_node *dn = php_slot->dn; 832 unsigned long flags; 833 int ret; 834 835 /* Check if the slot is registered or not */ 836 parent = pnv_php_find_slot(php_slot->dn); 837 if (parent) { 838 pnv_php_put_slot(parent); 839 return -EEXIST; 840 } 841 842 /* Register PCI slot */ 843 ret = pci_hp_register(&php_slot->slot, php_slot->bus, 844 php_slot->slot_no, php_slot->name); 845 if (ret) { 846 SLOT_WARN(php_slot, "Error %d registering slot\n", ret); 847 return ret; 848 } 849 850 /* Attach to the parent's child list or global list */ 851 while ((dn = of_get_parent(dn))) { 852 if (!PCI_DN(dn)) { 853 of_node_put(dn); 854 break; 855 } 856 857 parent = pnv_php_find_slot(dn); 858 if (parent) { 859 of_node_put(dn); 860 break; 861 } 862 863 of_node_put(dn); 864 } 865 866 spin_lock_irqsave(&pnv_php_lock, flags); 867 php_slot->parent = parent; 868 if (parent) 869 list_add_tail(&php_slot->link, &parent->children); 870 else 871 list_add_tail(&php_slot->link, &pnv_php_slot_list); 872 spin_unlock_irqrestore(&pnv_php_lock, flags); 873 874 php_slot->state = PNV_PHP_STATE_REGISTERED; 875 return 0; 876 } 877 878 static int pnv_php_enable_msix(struct pnv_php_slot *php_slot) 879 { 880 struct pci_dev *pdev = php_slot->pdev; 881 struct msix_entry entry; 882 int nr_entries, ret; 883 u16 pcie_flag; 884 885 /* Get total number of MSIx entries */ 886 nr_entries = pci_msix_vec_count(pdev); 887 if (nr_entries < 0) 888 return nr_entries; 889 890 /* Check hotplug MSIx entry is in range */ 891 pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag); 892 entry.entry = FIELD_GET(PCI_EXP_FLAGS_IRQ, pcie_flag); 893 if (entry.entry >= nr_entries) 894 return -ERANGE; 895 896 /* Enable MSIx */ 897 ret = pci_enable_msix_exact(pdev, &entry, 1); 898 if (ret) { 899 SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret); 900 return ret; 901 } 902 903 return entry.vector; 904 } 905 906 static void 907 pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot) 908 { 909 struct pci_dev *pdev = php_slot->pdev; 910 struct eeh_dev *edev; 911 struct eeh_pe *pe; 912 int i, rc; 913 914 /* 915 * When a device is surprise removed from a downstream bridge slot, 916 * the upstream bridge port can still end up frozen due to related EEH 917 * events, which will in turn block the MSI interrupts for slot hotplug 918 * detection. 919 * 920 * Detect and thaw any frozen upstream PE after slot deactivation. 921 */ 922 edev = pci_dev_to_eeh_dev(pdev); 923 pe = edev ? edev->pe : NULL; 924 rc = eeh_pe_get_state(pe); 925 if ((rc == -ENODEV) || (rc == -ENOENT)) { 926 SLOT_WARN( 927 php_slot, 928 "Upstream bridge PE state unknown, hotplug detect may fail\n"); 929 } else { 930 if (pe->state & EEH_PE_ISOLATED) { 931 SLOT_WARN( 932 php_slot, 933 "Upstream bridge PE %02x frozen, thawing...\n", 934 pe->addr); 935 for (i = 0; i < 3; i++) 936 if (!eeh_unfreeze_pe(pe)) 937 break; 938 if (i >= 3) 939 SLOT_WARN( 940 php_slot, 941 "Unable to thaw PE %02x, hotplug detect will fail!\n", 942 pe->addr); 943 else 944 SLOT_WARN(php_slot, 945 "PE %02x thawed successfully\n", 946 pe->addr); 947 } 948 } 949 } 950 951 static void pnv_php_event_handler(struct work_struct *work) 952 { 953 struct pnv_php_event *event = 954 container_of(work, struct pnv_php_event, work); 955 struct pnv_php_slot *php_slot = event->php_slot; 956 957 if (event->added) { 958 pnv_php_enable_slot(&php_slot->slot); 959 } else { 960 pnv_php_disable_slot(&php_slot->slot); 961 pnv_php_detect_clear_suprise_removal_freeze(php_slot); 962 } 963 964 kfree(event); 965 } 966 967 static irqreturn_t pnv_php_interrupt(int irq, void *data) 968 { 969 struct pnv_php_slot *php_slot = data; 970 struct pci_dev *pchild, *pdev = php_slot->pdev; 971 struct eeh_dev *edev; 972 struct eeh_pe *pe; 973 struct pnv_php_event *event; 974 u16 sts, lsts; 975 u8 presence; 976 bool added; 977 unsigned long flags; 978 int ret; 979 980 pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts); 981 sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC); 982 pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts); 983 984 pci_dbg(pdev, "PCI slot [%s]: HP int! DLAct: %d, PresDet: %d\n", 985 php_slot->name, 986 !!(sts & PCI_EXP_SLTSTA_DLLSC), 987 !!(sts & PCI_EXP_SLTSTA_PDC)); 988 989 if (sts & PCI_EXP_SLTSTA_DLLSC) { 990 pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts); 991 added = !!(lsts & PCI_EXP_LNKSTA_DLLLA); 992 } else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) && 993 (sts & PCI_EXP_SLTSTA_PDC)) { 994 ret = pnv_pci_get_presence_state(php_slot->id, &presence); 995 if (ret) { 996 SLOT_WARN(php_slot, 997 "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n", 998 php_slot->name, ret, sts); 999 return IRQ_HANDLED; 1000 } 1001 1002 added = !!(presence == OPAL_PCI_SLOT_PRESENT); 1003 } else { 1004 pci_dbg(pdev, "PCI slot [%s]: Spurious IRQ?\n", php_slot->name); 1005 return IRQ_NONE; 1006 } 1007 1008 /* Freeze the removed PE to avoid unexpected error reporting */ 1009 if (!added) { 1010 pchild = list_first_entry_or_null(&php_slot->bus->devices, 1011 struct pci_dev, bus_list); 1012 edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL; 1013 pe = edev ? edev->pe : NULL; 1014 if (pe) { 1015 eeh_serialize_lock(&flags); 1016 eeh_pe_mark_isolated(pe); 1017 eeh_serialize_unlock(flags); 1018 eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); 1019 } 1020 } 1021 1022 /* 1023 * The PE is left in frozen state if the event is missed. It's 1024 * fine as the PCI devices (PE) aren't functional any more. 1025 */ 1026 event = kzalloc_obj(*event, GFP_ATOMIC); 1027 if (!event) { 1028 SLOT_WARN(php_slot, 1029 "PCI slot [%s] missed hotplug event 0x%04x\n", 1030 php_slot->name, sts); 1031 return IRQ_HANDLED; 1032 } 1033 1034 pci_info(pdev, "PCI slot [%s] %s (IRQ: %d)\n", 1035 php_slot->name, added ? "added" : "removed", irq); 1036 INIT_WORK(&event->work, pnv_php_event_handler); 1037 event->added = added; 1038 event->php_slot = php_slot; 1039 queue_work(php_slot->wq, &event->work); 1040 1041 return IRQ_HANDLED; 1042 } 1043 1044 static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) 1045 { 1046 struct pci_dev *pdev = php_slot->pdev; 1047 u32 broken_pdc = 0; 1048 u16 sts, ctrl; 1049 int ret; 1050 1051 /* Check PDC (Presence Detection Change) is broken or not */ 1052 ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc", 1053 &broken_pdc); 1054 if (!ret && broken_pdc) 1055 php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC; 1056 1057 /* Clear pending interrupts */ 1058 pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts); 1059 if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) 1060 sts |= PCI_EXP_SLTSTA_DLLSC; 1061 else 1062 sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC); 1063 pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts); 1064 1065 /* Request the interrupt */ 1066 ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED, 1067 php_slot->name, php_slot); 1068 if (ret) { 1069 pnv_php_disable_irq(php_slot, true, true); 1070 SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq); 1071 return; 1072 } 1073 1074 /* Enable the interrupts */ 1075 pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl); 1076 if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) { 1077 ctrl &= ~PCI_EXP_SLTCTL_PDCE; 1078 ctrl |= (PCI_EXP_SLTCTL_HPIE | 1079 PCI_EXP_SLTCTL_DLLSCE); 1080 } else { 1081 ctrl |= (PCI_EXP_SLTCTL_HPIE | 1082 PCI_EXP_SLTCTL_PDCE | 1083 PCI_EXP_SLTCTL_DLLSCE); 1084 } 1085 pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl); 1086 1087 /* The interrupt is initialized successfully when @irq is valid */ 1088 php_slot->irq = irq; 1089 } 1090 1091 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot) 1092 { 1093 struct pci_dev *pdev = php_slot->pdev; 1094 int irq, ret; 1095 1096 /* 1097 * The MSI/MSIx interrupt might have been occupied by other 1098 * drivers. Don't populate the surprise hotplug capability 1099 * in that case. 1100 */ 1101 if (pci_dev_msi_enabled(pdev)) 1102 return; 1103 1104 ret = pci_enable_device(pdev); 1105 if (ret) { 1106 SLOT_WARN(php_slot, "Error %d enabling device\n", ret); 1107 return; 1108 } 1109 1110 pci_set_master(pdev); 1111 1112 /* Enable MSIx interrupt */ 1113 irq = pnv_php_enable_msix(php_slot); 1114 if (irq > 0) { 1115 pnv_php_init_irq(php_slot, irq); 1116 return; 1117 } 1118 1119 /* 1120 * Use MSI if MSIx doesn't work. Fail back to legacy INTx 1121 * if MSI doesn't work either 1122 */ 1123 ret = pci_enable_msi(pdev); 1124 if (!ret || pdev->irq) { 1125 irq = pdev->irq; 1126 pnv_php_init_irq(php_slot, irq); 1127 } 1128 } 1129 1130 static int pnv_php_register_one(struct device_node *dn) 1131 { 1132 struct pnv_php_slot *php_slot; 1133 u32 prop32; 1134 int ret; 1135 1136 /* Check if it's hotpluggable slot */ 1137 ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32); 1138 if (ret || !prop32) 1139 return -ENXIO; 1140 1141 ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32); 1142 if (ret || !prop32) 1143 return -ENXIO; 1144 1145 php_slot = pnv_php_alloc_slot(dn); 1146 if (!php_slot) 1147 return -ENODEV; 1148 1149 ret = pnv_php_register_slot(php_slot); 1150 if (ret) 1151 goto free_slot; 1152 1153 ret = pnv_php_enable(php_slot, false); 1154 if (ret) 1155 goto unregister_slot; 1156 1157 /* Enable interrupt if the slot supports surprise hotplug */ 1158 ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32); 1159 if (!ret && prop32) 1160 pnv_php_enable_irq(php_slot); 1161 1162 return 0; 1163 1164 unregister_slot: 1165 pnv_php_unregister_one(php_slot->dn); 1166 free_slot: 1167 pnv_php_put_slot(php_slot); 1168 return ret; 1169 } 1170 1171 static void pnv_php_register(struct device_node *dn) 1172 { 1173 struct device_node *child; 1174 1175 /* 1176 * The parent slots should be registered before their 1177 * child slots. 1178 */ 1179 for_each_child_of_node(dn, child) { 1180 pnv_php_register_one(child); 1181 pnv_php_register(child); 1182 } 1183 } 1184 1185 static void pnv_php_unregister_one(struct device_node *dn) 1186 { 1187 struct pnv_php_slot *php_slot; 1188 1189 php_slot = pnv_php_find_slot(dn); 1190 if (!php_slot) 1191 return; 1192 1193 php_slot->state = PNV_PHP_STATE_OFFLINE; 1194 pci_hp_deregister(&php_slot->slot); 1195 pnv_php_release(php_slot); 1196 pnv_php_put_slot(php_slot); 1197 } 1198 1199 static void pnv_php_unregister(struct device_node *dn) 1200 { 1201 struct device_node *child; 1202 1203 /* The child slots should go before their parent slots */ 1204 for_each_child_of_node(dn, child) { 1205 pnv_php_unregister(child); 1206 pnv_php_unregister_one(child); 1207 } 1208 } 1209 1210 static int __init pnv_php_init(void) 1211 { 1212 struct device_node *dn; 1213 1214 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 1215 for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") 1216 pnv_php_register(dn); 1217 1218 for_each_compatible_node(dn, NULL, "ibm,ioda3-phb") 1219 pnv_php_register(dn); 1220 1221 for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") 1222 pnv_php_register_one(dn); /* slot directly under the PHB */ 1223 return 0; 1224 } 1225 1226 static void __exit pnv_php_exit(void) 1227 { 1228 struct device_node *dn; 1229 1230 for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") 1231 pnv_php_unregister(dn); 1232 1233 for_each_compatible_node(dn, NULL, "ibm,ioda3-phb") 1234 pnv_php_unregister(dn); 1235 1236 for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") 1237 pnv_php_unregister_one(dn); /* slot directly under the PHB */ 1238 } 1239 1240 module_init(pnv_php_init); 1241 module_exit(pnv_php_exit); 1242 1243 MODULE_VERSION(DRIVER_VERSION); 1244 MODULE_LICENSE("GPL v2"); 1245 MODULE_AUTHOR(DRIVER_AUTHOR); 1246 MODULE_DESCRIPTION(DRIVER_DESC); 1247