1 // SPDX-License-Identifier: GPL-2.0+ 2 /* 3 * PCI Hotplug Driver for PowerPC PowerNV platform. 4 * 5 * Copyright Gavin Shan, IBM Corporation 2016. 6 * Copyright (C) 2025 Raptor Engineering, LLC 7 * Copyright (C) 2025 Raptor Computing Systems, LLC 8 */ 9 10 #include <linux/bitfield.h> 11 #include <linux/libfdt.h> 12 #include <linux/module.h> 13 #include <linux/pci.h> 14 #include <linux/delay.h> 15 #include <linux/pci_hotplug.h> 16 #include <linux/of_fdt.h> 17 18 #include <asm/opal.h> 19 #include <asm/pnv-pci.h> 20 #include <asm/ppc-pci.h> 21 22 #define DRIVER_VERSION "0.1" 23 #define DRIVER_AUTHOR "Gavin Shan, IBM Corporation" 24 #define DRIVER_DESC "PowerPC PowerNV PCI Hotplug Driver" 25 26 #define SLOT_WARN(sl, x...) \ 27 ((sl)->pdev ? pci_warn((sl)->pdev, x) : dev_warn(&(sl)->bus->dev, x)) 28 29 struct pnv_php_event { 30 bool added; 31 struct pnv_php_slot *php_slot; 32 struct work_struct work; 33 }; 34 35 static LIST_HEAD(pnv_php_slot_list); 36 static DEFINE_SPINLOCK(pnv_php_lock); 37 38 static void pnv_php_register(struct device_node *dn); 39 static void pnv_php_unregister_one(struct device_node *dn); 40 static void pnv_php_unregister(struct device_node *dn); 41 42 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot); 43 44 static void pnv_php_disable_irq(struct pnv_php_slot *php_slot, 45 bool disable_device, bool disable_msi) 46 { 47 struct pci_dev *pdev = php_slot->pdev; 48 u16 ctrl; 49 50 if (php_slot->irq > 0) { 51 pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl); 52 ctrl &= ~(PCI_EXP_SLTCTL_HPIE | 53 PCI_EXP_SLTCTL_PDCE | 54 PCI_EXP_SLTCTL_DLLSCE); 55 pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl); 56 57 free_irq(php_slot->irq, php_slot); 58 php_slot->irq = 0; 59 } 60 61 if (disable_device || disable_msi) { 62 if (pdev->msix_enabled) 63 pci_disable_msix(pdev); 64 else if (pdev->msi_enabled) 65 pci_disable_msi(pdev); 66 } 67 68 if (disable_device) 69 pci_disable_device(pdev); 70 } 71 72 static void pnv_php_free_slot(struct kref *kref) 73 { 74 struct pnv_php_slot *php_slot = container_of(kref, 75 struct pnv_php_slot, kref); 76 77 WARN_ON(!list_empty(&php_slot->children)); 78 pnv_php_disable_irq(php_slot, false, false); 79 destroy_workqueue(php_slot->wq); 80 kfree(php_slot->name); 81 kfree(php_slot); 82 } 83 84 static inline void pnv_php_put_slot(struct pnv_php_slot *php_slot) 85 { 86 87 if (!php_slot) 88 return; 89 90 kref_put(&php_slot->kref, pnv_php_free_slot); 91 } 92 93 static struct pnv_php_slot *pnv_php_match(struct device_node *dn, 94 struct pnv_php_slot *php_slot) 95 { 96 struct pnv_php_slot *target, *tmp; 97 98 if (php_slot->dn == dn) { 99 kref_get(&php_slot->kref); 100 return php_slot; 101 } 102 103 list_for_each_entry(tmp, &php_slot->children, link) { 104 target = pnv_php_match(dn, tmp); 105 if (target) 106 return target; 107 } 108 109 return NULL; 110 } 111 112 struct pnv_php_slot *pnv_php_find_slot(struct device_node *dn) 113 { 114 struct pnv_php_slot *php_slot, *tmp; 115 unsigned long flags; 116 117 spin_lock_irqsave(&pnv_php_lock, flags); 118 list_for_each_entry(tmp, &pnv_php_slot_list, link) { 119 php_slot = pnv_php_match(dn, tmp); 120 if (php_slot) { 121 spin_unlock_irqrestore(&pnv_php_lock, flags); 122 return php_slot; 123 } 124 } 125 spin_unlock_irqrestore(&pnv_php_lock, flags); 126 127 return NULL; 128 } 129 EXPORT_SYMBOL_GPL(pnv_php_find_slot); 130 131 /* 132 * Remove pdn for all children of the indicated device node. 133 * The function should remove pdn in a depth-first manner. 134 */ 135 static void pnv_php_rmv_pdns(struct device_node *dn) 136 { 137 struct device_node *child; 138 139 for_each_child_of_node(dn, child) { 140 pnv_php_rmv_pdns(child); 141 142 pci_remove_device_node_info(child); 143 } 144 } 145 146 /* 147 * Detach all child nodes of the indicated device nodes. The 148 * function should handle device nodes in depth-first manner. 149 * 150 * We should not invoke of_node_release() as the memory for 151 * individual device node is part of large memory block. The 152 * large block is allocated from memblock (system bootup) or 153 * kmalloc() when unflattening the device tree by OF changeset. 154 * We can not free the large block allocated from memblock. For 155 * later case, it should be released at once. 156 */ 157 static void pnv_php_detach_device_nodes(struct device_node *parent) 158 { 159 struct device_node *dn; 160 161 for_each_child_of_node(parent, dn) { 162 pnv_php_detach_device_nodes(dn); 163 164 of_node_put(dn); 165 of_detach_node(dn); 166 } 167 } 168 169 static void pnv_php_rmv_devtree(struct pnv_php_slot *php_slot) 170 { 171 pnv_php_rmv_pdns(php_slot->dn); 172 173 /* 174 * Decrease the refcount if the device nodes were created 175 * through OF changeset before detaching them. 176 */ 177 if (php_slot->fdt) 178 of_changeset_destroy(&php_slot->ocs); 179 pnv_php_detach_device_nodes(php_slot->dn); 180 181 if (php_slot->fdt) { 182 kfree(php_slot->dt); 183 kfree(php_slot->fdt); 184 php_slot->dt = NULL; 185 php_slot->dn->child = NULL; 186 php_slot->fdt = NULL; 187 } 188 } 189 190 /* 191 * As the nodes in OF changeset are applied in reverse order, we 192 * need revert the nodes in advance so that we have correct node 193 * order after the changeset is applied. 194 */ 195 static void pnv_php_reverse_nodes(struct device_node *parent) 196 { 197 struct device_node *child, *next; 198 199 /* In-depth first */ 200 for_each_child_of_node(parent, child) 201 pnv_php_reverse_nodes(child); 202 203 /* Reverse the nodes in the child list */ 204 child = parent->child; 205 parent->child = NULL; 206 while (child) { 207 next = child->sibling; 208 209 child->sibling = parent->child; 210 parent->child = child; 211 child = next; 212 } 213 } 214 215 static int pnv_php_populate_changeset(struct of_changeset *ocs, 216 struct device_node *dn) 217 { 218 struct device_node *child; 219 int ret = 0; 220 221 for_each_child_of_node(dn, child) { 222 ret = of_changeset_attach_node(ocs, child); 223 if (ret) { 224 of_node_put(child); 225 break; 226 } 227 228 ret = pnv_php_populate_changeset(ocs, child); 229 if (ret) { 230 of_node_put(child); 231 break; 232 } 233 } 234 235 return ret; 236 } 237 238 static void *pnv_php_add_one_pdn(struct device_node *dn, void *data) 239 { 240 struct pci_controller *hose = (struct pci_controller *)data; 241 struct pci_dn *pdn; 242 243 pdn = pci_add_device_node_info(hose, dn); 244 if (!pdn) 245 return ERR_PTR(-ENOMEM); 246 247 return NULL; 248 } 249 250 static void pnv_php_add_pdns(struct pnv_php_slot *slot) 251 { 252 struct pci_controller *hose = pci_bus_to_host(slot->bus); 253 254 pci_traverse_device_nodes(slot->dn, pnv_php_add_one_pdn, hose); 255 } 256 257 static int pnv_php_add_devtree(struct pnv_php_slot *php_slot) 258 { 259 void *fdt, *fdt1, *dt; 260 int ret; 261 262 /* We don't know the FDT blob size. We try to get it through 263 * maximal memory chunk and then copy it to another chunk that 264 * fits the real size. 265 */ 266 fdt1 = kzalloc(0x10000, GFP_KERNEL); 267 if (!fdt1) { 268 ret = -ENOMEM; 269 goto out; 270 } 271 272 ret = pnv_pci_get_device_tree(php_slot->dn->phandle, fdt1, 0x10000); 273 if (ret) { 274 SLOT_WARN(php_slot, "Error %d getting FDT blob\n", ret); 275 goto free_fdt1; 276 } 277 278 fdt = kmemdup(fdt1, fdt_totalsize(fdt1), GFP_KERNEL); 279 if (!fdt) { 280 ret = -ENOMEM; 281 goto free_fdt1; 282 } 283 284 /* Unflatten device tree blob */ 285 dt = of_fdt_unflatten_tree(fdt, php_slot->dn, NULL); 286 if (!dt) { 287 ret = -EINVAL; 288 SLOT_WARN(php_slot, "Cannot unflatten FDT\n"); 289 goto free_fdt; 290 } 291 292 /* Initialize and apply the changeset */ 293 of_changeset_init(&php_slot->ocs); 294 pnv_php_reverse_nodes(php_slot->dn); 295 ret = pnv_php_populate_changeset(&php_slot->ocs, php_slot->dn); 296 if (ret) { 297 pnv_php_reverse_nodes(php_slot->dn); 298 SLOT_WARN(php_slot, "Error %d populating changeset\n", 299 ret); 300 goto free_dt; 301 } 302 303 php_slot->dn->child = NULL; 304 ret = of_changeset_apply(&php_slot->ocs); 305 if (ret) { 306 SLOT_WARN(php_slot, "Error %d applying changeset\n", ret); 307 goto destroy_changeset; 308 } 309 310 /* Add device node firmware data */ 311 pnv_php_add_pdns(php_slot); 312 php_slot->fdt = fdt; 313 php_slot->dt = dt; 314 kfree(fdt1); 315 goto out; 316 317 destroy_changeset: 318 of_changeset_destroy(&php_slot->ocs); 319 free_dt: 320 kfree(dt); 321 php_slot->dn->child = NULL; 322 free_fdt: 323 kfree(fdt); 324 free_fdt1: 325 kfree(fdt1); 326 out: 327 return ret; 328 } 329 330 static inline struct pnv_php_slot *to_pnv_php_slot(struct hotplug_slot *slot) 331 { 332 return container_of(slot, struct pnv_php_slot, slot); 333 } 334 335 int pnv_php_set_slot_power_state(struct hotplug_slot *slot, 336 uint8_t state) 337 { 338 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 339 struct opal_msg msg; 340 int ret; 341 342 ret = pnv_pci_set_power_state(php_slot->id, state, &msg); 343 if (ret > 0) { 344 if (be64_to_cpu(msg.params[1]) != php_slot->dn->phandle || 345 be64_to_cpu(msg.params[2]) != state) { 346 SLOT_WARN(php_slot, "Wrong msg (%lld, %lld, %lld)\n", 347 be64_to_cpu(msg.params[1]), 348 be64_to_cpu(msg.params[2]), 349 be64_to_cpu(msg.params[3])); 350 return -ENOMSG; 351 } 352 if (be64_to_cpu(msg.params[3]) != OPAL_SUCCESS) { 353 ret = -ENODEV; 354 goto error; 355 } 356 } else if (ret < 0) { 357 goto error; 358 } 359 360 if (state == OPAL_PCI_SLOT_POWER_OFF || state == OPAL_PCI_SLOT_OFFLINE) 361 pnv_php_rmv_devtree(php_slot); 362 else 363 ret = pnv_php_add_devtree(php_slot); 364 365 return ret; 366 367 error: 368 SLOT_WARN(php_slot, "Error %d powering %s\n", 369 ret, (state == OPAL_PCI_SLOT_POWER_ON) ? "on" : "off"); 370 return ret; 371 } 372 EXPORT_SYMBOL_GPL(pnv_php_set_slot_power_state); 373 374 static int pnv_php_get_power_state(struct hotplug_slot *slot, u8 *state) 375 { 376 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 377 uint8_t power_state = OPAL_PCI_SLOT_POWER_ON; 378 int ret; 379 380 /* 381 * Retrieve power status from firmware. If we fail 382 * getting that, the power status fails back to 383 * be on. 384 */ 385 ret = pnv_pci_get_power_state(php_slot->id, &power_state); 386 if (ret) { 387 SLOT_WARN(php_slot, "Error %d getting power status\n", 388 ret); 389 } else { 390 *state = power_state; 391 } 392 393 return 0; 394 } 395 396 static int pcie_check_link_active(struct pci_dev *pdev) 397 { 398 u16 lnk_status; 399 int ret; 400 401 ret = pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnk_status); 402 if (ret == PCIBIOS_DEVICE_NOT_FOUND || PCI_POSSIBLE_ERROR(lnk_status)) 403 return -ENODEV; 404 405 ret = !!(lnk_status & PCI_EXP_LNKSTA_DLLLA); 406 407 return ret; 408 } 409 410 static int pnv_php_get_adapter_state(struct hotplug_slot *slot, u8 *state) 411 { 412 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 413 uint8_t presence = OPAL_PCI_SLOT_EMPTY; 414 int ret; 415 416 /* 417 * Retrieve presence status from firmware. If we can't 418 * get that, it will fail back to be empty. 419 */ 420 ret = pnv_pci_get_presence_state(php_slot->id, &presence); 421 if (ret >= 0) { 422 if (pci_pcie_type(php_slot->pdev) == PCI_EXP_TYPE_DOWNSTREAM && 423 presence == OPAL_PCI_SLOT_EMPTY) { 424 /* 425 * Similar to pciehp_hpc, check whether the Link Active 426 * bit is set to account for broken downstream bridges 427 * that don't properly assert Presence Detect State, as 428 * was observed on the Microsemi Switchtec PM8533 PFX 429 * [11f8:8533]. 430 */ 431 if (pcie_check_link_active(php_slot->pdev) > 0) 432 presence = OPAL_PCI_SLOT_PRESENT; 433 } 434 435 *state = presence; 436 ret = 0; 437 } else { 438 SLOT_WARN(php_slot, "Error %d getting presence\n", ret); 439 } 440 441 return ret; 442 } 443 444 static int pnv_php_get_raw_indicator_status(struct hotplug_slot *slot, u8 *state) 445 { 446 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 447 struct pci_dev *bridge = php_slot->pdev; 448 u16 status; 449 450 pcie_capability_read_word(bridge, PCI_EXP_SLTCTL, &status); 451 *state = (status & (PCI_EXP_SLTCTL_AIC | PCI_EXP_SLTCTL_PIC)) >> 6; 452 return 0; 453 } 454 455 456 static int pnv_php_get_attention_state(struct hotplug_slot *slot, u8 *state) 457 { 458 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 459 460 pnv_php_get_raw_indicator_status(slot, &php_slot->attention_state); 461 *state = php_slot->attention_state; 462 return 0; 463 } 464 465 static int pnv_php_set_attention_state(struct hotplug_slot *slot, u8 state) 466 { 467 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 468 struct pci_dev *bridge = php_slot->pdev; 469 u16 new, mask; 470 471 php_slot->attention_state = state; 472 if (!bridge) 473 return 0; 474 475 mask = PCI_EXP_SLTCTL_AIC; 476 477 if (state) 478 new = FIELD_PREP(PCI_EXP_SLTCTL_AIC, state); 479 else 480 new = PCI_EXP_SLTCTL_ATTN_IND_OFF; 481 482 pcie_capability_clear_and_set_word(bridge, PCI_EXP_SLTCTL, mask, new); 483 484 return 0; 485 } 486 487 static int pnv_php_activate_slot(struct pnv_php_slot *php_slot, 488 struct hotplug_slot *slot) 489 { 490 int ret, i; 491 492 /* 493 * Issue initial slot activation command to firmware 494 * 495 * Firmware will power slot on, attempt to train the link, and 496 * discover any downstream devices. If this process fails, firmware 497 * will return an error code and an invalid device tree. Failure 498 * can be caused for multiple reasons, including a faulty 499 * downstream device, poor connection to the downstream device, or 500 * a previously latched PHB fence. On failure, issue fundamental 501 * reset up to three times before aborting. 502 */ 503 ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_ON); 504 if (ret) { 505 SLOT_WARN( 506 php_slot, 507 "PCI slot activation failed with error code %d, possible frozen PHB", 508 ret); 509 SLOT_WARN( 510 php_slot, 511 "Attempting complete PHB reset before retrying slot activation\n"); 512 for (i = 0; i < 3; i++) { 513 /* 514 * Slot activation failed, PHB may be fenced from a 515 * prior device failure. 516 * 517 * Use the OPAL fundamental reset call to both try a 518 * device reset and clear any potentially active PHB 519 * fence / freeze. 520 */ 521 SLOT_WARN(php_slot, "Try %d...\n", i + 1); 522 pci_set_pcie_reset_state(php_slot->pdev, 523 pcie_warm_reset); 524 msleep(250); 525 pci_set_pcie_reset_state(php_slot->pdev, 526 pcie_deassert_reset); 527 528 ret = pnv_php_set_slot_power_state( 529 slot, OPAL_PCI_SLOT_POWER_ON); 530 if (!ret) 531 break; 532 } 533 534 if (i >= 3) 535 SLOT_WARN(php_slot, 536 "Failed to bring slot online, aborting!\n"); 537 } 538 539 return ret; 540 } 541 542 static int pnv_php_enable(struct pnv_php_slot *php_slot, bool rescan) 543 { 544 struct hotplug_slot *slot = &php_slot->slot; 545 uint8_t presence = OPAL_PCI_SLOT_EMPTY; 546 uint8_t power_status = OPAL_PCI_SLOT_POWER_ON; 547 int ret; 548 549 /* Check if the slot has been configured */ 550 if (php_slot->state != PNV_PHP_STATE_REGISTERED) 551 return 0; 552 553 /* Retrieve slot presence status */ 554 ret = pnv_php_get_adapter_state(slot, &presence); 555 if (ret) 556 return ret; 557 558 /* 559 * Proceed if there have nothing behind the slot. However, 560 * we should leave the slot in registered state at the 561 * beginning. Otherwise, the PCI devices inserted afterwards 562 * won't be probed and populated. 563 */ 564 if (presence == OPAL_PCI_SLOT_EMPTY) { 565 if (!php_slot->power_state_check) { 566 php_slot->power_state_check = true; 567 568 return 0; 569 } 570 571 goto scan; 572 } 573 574 /* 575 * If the power supply to the slot is off, we can't detect 576 * adapter presence state. That means we have to turn the 577 * slot on before going to probe slot's presence state. 578 * 579 * On the first time, we don't change the power status to 580 * boost system boot with assumption that the firmware 581 * supplies consistent slot power status: empty slot always 582 * has its power off and non-empty slot has its power on. 583 */ 584 if (!php_slot->power_state_check) { 585 php_slot->power_state_check = true; 586 587 ret = pnv_php_get_power_state(slot, &power_status); 588 if (ret) 589 return ret; 590 591 if (power_status != OPAL_PCI_SLOT_POWER_ON) 592 return 0; 593 } 594 595 /* Check the power status. Scan the slot if it is already on */ 596 ret = pnv_php_get_power_state(slot, &power_status); 597 if (ret) 598 return ret; 599 600 if (power_status == OPAL_PCI_SLOT_POWER_ON) 601 goto scan; 602 603 /* Power is off, turn it on and then scan the slot */ 604 ret = pnv_php_activate_slot(php_slot, slot); 605 if (ret) 606 return ret; 607 608 scan: 609 if (presence == OPAL_PCI_SLOT_PRESENT) { 610 if (rescan) { 611 pci_lock_rescan_remove(); 612 pci_hp_add_devices(php_slot->bus); 613 pci_unlock_rescan_remove(); 614 } 615 616 /* Rescan for child hotpluggable slots */ 617 php_slot->state = PNV_PHP_STATE_POPULATED; 618 if (rescan) 619 pnv_php_register(php_slot->dn); 620 } else { 621 php_slot->state = PNV_PHP_STATE_POPULATED; 622 } 623 624 return 0; 625 } 626 627 static int pnv_php_reset_slot(struct hotplug_slot *slot, bool probe) 628 { 629 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 630 struct pci_dev *bridge = php_slot->pdev; 631 uint16_t sts; 632 633 /* 634 * The CAPI folks want pnv_php to drive OpenCAPI slots 635 * which don't have a bridge. Only claim to support 636 * reset_slot() if we have a bridge device (for now...) 637 */ 638 if (probe) 639 return !bridge; 640 641 /* mask our interrupt while resetting the bridge */ 642 if (php_slot->irq > 0) 643 disable_irq(php_slot->irq); 644 645 pci_bridge_secondary_bus_reset(bridge); 646 647 /* clear any state changes that happened due to the reset */ 648 pcie_capability_read_word(php_slot->pdev, PCI_EXP_SLTSTA, &sts); 649 sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC); 650 pcie_capability_write_word(php_slot->pdev, PCI_EXP_SLTSTA, sts); 651 652 if (php_slot->irq > 0) 653 enable_irq(php_slot->irq); 654 655 return 0; 656 } 657 658 static int pnv_php_enable_slot(struct hotplug_slot *slot) 659 { 660 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 661 u32 prop32; 662 int ret; 663 664 ret = pnv_php_enable(php_slot, true); 665 if (ret) 666 return ret; 667 668 /* (Re-)enable interrupt if the slot supports surprise hotplug */ 669 ret = of_property_read_u32(php_slot->dn, "ibm,slot-surprise-pluggable", 670 &prop32); 671 if (!ret && prop32) 672 pnv_php_enable_irq(php_slot); 673 674 return 0; 675 } 676 677 /* 678 * Disable any hotplug interrupts for all slots on the provided bus, as well as 679 * all downstream slots in preparation for a hot unplug. 680 */ 681 static int pnv_php_disable_all_irqs(struct pci_bus *bus) 682 { 683 struct pci_bus *child_bus; 684 struct pci_slot *slot; 685 686 /* First go down child buses */ 687 list_for_each_entry(child_bus, &bus->children, node) 688 pnv_php_disable_all_irqs(child_bus); 689 690 /* Disable IRQs for all pnv_php slots on this bus */ 691 list_for_each_entry(slot, &bus->slots, list) { 692 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot->hotplug); 693 694 pnv_php_disable_irq(php_slot, false, true); 695 } 696 697 return 0; 698 } 699 700 /* 701 * Disable any hotplug interrupts for all downstream slots on the provided 702 * bus in preparation for a hot unplug. 703 */ 704 static int pnv_php_disable_all_downstream_irqs(struct pci_bus *bus) 705 { 706 struct pci_bus *child_bus; 707 708 /* Go down child buses, recursively deactivating their IRQs */ 709 list_for_each_entry(child_bus, &bus->children, node) 710 pnv_php_disable_all_irqs(child_bus); 711 712 return 0; 713 } 714 715 static int pnv_php_disable_slot(struct hotplug_slot *slot) 716 { 717 struct pnv_php_slot *php_slot = to_pnv_php_slot(slot); 718 int ret; 719 720 /* 721 * Allow to disable a slot already in the registered state to 722 * cover cases where the slot couldn't be enabled and never 723 * reached the populated state 724 */ 725 if (php_slot->state != PNV_PHP_STATE_POPULATED && 726 php_slot->state != PNV_PHP_STATE_REGISTERED) 727 return 0; 728 729 /* 730 * Free all IRQ resources from all child slots before remove. 731 * Note that we do not disable the root slot IRQ here as that 732 * would also deactivate the slot hot (re)plug interrupt! 733 */ 734 pnv_php_disable_all_downstream_irqs(php_slot->bus); 735 736 /* Remove all devices behind the slot */ 737 pci_lock_rescan_remove(); 738 pci_hp_remove_devices(php_slot->bus); 739 pci_unlock_rescan_remove(); 740 741 /* Detach the child hotpluggable slots */ 742 pnv_php_unregister(php_slot->dn); 743 744 /* Notify firmware and remove device nodes */ 745 ret = pnv_php_set_slot_power_state(slot, OPAL_PCI_SLOT_POWER_OFF); 746 747 php_slot->state = PNV_PHP_STATE_REGISTERED; 748 return ret; 749 } 750 751 static const struct hotplug_slot_ops php_slot_ops = { 752 .get_power_status = pnv_php_get_power_state, 753 .get_adapter_status = pnv_php_get_adapter_state, 754 .get_attention_status = pnv_php_get_attention_state, 755 .set_attention_status = pnv_php_set_attention_state, 756 .enable_slot = pnv_php_enable_slot, 757 .disable_slot = pnv_php_disable_slot, 758 .reset_slot = pnv_php_reset_slot, 759 }; 760 761 static void pnv_php_release(struct pnv_php_slot *php_slot) 762 { 763 unsigned long flags; 764 765 /* Remove from global or child list */ 766 spin_lock_irqsave(&pnv_php_lock, flags); 767 list_del(&php_slot->link); 768 spin_unlock_irqrestore(&pnv_php_lock, flags); 769 770 /* Detach from parent */ 771 pnv_php_put_slot(php_slot); 772 pnv_php_put_slot(php_slot->parent); 773 } 774 775 static struct pnv_php_slot *pnv_php_alloc_slot(struct device_node *dn) 776 { 777 struct pnv_php_slot *php_slot; 778 struct pci_bus *bus; 779 const char *label; 780 uint64_t id; 781 int ret; 782 783 ret = of_property_read_string(dn, "ibm,slot-label", &label); 784 if (ret) 785 return NULL; 786 787 if (pnv_pci_get_slot_id(dn, &id)) 788 return NULL; 789 790 bus = pci_find_bus_by_node(dn); 791 if (!bus) 792 return NULL; 793 794 php_slot = kzalloc(sizeof(*php_slot), GFP_KERNEL); 795 if (!php_slot) 796 return NULL; 797 798 php_slot->name = kstrdup(label, GFP_KERNEL); 799 if (!php_slot->name) { 800 kfree(php_slot); 801 return NULL; 802 } 803 804 /* Allocate workqueue for this slot's interrupt handling */ 805 php_slot->wq = alloc_workqueue("pciehp-%s", 0, 0, php_slot->name); 806 if (!php_slot->wq) { 807 SLOT_WARN(php_slot, "Cannot alloc workqueue\n"); 808 kfree(php_slot->name); 809 kfree(php_slot); 810 return NULL; 811 } 812 813 if (dn->child && PCI_DN(dn->child)) 814 php_slot->slot_no = PCI_SLOT(PCI_DN(dn->child)->devfn); 815 else 816 php_slot->slot_no = -1; /* Placeholder slot */ 817 818 kref_init(&php_slot->kref); 819 php_slot->state = PNV_PHP_STATE_INITIALIZED; 820 php_slot->dn = dn; 821 php_slot->pdev = bus->self; 822 php_slot->bus = bus; 823 php_slot->id = id; 824 php_slot->power_state_check = false; 825 php_slot->slot.ops = &php_slot_ops; 826 827 INIT_LIST_HEAD(&php_slot->children); 828 INIT_LIST_HEAD(&php_slot->link); 829 830 return php_slot; 831 } 832 833 static int pnv_php_register_slot(struct pnv_php_slot *php_slot) 834 { 835 struct pnv_php_slot *parent; 836 struct device_node *dn = php_slot->dn; 837 unsigned long flags; 838 int ret; 839 840 /* Check if the slot is registered or not */ 841 parent = pnv_php_find_slot(php_slot->dn); 842 if (parent) { 843 pnv_php_put_slot(parent); 844 return -EEXIST; 845 } 846 847 /* Register PCI slot */ 848 ret = pci_hp_register(&php_slot->slot, php_slot->bus, 849 php_slot->slot_no, php_slot->name); 850 if (ret) { 851 SLOT_WARN(php_slot, "Error %d registering slot\n", ret); 852 return ret; 853 } 854 855 /* Attach to the parent's child list or global list */ 856 while ((dn = of_get_parent(dn))) { 857 if (!PCI_DN(dn)) { 858 of_node_put(dn); 859 break; 860 } 861 862 parent = pnv_php_find_slot(dn); 863 if (parent) { 864 of_node_put(dn); 865 break; 866 } 867 868 of_node_put(dn); 869 } 870 871 spin_lock_irqsave(&pnv_php_lock, flags); 872 php_slot->parent = parent; 873 if (parent) 874 list_add_tail(&php_slot->link, &parent->children); 875 else 876 list_add_tail(&php_slot->link, &pnv_php_slot_list); 877 spin_unlock_irqrestore(&pnv_php_lock, flags); 878 879 php_slot->state = PNV_PHP_STATE_REGISTERED; 880 return 0; 881 } 882 883 static int pnv_php_enable_msix(struct pnv_php_slot *php_slot) 884 { 885 struct pci_dev *pdev = php_slot->pdev; 886 struct msix_entry entry; 887 int nr_entries, ret; 888 u16 pcie_flag; 889 890 /* Get total number of MSIx entries */ 891 nr_entries = pci_msix_vec_count(pdev); 892 if (nr_entries < 0) 893 return nr_entries; 894 895 /* Check hotplug MSIx entry is in range */ 896 pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &pcie_flag); 897 entry.entry = FIELD_GET(PCI_EXP_FLAGS_IRQ, pcie_flag); 898 if (entry.entry >= nr_entries) 899 return -ERANGE; 900 901 /* Enable MSIx */ 902 ret = pci_enable_msix_exact(pdev, &entry, 1); 903 if (ret) { 904 SLOT_WARN(php_slot, "Error %d enabling MSIx\n", ret); 905 return ret; 906 } 907 908 return entry.vector; 909 } 910 911 static void 912 pnv_php_detect_clear_suprise_removal_freeze(struct pnv_php_slot *php_slot) 913 { 914 struct pci_dev *pdev = php_slot->pdev; 915 struct eeh_dev *edev; 916 struct eeh_pe *pe; 917 int i, rc; 918 919 /* 920 * When a device is surprise removed from a downstream bridge slot, 921 * the upstream bridge port can still end up frozen due to related EEH 922 * events, which will in turn block the MSI interrupts for slot hotplug 923 * detection. 924 * 925 * Detect and thaw any frozen upstream PE after slot deactivation. 926 */ 927 edev = pci_dev_to_eeh_dev(pdev); 928 pe = edev ? edev->pe : NULL; 929 rc = eeh_pe_get_state(pe); 930 if ((rc == -ENODEV) || (rc == -ENOENT)) { 931 SLOT_WARN( 932 php_slot, 933 "Upstream bridge PE state unknown, hotplug detect may fail\n"); 934 } else { 935 if (pe->state & EEH_PE_ISOLATED) { 936 SLOT_WARN( 937 php_slot, 938 "Upstream bridge PE %02x frozen, thawing...\n", 939 pe->addr); 940 for (i = 0; i < 3; i++) 941 if (!eeh_unfreeze_pe(pe)) 942 break; 943 if (i >= 3) 944 SLOT_WARN( 945 php_slot, 946 "Unable to thaw PE %02x, hotplug detect will fail!\n", 947 pe->addr); 948 else 949 SLOT_WARN(php_slot, 950 "PE %02x thawed successfully\n", 951 pe->addr); 952 } 953 } 954 } 955 956 static void pnv_php_event_handler(struct work_struct *work) 957 { 958 struct pnv_php_event *event = 959 container_of(work, struct pnv_php_event, work); 960 struct pnv_php_slot *php_slot = event->php_slot; 961 962 if (event->added) { 963 pnv_php_enable_slot(&php_slot->slot); 964 } else { 965 pnv_php_disable_slot(&php_slot->slot); 966 pnv_php_detect_clear_suprise_removal_freeze(php_slot); 967 } 968 969 kfree(event); 970 } 971 972 static irqreturn_t pnv_php_interrupt(int irq, void *data) 973 { 974 struct pnv_php_slot *php_slot = data; 975 struct pci_dev *pchild, *pdev = php_slot->pdev; 976 struct eeh_dev *edev; 977 struct eeh_pe *pe; 978 struct pnv_php_event *event; 979 u16 sts, lsts; 980 u8 presence; 981 bool added; 982 unsigned long flags; 983 int ret; 984 985 pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts); 986 sts &= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC); 987 pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts); 988 989 pci_dbg(pdev, "PCI slot [%s]: HP int! DLAct: %d, PresDet: %d\n", 990 php_slot->name, 991 !!(sts & PCI_EXP_SLTSTA_DLLSC), 992 !!(sts & PCI_EXP_SLTSTA_PDC)); 993 994 if (sts & PCI_EXP_SLTSTA_DLLSC) { 995 pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lsts); 996 added = !!(lsts & PCI_EXP_LNKSTA_DLLLA); 997 } else if (!(php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) && 998 (sts & PCI_EXP_SLTSTA_PDC)) { 999 ret = pnv_pci_get_presence_state(php_slot->id, &presence); 1000 if (ret) { 1001 SLOT_WARN(php_slot, 1002 "PCI slot [%s] error %d getting presence (0x%04x), to retry the operation.\n", 1003 php_slot->name, ret, sts); 1004 return IRQ_HANDLED; 1005 } 1006 1007 added = !!(presence == OPAL_PCI_SLOT_PRESENT); 1008 } else { 1009 pci_dbg(pdev, "PCI slot [%s]: Spurious IRQ?\n", php_slot->name); 1010 return IRQ_NONE; 1011 } 1012 1013 /* Freeze the removed PE to avoid unexpected error reporting */ 1014 if (!added) { 1015 pchild = list_first_entry_or_null(&php_slot->bus->devices, 1016 struct pci_dev, bus_list); 1017 edev = pchild ? pci_dev_to_eeh_dev(pchild) : NULL; 1018 pe = edev ? edev->pe : NULL; 1019 if (pe) { 1020 eeh_serialize_lock(&flags); 1021 eeh_pe_mark_isolated(pe); 1022 eeh_serialize_unlock(flags); 1023 eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); 1024 } 1025 } 1026 1027 /* 1028 * The PE is left in frozen state if the event is missed. It's 1029 * fine as the PCI devices (PE) aren't functional any more. 1030 */ 1031 event = kzalloc(sizeof(*event), GFP_ATOMIC); 1032 if (!event) { 1033 SLOT_WARN(php_slot, 1034 "PCI slot [%s] missed hotplug event 0x%04x\n", 1035 php_slot->name, sts); 1036 return IRQ_HANDLED; 1037 } 1038 1039 pci_info(pdev, "PCI slot [%s] %s (IRQ: %d)\n", 1040 php_slot->name, added ? "added" : "removed", irq); 1041 INIT_WORK(&event->work, pnv_php_event_handler); 1042 event->added = added; 1043 event->php_slot = php_slot; 1044 queue_work(php_slot->wq, &event->work); 1045 1046 return IRQ_HANDLED; 1047 } 1048 1049 static void pnv_php_init_irq(struct pnv_php_slot *php_slot, int irq) 1050 { 1051 struct pci_dev *pdev = php_slot->pdev; 1052 u32 broken_pdc = 0; 1053 u16 sts, ctrl; 1054 int ret; 1055 1056 /* Check PDC (Presence Detection Change) is broken or not */ 1057 ret = of_property_read_u32(php_slot->dn, "ibm,slot-broken-pdc", 1058 &broken_pdc); 1059 if (!ret && broken_pdc) 1060 php_slot->flags |= PNV_PHP_FLAG_BROKEN_PDC; 1061 1062 /* Clear pending interrupts */ 1063 pcie_capability_read_word(pdev, PCI_EXP_SLTSTA, &sts); 1064 if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) 1065 sts |= PCI_EXP_SLTSTA_DLLSC; 1066 else 1067 sts |= (PCI_EXP_SLTSTA_PDC | PCI_EXP_SLTSTA_DLLSC); 1068 pcie_capability_write_word(pdev, PCI_EXP_SLTSTA, sts); 1069 1070 /* Request the interrupt */ 1071 ret = request_irq(irq, pnv_php_interrupt, IRQF_SHARED, 1072 php_slot->name, php_slot); 1073 if (ret) { 1074 pnv_php_disable_irq(php_slot, true, true); 1075 SLOT_WARN(php_slot, "Error %d enabling IRQ %d\n", ret, irq); 1076 return; 1077 } 1078 1079 /* Enable the interrupts */ 1080 pcie_capability_read_word(pdev, PCI_EXP_SLTCTL, &ctrl); 1081 if (php_slot->flags & PNV_PHP_FLAG_BROKEN_PDC) { 1082 ctrl &= ~PCI_EXP_SLTCTL_PDCE; 1083 ctrl |= (PCI_EXP_SLTCTL_HPIE | 1084 PCI_EXP_SLTCTL_DLLSCE); 1085 } else { 1086 ctrl |= (PCI_EXP_SLTCTL_HPIE | 1087 PCI_EXP_SLTCTL_PDCE | 1088 PCI_EXP_SLTCTL_DLLSCE); 1089 } 1090 pcie_capability_write_word(pdev, PCI_EXP_SLTCTL, ctrl); 1091 1092 /* The interrupt is initialized successfully when @irq is valid */ 1093 php_slot->irq = irq; 1094 } 1095 1096 static void pnv_php_enable_irq(struct pnv_php_slot *php_slot) 1097 { 1098 struct pci_dev *pdev = php_slot->pdev; 1099 int irq, ret; 1100 1101 /* 1102 * The MSI/MSIx interrupt might have been occupied by other 1103 * drivers. Don't populate the surprise hotplug capability 1104 * in that case. 1105 */ 1106 if (pci_dev_msi_enabled(pdev)) 1107 return; 1108 1109 ret = pci_enable_device(pdev); 1110 if (ret) { 1111 SLOT_WARN(php_slot, "Error %d enabling device\n", ret); 1112 return; 1113 } 1114 1115 pci_set_master(pdev); 1116 1117 /* Enable MSIx interrupt */ 1118 irq = pnv_php_enable_msix(php_slot); 1119 if (irq > 0) { 1120 pnv_php_init_irq(php_slot, irq); 1121 return; 1122 } 1123 1124 /* 1125 * Use MSI if MSIx doesn't work. Fail back to legacy INTx 1126 * if MSI doesn't work either 1127 */ 1128 ret = pci_enable_msi(pdev); 1129 if (!ret || pdev->irq) { 1130 irq = pdev->irq; 1131 pnv_php_init_irq(php_slot, irq); 1132 } 1133 } 1134 1135 static int pnv_php_register_one(struct device_node *dn) 1136 { 1137 struct pnv_php_slot *php_slot; 1138 u32 prop32; 1139 int ret; 1140 1141 /* Check if it's hotpluggable slot */ 1142 ret = of_property_read_u32(dn, "ibm,slot-pluggable", &prop32); 1143 if (ret || !prop32) 1144 return -ENXIO; 1145 1146 ret = of_property_read_u32(dn, "ibm,reset-by-firmware", &prop32); 1147 if (ret || !prop32) 1148 return -ENXIO; 1149 1150 php_slot = pnv_php_alloc_slot(dn); 1151 if (!php_slot) 1152 return -ENODEV; 1153 1154 ret = pnv_php_register_slot(php_slot); 1155 if (ret) 1156 goto free_slot; 1157 1158 ret = pnv_php_enable(php_slot, false); 1159 if (ret) 1160 goto unregister_slot; 1161 1162 /* Enable interrupt if the slot supports surprise hotplug */ 1163 ret = of_property_read_u32(dn, "ibm,slot-surprise-pluggable", &prop32); 1164 if (!ret && prop32) 1165 pnv_php_enable_irq(php_slot); 1166 1167 return 0; 1168 1169 unregister_slot: 1170 pnv_php_unregister_one(php_slot->dn); 1171 free_slot: 1172 pnv_php_put_slot(php_slot); 1173 return ret; 1174 } 1175 1176 static void pnv_php_register(struct device_node *dn) 1177 { 1178 struct device_node *child; 1179 1180 /* 1181 * The parent slots should be registered before their 1182 * child slots. 1183 */ 1184 for_each_child_of_node(dn, child) { 1185 pnv_php_register_one(child); 1186 pnv_php_register(child); 1187 } 1188 } 1189 1190 static void pnv_php_unregister_one(struct device_node *dn) 1191 { 1192 struct pnv_php_slot *php_slot; 1193 1194 php_slot = pnv_php_find_slot(dn); 1195 if (!php_slot) 1196 return; 1197 1198 php_slot->state = PNV_PHP_STATE_OFFLINE; 1199 pci_hp_deregister(&php_slot->slot); 1200 pnv_php_release(php_slot); 1201 pnv_php_put_slot(php_slot); 1202 } 1203 1204 static void pnv_php_unregister(struct device_node *dn) 1205 { 1206 struct device_node *child; 1207 1208 /* The child slots should go before their parent slots */ 1209 for_each_child_of_node(dn, child) { 1210 pnv_php_unregister(child); 1211 pnv_php_unregister_one(child); 1212 } 1213 } 1214 1215 static int __init pnv_php_init(void) 1216 { 1217 struct device_node *dn; 1218 1219 pr_info(DRIVER_DESC " version: " DRIVER_VERSION "\n"); 1220 for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") 1221 pnv_php_register(dn); 1222 1223 for_each_compatible_node(dn, NULL, "ibm,ioda3-phb") 1224 pnv_php_register(dn); 1225 1226 for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") 1227 pnv_php_register_one(dn); /* slot directly under the PHB */ 1228 return 0; 1229 } 1230 1231 static void __exit pnv_php_exit(void) 1232 { 1233 struct device_node *dn; 1234 1235 for_each_compatible_node(dn, NULL, "ibm,ioda2-phb") 1236 pnv_php_unregister(dn); 1237 1238 for_each_compatible_node(dn, NULL, "ibm,ioda3-phb") 1239 pnv_php_unregister(dn); 1240 1241 for_each_compatible_node(dn, NULL, "ibm,ioda2-npu2-opencapi-phb") 1242 pnv_php_unregister_one(dn); /* slot directly under the PHB */ 1243 } 1244 1245 module_init(pnv_php_init); 1246 module_exit(pnv_php_exit); 1247 1248 MODULE_VERSION(DRIVER_VERSION); 1249 MODULE_LICENSE("GPL v2"); 1250 MODULE_AUTHOR(DRIVER_AUTHOR); 1251 MODULE_DESCRIPTION(DRIVER_DESC); 1252