1 /* 2 * The file intends to implement the platform dependent EEH operations on 3 * powernv platform. Actually, the powernv was created in order to fully 4 * hypervisor support. 5 * 6 * Copyright Benjamin Herrenschmidt & Gavin Shan, IBM Corporation 2013. 7 * 8 * This program is free software; you can redistribute it and/or modify 9 * it under the terms of the GNU General Public License as published by 10 * the Free Software Foundation; either version 2 of the License, or 11 * (at your option) any later version. 12 */ 13 14 #include <linux/atomic.h> 15 #include <linux/debugfs.h> 16 #include <linux/delay.h> 17 #include <linux/export.h> 18 #include <linux/init.h> 19 #include <linux/interrupt.h> 20 #include <linux/list.h> 21 #include <linux/msi.h> 22 #include <linux/of.h> 23 #include <linux/pci.h> 24 #include <linux/proc_fs.h> 25 #include <linux/rbtree.h> 26 #include <linux/sched.h> 27 #include <linux/seq_file.h> 28 #include <linux/spinlock.h> 29 30 #include <asm/eeh.h> 31 #include <asm/eeh_event.h> 32 #include <asm/firmware.h> 33 #include <asm/io.h> 34 #include <asm/iommu.h> 35 #include <asm/machdep.h> 36 #include <asm/msi_bitmap.h> 37 #include <asm/opal.h> 38 #include <asm/ppc-pci.h> 39 40 #include "powernv.h" 41 #include "pci.h" 42 43 static bool pnv_eeh_nb_init = false; 44 static int eeh_event_irq = -EINVAL; 45 46 /** 47 * pnv_eeh_init - EEH platform dependent initialization 48 * 49 * EEH platform dependent initialization on powernv 50 */ 51 static int pnv_eeh_init(void) 52 { 53 struct pci_controller *hose; 54 struct pnv_phb *phb; 55 56 /* We require OPALv3 */ 57 if (!firmware_has_feature(FW_FEATURE_OPALv3)) { 58 pr_warn("%s: OPALv3 is required !\n", 59 __func__); 60 return -EINVAL; 61 } 62 63 /* Set probe mode */ 64 eeh_add_flag(EEH_PROBE_MODE_DEV); 65 66 /* 67 * P7IOC blocks PCI config access to frozen PE, but PHB3 68 * doesn't do that. So we have to selectively enable I/O 69 * prior to collecting error log. 70 */ 71 list_for_each_entry(hose, &hose_list, list_node) { 72 phb = hose->private_data; 73 74 if (phb->model == PNV_PHB_MODEL_P7IOC) 75 eeh_add_flag(EEH_ENABLE_IO_FOR_LOG); 76 77 /* 78 * PE#0 should be regarded as valid by EEH core 79 * if it's not the reserved one. Currently, we 80 * have the reserved PE#0 and PE#127 for PHB3 81 * and P7IOC separately. So we should regard 82 * PE#0 as valid for P7IOC. 83 */ 84 if (phb->ioda.reserved_pe != 0) 85 eeh_add_flag(EEH_VALID_PE_ZERO); 86 87 break; 88 } 89 90 return 0; 91 } 92 93 static irqreturn_t pnv_eeh_event(int irq, void *data) 94 { 95 /* 96 * We simply send a special EEH event if EEH has been 97 * enabled. We don't care about EEH events until we've 98 * finished processing the outstanding ones. Event processing 99 * gets unmasked in next_error() if EEH is enabled. 100 */ 101 disable_irq_nosync(irq); 102 103 if (eeh_enabled()) 104 eeh_send_failure_event(NULL); 105 106 return IRQ_HANDLED; 107 } 108 109 #ifdef CONFIG_DEBUG_FS 110 static ssize_t pnv_eeh_ei_write(struct file *filp, 111 const char __user *user_buf, 112 size_t count, loff_t *ppos) 113 { 114 struct pci_controller *hose = filp->private_data; 115 struct eeh_dev *edev; 116 struct eeh_pe *pe; 117 int pe_no, type, func; 118 unsigned long addr, mask; 119 char buf[50]; 120 int ret; 121 122 if (!eeh_ops || !eeh_ops->err_inject) 123 return -ENXIO; 124 125 /* Copy over argument buffer */ 126 ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); 127 if (!ret) 128 return -EFAULT; 129 130 /* Retrieve parameters */ 131 ret = sscanf(buf, "%x:%x:%x:%lx:%lx", 132 &pe_no, &type, &func, &addr, &mask); 133 if (ret != 5) 134 return -EINVAL; 135 136 /* Retrieve PE */ 137 edev = kzalloc(sizeof(*edev), GFP_KERNEL); 138 if (!edev) 139 return -ENOMEM; 140 edev->phb = hose; 141 edev->pe_config_addr = pe_no; 142 pe = eeh_pe_get(edev); 143 kfree(edev); 144 if (!pe) 145 return -ENODEV; 146 147 /* Do error injection */ 148 ret = eeh_ops->err_inject(pe, type, func, addr, mask); 149 return ret < 0 ? ret : count; 150 } 151 152 static const struct file_operations pnv_eeh_ei_fops = { 153 .open = simple_open, 154 .llseek = no_llseek, 155 .write = pnv_eeh_ei_write, 156 }; 157 158 static int pnv_eeh_dbgfs_set(void *data, int offset, u64 val) 159 { 160 struct pci_controller *hose = data; 161 struct pnv_phb *phb = hose->private_data; 162 163 out_be64(phb->regs + offset, val); 164 return 0; 165 } 166 167 static int pnv_eeh_dbgfs_get(void *data, int offset, u64 *val) 168 { 169 struct pci_controller *hose = data; 170 struct pnv_phb *phb = hose->private_data; 171 172 *val = in_be64(phb->regs + offset); 173 return 0; 174 } 175 176 static int pnv_eeh_outb_dbgfs_set(void *data, u64 val) 177 { 178 return pnv_eeh_dbgfs_set(data, 0xD10, val); 179 } 180 181 static int pnv_eeh_outb_dbgfs_get(void *data, u64 *val) 182 { 183 return pnv_eeh_dbgfs_get(data, 0xD10, val); 184 } 185 186 static int pnv_eeh_inbA_dbgfs_set(void *data, u64 val) 187 { 188 return pnv_eeh_dbgfs_set(data, 0xD90, val); 189 } 190 191 static int pnv_eeh_inbA_dbgfs_get(void *data, u64 *val) 192 { 193 return pnv_eeh_dbgfs_get(data, 0xD90, val); 194 } 195 196 static int pnv_eeh_inbB_dbgfs_set(void *data, u64 val) 197 { 198 return pnv_eeh_dbgfs_set(data, 0xE10, val); 199 } 200 201 static int pnv_eeh_inbB_dbgfs_get(void *data, u64 *val) 202 { 203 return pnv_eeh_dbgfs_get(data, 0xE10, val); 204 } 205 206 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_outb_dbgfs_ops, pnv_eeh_outb_dbgfs_get, 207 pnv_eeh_outb_dbgfs_set, "0x%llx\n"); 208 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbA_dbgfs_ops, pnv_eeh_inbA_dbgfs_get, 209 pnv_eeh_inbA_dbgfs_set, "0x%llx\n"); 210 DEFINE_SIMPLE_ATTRIBUTE(pnv_eeh_inbB_dbgfs_ops, pnv_eeh_inbB_dbgfs_get, 211 pnv_eeh_inbB_dbgfs_set, "0x%llx\n"); 212 #endif /* CONFIG_DEBUG_FS */ 213 214 /** 215 * pnv_eeh_post_init - EEH platform dependent post initialization 216 * 217 * EEH platform dependent post initialization on powernv. When 218 * the function is called, the EEH PEs and devices should have 219 * been built. If the I/O cache staff has been built, EEH is 220 * ready to supply service. 221 */ 222 static int pnv_eeh_post_init(void) 223 { 224 struct pci_controller *hose; 225 struct pnv_phb *phb; 226 int ret = 0; 227 228 /* Register OPAL event notifier */ 229 if (!pnv_eeh_nb_init) { 230 eeh_event_irq = opal_event_request(ilog2(OPAL_EVENT_PCI_ERROR)); 231 if (eeh_event_irq < 0) { 232 pr_err("%s: Can't register OPAL event interrupt (%d)\n", 233 __func__, eeh_event_irq); 234 return eeh_event_irq; 235 } 236 237 ret = request_irq(eeh_event_irq, pnv_eeh_event, 238 IRQ_TYPE_LEVEL_HIGH, "opal-eeh", NULL); 239 if (ret < 0) { 240 irq_dispose_mapping(eeh_event_irq); 241 pr_err("%s: Can't request OPAL event interrupt (%d)\n", 242 __func__, eeh_event_irq); 243 return ret; 244 } 245 246 pnv_eeh_nb_init = true; 247 } 248 249 if (!eeh_enabled()) 250 disable_irq(eeh_event_irq); 251 252 list_for_each_entry(hose, &hose_list, list_node) { 253 phb = hose->private_data; 254 255 /* 256 * If EEH is enabled, we're going to rely on that. 257 * Otherwise, we restore to conventional mechanism 258 * to clear frozen PE during PCI config access. 259 */ 260 if (eeh_enabled()) 261 phb->flags |= PNV_PHB_FLAG_EEH; 262 else 263 phb->flags &= ~PNV_PHB_FLAG_EEH; 264 265 /* Create debugfs entries */ 266 #ifdef CONFIG_DEBUG_FS 267 if (phb->has_dbgfs || !phb->dbgfs) 268 continue; 269 270 phb->has_dbgfs = 1; 271 debugfs_create_file("err_injct", 0200, 272 phb->dbgfs, hose, 273 &pnv_eeh_ei_fops); 274 275 debugfs_create_file("err_injct_outbound", 0600, 276 phb->dbgfs, hose, 277 &pnv_eeh_outb_dbgfs_ops); 278 debugfs_create_file("err_injct_inboundA", 0600, 279 phb->dbgfs, hose, 280 &pnv_eeh_inbA_dbgfs_ops); 281 debugfs_create_file("err_injct_inboundB", 0600, 282 phb->dbgfs, hose, 283 &pnv_eeh_inbB_dbgfs_ops); 284 #endif /* CONFIG_DEBUG_FS */ 285 } 286 287 288 return ret; 289 } 290 291 static int pnv_eeh_cap_start(struct pci_dn *pdn) 292 { 293 u32 status; 294 295 if (!pdn) 296 return 0; 297 298 pnv_pci_cfg_read(pdn, PCI_STATUS, 2, &status); 299 if (!(status & PCI_STATUS_CAP_LIST)) 300 return 0; 301 302 return PCI_CAPABILITY_LIST; 303 } 304 305 static int pnv_eeh_find_cap(struct pci_dn *pdn, int cap) 306 { 307 int pos = pnv_eeh_cap_start(pdn); 308 int cnt = 48; /* Maximal number of capabilities */ 309 u32 id; 310 311 if (!pos) 312 return 0; 313 314 while (cnt--) { 315 pnv_pci_cfg_read(pdn, pos, 1, &pos); 316 if (pos < 0x40) 317 break; 318 319 pos &= ~3; 320 pnv_pci_cfg_read(pdn, pos + PCI_CAP_LIST_ID, 1, &id); 321 if (id == 0xff) 322 break; 323 324 /* Found */ 325 if (id == cap) 326 return pos; 327 328 /* Next one */ 329 pos += PCI_CAP_LIST_NEXT; 330 } 331 332 return 0; 333 } 334 335 static int pnv_eeh_find_ecap(struct pci_dn *pdn, int cap) 336 { 337 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 338 u32 header; 339 int pos = 256, ttl = (4096 - 256) / 8; 340 341 if (!edev || !edev->pcie_cap) 342 return 0; 343 if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) 344 return 0; 345 else if (!header) 346 return 0; 347 348 while (ttl-- > 0) { 349 if (PCI_EXT_CAP_ID(header) == cap && pos) 350 return pos; 351 352 pos = PCI_EXT_CAP_NEXT(header); 353 if (pos < 256) 354 break; 355 356 if (pnv_pci_cfg_read(pdn, pos, 4, &header) != PCIBIOS_SUCCESSFUL) 357 break; 358 } 359 360 return 0; 361 } 362 363 /** 364 * pnv_eeh_probe - Do probe on PCI device 365 * @pdn: PCI device node 366 * @data: unused 367 * 368 * When EEH module is installed during system boot, all PCI devices 369 * are checked one by one to see if it supports EEH. The function 370 * is introduced for the purpose. By default, EEH has been enabled 371 * on all PCI devices. That's to say, we only need do necessary 372 * initialization on the corresponding eeh device and create PE 373 * accordingly. 374 * 375 * It's notable that's unsafe to retrieve the EEH device through 376 * the corresponding PCI device. During the PCI device hotplug, which 377 * was possiblly triggered by EEH core, the binding between EEH device 378 * and the PCI device isn't built yet. 379 */ 380 static void *pnv_eeh_probe(struct pci_dn *pdn, void *data) 381 { 382 struct pci_controller *hose = pdn->phb; 383 struct pnv_phb *phb = hose->private_data; 384 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 385 uint32_t pcie_flags; 386 int ret; 387 388 /* 389 * When probing the root bridge, which doesn't have any 390 * subordinate PCI devices. We don't have OF node for 391 * the root bridge. So it's not reasonable to continue 392 * the probing. 393 */ 394 if (!edev || edev->pe) 395 return NULL; 396 397 /* Skip for PCI-ISA bridge */ 398 if ((pdn->class_code >> 8) == PCI_CLASS_BRIDGE_ISA) 399 return NULL; 400 401 /* Initialize eeh device */ 402 edev->class_code = pdn->class_code; 403 edev->mode &= 0xFFFFFF00; 404 edev->pcix_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_PCIX); 405 edev->pcie_cap = pnv_eeh_find_cap(pdn, PCI_CAP_ID_EXP); 406 edev->aer_cap = pnv_eeh_find_ecap(pdn, PCI_EXT_CAP_ID_ERR); 407 if ((edev->class_code >> 8) == PCI_CLASS_BRIDGE_PCI) { 408 edev->mode |= EEH_DEV_BRIDGE; 409 if (edev->pcie_cap) { 410 pnv_pci_cfg_read(pdn, edev->pcie_cap + PCI_EXP_FLAGS, 411 2, &pcie_flags); 412 pcie_flags = (pcie_flags & PCI_EXP_FLAGS_TYPE) >> 4; 413 if (pcie_flags == PCI_EXP_TYPE_ROOT_PORT) 414 edev->mode |= EEH_DEV_ROOT_PORT; 415 else if (pcie_flags == PCI_EXP_TYPE_DOWNSTREAM) 416 edev->mode |= EEH_DEV_DS_PORT; 417 } 418 } 419 420 edev->config_addr = (pdn->busno << 8) | (pdn->devfn); 421 edev->pe_config_addr = phb->ioda.pe_rmap[edev->config_addr]; 422 423 /* Create PE */ 424 ret = eeh_add_to_parent_pe(edev); 425 if (ret) { 426 pr_warn("%s: Can't add PCI dev %04x:%02x:%02x.%01x to parent PE (%d)\n", 427 __func__, hose->global_number, pdn->busno, 428 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn), ret); 429 return NULL; 430 } 431 432 /* 433 * If the PE contains any one of following adapters, the 434 * PCI config space can't be accessed when dumping EEH log. 435 * Otherwise, we will run into fenced PHB caused by shortage 436 * of outbound credits in the adapter. The PCI config access 437 * should be blocked until PE reset. MMIO access is dropped 438 * by hardware certainly. In order to drop PCI config requests, 439 * one more flag (EEH_PE_CFG_RESTRICTED) is introduced, which 440 * will be checked in the backend for PE state retrival. If 441 * the PE becomes frozen for the first time and the flag has 442 * been set for the PE, we will set EEH_PE_CFG_BLOCKED for 443 * that PE to block its config space. 444 * 445 * Broadcom Austin 4-ports NICs (14e4:1657) 446 * Broadcom Shiner 2-ports 10G NICs (14e4:168e) 447 */ 448 if ((pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && 449 pdn->device_id == 0x1657) || 450 (pdn->vendor_id == PCI_VENDOR_ID_BROADCOM && 451 pdn->device_id == 0x168e)) 452 edev->pe->state |= EEH_PE_CFG_RESTRICTED; 453 454 /* 455 * Cache the PE primary bus, which can't be fetched when 456 * full hotplug is in progress. In that case, all child 457 * PCI devices of the PE are expected to be removed prior 458 * to PE reset. 459 */ 460 if (!edev->pe->bus) 461 edev->pe->bus = pci_find_bus(hose->global_number, 462 pdn->busno); 463 464 /* 465 * Enable EEH explicitly so that we will do EEH check 466 * while accessing I/O stuff 467 */ 468 eeh_add_flag(EEH_ENABLED); 469 470 /* Save memory bars */ 471 eeh_save_bars(edev); 472 473 return NULL; 474 } 475 476 /** 477 * pnv_eeh_set_option - Initialize EEH or MMIO/DMA reenable 478 * @pe: EEH PE 479 * @option: operation to be issued 480 * 481 * The function is used to control the EEH functionality globally. 482 * Currently, following options are support according to PAPR: 483 * Enable EEH, Disable EEH, Enable MMIO and Enable DMA 484 */ 485 static int pnv_eeh_set_option(struct eeh_pe *pe, int option) 486 { 487 struct pci_controller *hose = pe->phb; 488 struct pnv_phb *phb = hose->private_data; 489 bool freeze_pe = false; 490 int opt, ret = 0; 491 s64 rc; 492 493 /* Sanity check on option */ 494 switch (option) { 495 case EEH_OPT_DISABLE: 496 return -EPERM; 497 case EEH_OPT_ENABLE: 498 return 0; 499 case EEH_OPT_THAW_MMIO: 500 opt = OPAL_EEH_ACTION_CLEAR_FREEZE_MMIO; 501 break; 502 case EEH_OPT_THAW_DMA: 503 opt = OPAL_EEH_ACTION_CLEAR_FREEZE_DMA; 504 break; 505 case EEH_OPT_FREEZE_PE: 506 freeze_pe = true; 507 opt = OPAL_EEH_ACTION_SET_FREEZE_ALL; 508 break; 509 default: 510 pr_warn("%s: Invalid option %d\n", __func__, option); 511 return -EINVAL; 512 } 513 514 /* If PHB supports compound PE, to handle it */ 515 if (freeze_pe) { 516 if (phb->freeze_pe) { 517 phb->freeze_pe(phb, pe->addr); 518 } else { 519 rc = opal_pci_eeh_freeze_set(phb->opal_id, 520 pe->addr, opt); 521 if (rc != OPAL_SUCCESS) { 522 pr_warn("%s: Failure %lld freezing " 523 "PHB#%x-PE#%x\n", 524 __func__, rc, 525 phb->hose->global_number, pe->addr); 526 ret = -EIO; 527 } 528 } 529 } else { 530 if (phb->unfreeze_pe) { 531 ret = phb->unfreeze_pe(phb, pe->addr, opt); 532 } else { 533 rc = opal_pci_eeh_freeze_clear(phb->opal_id, 534 pe->addr, opt); 535 if (rc != OPAL_SUCCESS) { 536 pr_warn("%s: Failure %lld enable %d " 537 "for PHB#%x-PE#%x\n", 538 __func__, rc, option, 539 phb->hose->global_number, pe->addr); 540 ret = -EIO; 541 } 542 } 543 } 544 545 return ret; 546 } 547 548 /** 549 * pnv_eeh_get_pe_addr - Retrieve PE address 550 * @pe: EEH PE 551 * 552 * Retrieve the PE address according to the given tranditional 553 * PCI BDF (Bus/Device/Function) address. 554 */ 555 static int pnv_eeh_get_pe_addr(struct eeh_pe *pe) 556 { 557 return pe->addr; 558 } 559 560 static void pnv_eeh_get_phb_diag(struct eeh_pe *pe) 561 { 562 struct pnv_phb *phb = pe->phb->private_data; 563 s64 rc; 564 565 rc = opal_pci_get_phb_diag_data2(phb->opal_id, pe->data, 566 PNV_PCI_DIAG_BUF_SIZE); 567 if (rc != OPAL_SUCCESS) 568 pr_warn("%s: Failure %lld getting PHB#%x diag-data\n", 569 __func__, rc, pe->phb->global_number); 570 } 571 572 static int pnv_eeh_get_phb_state(struct eeh_pe *pe) 573 { 574 struct pnv_phb *phb = pe->phb->private_data; 575 u8 fstate; 576 __be16 pcierr; 577 s64 rc; 578 int result = 0; 579 580 rc = opal_pci_eeh_freeze_status(phb->opal_id, 581 pe->addr, 582 &fstate, 583 &pcierr, 584 NULL); 585 if (rc != OPAL_SUCCESS) { 586 pr_warn("%s: Failure %lld getting PHB#%x state\n", 587 __func__, rc, phb->hose->global_number); 588 return EEH_STATE_NOT_SUPPORT; 589 } 590 591 /* 592 * Check PHB state. If the PHB is frozen for the 593 * first time, to dump the PHB diag-data. 594 */ 595 if (be16_to_cpu(pcierr) != OPAL_EEH_PHB_ERROR) { 596 result = (EEH_STATE_MMIO_ACTIVE | 597 EEH_STATE_DMA_ACTIVE | 598 EEH_STATE_MMIO_ENABLED | 599 EEH_STATE_DMA_ENABLED); 600 } else if (!(pe->state & EEH_PE_ISOLATED)) { 601 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 602 pnv_eeh_get_phb_diag(pe); 603 604 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 605 pnv_pci_dump_phb_diag_data(pe->phb, pe->data); 606 } 607 608 return result; 609 } 610 611 static int pnv_eeh_get_pe_state(struct eeh_pe *pe) 612 { 613 struct pnv_phb *phb = pe->phb->private_data; 614 u8 fstate; 615 __be16 pcierr; 616 s64 rc; 617 int result; 618 619 /* 620 * We don't clobber hardware frozen state until PE 621 * reset is completed. In order to keep EEH core 622 * moving forward, we have to return operational 623 * state during PE reset. 624 */ 625 if (pe->state & EEH_PE_RESET) { 626 result = (EEH_STATE_MMIO_ACTIVE | 627 EEH_STATE_DMA_ACTIVE | 628 EEH_STATE_MMIO_ENABLED | 629 EEH_STATE_DMA_ENABLED); 630 return result; 631 } 632 633 /* 634 * Fetch PE state from hardware. If the PHB 635 * supports compound PE, let it handle that. 636 */ 637 if (phb->get_pe_state) { 638 fstate = phb->get_pe_state(phb, pe->addr); 639 } else { 640 rc = opal_pci_eeh_freeze_status(phb->opal_id, 641 pe->addr, 642 &fstate, 643 &pcierr, 644 NULL); 645 if (rc != OPAL_SUCCESS) { 646 pr_warn("%s: Failure %lld getting PHB#%x-PE%x state\n", 647 __func__, rc, phb->hose->global_number, 648 pe->addr); 649 return EEH_STATE_NOT_SUPPORT; 650 } 651 } 652 653 /* Figure out state */ 654 switch (fstate) { 655 case OPAL_EEH_STOPPED_NOT_FROZEN: 656 result = (EEH_STATE_MMIO_ACTIVE | 657 EEH_STATE_DMA_ACTIVE | 658 EEH_STATE_MMIO_ENABLED | 659 EEH_STATE_DMA_ENABLED); 660 break; 661 case OPAL_EEH_STOPPED_MMIO_FREEZE: 662 result = (EEH_STATE_DMA_ACTIVE | 663 EEH_STATE_DMA_ENABLED); 664 break; 665 case OPAL_EEH_STOPPED_DMA_FREEZE: 666 result = (EEH_STATE_MMIO_ACTIVE | 667 EEH_STATE_MMIO_ENABLED); 668 break; 669 case OPAL_EEH_STOPPED_MMIO_DMA_FREEZE: 670 result = 0; 671 break; 672 case OPAL_EEH_STOPPED_RESET: 673 result = EEH_STATE_RESET_ACTIVE; 674 break; 675 case OPAL_EEH_STOPPED_TEMP_UNAVAIL: 676 result = EEH_STATE_UNAVAILABLE; 677 break; 678 case OPAL_EEH_STOPPED_PERM_UNAVAIL: 679 result = EEH_STATE_NOT_SUPPORT; 680 break; 681 default: 682 result = EEH_STATE_NOT_SUPPORT; 683 pr_warn("%s: Invalid PHB#%x-PE#%x state %x\n", 684 __func__, phb->hose->global_number, 685 pe->addr, fstate); 686 } 687 688 /* 689 * If PHB supports compound PE, to freeze all 690 * slave PEs for consistency. 691 * 692 * If the PE is switching to frozen state for the 693 * first time, to dump the PHB diag-data. 694 */ 695 if (!(result & EEH_STATE_NOT_SUPPORT) && 696 !(result & EEH_STATE_UNAVAILABLE) && 697 !(result & EEH_STATE_MMIO_ACTIVE) && 698 !(result & EEH_STATE_DMA_ACTIVE) && 699 !(pe->state & EEH_PE_ISOLATED)) { 700 if (phb->freeze_pe) 701 phb->freeze_pe(phb, pe->addr); 702 703 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 704 pnv_eeh_get_phb_diag(pe); 705 706 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 707 pnv_pci_dump_phb_diag_data(pe->phb, pe->data); 708 } 709 710 return result; 711 } 712 713 /** 714 * pnv_eeh_get_state - Retrieve PE state 715 * @pe: EEH PE 716 * @delay: delay while PE state is temporarily unavailable 717 * 718 * Retrieve the state of the specified PE. For IODA-compitable 719 * platform, it should be retrieved from IODA table. Therefore, 720 * we prefer passing down to hardware implementation to handle 721 * it. 722 */ 723 static int pnv_eeh_get_state(struct eeh_pe *pe, int *delay) 724 { 725 int ret; 726 727 if (pe->type & EEH_PE_PHB) 728 ret = pnv_eeh_get_phb_state(pe); 729 else 730 ret = pnv_eeh_get_pe_state(pe); 731 732 if (!delay) 733 return ret; 734 735 /* 736 * If the PE state is temporarily unavailable, 737 * to inform the EEH core delay for default 738 * period (1 second) 739 */ 740 *delay = 0; 741 if (ret & EEH_STATE_UNAVAILABLE) 742 *delay = 1000; 743 744 return ret; 745 } 746 747 static s64 pnv_eeh_phb_poll(struct pnv_phb *phb) 748 { 749 s64 rc = OPAL_HARDWARE; 750 751 while (1) { 752 rc = opal_pci_poll(phb->opal_id); 753 if (rc <= 0) 754 break; 755 756 if (system_state < SYSTEM_RUNNING) 757 udelay(1000 * rc); 758 else 759 msleep(rc); 760 } 761 762 return rc; 763 } 764 765 int pnv_eeh_phb_reset(struct pci_controller *hose, int option) 766 { 767 struct pnv_phb *phb = hose->private_data; 768 s64 rc = OPAL_HARDWARE; 769 770 pr_debug("%s: Reset PHB#%x, option=%d\n", 771 __func__, hose->global_number, option); 772 773 /* Issue PHB complete reset request */ 774 if (option == EEH_RESET_FUNDAMENTAL || 775 option == EEH_RESET_HOT) 776 rc = opal_pci_reset(phb->opal_id, 777 OPAL_RESET_PHB_COMPLETE, 778 OPAL_ASSERT_RESET); 779 else if (option == EEH_RESET_DEACTIVATE) 780 rc = opal_pci_reset(phb->opal_id, 781 OPAL_RESET_PHB_COMPLETE, 782 OPAL_DEASSERT_RESET); 783 if (rc < 0) 784 goto out; 785 786 /* 787 * Poll state of the PHB until the request is done 788 * successfully. The PHB reset is usually PHB complete 789 * reset followed by hot reset on root bus. So we also 790 * need the PCI bus settlement delay. 791 */ 792 rc = pnv_eeh_phb_poll(phb); 793 if (option == EEH_RESET_DEACTIVATE) { 794 if (system_state < SYSTEM_RUNNING) 795 udelay(1000 * EEH_PE_RST_SETTLE_TIME); 796 else 797 msleep(EEH_PE_RST_SETTLE_TIME); 798 } 799 out: 800 if (rc != OPAL_SUCCESS) 801 return -EIO; 802 803 return 0; 804 } 805 806 static int pnv_eeh_root_reset(struct pci_controller *hose, int option) 807 { 808 struct pnv_phb *phb = hose->private_data; 809 s64 rc = OPAL_HARDWARE; 810 811 pr_debug("%s: Reset PHB#%x, option=%d\n", 812 __func__, hose->global_number, option); 813 814 /* 815 * During the reset deassert time, we needn't care 816 * the reset scope because the firmware does nothing 817 * for fundamental or hot reset during deassert phase. 818 */ 819 if (option == EEH_RESET_FUNDAMENTAL) 820 rc = opal_pci_reset(phb->opal_id, 821 OPAL_RESET_PCI_FUNDAMENTAL, 822 OPAL_ASSERT_RESET); 823 else if (option == EEH_RESET_HOT) 824 rc = opal_pci_reset(phb->opal_id, 825 OPAL_RESET_PCI_HOT, 826 OPAL_ASSERT_RESET); 827 else if (option == EEH_RESET_DEACTIVATE) 828 rc = opal_pci_reset(phb->opal_id, 829 OPAL_RESET_PCI_HOT, 830 OPAL_DEASSERT_RESET); 831 if (rc < 0) 832 goto out; 833 834 /* Poll state of the PHB until the request is done */ 835 rc = pnv_eeh_phb_poll(phb); 836 if (option == EEH_RESET_DEACTIVATE) 837 msleep(EEH_PE_RST_SETTLE_TIME); 838 out: 839 if (rc != OPAL_SUCCESS) 840 return -EIO; 841 842 return 0; 843 } 844 845 static int pnv_eeh_bridge_reset(struct pci_dev *dev, int option) 846 { 847 struct pci_dn *pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 848 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 849 int aer = edev ? edev->aer_cap : 0; 850 u32 ctrl; 851 852 pr_debug("%s: Reset PCI bus %04x:%02x with option %d\n", 853 __func__, pci_domain_nr(dev->bus), 854 dev->bus->number, option); 855 856 switch (option) { 857 case EEH_RESET_FUNDAMENTAL: 858 case EEH_RESET_HOT: 859 /* Don't report linkDown event */ 860 if (aer) { 861 eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, 862 4, &ctrl); 863 ctrl |= PCI_ERR_UNC_SURPDN; 864 eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, 865 4, ctrl); 866 } 867 868 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); 869 ctrl |= PCI_BRIDGE_CTL_BUS_RESET; 870 eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); 871 872 msleep(EEH_PE_RST_HOLD_TIME); 873 break; 874 case EEH_RESET_DEACTIVATE: 875 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &ctrl); 876 ctrl &= ~PCI_BRIDGE_CTL_BUS_RESET; 877 eeh_ops->write_config(pdn, PCI_BRIDGE_CONTROL, 2, ctrl); 878 879 msleep(EEH_PE_RST_SETTLE_TIME); 880 881 /* Continue reporting linkDown event */ 882 if (aer) { 883 eeh_ops->read_config(pdn, aer + PCI_ERR_UNCOR_MASK, 884 4, &ctrl); 885 ctrl &= ~PCI_ERR_UNC_SURPDN; 886 eeh_ops->write_config(pdn, aer + PCI_ERR_UNCOR_MASK, 887 4, ctrl); 888 } 889 890 break; 891 } 892 893 return 0; 894 } 895 896 void pnv_pci_reset_secondary_bus(struct pci_dev *dev) 897 { 898 struct pci_controller *hose; 899 900 if (pci_is_root_bus(dev->bus)) { 901 hose = pci_bus_to_host(dev->bus); 902 pnv_eeh_root_reset(hose, EEH_RESET_HOT); 903 pnv_eeh_root_reset(hose, EEH_RESET_DEACTIVATE); 904 } else { 905 pnv_eeh_bridge_reset(dev, EEH_RESET_HOT); 906 pnv_eeh_bridge_reset(dev, EEH_RESET_DEACTIVATE); 907 } 908 } 909 910 /** 911 * pnv_eeh_reset - Reset the specified PE 912 * @pe: EEH PE 913 * @option: reset option 914 * 915 * Do reset on the indicated PE. For PCI bus sensitive PE, 916 * we need to reset the parent p2p bridge. The PHB has to 917 * be reinitialized if the p2p bridge is root bridge. For 918 * PCI device sensitive PE, we will try to reset the device 919 * through FLR. For now, we don't have OPAL APIs to do HARD 920 * reset yet, so all reset would be SOFT (HOT) reset. 921 */ 922 static int pnv_eeh_reset(struct eeh_pe *pe, int option) 923 { 924 struct pci_controller *hose = pe->phb; 925 struct pci_bus *bus; 926 int ret; 927 928 /* 929 * For PHB reset, we always have complete reset. For those PEs whose 930 * primary bus derived from root complex (root bus) or root port 931 * (usually bus#1), we apply hot or fundamental reset on the root port. 932 * For other PEs, we always have hot reset on the PE primary bus. 933 * 934 * Here, we have different design to pHyp, which always clear the 935 * frozen state during PE reset. However, the good idea here from 936 * benh is to keep frozen state before we get PE reset done completely 937 * (until BAR restore). With the frozen state, HW drops illegal IO 938 * or MMIO access, which can incur recrusive frozen PE during PE 939 * reset. The side effect is that EEH core has to clear the frozen 940 * state explicitly after BAR restore. 941 */ 942 if (pe->type & EEH_PE_PHB) { 943 ret = pnv_eeh_phb_reset(hose, option); 944 } else { 945 struct pnv_phb *phb; 946 s64 rc; 947 948 /* 949 * The frozen PE might be caused by PAPR error injection 950 * registers, which are expected to be cleared after hitting 951 * frozen PE as stated in the hardware spec. Unfortunately, 952 * that's not true on P7IOC. So we have to clear it manually 953 * to avoid recursive EEH errors during recovery. 954 */ 955 phb = hose->private_data; 956 if (phb->model == PNV_PHB_MODEL_P7IOC && 957 (option == EEH_RESET_HOT || 958 option == EEH_RESET_FUNDAMENTAL)) { 959 rc = opal_pci_reset(phb->opal_id, 960 OPAL_RESET_PHB_ERROR, 961 OPAL_ASSERT_RESET); 962 if (rc != OPAL_SUCCESS) { 963 pr_warn("%s: Failure %lld clearing " 964 "error injection registers\n", 965 __func__, rc); 966 return -EIO; 967 } 968 } 969 970 bus = eeh_pe_bus_get(pe); 971 if (pci_is_root_bus(bus) || 972 pci_is_root_bus(bus->parent)) 973 ret = pnv_eeh_root_reset(hose, option); 974 else 975 ret = pnv_eeh_bridge_reset(bus->self, option); 976 } 977 978 return ret; 979 } 980 981 /** 982 * pnv_eeh_wait_state - Wait for PE state 983 * @pe: EEH PE 984 * @max_wait: maximal period in millisecond 985 * 986 * Wait for the state of associated PE. It might take some time 987 * to retrieve the PE's state. 988 */ 989 static int pnv_eeh_wait_state(struct eeh_pe *pe, int max_wait) 990 { 991 int ret; 992 int mwait; 993 994 while (1) { 995 ret = pnv_eeh_get_state(pe, &mwait); 996 997 /* 998 * If the PE's state is temporarily unavailable, 999 * we have to wait for the specified time. Otherwise, 1000 * the PE's state will be returned immediately. 1001 */ 1002 if (ret != EEH_STATE_UNAVAILABLE) 1003 return ret; 1004 1005 if (max_wait <= 0) { 1006 pr_warn("%s: Timeout getting PE#%x's state (%d)\n", 1007 __func__, pe->addr, max_wait); 1008 return EEH_STATE_NOT_SUPPORT; 1009 } 1010 1011 max_wait -= mwait; 1012 msleep(mwait); 1013 } 1014 1015 return EEH_STATE_NOT_SUPPORT; 1016 } 1017 1018 /** 1019 * pnv_eeh_get_log - Retrieve error log 1020 * @pe: EEH PE 1021 * @severity: temporary or permanent error log 1022 * @drv_log: driver log to be combined with retrieved error log 1023 * @len: length of driver log 1024 * 1025 * Retrieve the temporary or permanent error from the PE. 1026 */ 1027 static int pnv_eeh_get_log(struct eeh_pe *pe, int severity, 1028 char *drv_log, unsigned long len) 1029 { 1030 if (!eeh_has_flag(EEH_EARLY_DUMP_LOG)) 1031 pnv_pci_dump_phb_diag_data(pe->phb, pe->data); 1032 1033 return 0; 1034 } 1035 1036 /** 1037 * pnv_eeh_configure_bridge - Configure PCI bridges in the indicated PE 1038 * @pe: EEH PE 1039 * 1040 * The function will be called to reconfigure the bridges included 1041 * in the specified PE so that the mulfunctional PE would be recovered 1042 * again. 1043 */ 1044 static int pnv_eeh_configure_bridge(struct eeh_pe *pe) 1045 { 1046 return 0; 1047 } 1048 1049 /** 1050 * pnv_pe_err_inject - Inject specified error to the indicated PE 1051 * @pe: the indicated PE 1052 * @type: error type 1053 * @func: specific error type 1054 * @addr: address 1055 * @mask: address mask 1056 * 1057 * The routine is called to inject specified error, which is 1058 * determined by @type and @func, to the indicated PE for 1059 * testing purpose. 1060 */ 1061 static int pnv_eeh_err_inject(struct eeh_pe *pe, int type, int func, 1062 unsigned long addr, unsigned long mask) 1063 { 1064 struct pci_controller *hose = pe->phb; 1065 struct pnv_phb *phb = hose->private_data; 1066 s64 rc; 1067 1068 /* Sanity check on error type */ 1069 if (type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR && 1070 type != OPAL_ERR_INJECT_TYPE_IOA_BUS_ERR64) { 1071 pr_warn("%s: Invalid error type %d\n", 1072 __func__, type); 1073 return -ERANGE; 1074 } 1075 1076 if (func < OPAL_ERR_INJECT_FUNC_IOA_LD_MEM_ADDR || 1077 func > OPAL_ERR_INJECT_FUNC_IOA_DMA_WR_TARGET) { 1078 pr_warn("%s: Invalid error function %d\n", 1079 __func__, func); 1080 return -ERANGE; 1081 } 1082 1083 /* Firmware supports error injection ? */ 1084 if (!opal_check_token(OPAL_PCI_ERR_INJECT)) { 1085 pr_warn("%s: Firmware doesn't support error injection\n", 1086 __func__); 1087 return -ENXIO; 1088 } 1089 1090 /* Do error injection */ 1091 rc = opal_pci_err_inject(phb->opal_id, pe->addr, 1092 type, func, addr, mask); 1093 if (rc != OPAL_SUCCESS) { 1094 pr_warn("%s: Failure %lld injecting error " 1095 "%d-%d to PHB#%x-PE#%x\n", 1096 __func__, rc, type, func, 1097 hose->global_number, pe->addr); 1098 return -EIO; 1099 } 1100 1101 return 0; 1102 } 1103 1104 static inline bool pnv_eeh_cfg_blocked(struct pci_dn *pdn) 1105 { 1106 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1107 1108 if (!edev || !edev->pe) 1109 return false; 1110 1111 if (edev->pe->state & EEH_PE_CFG_BLOCKED) 1112 return true; 1113 1114 return false; 1115 } 1116 1117 static int pnv_eeh_read_config(struct pci_dn *pdn, 1118 int where, int size, u32 *val) 1119 { 1120 if (!pdn) 1121 return PCIBIOS_DEVICE_NOT_FOUND; 1122 1123 if (pnv_eeh_cfg_blocked(pdn)) { 1124 *val = 0xFFFFFFFF; 1125 return PCIBIOS_SET_FAILED; 1126 } 1127 1128 return pnv_pci_cfg_read(pdn, where, size, val); 1129 } 1130 1131 static int pnv_eeh_write_config(struct pci_dn *pdn, 1132 int where, int size, u32 val) 1133 { 1134 if (!pdn) 1135 return PCIBIOS_DEVICE_NOT_FOUND; 1136 1137 if (pnv_eeh_cfg_blocked(pdn)) 1138 return PCIBIOS_SET_FAILED; 1139 1140 return pnv_pci_cfg_write(pdn, where, size, val); 1141 } 1142 1143 static void pnv_eeh_dump_hub_diag_common(struct OpalIoP7IOCErrorData *data) 1144 { 1145 /* GEM */ 1146 if (data->gemXfir || data->gemRfir || 1147 data->gemRirqfir || data->gemMask || data->gemRwof) 1148 pr_info(" GEM: %016llx %016llx %016llx %016llx %016llx\n", 1149 be64_to_cpu(data->gemXfir), 1150 be64_to_cpu(data->gemRfir), 1151 be64_to_cpu(data->gemRirqfir), 1152 be64_to_cpu(data->gemMask), 1153 be64_to_cpu(data->gemRwof)); 1154 1155 /* LEM */ 1156 if (data->lemFir || data->lemErrMask || 1157 data->lemAction0 || data->lemAction1 || data->lemWof) 1158 pr_info(" LEM: %016llx %016llx %016llx %016llx %016llx\n", 1159 be64_to_cpu(data->lemFir), 1160 be64_to_cpu(data->lemErrMask), 1161 be64_to_cpu(data->lemAction0), 1162 be64_to_cpu(data->lemAction1), 1163 be64_to_cpu(data->lemWof)); 1164 } 1165 1166 static void pnv_eeh_get_and_dump_hub_diag(struct pci_controller *hose) 1167 { 1168 struct pnv_phb *phb = hose->private_data; 1169 struct OpalIoP7IOCErrorData *data = &phb->diag.hub_diag; 1170 long rc; 1171 1172 rc = opal_pci_get_hub_diag_data(phb->hub_id, data, sizeof(*data)); 1173 if (rc != OPAL_SUCCESS) { 1174 pr_warn("%s: Failed to get HUB#%llx diag-data (%ld)\n", 1175 __func__, phb->hub_id, rc); 1176 return; 1177 } 1178 1179 switch (data->type) { 1180 case OPAL_P7IOC_DIAG_TYPE_RGC: 1181 pr_info("P7IOC diag-data for RGC\n\n"); 1182 pnv_eeh_dump_hub_diag_common(data); 1183 if (data->rgc.rgcStatus || data->rgc.rgcLdcp) 1184 pr_info(" RGC: %016llx %016llx\n", 1185 be64_to_cpu(data->rgc.rgcStatus), 1186 be64_to_cpu(data->rgc.rgcLdcp)); 1187 break; 1188 case OPAL_P7IOC_DIAG_TYPE_BI: 1189 pr_info("P7IOC diag-data for BI %s\n\n", 1190 data->bi.biDownbound ? "Downbound" : "Upbound"); 1191 pnv_eeh_dump_hub_diag_common(data); 1192 if (data->bi.biLdcp0 || data->bi.biLdcp1 || 1193 data->bi.biLdcp2 || data->bi.biFenceStatus) 1194 pr_info(" BI: %016llx %016llx %016llx %016llx\n", 1195 be64_to_cpu(data->bi.biLdcp0), 1196 be64_to_cpu(data->bi.biLdcp1), 1197 be64_to_cpu(data->bi.biLdcp2), 1198 be64_to_cpu(data->bi.biFenceStatus)); 1199 break; 1200 case OPAL_P7IOC_DIAG_TYPE_CI: 1201 pr_info("P7IOC diag-data for CI Port %d\n\n", 1202 data->ci.ciPort); 1203 pnv_eeh_dump_hub_diag_common(data); 1204 if (data->ci.ciPortStatus || data->ci.ciPortLdcp) 1205 pr_info(" CI: %016llx %016llx\n", 1206 be64_to_cpu(data->ci.ciPortStatus), 1207 be64_to_cpu(data->ci.ciPortLdcp)); 1208 break; 1209 case OPAL_P7IOC_DIAG_TYPE_MISC: 1210 pr_info("P7IOC diag-data for MISC\n\n"); 1211 pnv_eeh_dump_hub_diag_common(data); 1212 break; 1213 case OPAL_P7IOC_DIAG_TYPE_I2C: 1214 pr_info("P7IOC diag-data for I2C\n\n"); 1215 pnv_eeh_dump_hub_diag_common(data); 1216 break; 1217 default: 1218 pr_warn("%s: Invalid type of HUB#%llx diag-data (%d)\n", 1219 __func__, phb->hub_id, data->type); 1220 } 1221 } 1222 1223 static int pnv_eeh_get_pe(struct pci_controller *hose, 1224 u16 pe_no, struct eeh_pe **pe) 1225 { 1226 struct pnv_phb *phb = hose->private_data; 1227 struct pnv_ioda_pe *pnv_pe; 1228 struct eeh_pe *dev_pe; 1229 struct eeh_dev edev; 1230 1231 /* 1232 * If PHB supports compound PE, to fetch 1233 * the master PE because slave PE is invisible 1234 * to EEH core. 1235 */ 1236 pnv_pe = &phb->ioda.pe_array[pe_no]; 1237 if (pnv_pe->flags & PNV_IODA_PE_SLAVE) { 1238 pnv_pe = pnv_pe->master; 1239 WARN_ON(!pnv_pe || 1240 !(pnv_pe->flags & PNV_IODA_PE_MASTER)); 1241 pe_no = pnv_pe->pe_number; 1242 } 1243 1244 /* Find the PE according to PE# */ 1245 memset(&edev, 0, sizeof(struct eeh_dev)); 1246 edev.phb = hose; 1247 edev.pe_config_addr = pe_no; 1248 dev_pe = eeh_pe_get(&edev); 1249 if (!dev_pe) 1250 return -EEXIST; 1251 1252 /* Freeze the (compound) PE */ 1253 *pe = dev_pe; 1254 if (!(dev_pe->state & EEH_PE_ISOLATED)) 1255 phb->freeze_pe(phb, pe_no); 1256 1257 /* 1258 * At this point, we're sure the (compound) PE should 1259 * have been frozen. However, we still need poke until 1260 * hitting the frozen PE on top level. 1261 */ 1262 dev_pe = dev_pe->parent; 1263 while (dev_pe && !(dev_pe->type & EEH_PE_PHB)) { 1264 int ret; 1265 int active_flags = (EEH_STATE_MMIO_ACTIVE | 1266 EEH_STATE_DMA_ACTIVE); 1267 1268 ret = eeh_ops->get_state(dev_pe, NULL); 1269 if (ret <= 0 || (ret & active_flags) == active_flags) { 1270 dev_pe = dev_pe->parent; 1271 continue; 1272 } 1273 1274 /* Frozen parent PE */ 1275 *pe = dev_pe; 1276 if (!(dev_pe->state & EEH_PE_ISOLATED)) 1277 phb->freeze_pe(phb, dev_pe->addr); 1278 1279 /* Next one */ 1280 dev_pe = dev_pe->parent; 1281 } 1282 1283 return 0; 1284 } 1285 1286 /** 1287 * pnv_eeh_next_error - Retrieve next EEH error to handle 1288 * @pe: Affected PE 1289 * 1290 * The function is expected to be called by EEH core while it gets 1291 * special EEH event (without binding PE). The function calls to 1292 * OPAL APIs for next error to handle. The informational error is 1293 * handled internally by platform. However, the dead IOC, dead PHB, 1294 * fenced PHB and frozen PE should be handled by EEH core eventually. 1295 */ 1296 static int pnv_eeh_next_error(struct eeh_pe **pe) 1297 { 1298 struct pci_controller *hose; 1299 struct pnv_phb *phb; 1300 struct eeh_pe *phb_pe, *parent_pe; 1301 __be64 frozen_pe_no; 1302 __be16 err_type, severity; 1303 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1304 long rc; 1305 int state, ret = EEH_NEXT_ERR_NONE; 1306 1307 /* 1308 * While running here, it's safe to purge the event queue. The 1309 * event should still be masked. 1310 */ 1311 eeh_remove_event(NULL, false); 1312 1313 list_for_each_entry(hose, &hose_list, list_node) { 1314 /* 1315 * If the subordinate PCI buses of the PHB has been 1316 * removed or is exactly under error recovery, we 1317 * needn't take care of it any more. 1318 */ 1319 phb = hose->private_data; 1320 phb_pe = eeh_phb_pe_get(hose); 1321 if (!phb_pe || (phb_pe->state & EEH_PE_ISOLATED)) 1322 continue; 1323 1324 rc = opal_pci_next_error(phb->opal_id, 1325 &frozen_pe_no, &err_type, &severity); 1326 if (rc != OPAL_SUCCESS) { 1327 pr_devel("%s: Invalid return value on " 1328 "PHB#%x (0x%lx) from opal_pci_next_error", 1329 __func__, hose->global_number, rc); 1330 continue; 1331 } 1332 1333 /* If the PHB doesn't have error, stop processing */ 1334 if (be16_to_cpu(err_type) == OPAL_EEH_NO_ERROR || 1335 be16_to_cpu(severity) == OPAL_EEH_SEV_NO_ERROR) { 1336 pr_devel("%s: No error found on PHB#%x\n", 1337 __func__, hose->global_number); 1338 continue; 1339 } 1340 1341 /* 1342 * Processing the error. We're expecting the error with 1343 * highest priority reported upon multiple errors on the 1344 * specific PHB. 1345 */ 1346 pr_devel("%s: Error (%d, %d, %llu) on PHB#%x\n", 1347 __func__, be16_to_cpu(err_type), 1348 be16_to_cpu(severity), be64_to_cpu(frozen_pe_no), 1349 hose->global_number); 1350 switch (be16_to_cpu(err_type)) { 1351 case OPAL_EEH_IOC_ERROR: 1352 if (be16_to_cpu(severity) == OPAL_EEH_SEV_IOC_DEAD) { 1353 pr_err("EEH: dead IOC detected\n"); 1354 ret = EEH_NEXT_ERR_DEAD_IOC; 1355 } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { 1356 pr_info("EEH: IOC informative error " 1357 "detected\n"); 1358 pnv_eeh_get_and_dump_hub_diag(hose); 1359 ret = EEH_NEXT_ERR_NONE; 1360 } 1361 1362 break; 1363 case OPAL_EEH_PHB_ERROR: 1364 if (be16_to_cpu(severity) == OPAL_EEH_SEV_PHB_DEAD) { 1365 *pe = phb_pe; 1366 pr_err("EEH: dead PHB#%x detected, " 1367 "location: %s\n", 1368 hose->global_number, 1369 eeh_pe_loc_get(phb_pe)); 1370 ret = EEH_NEXT_ERR_DEAD_PHB; 1371 } else if (be16_to_cpu(severity) == 1372 OPAL_EEH_SEV_PHB_FENCED) { 1373 *pe = phb_pe; 1374 pr_err("EEH: Fenced PHB#%x detected, " 1375 "location: %s\n", 1376 hose->global_number, 1377 eeh_pe_loc_get(phb_pe)); 1378 ret = EEH_NEXT_ERR_FENCED_PHB; 1379 } else if (be16_to_cpu(severity) == OPAL_EEH_SEV_INF) { 1380 pr_info("EEH: PHB#%x informative error " 1381 "detected, location: %s\n", 1382 hose->global_number, 1383 eeh_pe_loc_get(phb_pe)); 1384 pnv_eeh_get_phb_diag(phb_pe); 1385 pnv_pci_dump_phb_diag_data(hose, phb_pe->data); 1386 ret = EEH_NEXT_ERR_NONE; 1387 } 1388 1389 break; 1390 case OPAL_EEH_PE_ERROR: 1391 /* 1392 * If we can't find the corresponding PE, we 1393 * just try to unfreeze. 1394 */ 1395 if (pnv_eeh_get_pe(hose, 1396 be64_to_cpu(frozen_pe_no), pe)) { 1397 pr_info("EEH: Clear non-existing PHB#%x-PE#%llx\n", 1398 hose->global_number, be64_to_cpu(frozen_pe_no)); 1399 pr_info("EEH: PHB location: %s\n", 1400 eeh_pe_loc_get(phb_pe)); 1401 1402 /* Dump PHB diag-data */ 1403 rc = opal_pci_get_phb_diag_data2(phb->opal_id, 1404 phb->diag.blob, PNV_PCI_DIAG_BUF_SIZE); 1405 if (rc == OPAL_SUCCESS) 1406 pnv_pci_dump_phb_diag_data(hose, 1407 phb->diag.blob); 1408 1409 /* Try best to clear it */ 1410 opal_pci_eeh_freeze_clear(phb->opal_id, 1411 frozen_pe_no, 1412 OPAL_EEH_ACTION_CLEAR_FREEZE_ALL); 1413 ret = EEH_NEXT_ERR_NONE; 1414 } else if ((*pe)->state & EEH_PE_ISOLATED || 1415 eeh_pe_passed(*pe)) { 1416 ret = EEH_NEXT_ERR_NONE; 1417 } else { 1418 pr_err("EEH: Frozen PE#%x " 1419 "on PHB#%x detected\n", 1420 (*pe)->addr, 1421 (*pe)->phb->global_number); 1422 pr_err("EEH: PE location: %s, " 1423 "PHB location: %s\n", 1424 eeh_pe_loc_get(*pe), 1425 eeh_pe_loc_get(phb_pe)); 1426 ret = EEH_NEXT_ERR_FROZEN_PE; 1427 } 1428 1429 break; 1430 default: 1431 pr_warn("%s: Unexpected error type %d\n", 1432 __func__, be16_to_cpu(err_type)); 1433 } 1434 1435 /* 1436 * EEH core will try recover from fenced PHB or 1437 * frozen PE. In the time for frozen PE, EEH core 1438 * enable IO path for that before collecting logs, 1439 * but it ruins the site. So we have to dump the 1440 * log in advance here. 1441 */ 1442 if ((ret == EEH_NEXT_ERR_FROZEN_PE || 1443 ret == EEH_NEXT_ERR_FENCED_PHB) && 1444 !((*pe)->state & EEH_PE_ISOLATED)) { 1445 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); 1446 pnv_eeh_get_phb_diag(*pe); 1447 1448 if (eeh_has_flag(EEH_EARLY_DUMP_LOG)) 1449 pnv_pci_dump_phb_diag_data((*pe)->phb, 1450 (*pe)->data); 1451 } 1452 1453 /* 1454 * We probably have the frozen parent PE out there and 1455 * we need have to handle frozen parent PE firstly. 1456 */ 1457 if (ret == EEH_NEXT_ERR_FROZEN_PE) { 1458 parent_pe = (*pe)->parent; 1459 while (parent_pe) { 1460 /* Hit the ceiling ? */ 1461 if (parent_pe->type & EEH_PE_PHB) 1462 break; 1463 1464 /* Frozen parent PE ? */ 1465 state = eeh_ops->get_state(parent_pe, NULL); 1466 if (state > 0 && 1467 (state & active_flags) != active_flags) 1468 *pe = parent_pe; 1469 1470 /* Next parent level */ 1471 parent_pe = parent_pe->parent; 1472 } 1473 1474 /* We possibly migrate to another PE */ 1475 eeh_pe_state_mark(*pe, EEH_PE_ISOLATED); 1476 } 1477 1478 /* 1479 * If we have no errors on the specific PHB or only 1480 * informative error there, we continue poking it. 1481 * Otherwise, we need actions to be taken by upper 1482 * layer. 1483 */ 1484 if (ret > EEH_NEXT_ERR_INF) 1485 break; 1486 } 1487 1488 /* Unmask the event */ 1489 if (ret == EEH_NEXT_ERR_NONE && eeh_enabled()) 1490 enable_irq(eeh_event_irq); 1491 1492 return ret; 1493 } 1494 1495 static int pnv_eeh_restore_config(struct pci_dn *pdn) 1496 { 1497 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1498 struct pnv_phb *phb; 1499 s64 ret; 1500 1501 if (!edev) 1502 return -EEXIST; 1503 1504 phb = edev->phb->private_data; 1505 ret = opal_pci_reinit(phb->opal_id, 1506 OPAL_REINIT_PCI_DEV, edev->config_addr); 1507 if (ret) { 1508 pr_warn("%s: Can't reinit PCI dev 0x%x (%lld)\n", 1509 __func__, edev->config_addr, ret); 1510 return -EIO; 1511 } 1512 1513 return 0; 1514 } 1515 1516 static struct eeh_ops pnv_eeh_ops = { 1517 .name = "powernv", 1518 .init = pnv_eeh_init, 1519 .post_init = pnv_eeh_post_init, 1520 .probe = pnv_eeh_probe, 1521 .set_option = pnv_eeh_set_option, 1522 .get_pe_addr = pnv_eeh_get_pe_addr, 1523 .get_state = pnv_eeh_get_state, 1524 .reset = pnv_eeh_reset, 1525 .wait_state = pnv_eeh_wait_state, 1526 .get_log = pnv_eeh_get_log, 1527 .configure_bridge = pnv_eeh_configure_bridge, 1528 .err_inject = pnv_eeh_err_inject, 1529 .read_config = pnv_eeh_read_config, 1530 .write_config = pnv_eeh_write_config, 1531 .next_error = pnv_eeh_next_error, 1532 .restore_config = pnv_eeh_restore_config 1533 }; 1534 1535 /** 1536 * eeh_powernv_init - Register platform dependent EEH operations 1537 * 1538 * EEH initialization on powernv platform. This function should be 1539 * called before any EEH related functions. 1540 */ 1541 static int __init eeh_powernv_init(void) 1542 { 1543 int ret = -EINVAL; 1544 1545 eeh_set_pe_aux_size(PNV_PCI_DIAG_BUF_SIZE); 1546 ret = eeh_ops_register(&pnv_eeh_ops); 1547 if (!ret) 1548 pr_info("EEH: PowerNV platform initialized\n"); 1549 else 1550 pr_info("EEH: Failed to initialize PowerNV platform (%d)\n", ret); 1551 1552 return ret; 1553 } 1554 machine_early_initcall(powernv, eeh_powernv_init); 1555