1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright IBM Corporation 2001, 2005, 2006 4 * Copyright Dave Engebretsen & Todd Inglett 2001 5 * Copyright Linas Vepstas 2005, 2006 6 * Copyright 2001-2012 IBM Corporation. 7 * 8 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 9 */ 10 11 #include <linux/delay.h> 12 #include <linux/sched.h> 13 #include <linux/init.h> 14 #include <linux/list.h> 15 #include <linux/pci.h> 16 #include <linux/iommu.h> 17 #include <linux/proc_fs.h> 18 #include <linux/rbtree.h> 19 #include <linux/reboot.h> 20 #include <linux/seq_file.h> 21 #include <linux/spinlock.h> 22 #include <linux/export.h> 23 #include <linux/of.h> 24 25 #include <linux/atomic.h> 26 #include <asm/debugfs.h> 27 #include <asm/eeh.h> 28 #include <asm/eeh_event.h> 29 #include <asm/io.h> 30 #include <asm/iommu.h> 31 #include <asm/machdep.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/rtas.h> 34 #include <asm/pte-walk.h> 35 36 37 /** Overview: 38 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 39 * dealing with PCI bus errors that can't be dealt with within the 40 * usual PCI framework, except by check-stopping the CPU. Systems 41 * that are designed for high-availability/reliability cannot afford 42 * to crash due to a "mere" PCI error, thus the need for EEH. 43 * An EEH-capable bridge operates by converting a detected error 44 * into a "slot freeze", taking the PCI adapter off-line, making 45 * the slot behave, from the OS'es point of view, as if the slot 46 * were "empty": all reads return 0xff's and all writes are silently 47 * ignored. EEH slot isolation events can be triggered by parity 48 * errors on the address or data busses (e.g. during posted writes), 49 * which in turn might be caused by low voltage on the bus, dust, 50 * vibration, humidity, radioactivity or plain-old failed hardware. 51 * 52 * Note, however, that one of the leading causes of EEH slot 53 * freeze events are buggy device drivers, buggy device microcode, 54 * or buggy device hardware. This is because any attempt by the 55 * device to bus-master data to a memory address that is not 56 * assigned to the device will trigger a slot freeze. (The idea 57 * is to prevent devices-gone-wild from corrupting system memory). 58 * Buggy hardware/drivers will have a miserable time co-existing 59 * with EEH. 60 * 61 * Ideally, a PCI device driver, when suspecting that an isolation 62 * event has occurred (e.g. by reading 0xff's), will then ask EEH 63 * whether this is the case, and then take appropriate steps to 64 * reset the PCI slot, the PCI device, and then resume operations. 65 * However, until that day, the checking is done here, with the 66 * eeh_check_failure() routine embedded in the MMIO macros. If 67 * the slot is found to be isolated, an "EEH Event" is synthesized 68 * and sent out for processing. 69 */ 70 71 /* If a device driver keeps reading an MMIO register in an interrupt 72 * handler after a slot isolation event, it might be broken. 73 * This sets the threshold for how many read attempts we allow 74 * before printing an error message. 75 */ 76 #define EEH_MAX_FAILS 2100000 77 78 /* Time to wait for a PCI slot to report status, in milliseconds */ 79 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 80 81 /* 82 * EEH probe mode support, which is part of the flags, 83 * is to support multiple platforms for EEH. Some platforms 84 * like pSeries do PCI emunation based on device tree. 85 * However, other platforms like powernv probe PCI devices 86 * from hardware. The flag is used to distinguish that. 87 * In addition, struct eeh_ops::probe would be invoked for 88 * particular OF node or PCI device so that the corresponding 89 * PE would be created there. 90 */ 91 int eeh_subsystem_flags; 92 EXPORT_SYMBOL(eeh_subsystem_flags); 93 94 /* 95 * EEH allowed maximal frozen times. If one particular PE's 96 * frozen count in last hour exceeds this limit, the PE will 97 * be forced to be offline permanently. 98 */ 99 u32 eeh_max_freezes = 5; 100 101 /* 102 * Controls whether a recovery event should be scheduled when an 103 * isolated device is discovered. This is only really useful for 104 * debugging problems with the EEH core. 105 */ 106 bool eeh_debugfs_no_recover; 107 108 /* Platform dependent EEH operations */ 109 struct eeh_ops *eeh_ops = NULL; 110 111 /* Lock to avoid races due to multiple reports of an error */ 112 DEFINE_RAW_SPINLOCK(confirm_error_lock); 113 EXPORT_SYMBOL_GPL(confirm_error_lock); 114 115 /* Lock to protect passed flags */ 116 static DEFINE_MUTEX(eeh_dev_mutex); 117 118 /* Buffer for reporting pci register dumps. Its here in BSS, and 119 * not dynamically alloced, so that it ends up in RMO where RTAS 120 * can access it. 121 */ 122 #define EEH_PCI_REGS_LOG_LEN 8192 123 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 124 125 /* 126 * The struct is used to maintain the EEH global statistic 127 * information. Besides, the EEH global statistics will be 128 * exported to user space through procfs 129 */ 130 struct eeh_stats { 131 u64 no_device; /* PCI device not found */ 132 u64 no_dn; /* OF node not found */ 133 u64 no_cfg_addr; /* Config address not found */ 134 u64 ignored_check; /* EEH check skipped */ 135 u64 total_mmio_ffs; /* Total EEH checks */ 136 u64 false_positives; /* Unnecessary EEH checks */ 137 u64 slot_resets; /* PE reset */ 138 }; 139 140 static struct eeh_stats eeh_stats; 141 142 static int __init eeh_setup(char *str) 143 { 144 if (!strcmp(str, "off")) 145 eeh_add_flag(EEH_FORCE_DISABLED); 146 else if (!strcmp(str, "early_log")) 147 eeh_add_flag(EEH_EARLY_DUMP_LOG); 148 149 return 1; 150 } 151 __setup("eeh=", eeh_setup); 152 153 void eeh_show_enabled(void) 154 { 155 if (eeh_has_flag(EEH_FORCE_DISABLED)) 156 pr_info("EEH: Recovery disabled by kernel parameter.\n"); 157 else if (eeh_has_flag(EEH_ENABLED)) 158 pr_info("EEH: Capable adapter found: recovery enabled.\n"); 159 else 160 pr_info("EEH: No capable adapters found: recovery disabled.\n"); 161 } 162 163 /* 164 * This routine captures assorted PCI configuration space data 165 * for the indicated PCI device, and puts them into a buffer 166 * for RTAS error logging. 167 */ 168 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 169 { 170 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 171 u32 cfg; 172 int cap, i; 173 int n = 0, l = 0; 174 char buffer[128]; 175 176 if (!pdn) { 177 pr_warn("EEH: Note: No error log for absent device.\n"); 178 return 0; 179 } 180 181 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 182 pdn->phb->global_number, pdn->busno, 183 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 184 pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", 185 pdn->phb->global_number, pdn->busno, 186 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 187 188 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 189 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 190 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 193 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 194 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 195 196 /* Gather bridge-specific registers */ 197 if (edev->mode & EEH_DEV_BRIDGE) { 198 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 199 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 200 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 201 202 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 203 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 204 pr_warn("EEH: Bridge control: %04x\n", cfg); 205 } 206 207 /* Dump out the PCI-X command and status regs */ 208 cap = edev->pcix_cap; 209 if (cap) { 210 eeh_ops->read_config(pdn, cap, 4, &cfg); 211 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 212 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 213 214 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 215 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 216 pr_warn("EEH: PCI-X status: %08x\n", cfg); 217 } 218 219 /* If PCI-E capable, dump PCI-E cap 10 */ 220 cap = edev->pcie_cap; 221 if (cap) { 222 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 223 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 224 225 for (i=0; i<=8; i++) { 226 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 227 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 228 229 if ((i % 4) == 0) { 230 if (i != 0) 231 pr_warn("%s\n", buffer); 232 233 l = scnprintf(buffer, sizeof(buffer), 234 "EEH: PCI-E %02x: %08x ", 235 4*i, cfg); 236 } else { 237 l += scnprintf(buffer+l, sizeof(buffer)-l, 238 "%08x ", cfg); 239 } 240 241 } 242 243 pr_warn("%s\n", buffer); 244 } 245 246 /* If AER capable, dump it */ 247 cap = edev->aer_cap; 248 if (cap) { 249 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 250 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 251 252 for (i=0; i<=13; i++) { 253 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 254 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 255 256 if ((i % 4) == 0) { 257 if (i != 0) 258 pr_warn("%s\n", buffer); 259 260 l = scnprintf(buffer, sizeof(buffer), 261 "EEH: PCI-E AER %02x: %08x ", 262 4*i, cfg); 263 } else { 264 l += scnprintf(buffer+l, sizeof(buffer)-l, 265 "%08x ", cfg); 266 } 267 } 268 269 pr_warn("%s\n", buffer); 270 } 271 272 return n; 273 } 274 275 static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) 276 { 277 struct eeh_dev *edev, *tmp; 278 size_t *plen = flag; 279 280 eeh_pe_for_each_dev(pe, edev, tmp) 281 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 282 EEH_PCI_REGS_LOG_LEN - *plen); 283 284 return NULL; 285 } 286 287 /** 288 * eeh_slot_error_detail - Generate combined log including driver log and error log 289 * @pe: EEH PE 290 * @severity: temporary or permanent error log 291 * 292 * This routine should be called to generate the combined log, which 293 * is comprised of driver log and error log. The driver log is figured 294 * out from the config space of the corresponding PCI device, while 295 * the error log is fetched through platform dependent function call. 296 */ 297 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 298 { 299 size_t loglen = 0; 300 301 /* 302 * When the PHB is fenced or dead, it's pointless to collect 303 * the data from PCI config space because it should return 304 * 0xFF's. For ER, we still retrieve the data from the PCI 305 * config space. 306 * 307 * For pHyp, we have to enable IO for log retrieval. Otherwise, 308 * 0xFF's is always returned from PCI config space. 309 * 310 * When the @severity is EEH_LOG_PERM, the PE is going to be 311 * removed. Prior to that, the drivers for devices included in 312 * the PE will be closed. The drivers rely on working IO path 313 * to bring the devices to quiet state. Otherwise, PCI traffic 314 * from those devices after they are removed is like to cause 315 * another unexpected EEH error. 316 */ 317 if (!(pe->type & EEH_PE_PHB)) { 318 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || 319 severity == EEH_LOG_PERM) 320 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 321 322 /* 323 * The config space of some PCI devices can't be accessed 324 * when their PEs are in frozen state. Otherwise, fenced 325 * PHB might be seen. Those PEs are identified with flag 326 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 327 * is set automatically when the PE is put to EEH_PE_ISOLATED. 328 * 329 * Restoring BARs possibly triggers PCI config access in 330 * (OPAL) firmware and then causes fenced PHB. If the 331 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 332 * pointless to restore BARs and dump config space. 333 */ 334 eeh_ops->configure_bridge(pe); 335 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 336 eeh_pe_restore_bars(pe); 337 338 pci_regs_buf[0] = 0; 339 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 340 } 341 } 342 343 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 344 } 345 346 /** 347 * eeh_token_to_phys - Convert EEH address token to phys address 348 * @token: I/O token, should be address in the form 0xA.... 349 * 350 * This routine should be called to convert virtual I/O address 351 * to physical one. 352 */ 353 static inline unsigned long eeh_token_to_phys(unsigned long token) 354 { 355 pte_t *ptep; 356 unsigned long pa; 357 int hugepage_shift; 358 359 /* 360 * We won't find hugepages here(this is iomem). Hence we are not 361 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 362 * page table free, because of init_mm. 363 */ 364 ptep = find_init_mm_pte(token, &hugepage_shift); 365 if (!ptep) 366 return token; 367 368 pa = pte_pfn(*ptep); 369 370 /* On radix we can do hugepage mappings for io, so handle that */ 371 if (hugepage_shift) { 372 pa <<= hugepage_shift; 373 pa |= token & ((1ul << hugepage_shift) - 1); 374 } else { 375 pa <<= PAGE_SHIFT; 376 pa |= token & (PAGE_SIZE - 1); 377 } 378 379 return pa; 380 } 381 382 /* 383 * On PowerNV platform, we might already have fenced PHB there. 384 * For that case, it's meaningless to recover frozen PE. Intead, 385 * We have to handle fenced PHB firstly. 386 */ 387 static int eeh_phb_check_failure(struct eeh_pe *pe) 388 { 389 struct eeh_pe *phb_pe; 390 unsigned long flags; 391 int ret; 392 393 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 394 return -EPERM; 395 396 /* Find the PHB PE */ 397 phb_pe = eeh_phb_pe_get(pe->phb); 398 if (!phb_pe) { 399 pr_warn("%s Can't find PE for PHB#%x\n", 400 __func__, pe->phb->global_number); 401 return -EEXIST; 402 } 403 404 /* If the PHB has been in problematic state */ 405 eeh_serialize_lock(&flags); 406 if (phb_pe->state & EEH_PE_ISOLATED) { 407 ret = 0; 408 goto out; 409 } 410 411 /* Check PHB state */ 412 ret = eeh_ops->get_state(phb_pe, NULL); 413 if ((ret < 0) || 414 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 415 ret = 0; 416 goto out; 417 } 418 419 /* Isolate the PHB and send event */ 420 eeh_pe_mark_isolated(phb_pe); 421 eeh_serialize_unlock(flags); 422 423 pr_err("EEH: PHB#%x failure detected, location: %s\n", 424 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 425 dump_stack(); 426 eeh_send_failure_event(phb_pe); 427 428 return 1; 429 out: 430 eeh_serialize_unlock(flags); 431 return ret; 432 } 433 434 /** 435 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 436 * @edev: eeh device 437 * 438 * Check for an EEH failure for the given device node. Call this 439 * routine if the result of a read was all 0xff's and you want to 440 * find out if this is due to an EEH slot freeze. This routine 441 * will query firmware for the EEH status. 442 * 443 * Returns 0 if there has not been an EEH error; otherwise returns 444 * a non-zero value and queues up a slot isolation event notification. 445 * 446 * It is safe to call this routine in an interrupt context. 447 */ 448 int eeh_dev_check_failure(struct eeh_dev *edev) 449 { 450 int ret; 451 unsigned long flags; 452 struct device_node *dn; 453 struct pci_dev *dev; 454 struct eeh_pe *pe, *parent_pe, *phb_pe; 455 int rc = 0; 456 const char *location = NULL; 457 458 eeh_stats.total_mmio_ffs++; 459 460 if (!eeh_enabled()) 461 return 0; 462 463 if (!edev) { 464 eeh_stats.no_dn++; 465 return 0; 466 } 467 dev = eeh_dev_to_pci_dev(edev); 468 pe = eeh_dev_to_pe(edev); 469 470 /* Access to IO BARs might get this far and still not want checking. */ 471 if (!pe) { 472 eeh_stats.ignored_check++; 473 eeh_edev_dbg(edev, "Ignored check\n"); 474 return 0; 475 } 476 477 if (!pe->addr && !pe->config_addr) { 478 eeh_stats.no_cfg_addr++; 479 return 0; 480 } 481 482 /* 483 * On PowerNV platform, we might already have fenced PHB 484 * there and we need take care of that firstly. 485 */ 486 ret = eeh_phb_check_failure(pe); 487 if (ret > 0) 488 return ret; 489 490 /* 491 * If the PE isn't owned by us, we shouldn't check the 492 * state. Instead, let the owner handle it if the PE has 493 * been frozen. 494 */ 495 if (eeh_pe_passed(pe)) 496 return 0; 497 498 /* If we already have a pending isolation event for this 499 * slot, we know it's bad already, we don't need to check. 500 * Do this checking under a lock; as multiple PCI devices 501 * in one slot might report errors simultaneously, and we 502 * only want one error recovery routine running. 503 */ 504 eeh_serialize_lock(&flags); 505 rc = 1; 506 if (pe->state & EEH_PE_ISOLATED) { 507 pe->check_count++; 508 if (pe->check_count % EEH_MAX_FAILS == 0) { 509 dn = pci_device_to_OF_node(dev); 510 if (dn) 511 location = of_get_property(dn, "ibm,loc-code", 512 NULL); 513 eeh_edev_err(edev, "%d reads ignored for recovering device at location=%s driver=%s\n", 514 pe->check_count, 515 location ? location : "unknown", 516 eeh_driver_name(dev)); 517 eeh_edev_err(edev, "Might be infinite loop in %s driver\n", 518 eeh_driver_name(dev)); 519 dump_stack(); 520 } 521 goto dn_unlock; 522 } 523 524 /* 525 * Now test for an EEH failure. This is VERY expensive. 526 * Note that the eeh_config_addr may be a parent device 527 * in the case of a device behind a bridge, or it may be 528 * function zero of a multi-function device. 529 * In any case they must share a common PHB. 530 */ 531 ret = eeh_ops->get_state(pe, NULL); 532 533 /* Note that config-io to empty slots may fail; 534 * they are empty when they don't have children. 535 * We will punt with the following conditions: Failure to get 536 * PE's state, EEH not support and Permanently unavailable 537 * state, PE is in good state. 538 */ 539 if ((ret < 0) || 540 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 541 eeh_stats.false_positives++; 542 pe->false_positives++; 543 rc = 0; 544 goto dn_unlock; 545 } 546 547 /* 548 * It should be corner case that the parent PE has been 549 * put into frozen state as well. We should take care 550 * that at first. 551 */ 552 parent_pe = pe->parent; 553 while (parent_pe) { 554 /* Hit the ceiling ? */ 555 if (parent_pe->type & EEH_PE_PHB) 556 break; 557 558 /* Frozen parent PE ? */ 559 ret = eeh_ops->get_state(parent_pe, NULL); 560 if (ret > 0 && !eeh_state_active(ret)) { 561 pe = parent_pe; 562 pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", 563 pe->phb->global_number, pe->addr, 564 pe->phb->global_number, parent_pe->addr); 565 } 566 567 /* Next parent level */ 568 parent_pe = parent_pe->parent; 569 } 570 571 eeh_stats.slot_resets++; 572 573 /* Avoid repeated reports of this failure, including problems 574 * with other functions on this device, and functions under 575 * bridges. 576 */ 577 eeh_pe_mark_isolated(pe); 578 eeh_serialize_unlock(flags); 579 580 /* Most EEH events are due to device driver bugs. Having 581 * a stack trace will help the device-driver authors figure 582 * out what happened. So print that out. 583 */ 584 phb_pe = eeh_phb_pe_get(pe->phb); 585 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 586 pe->phb->global_number, pe->addr); 587 pr_err("EEH: PE location: %s, PHB location: %s\n", 588 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 589 dump_stack(); 590 591 eeh_send_failure_event(pe); 592 593 return 1; 594 595 dn_unlock: 596 eeh_serialize_unlock(flags); 597 return rc; 598 } 599 600 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 601 602 /** 603 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 604 * @token: I/O address 605 * 606 * Check for an EEH failure at the given I/O address. Call this 607 * routine if the result of a read was all 0xff's and you want to 608 * find out if this is due to an EEH slot freeze event. This routine 609 * will query firmware for the EEH status. 610 * 611 * Note this routine is safe to call in an interrupt context. 612 */ 613 int eeh_check_failure(const volatile void __iomem *token) 614 { 615 unsigned long addr; 616 struct eeh_dev *edev; 617 618 /* Finding the phys addr + pci device; this is pretty quick. */ 619 addr = eeh_token_to_phys((unsigned long __force) token); 620 edev = eeh_addr_cache_get_dev(addr); 621 if (!edev) { 622 eeh_stats.no_device++; 623 return 0; 624 } 625 626 return eeh_dev_check_failure(edev); 627 } 628 EXPORT_SYMBOL(eeh_check_failure); 629 630 631 /** 632 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 633 * @pe: EEH PE 634 * 635 * This routine should be called to reenable frozen MMIO or DMA 636 * so that it would work correctly again. It's useful while doing 637 * recovery or log collection on the indicated device. 638 */ 639 int eeh_pci_enable(struct eeh_pe *pe, int function) 640 { 641 int active_flag, rc; 642 643 /* 644 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 645 * Also, it's pointless to enable them on unfrozen PE. So 646 * we have to check before enabling IO or DMA. 647 */ 648 switch (function) { 649 case EEH_OPT_THAW_MMIO: 650 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 651 break; 652 case EEH_OPT_THAW_DMA: 653 active_flag = EEH_STATE_DMA_ACTIVE; 654 break; 655 case EEH_OPT_DISABLE: 656 case EEH_OPT_ENABLE: 657 case EEH_OPT_FREEZE_PE: 658 active_flag = 0; 659 break; 660 default: 661 pr_warn("%s: Invalid function %d\n", 662 __func__, function); 663 return -EINVAL; 664 } 665 666 /* 667 * Check if IO or DMA has been enabled before 668 * enabling them. 669 */ 670 if (active_flag) { 671 rc = eeh_ops->get_state(pe, NULL); 672 if (rc < 0) 673 return rc; 674 675 /* Needn't enable it at all */ 676 if (rc == EEH_STATE_NOT_SUPPORT) 677 return 0; 678 679 /* It's already enabled */ 680 if (rc & active_flag) 681 return 0; 682 } 683 684 685 /* Issue the request */ 686 rc = eeh_ops->set_option(pe, function); 687 if (rc) 688 pr_warn("%s: Unexpected state change %d on " 689 "PHB#%x-PE#%x, err=%d\n", 690 __func__, function, pe->phb->global_number, 691 pe->addr, rc); 692 693 /* Check if the request is finished successfully */ 694 if (active_flag) { 695 rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 696 if (rc < 0) 697 return rc; 698 699 if (rc & active_flag) 700 return 0; 701 702 return -EIO; 703 } 704 705 return rc; 706 } 707 708 static void eeh_disable_and_save_dev_state(struct eeh_dev *edev, 709 void *userdata) 710 { 711 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 712 struct pci_dev *dev = userdata; 713 714 /* 715 * The caller should have disabled and saved the 716 * state for the specified device 717 */ 718 if (!pdev || pdev == dev) 719 return; 720 721 /* Ensure we have D0 power state */ 722 pci_set_power_state(pdev, PCI_D0); 723 724 /* Save device state */ 725 pci_save_state(pdev); 726 727 /* 728 * Disable device to avoid any DMA traffic and 729 * interrupt from the device 730 */ 731 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 732 } 733 734 static void eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) 735 { 736 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 737 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 738 struct pci_dev *dev = userdata; 739 740 if (!pdev) 741 return; 742 743 /* Apply customization from firmware */ 744 if (pdn && eeh_ops->restore_config) 745 eeh_ops->restore_config(pdn); 746 747 /* The caller should restore state for the specified device */ 748 if (pdev != dev) 749 pci_restore_state(pdev); 750 } 751 752 int eeh_restore_vf_config(struct pci_dn *pdn) 753 { 754 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 755 u32 devctl, cmd, cap2, aer_capctl; 756 int old_mps; 757 758 if (edev->pcie_cap) { 759 /* Restore MPS */ 760 old_mps = (ffs(pdn->mps) - 8) << 5; 761 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 762 2, &devctl); 763 devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; 764 devctl |= old_mps; 765 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 766 2, devctl); 767 768 /* Disable Completion Timeout if possible */ 769 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 770 4, &cap2); 771 if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { 772 eeh_ops->read_config(pdn, 773 edev->pcie_cap + PCI_EXP_DEVCTL2, 774 4, &cap2); 775 cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; 776 eeh_ops->write_config(pdn, 777 edev->pcie_cap + PCI_EXP_DEVCTL2, 778 4, cap2); 779 } 780 } 781 782 /* Enable SERR and parity checking */ 783 eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); 784 cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); 785 eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); 786 787 /* Enable report various errors */ 788 if (edev->pcie_cap) { 789 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 790 2, &devctl); 791 devctl &= ~PCI_EXP_DEVCTL_CERE; 792 devctl |= (PCI_EXP_DEVCTL_NFERE | 793 PCI_EXP_DEVCTL_FERE | 794 PCI_EXP_DEVCTL_URRE); 795 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 796 2, devctl); 797 } 798 799 /* Enable ECRC generation and check */ 800 if (edev->pcie_cap && edev->aer_cap) { 801 eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, 802 4, &aer_capctl); 803 aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 804 eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, 805 4, aer_capctl); 806 } 807 808 return 0; 809 } 810 811 /** 812 * pcibios_set_pcie_reset_state - Set PCI-E reset state 813 * @dev: pci device struct 814 * @state: reset state to enter 815 * 816 * Return value: 817 * 0 if success 818 */ 819 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 820 { 821 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 822 struct eeh_pe *pe = eeh_dev_to_pe(edev); 823 824 if (!pe) { 825 pr_err("%s: No PE found on PCI device %s\n", 826 __func__, pci_name(dev)); 827 return -EINVAL; 828 } 829 830 switch (state) { 831 case pcie_deassert_reset: 832 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 833 eeh_unfreeze_pe(pe); 834 if (!(pe->type & EEH_PE_VF)) 835 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 836 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 837 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 838 break; 839 case pcie_hot_reset: 840 eeh_pe_mark_isolated(pe); 841 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 842 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 843 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 844 if (!(pe->type & EEH_PE_VF)) 845 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 846 eeh_ops->reset(pe, EEH_RESET_HOT); 847 break; 848 case pcie_warm_reset: 849 eeh_pe_mark_isolated(pe); 850 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 851 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 852 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 853 if (!(pe->type & EEH_PE_VF)) 854 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 855 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 856 break; 857 default: 858 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); 859 return -EINVAL; 860 }; 861 862 return 0; 863 } 864 865 /** 866 * eeh_set_pe_freset - Check the required reset for the indicated device 867 * @data: EEH device 868 * @flag: return value 869 * 870 * Each device might have its preferred reset type: fundamental or 871 * hot reset. The routine is used to collected the information for 872 * the indicated device and its children so that the bunch of the 873 * devices could be reset properly. 874 */ 875 static void eeh_set_dev_freset(struct eeh_dev *edev, void *flag) 876 { 877 struct pci_dev *dev; 878 unsigned int *freset = (unsigned int *)flag; 879 880 dev = eeh_dev_to_pci_dev(edev); 881 if (dev) 882 *freset |= dev->needs_freset; 883 } 884 885 static void eeh_pe_refreeze_passed(struct eeh_pe *root) 886 { 887 struct eeh_pe *pe; 888 int state; 889 890 eeh_for_each_pe(root, pe) { 891 if (eeh_pe_passed(pe)) { 892 state = eeh_ops->get_state(pe, NULL); 893 if (state & 894 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { 895 pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", 896 pe->phb->global_number, pe->addr); 897 eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); 898 } 899 } 900 } 901 } 902 903 /** 904 * eeh_pe_reset_full - Complete a full reset process on the indicated PE 905 * @pe: EEH PE 906 * 907 * This function executes a full reset procedure on a PE, including setting 908 * the appropriate flags, performing a fundamental or hot reset, and then 909 * deactivating the reset status. It is designed to be used within the EEH 910 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and 911 * only performs a single operation at a time. 912 * 913 * This function will attempt to reset a PE three times before failing. 914 */ 915 int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) 916 { 917 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 918 int type = EEH_RESET_HOT; 919 unsigned int freset = 0; 920 int i, state = 0, ret; 921 922 /* 923 * Determine the type of reset to perform - hot or fundamental. 924 * Hot reset is the default operation, unless any device under the 925 * PE requires a fundamental reset. 926 */ 927 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 928 929 if (freset) 930 type = EEH_RESET_FUNDAMENTAL; 931 932 /* Mark the PE as in reset state and block config space accesses */ 933 eeh_pe_state_mark(pe, reset_state); 934 935 /* Make three attempts at resetting the bus */ 936 for (i = 0; i < 3; i++) { 937 ret = eeh_pe_reset(pe, type, include_passed); 938 if (!ret) 939 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, 940 include_passed); 941 if (ret) { 942 ret = -EIO; 943 pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n", 944 state, pe->phb->global_number, pe->addr, i + 1); 945 continue; 946 } 947 if (i) 948 pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n", 949 pe->phb->global_number, pe->addr, i + 1); 950 951 /* Wait until the PE is in a functioning state */ 952 state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 953 if (state < 0) { 954 pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x", 955 pe->phb->global_number, pe->addr); 956 ret = -ENOTRECOVERABLE; 957 break; 958 } 959 if (eeh_state_active(state)) 960 break; 961 else 962 pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n", 963 pe->phb->global_number, pe->addr, state, i + 1); 964 } 965 966 /* Resetting the PE may have unfrozen child PEs. If those PEs have been 967 * (potentially) passed through to a guest, re-freeze them: 968 */ 969 if (!include_passed) 970 eeh_pe_refreeze_passed(pe); 971 972 eeh_pe_state_clear(pe, reset_state, true); 973 return ret; 974 } 975 976 /** 977 * eeh_save_bars - Save device bars 978 * @edev: PCI device associated EEH device 979 * 980 * Save the values of the device bars. Unlike the restore 981 * routine, this routine is *not* recursive. This is because 982 * PCI devices are added individually; but, for the restore, 983 * an entire slot is reset at a time. 984 */ 985 void eeh_save_bars(struct eeh_dev *edev) 986 { 987 struct pci_dn *pdn; 988 int i; 989 990 pdn = eeh_dev_to_pdn(edev); 991 if (!pdn) 992 return; 993 994 for (i = 0; i < 16; i++) 995 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 996 997 /* 998 * For PCI bridges including root port, we need enable bus 999 * master explicitly. Otherwise, it can't fetch IODA table 1000 * entries correctly. So we cache the bit in advance so that 1001 * we can restore it after reset, either PHB range or PE range. 1002 */ 1003 if (edev->mode & EEH_DEV_BRIDGE) 1004 edev->config_space[1] |= PCI_COMMAND_MASTER; 1005 } 1006 1007 /** 1008 * eeh_ops_register - Register platform dependent EEH operations 1009 * @ops: platform dependent EEH operations 1010 * 1011 * Register the platform dependent EEH operation callback 1012 * functions. The platform should call this function before 1013 * any other EEH operations. 1014 */ 1015 int __init eeh_ops_register(struct eeh_ops *ops) 1016 { 1017 if (!ops->name) { 1018 pr_warn("%s: Invalid EEH ops name for %p\n", 1019 __func__, ops); 1020 return -EINVAL; 1021 } 1022 1023 if (eeh_ops && eeh_ops != ops) { 1024 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 1025 __func__, eeh_ops->name, ops->name); 1026 return -EEXIST; 1027 } 1028 1029 eeh_ops = ops; 1030 1031 return 0; 1032 } 1033 1034 /** 1035 * eeh_ops_unregister - Unreigster platform dependent EEH operations 1036 * @name: name of EEH platform operations 1037 * 1038 * Unregister the platform dependent EEH operation callback 1039 * functions. 1040 */ 1041 int __exit eeh_ops_unregister(const char *name) 1042 { 1043 if (!name || !strlen(name)) { 1044 pr_warn("%s: Invalid EEH ops name\n", 1045 __func__); 1046 return -EINVAL; 1047 } 1048 1049 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 1050 eeh_ops = NULL; 1051 return 0; 1052 } 1053 1054 return -EEXIST; 1055 } 1056 1057 static int eeh_reboot_notifier(struct notifier_block *nb, 1058 unsigned long action, void *unused) 1059 { 1060 eeh_clear_flag(EEH_ENABLED); 1061 return NOTIFY_DONE; 1062 } 1063 1064 static struct notifier_block eeh_reboot_nb = { 1065 .notifier_call = eeh_reboot_notifier, 1066 }; 1067 1068 /** 1069 * eeh_init - EEH initialization 1070 * 1071 * Initialize EEH by trying to enable it for all of the adapters in the system. 1072 * As a side effect we can determine here if eeh is supported at all. 1073 * Note that we leave EEH on so failed config cycles won't cause a machine 1074 * check. If a user turns off EEH for a particular adapter they are really 1075 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 1076 * grant access to a slot if EEH isn't enabled, and so we always enable 1077 * EEH for all slots/all devices. 1078 * 1079 * The eeh-force-off option disables EEH checking globally, for all slots. 1080 * Even if force-off is set, the EEH hardware is still enabled, so that 1081 * newer systems can boot. 1082 */ 1083 static int eeh_init(void) 1084 { 1085 struct pci_controller *hose, *tmp; 1086 int ret = 0; 1087 1088 /* Register reboot notifier */ 1089 ret = register_reboot_notifier(&eeh_reboot_nb); 1090 if (ret) { 1091 pr_warn("%s: Failed to register notifier (%d)\n", 1092 __func__, ret); 1093 return ret; 1094 } 1095 1096 /* call platform initialization function */ 1097 if (!eeh_ops) { 1098 pr_warn("%s: Platform EEH operation not found\n", 1099 __func__); 1100 return -EEXIST; 1101 } else if ((ret = eeh_ops->init())) 1102 return ret; 1103 1104 /* Initialize PHB PEs */ 1105 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 1106 eeh_dev_phb_init_dynamic(hose); 1107 1108 eeh_addr_cache_init(); 1109 1110 /* Initialize EEH event */ 1111 return eeh_event_init(); 1112 } 1113 1114 core_initcall_sync(eeh_init); 1115 1116 /** 1117 * eeh_add_device_early - Enable EEH for the indicated device node 1118 * @pdn: PCI device node for which to set up EEH 1119 * 1120 * This routine must be used to perform EEH initialization for PCI 1121 * devices that were added after system boot (e.g. hotplug, dlpar). 1122 * This routine must be called before any i/o is performed to the 1123 * adapter (inluding any config-space i/o). 1124 * Whether this actually enables EEH or not for this device depends 1125 * on the CEC architecture, type of the device, on earlier boot 1126 * command-line arguments & etc. 1127 */ 1128 void eeh_add_device_early(struct pci_dn *pdn) 1129 { 1130 struct pci_controller *phb = pdn ? pdn->phb : NULL; 1131 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1132 1133 if (!edev) 1134 return; 1135 1136 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1137 return; 1138 1139 /* USB Bus children of PCI devices will not have BUID's */ 1140 if (NULL == phb || 1141 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1142 return; 1143 1144 eeh_ops->probe(pdn, NULL); 1145 } 1146 1147 /** 1148 * eeh_add_device_tree_early - Enable EEH for the indicated device 1149 * @pdn: PCI device node 1150 * 1151 * This routine must be used to perform EEH initialization for the 1152 * indicated PCI device that was added after system boot (e.g. 1153 * hotplug, dlpar). 1154 */ 1155 void eeh_add_device_tree_early(struct pci_dn *pdn) 1156 { 1157 struct pci_dn *n; 1158 1159 if (!pdn) 1160 return; 1161 1162 list_for_each_entry(n, &pdn->child_list, list) 1163 eeh_add_device_tree_early(n); 1164 eeh_add_device_early(pdn); 1165 } 1166 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1167 1168 /** 1169 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1170 * @dev: pci device for which to set up EEH 1171 * 1172 * This routine must be used to complete EEH initialization for PCI 1173 * devices that were added after system boot (e.g. hotplug, dlpar). 1174 */ 1175 void eeh_add_device_late(struct pci_dev *dev) 1176 { 1177 struct pci_dn *pdn; 1178 struct eeh_dev *edev; 1179 1180 if (!dev) 1181 return; 1182 1183 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1184 edev = pdn_to_eeh_dev(pdn); 1185 eeh_edev_dbg(edev, "Adding device\n"); 1186 if (edev->pdev == dev) { 1187 eeh_edev_dbg(edev, "Device already referenced!\n"); 1188 return; 1189 } 1190 1191 /* 1192 * The EEH cache might not be removed correctly because of 1193 * unbalanced kref to the device during unplug time, which 1194 * relies on pcibios_release_device(). So we have to remove 1195 * that here explicitly. 1196 */ 1197 if (edev->pdev) { 1198 eeh_rmv_from_parent_pe(edev); 1199 eeh_addr_cache_rmv_dev(edev->pdev); 1200 eeh_sysfs_remove_device(edev->pdev); 1201 edev->mode &= ~EEH_DEV_SYSFS; 1202 1203 /* 1204 * We definitely should have the PCI device removed 1205 * though it wasn't correctly. So we needn't call 1206 * into error handler afterwards. 1207 */ 1208 edev->mode |= EEH_DEV_NO_HANDLER; 1209 1210 edev->pdev = NULL; 1211 dev->dev.archdata.edev = NULL; 1212 } 1213 1214 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1215 eeh_ops->probe(pdn, NULL); 1216 1217 edev->pdev = dev; 1218 dev->dev.archdata.edev = edev; 1219 1220 eeh_addr_cache_insert_dev(dev); 1221 } 1222 1223 /** 1224 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1225 * @bus: PCI bus 1226 * 1227 * This routine must be used to perform EEH initialization for PCI 1228 * devices which are attached to the indicated PCI bus. The PCI bus 1229 * is added after system boot through hotplug or dlpar. 1230 */ 1231 void eeh_add_device_tree_late(struct pci_bus *bus) 1232 { 1233 struct pci_dev *dev; 1234 1235 if (eeh_has_flag(EEH_FORCE_DISABLED)) 1236 return; 1237 list_for_each_entry(dev, &bus->devices, bus_list) { 1238 eeh_add_device_late(dev); 1239 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1240 struct pci_bus *subbus = dev->subordinate; 1241 if (subbus) 1242 eeh_add_device_tree_late(subbus); 1243 } 1244 } 1245 } 1246 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1247 1248 /** 1249 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1250 * @bus: PCI bus 1251 * 1252 * This routine must be used to add EEH sysfs files for PCI 1253 * devices which are attached to the indicated PCI bus. The PCI bus 1254 * is added after system boot through hotplug or dlpar. 1255 */ 1256 void eeh_add_sysfs_files(struct pci_bus *bus) 1257 { 1258 struct pci_dev *dev; 1259 1260 list_for_each_entry(dev, &bus->devices, bus_list) { 1261 eeh_sysfs_add_device(dev); 1262 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1263 struct pci_bus *subbus = dev->subordinate; 1264 if (subbus) 1265 eeh_add_sysfs_files(subbus); 1266 } 1267 } 1268 } 1269 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1270 1271 /** 1272 * eeh_remove_device - Undo EEH setup for the indicated pci device 1273 * @dev: pci device to be removed 1274 * 1275 * This routine should be called when a device is removed from 1276 * a running system (e.g. by hotplug or dlpar). It unregisters 1277 * the PCI device from the EEH subsystem. I/O errors affecting 1278 * this device will no longer be detected after this call; thus, 1279 * i/o errors affecting this slot may leave this device unusable. 1280 */ 1281 void eeh_remove_device(struct pci_dev *dev) 1282 { 1283 struct eeh_dev *edev; 1284 1285 if (!dev || !eeh_enabled()) 1286 return; 1287 edev = pci_dev_to_eeh_dev(dev); 1288 1289 /* Unregister the device with the EEH/PCI address search system */ 1290 dev_dbg(&dev->dev, "EEH: Removing device\n"); 1291 1292 if (!edev || !edev->pdev || !edev->pe) { 1293 dev_dbg(&dev->dev, "EEH: Device not referenced!\n"); 1294 return; 1295 } 1296 1297 /* 1298 * During the hotplug for EEH error recovery, we need the EEH 1299 * device attached to the parent PE in order for BAR restore 1300 * a bit later. So we keep it for BAR restore and remove it 1301 * from the parent PE during the BAR resotre. 1302 */ 1303 edev->pdev = NULL; 1304 1305 /* 1306 * The flag "in_error" is used to trace EEH devices for VFs 1307 * in error state or not. It's set in eeh_report_error(). If 1308 * it's not set, eeh_report_{reset,resume}() won't be called 1309 * for the VF EEH device. 1310 */ 1311 edev->in_error = false; 1312 dev->dev.archdata.edev = NULL; 1313 if (!(edev->pe->state & EEH_PE_KEEP)) 1314 eeh_rmv_from_parent_pe(edev); 1315 else 1316 edev->mode |= EEH_DEV_DISCONNECTED; 1317 1318 /* 1319 * We're removing from the PCI subsystem, that means 1320 * the PCI device driver can't support EEH or not 1321 * well. So we rely on hotplug completely to do recovery 1322 * for the specific PCI device. 1323 */ 1324 edev->mode |= EEH_DEV_NO_HANDLER; 1325 1326 eeh_addr_cache_rmv_dev(dev); 1327 eeh_sysfs_remove_device(dev); 1328 edev->mode &= ~EEH_DEV_SYSFS; 1329 } 1330 1331 int eeh_unfreeze_pe(struct eeh_pe *pe) 1332 { 1333 int ret; 1334 1335 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1336 if (ret) { 1337 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1338 __func__, ret, pe->phb->global_number, pe->addr); 1339 return ret; 1340 } 1341 1342 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1343 if (ret) { 1344 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1345 __func__, ret, pe->phb->global_number, pe->addr); 1346 return ret; 1347 } 1348 1349 return ret; 1350 } 1351 1352 1353 static struct pci_device_id eeh_reset_ids[] = { 1354 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1355 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1356 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1357 { 0 } 1358 }; 1359 1360 static int eeh_pe_change_owner(struct eeh_pe *pe) 1361 { 1362 struct eeh_dev *edev, *tmp; 1363 struct pci_dev *pdev; 1364 struct pci_device_id *id; 1365 int ret; 1366 1367 /* Check PE state */ 1368 ret = eeh_ops->get_state(pe, NULL); 1369 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1370 return 0; 1371 1372 /* Unfrozen PE, nothing to do */ 1373 if (eeh_state_active(ret)) 1374 return 0; 1375 1376 /* Frozen PE, check if it needs PE level reset */ 1377 eeh_pe_for_each_dev(pe, edev, tmp) { 1378 pdev = eeh_dev_to_pci_dev(edev); 1379 if (!pdev) 1380 continue; 1381 1382 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1383 if (id->vendor != PCI_ANY_ID && 1384 id->vendor != pdev->vendor) 1385 continue; 1386 if (id->device != PCI_ANY_ID && 1387 id->device != pdev->device) 1388 continue; 1389 if (id->subvendor != PCI_ANY_ID && 1390 id->subvendor != pdev->subsystem_vendor) 1391 continue; 1392 if (id->subdevice != PCI_ANY_ID && 1393 id->subdevice != pdev->subsystem_device) 1394 continue; 1395 1396 return eeh_pe_reset_and_recover(pe); 1397 } 1398 } 1399 1400 ret = eeh_unfreeze_pe(pe); 1401 if (!ret) 1402 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 1403 return ret; 1404 } 1405 1406 /** 1407 * eeh_dev_open - Increase count of pass through devices for PE 1408 * @pdev: PCI device 1409 * 1410 * Increase count of passed through devices for the indicated 1411 * PE. In the result, the EEH errors detected on the PE won't be 1412 * reported. The PE owner will be responsible for detection 1413 * and recovery. 1414 */ 1415 int eeh_dev_open(struct pci_dev *pdev) 1416 { 1417 struct eeh_dev *edev; 1418 int ret = -ENODEV; 1419 1420 mutex_lock(&eeh_dev_mutex); 1421 1422 /* No PCI device ? */ 1423 if (!pdev) 1424 goto out; 1425 1426 /* No EEH device or PE ? */ 1427 edev = pci_dev_to_eeh_dev(pdev); 1428 if (!edev || !edev->pe) 1429 goto out; 1430 1431 /* 1432 * The PE might have been put into frozen state, but we 1433 * didn't detect that yet. The passed through PCI devices 1434 * in frozen PE won't work properly. Clear the frozen state 1435 * in advance. 1436 */ 1437 ret = eeh_pe_change_owner(edev->pe); 1438 if (ret) 1439 goto out; 1440 1441 /* Increase PE's pass through count */ 1442 atomic_inc(&edev->pe->pass_dev_cnt); 1443 mutex_unlock(&eeh_dev_mutex); 1444 1445 return 0; 1446 out: 1447 mutex_unlock(&eeh_dev_mutex); 1448 return ret; 1449 } 1450 EXPORT_SYMBOL_GPL(eeh_dev_open); 1451 1452 /** 1453 * eeh_dev_release - Decrease count of pass through devices for PE 1454 * @pdev: PCI device 1455 * 1456 * Decrease count of pass through devices for the indicated PE. If 1457 * there is no passed through device in PE, the EEH errors detected 1458 * on the PE will be reported and handled as usual. 1459 */ 1460 void eeh_dev_release(struct pci_dev *pdev) 1461 { 1462 struct eeh_dev *edev; 1463 1464 mutex_lock(&eeh_dev_mutex); 1465 1466 /* No PCI device ? */ 1467 if (!pdev) 1468 goto out; 1469 1470 /* No EEH device ? */ 1471 edev = pci_dev_to_eeh_dev(pdev); 1472 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1473 goto out; 1474 1475 /* Decrease PE's pass through count */ 1476 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1477 eeh_pe_change_owner(edev->pe); 1478 out: 1479 mutex_unlock(&eeh_dev_mutex); 1480 } 1481 EXPORT_SYMBOL(eeh_dev_release); 1482 1483 #ifdef CONFIG_IOMMU_API 1484 1485 static int dev_has_iommu_table(struct device *dev, void *data) 1486 { 1487 struct pci_dev *pdev = to_pci_dev(dev); 1488 struct pci_dev **ppdev = data; 1489 1490 if (!dev) 1491 return 0; 1492 1493 if (device_iommu_mapped(dev)) { 1494 *ppdev = pdev; 1495 return 1; 1496 } 1497 1498 return 0; 1499 } 1500 1501 /** 1502 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1503 * @group: IOMMU group 1504 * 1505 * The routine is called to convert IOMMU group to EEH PE. 1506 */ 1507 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1508 { 1509 struct pci_dev *pdev = NULL; 1510 struct eeh_dev *edev; 1511 int ret; 1512 1513 /* No IOMMU group ? */ 1514 if (!group) 1515 return NULL; 1516 1517 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1518 if (!ret || !pdev) 1519 return NULL; 1520 1521 /* No EEH device or PE ? */ 1522 edev = pci_dev_to_eeh_dev(pdev); 1523 if (!edev || !edev->pe) 1524 return NULL; 1525 1526 return edev->pe; 1527 } 1528 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1529 1530 #endif /* CONFIG_IOMMU_API */ 1531 1532 /** 1533 * eeh_pe_set_option - Set options for the indicated PE 1534 * @pe: EEH PE 1535 * @option: requested option 1536 * 1537 * The routine is called to enable or disable EEH functionality 1538 * on the indicated PE, to enable IO or DMA for the frozen PE. 1539 */ 1540 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1541 { 1542 int ret = 0; 1543 1544 /* Invalid PE ? */ 1545 if (!pe) 1546 return -ENODEV; 1547 1548 /* 1549 * EEH functionality could possibly be disabled, just 1550 * return error for the case. And the EEH functinality 1551 * isn't expected to be disabled on one specific PE. 1552 */ 1553 switch (option) { 1554 case EEH_OPT_ENABLE: 1555 if (eeh_enabled()) { 1556 ret = eeh_pe_change_owner(pe); 1557 break; 1558 } 1559 ret = -EIO; 1560 break; 1561 case EEH_OPT_DISABLE: 1562 break; 1563 case EEH_OPT_THAW_MMIO: 1564 case EEH_OPT_THAW_DMA: 1565 case EEH_OPT_FREEZE_PE: 1566 if (!eeh_ops || !eeh_ops->set_option) { 1567 ret = -ENOENT; 1568 break; 1569 } 1570 1571 ret = eeh_pci_enable(pe, option); 1572 break; 1573 default: 1574 pr_debug("%s: Option %d out of range (%d, %d)\n", 1575 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1576 ret = -EINVAL; 1577 } 1578 1579 return ret; 1580 } 1581 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1582 1583 /** 1584 * eeh_pe_get_state - Retrieve PE's state 1585 * @pe: EEH PE 1586 * 1587 * Retrieve the PE's state, which includes 3 aspects: enabled 1588 * DMA, enabled IO and asserted reset. 1589 */ 1590 int eeh_pe_get_state(struct eeh_pe *pe) 1591 { 1592 int result, ret = 0; 1593 bool rst_active, dma_en, mmio_en; 1594 1595 /* Existing PE ? */ 1596 if (!pe) 1597 return -ENODEV; 1598 1599 if (!eeh_ops || !eeh_ops->get_state) 1600 return -ENOENT; 1601 1602 /* 1603 * If the parent PE is owned by the host kernel and is undergoing 1604 * error recovery, we should return the PE state as temporarily 1605 * unavailable so that the error recovery on the guest is suspended 1606 * until the recovery completes on the host. 1607 */ 1608 if (pe->parent && 1609 !(pe->state & EEH_PE_REMOVED) && 1610 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1611 return EEH_PE_STATE_UNAVAIL; 1612 1613 result = eeh_ops->get_state(pe, NULL); 1614 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1615 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1616 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1617 1618 if (rst_active) 1619 ret = EEH_PE_STATE_RESET; 1620 else if (dma_en && mmio_en) 1621 ret = EEH_PE_STATE_NORMAL; 1622 else if (!dma_en && !mmio_en) 1623 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1624 else if (!dma_en && mmio_en) 1625 ret = EEH_PE_STATE_STOPPED_DMA; 1626 else 1627 ret = EEH_PE_STATE_UNAVAIL; 1628 1629 return ret; 1630 } 1631 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1632 1633 static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) 1634 { 1635 struct eeh_dev *edev, *tmp; 1636 struct pci_dev *pdev; 1637 int ret = 0; 1638 1639 eeh_pe_restore_bars(pe); 1640 1641 /* 1642 * Reenable PCI devices as the devices passed 1643 * through are always enabled before the reset. 1644 */ 1645 eeh_pe_for_each_dev(pe, edev, tmp) { 1646 pdev = eeh_dev_to_pci_dev(edev); 1647 if (!pdev) 1648 continue; 1649 1650 ret = pci_reenable_device(pdev); 1651 if (ret) { 1652 pr_warn("%s: Failure %d reenabling %s\n", 1653 __func__, ret, pci_name(pdev)); 1654 return ret; 1655 } 1656 } 1657 1658 /* The PE is still in frozen state */ 1659 if (include_passed || !eeh_pe_passed(pe)) { 1660 ret = eeh_unfreeze_pe(pe); 1661 } else 1662 pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", 1663 pe->phb->global_number, pe->addr); 1664 if (!ret) 1665 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); 1666 return ret; 1667 } 1668 1669 1670 /** 1671 * eeh_pe_reset - Issue PE reset according to specified type 1672 * @pe: EEH PE 1673 * @option: reset type 1674 * 1675 * The routine is called to reset the specified PE with the 1676 * indicated type, either fundamental reset or hot reset. 1677 * PE reset is the most important part for error recovery. 1678 */ 1679 int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) 1680 { 1681 int ret = 0; 1682 1683 /* Invalid PE ? */ 1684 if (!pe) 1685 return -ENODEV; 1686 1687 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1688 return -ENOENT; 1689 1690 switch (option) { 1691 case EEH_RESET_DEACTIVATE: 1692 ret = eeh_ops->reset(pe, option); 1693 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); 1694 if (ret) 1695 break; 1696 1697 ret = eeh_pe_reenable_devices(pe, include_passed); 1698 break; 1699 case EEH_RESET_HOT: 1700 case EEH_RESET_FUNDAMENTAL: 1701 /* 1702 * Proactively freeze the PE to drop all MMIO access 1703 * during reset, which should be banned as it's always 1704 * cause recursive EEH error. 1705 */ 1706 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1707 1708 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1709 ret = eeh_ops->reset(pe, option); 1710 break; 1711 default: 1712 pr_debug("%s: Unsupported option %d\n", 1713 __func__, option); 1714 ret = -EINVAL; 1715 } 1716 1717 return ret; 1718 } 1719 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1720 1721 /** 1722 * eeh_pe_configure - Configure PCI bridges after PE reset 1723 * @pe: EEH PE 1724 * 1725 * The routine is called to restore the PCI config space for 1726 * those PCI devices, especially PCI bridges affected by PE 1727 * reset issued previously. 1728 */ 1729 int eeh_pe_configure(struct eeh_pe *pe) 1730 { 1731 int ret = 0; 1732 1733 /* Invalid PE ? */ 1734 if (!pe) 1735 return -ENODEV; 1736 1737 return ret; 1738 } 1739 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1740 1741 /** 1742 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1743 * @pe: the indicated PE 1744 * @type: error type 1745 * @function: error function 1746 * @addr: address 1747 * @mask: address mask 1748 * 1749 * The routine is called to inject the specified PCI error, which 1750 * is determined by @type and @function, to the indicated PE for 1751 * testing purpose. 1752 */ 1753 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1754 unsigned long addr, unsigned long mask) 1755 { 1756 /* Invalid PE ? */ 1757 if (!pe) 1758 return -ENODEV; 1759 1760 /* Unsupported operation ? */ 1761 if (!eeh_ops || !eeh_ops->err_inject) 1762 return -ENOENT; 1763 1764 /* Check on PCI error type */ 1765 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1766 return -EINVAL; 1767 1768 /* Check on PCI error function */ 1769 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1770 return -EINVAL; 1771 1772 return eeh_ops->err_inject(pe, type, func, addr, mask); 1773 } 1774 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1775 1776 static int proc_eeh_show(struct seq_file *m, void *v) 1777 { 1778 if (!eeh_enabled()) { 1779 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1780 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1781 } else { 1782 seq_printf(m, "EEH Subsystem is enabled\n"); 1783 seq_printf(m, 1784 "no device=%llu\n" 1785 "no device node=%llu\n" 1786 "no config address=%llu\n" 1787 "check not wanted=%llu\n" 1788 "eeh_total_mmio_ffs=%llu\n" 1789 "eeh_false_positives=%llu\n" 1790 "eeh_slot_resets=%llu\n", 1791 eeh_stats.no_device, 1792 eeh_stats.no_dn, 1793 eeh_stats.no_cfg_addr, 1794 eeh_stats.ignored_check, 1795 eeh_stats.total_mmio_ffs, 1796 eeh_stats.false_positives, 1797 eeh_stats.slot_resets); 1798 } 1799 1800 return 0; 1801 } 1802 1803 #ifdef CONFIG_DEBUG_FS 1804 static int eeh_enable_dbgfs_set(void *data, u64 val) 1805 { 1806 if (val) 1807 eeh_clear_flag(EEH_FORCE_DISABLED); 1808 else 1809 eeh_add_flag(EEH_FORCE_DISABLED); 1810 1811 return 0; 1812 } 1813 1814 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1815 { 1816 if (eeh_enabled()) 1817 *val = 0x1ul; 1818 else 1819 *val = 0x0ul; 1820 return 0; 1821 } 1822 1823 DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1824 eeh_enable_dbgfs_set, "0x%llx\n"); 1825 1826 static ssize_t eeh_force_recover_write(struct file *filp, 1827 const char __user *user_buf, 1828 size_t count, loff_t *ppos) 1829 { 1830 struct pci_controller *hose; 1831 uint32_t phbid, pe_no; 1832 struct eeh_pe *pe; 1833 char buf[20]; 1834 int ret; 1835 1836 ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); 1837 if (!ret) 1838 return -EFAULT; 1839 1840 /* 1841 * When PE is NULL the event is a "special" event. Rather than 1842 * recovering a specific PE it forces the EEH core to scan for failed 1843 * PHBs and recovers each. This needs to be done before any device 1844 * recoveries can occur. 1845 */ 1846 if (!strncmp(buf, "hwcheck", 7)) { 1847 __eeh_send_failure_event(NULL); 1848 return count; 1849 } 1850 1851 ret = sscanf(buf, "%x:%x", &phbid, &pe_no); 1852 if (ret != 2) 1853 return -EINVAL; 1854 1855 hose = pci_find_controller_for_domain(phbid); 1856 if (!hose) 1857 return -ENODEV; 1858 1859 /* Retrieve PE */ 1860 pe = eeh_pe_get(hose, pe_no, 0); 1861 if (!pe) 1862 return -ENODEV; 1863 1864 /* 1865 * We don't do any state checking here since the detection 1866 * process is async to the recovery process. The recovery 1867 * thread *should* not break even if we schedule a recovery 1868 * from an odd state (e.g. PE removed, or recovery of a 1869 * non-isolated PE) 1870 */ 1871 __eeh_send_failure_event(pe); 1872 1873 return ret < 0 ? ret : count; 1874 } 1875 1876 static const struct file_operations eeh_force_recover_fops = { 1877 .open = simple_open, 1878 .llseek = no_llseek, 1879 .write = eeh_force_recover_write, 1880 }; 1881 #endif 1882 1883 static int __init eeh_init_proc(void) 1884 { 1885 if (machine_is(pseries) || machine_is(powernv)) { 1886 proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); 1887 #ifdef CONFIG_DEBUG_FS 1888 debugfs_create_file_unsafe("eeh_enable", 0600, 1889 powerpc_debugfs_root, NULL, 1890 &eeh_enable_dbgfs_ops); 1891 debugfs_create_u32("eeh_max_freezes", 0600, 1892 powerpc_debugfs_root, &eeh_max_freezes); 1893 debugfs_create_bool("eeh_disable_recovery", 0600, 1894 powerpc_debugfs_root, 1895 &eeh_debugfs_no_recover); 1896 debugfs_create_file_unsafe("eeh_force_recover", 0600, 1897 powerpc_debugfs_root, NULL, 1898 &eeh_force_recover_fops); 1899 eeh_cache_debugfs_init(); 1900 #endif 1901 } 1902 1903 return 0; 1904 } 1905 __initcall(eeh_init_proc); 1906