1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright IBM Corporation 2001, 2005, 2006 4 * Copyright Dave Engebretsen & Todd Inglett 2001 5 * Copyright Linas Vepstas 2005, 2006 6 * Copyright 2001-2012 IBM Corporation. 7 * 8 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 9 */ 10 11 #include <linux/delay.h> 12 #include <linux/sched.h> 13 #include <linux/init.h> 14 #include <linux/list.h> 15 #include <linux/pci.h> 16 #include <linux/iommu.h> 17 #include <linux/proc_fs.h> 18 #include <linux/rbtree.h> 19 #include <linux/reboot.h> 20 #include <linux/seq_file.h> 21 #include <linux/spinlock.h> 22 #include <linux/export.h> 23 #include <linux/of.h> 24 25 #include <linux/atomic.h> 26 #include <asm/debugfs.h> 27 #include <asm/eeh.h> 28 #include <asm/eeh_event.h> 29 #include <asm/io.h> 30 #include <asm/iommu.h> 31 #include <asm/machdep.h> 32 #include <asm/ppc-pci.h> 33 #include <asm/rtas.h> 34 #include <asm/pte-walk.h> 35 36 37 /** Overview: 38 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 39 * dealing with PCI bus errors that can't be dealt with within the 40 * usual PCI framework, except by check-stopping the CPU. Systems 41 * that are designed for high-availability/reliability cannot afford 42 * to crash due to a "mere" PCI error, thus the need for EEH. 43 * An EEH-capable bridge operates by converting a detected error 44 * into a "slot freeze", taking the PCI adapter off-line, making 45 * the slot behave, from the OS'es point of view, as if the slot 46 * were "empty": all reads return 0xff's and all writes are silently 47 * ignored. EEH slot isolation events can be triggered by parity 48 * errors on the address or data busses (e.g. during posted writes), 49 * which in turn might be caused by low voltage on the bus, dust, 50 * vibration, humidity, radioactivity or plain-old failed hardware. 51 * 52 * Note, however, that one of the leading causes of EEH slot 53 * freeze events are buggy device drivers, buggy device microcode, 54 * or buggy device hardware. This is because any attempt by the 55 * device to bus-master data to a memory address that is not 56 * assigned to the device will trigger a slot freeze. (The idea 57 * is to prevent devices-gone-wild from corrupting system memory). 58 * Buggy hardware/drivers will have a miserable time co-existing 59 * with EEH. 60 * 61 * Ideally, a PCI device driver, when suspecting that an isolation 62 * event has occurred (e.g. by reading 0xff's), will then ask EEH 63 * whether this is the case, and then take appropriate steps to 64 * reset the PCI slot, the PCI device, and then resume operations. 65 * However, until that day, the checking is done here, with the 66 * eeh_check_failure() routine embedded in the MMIO macros. If 67 * the slot is found to be isolated, an "EEH Event" is synthesized 68 * and sent out for processing. 69 */ 70 71 /* If a device driver keeps reading an MMIO register in an interrupt 72 * handler after a slot isolation event, it might be broken. 73 * This sets the threshold for how many read attempts we allow 74 * before printing an error message. 75 */ 76 #define EEH_MAX_FAILS 2100000 77 78 /* Time to wait for a PCI slot to report status, in milliseconds */ 79 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 80 81 /* 82 * EEH probe mode support, which is part of the flags, 83 * is to support multiple platforms for EEH. Some platforms 84 * like pSeries do PCI emunation based on device tree. 85 * However, other platforms like powernv probe PCI devices 86 * from hardware. The flag is used to distinguish that. 87 * In addition, struct eeh_ops::probe would be invoked for 88 * particular OF node or PCI device so that the corresponding 89 * PE would be created there. 90 */ 91 int eeh_subsystem_flags; 92 EXPORT_SYMBOL(eeh_subsystem_flags); 93 94 /* 95 * EEH allowed maximal frozen times. If one particular PE's 96 * frozen count in last hour exceeds this limit, the PE will 97 * be forced to be offline permanently. 98 */ 99 u32 eeh_max_freezes = 5; 100 101 /* 102 * Controls whether a recovery event should be scheduled when an 103 * isolated device is discovered. This is only really useful for 104 * debugging problems with the EEH core. 105 */ 106 bool eeh_debugfs_no_recover; 107 108 /* Platform dependent EEH operations */ 109 struct eeh_ops *eeh_ops = NULL; 110 111 /* Lock to avoid races due to multiple reports of an error */ 112 DEFINE_RAW_SPINLOCK(confirm_error_lock); 113 EXPORT_SYMBOL_GPL(confirm_error_lock); 114 115 /* Lock to protect passed flags */ 116 static DEFINE_MUTEX(eeh_dev_mutex); 117 118 /* Buffer for reporting pci register dumps. Its here in BSS, and 119 * not dynamically alloced, so that it ends up in RMO where RTAS 120 * can access it. 121 */ 122 #define EEH_PCI_REGS_LOG_LEN 8192 123 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 124 125 /* 126 * The struct is used to maintain the EEH global statistic 127 * information. Besides, the EEH global statistics will be 128 * exported to user space through procfs 129 */ 130 struct eeh_stats { 131 u64 no_device; /* PCI device not found */ 132 u64 no_dn; /* OF node not found */ 133 u64 no_cfg_addr; /* Config address not found */ 134 u64 ignored_check; /* EEH check skipped */ 135 u64 total_mmio_ffs; /* Total EEH checks */ 136 u64 false_positives; /* Unnecessary EEH checks */ 137 u64 slot_resets; /* PE reset */ 138 }; 139 140 static struct eeh_stats eeh_stats; 141 142 static int __init eeh_setup(char *str) 143 { 144 if (!strcmp(str, "off")) 145 eeh_add_flag(EEH_FORCE_DISABLED); 146 else if (!strcmp(str, "early_log")) 147 eeh_add_flag(EEH_EARLY_DUMP_LOG); 148 149 return 1; 150 } 151 __setup("eeh=", eeh_setup); 152 153 /* 154 * This routine captures assorted PCI configuration space data 155 * for the indicated PCI device, and puts them into a buffer 156 * for RTAS error logging. 157 */ 158 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 159 { 160 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 161 u32 cfg; 162 int cap, i; 163 int n = 0, l = 0; 164 char buffer[128]; 165 166 if (!pdn) { 167 pr_warn("EEH: Note: No error log for absent device.\n"); 168 return 0; 169 } 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 172 pdn->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", 175 pdn->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) 266 { 267 struct eeh_dev *edev, *tmp; 268 size_t *plen = flag; 269 270 eeh_pe_for_each_dev(pe, edev, tmp) 271 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 272 EEH_PCI_REGS_LOG_LEN - *plen); 273 274 return NULL; 275 } 276 277 /** 278 * eeh_slot_error_detail - Generate combined log including driver log and error log 279 * @pe: EEH PE 280 * @severity: temporary or permanent error log 281 * 282 * This routine should be called to generate the combined log, which 283 * is comprised of driver log and error log. The driver log is figured 284 * out from the config space of the corresponding PCI device, while 285 * the error log is fetched through platform dependent function call. 286 */ 287 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 288 { 289 size_t loglen = 0; 290 291 /* 292 * When the PHB is fenced or dead, it's pointless to collect 293 * the data from PCI config space because it should return 294 * 0xFF's. For ER, we still retrieve the data from the PCI 295 * config space. 296 * 297 * For pHyp, we have to enable IO for log retrieval. Otherwise, 298 * 0xFF's is always returned from PCI config space. 299 * 300 * When the @severity is EEH_LOG_PERM, the PE is going to be 301 * removed. Prior to that, the drivers for devices included in 302 * the PE will be closed. The drivers rely on working IO path 303 * to bring the devices to quiet state. Otherwise, PCI traffic 304 * from those devices after they are removed is like to cause 305 * another unexpected EEH error. 306 */ 307 if (!(pe->type & EEH_PE_PHB)) { 308 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || 309 severity == EEH_LOG_PERM) 310 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 311 312 /* 313 * The config space of some PCI devices can't be accessed 314 * when their PEs are in frozen state. Otherwise, fenced 315 * PHB might be seen. Those PEs are identified with flag 316 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 317 * is set automatically when the PE is put to EEH_PE_ISOLATED. 318 * 319 * Restoring BARs possibly triggers PCI config access in 320 * (OPAL) firmware and then causes fenced PHB. If the 321 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 322 * pointless to restore BARs and dump config space. 323 */ 324 eeh_ops->configure_bridge(pe); 325 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 326 eeh_pe_restore_bars(pe); 327 328 pci_regs_buf[0] = 0; 329 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 330 } 331 } 332 333 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 334 } 335 336 /** 337 * eeh_token_to_phys - Convert EEH address token to phys address 338 * @token: I/O token, should be address in the form 0xA.... 339 * 340 * This routine should be called to convert virtual I/O address 341 * to physical one. 342 */ 343 static inline unsigned long eeh_token_to_phys(unsigned long token) 344 { 345 pte_t *ptep; 346 unsigned long pa; 347 int hugepage_shift; 348 349 /* 350 * We won't find hugepages here(this is iomem). Hence we are not 351 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 352 * page table free, because of init_mm. 353 */ 354 ptep = find_init_mm_pte(token, &hugepage_shift); 355 if (!ptep) 356 return token; 357 358 pa = pte_pfn(*ptep); 359 360 /* On radix we can do hugepage mappings for io, so handle that */ 361 if (hugepage_shift) { 362 pa <<= hugepage_shift; 363 pa |= token & ((1ul << hugepage_shift) - 1); 364 } else { 365 pa <<= PAGE_SHIFT; 366 pa |= token & (PAGE_SIZE - 1); 367 } 368 369 return pa; 370 } 371 372 /* 373 * On PowerNV platform, we might already have fenced PHB there. 374 * For that case, it's meaningless to recover frozen PE. Intead, 375 * We have to handle fenced PHB firstly. 376 */ 377 static int eeh_phb_check_failure(struct eeh_pe *pe) 378 { 379 struct eeh_pe *phb_pe; 380 unsigned long flags; 381 int ret; 382 383 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 384 return -EPERM; 385 386 /* Find the PHB PE */ 387 phb_pe = eeh_phb_pe_get(pe->phb); 388 if (!phb_pe) { 389 pr_warn("%s Can't find PE for PHB#%x\n", 390 __func__, pe->phb->global_number); 391 return -EEXIST; 392 } 393 394 /* If the PHB has been in problematic state */ 395 eeh_serialize_lock(&flags); 396 if (phb_pe->state & EEH_PE_ISOLATED) { 397 ret = 0; 398 goto out; 399 } 400 401 /* Check PHB state */ 402 ret = eeh_ops->get_state(phb_pe, NULL); 403 if ((ret < 0) || 404 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 405 ret = 0; 406 goto out; 407 } 408 409 /* Isolate the PHB and send event */ 410 eeh_pe_mark_isolated(phb_pe); 411 eeh_serialize_unlock(flags); 412 413 pr_err("EEH: PHB#%x failure detected, location: %s\n", 414 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 415 dump_stack(); 416 eeh_send_failure_event(phb_pe); 417 418 return 1; 419 out: 420 eeh_serialize_unlock(flags); 421 return ret; 422 } 423 424 /** 425 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 426 * @edev: eeh device 427 * 428 * Check for an EEH failure for the given device node. Call this 429 * routine if the result of a read was all 0xff's and you want to 430 * find out if this is due to an EEH slot freeze. This routine 431 * will query firmware for the EEH status. 432 * 433 * Returns 0 if there has not been an EEH error; otherwise returns 434 * a non-zero value and queues up a slot isolation event notification. 435 * 436 * It is safe to call this routine in an interrupt context. 437 */ 438 int eeh_dev_check_failure(struct eeh_dev *edev) 439 { 440 int ret; 441 unsigned long flags; 442 struct device_node *dn; 443 struct pci_dev *dev; 444 struct eeh_pe *pe, *parent_pe, *phb_pe; 445 int rc = 0; 446 const char *location = NULL; 447 448 eeh_stats.total_mmio_ffs++; 449 450 if (!eeh_enabled()) 451 return 0; 452 453 if (!edev) { 454 eeh_stats.no_dn++; 455 return 0; 456 } 457 dev = eeh_dev_to_pci_dev(edev); 458 pe = eeh_dev_to_pe(edev); 459 460 /* Access to IO BARs might get this far and still not want checking. */ 461 if (!pe) { 462 eeh_stats.ignored_check++; 463 pr_debug("EEH: Ignored check for %s\n", 464 eeh_pci_name(dev)); 465 return 0; 466 } 467 468 if (!pe->addr && !pe->config_addr) { 469 eeh_stats.no_cfg_addr++; 470 return 0; 471 } 472 473 /* 474 * On PowerNV platform, we might already have fenced PHB 475 * there and we need take care of that firstly. 476 */ 477 ret = eeh_phb_check_failure(pe); 478 if (ret > 0) 479 return ret; 480 481 /* 482 * If the PE isn't owned by us, we shouldn't check the 483 * state. Instead, let the owner handle it if the PE has 484 * been frozen. 485 */ 486 if (eeh_pe_passed(pe)) 487 return 0; 488 489 /* If we already have a pending isolation event for this 490 * slot, we know it's bad already, we don't need to check. 491 * Do this checking under a lock; as multiple PCI devices 492 * in one slot might report errors simultaneously, and we 493 * only want one error recovery routine running. 494 */ 495 eeh_serialize_lock(&flags); 496 rc = 1; 497 if (pe->state & EEH_PE_ISOLATED) { 498 pe->check_count++; 499 if (pe->check_count % EEH_MAX_FAILS == 0) { 500 dn = pci_device_to_OF_node(dev); 501 if (dn) 502 location = of_get_property(dn, "ibm,loc-code", 503 NULL); 504 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 505 "location=%s driver=%s pci addr=%s\n", 506 pe->check_count, 507 location ? location : "unknown", 508 eeh_driver_name(dev), eeh_pci_name(dev)); 509 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 510 eeh_driver_name(dev)); 511 dump_stack(); 512 } 513 goto dn_unlock; 514 } 515 516 /* 517 * Now test for an EEH failure. This is VERY expensive. 518 * Note that the eeh_config_addr may be a parent device 519 * in the case of a device behind a bridge, or it may be 520 * function zero of a multi-function device. 521 * In any case they must share a common PHB. 522 */ 523 ret = eeh_ops->get_state(pe, NULL); 524 525 /* Note that config-io to empty slots may fail; 526 * they are empty when they don't have children. 527 * We will punt with the following conditions: Failure to get 528 * PE's state, EEH not support and Permanently unavailable 529 * state, PE is in good state. 530 */ 531 if ((ret < 0) || 532 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 533 eeh_stats.false_positives++; 534 pe->false_positives++; 535 rc = 0; 536 goto dn_unlock; 537 } 538 539 /* 540 * It should be corner case that the parent PE has been 541 * put into frozen state as well. We should take care 542 * that at first. 543 */ 544 parent_pe = pe->parent; 545 while (parent_pe) { 546 /* Hit the ceiling ? */ 547 if (parent_pe->type & EEH_PE_PHB) 548 break; 549 550 /* Frozen parent PE ? */ 551 ret = eeh_ops->get_state(parent_pe, NULL); 552 if (ret > 0 && !eeh_state_active(ret)) { 553 pe = parent_pe; 554 pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", 555 pe->phb->global_number, pe->addr, 556 pe->phb->global_number, parent_pe->addr); 557 } 558 559 /* Next parent level */ 560 parent_pe = parent_pe->parent; 561 } 562 563 eeh_stats.slot_resets++; 564 565 /* Avoid repeated reports of this failure, including problems 566 * with other functions on this device, and functions under 567 * bridges. 568 */ 569 eeh_pe_mark_isolated(pe); 570 eeh_serialize_unlock(flags); 571 572 /* Most EEH events are due to device driver bugs. Having 573 * a stack trace will help the device-driver authors figure 574 * out what happened. So print that out. 575 */ 576 phb_pe = eeh_phb_pe_get(pe->phb); 577 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 578 pe->phb->global_number, pe->addr); 579 pr_err("EEH: PE location: %s, PHB location: %s\n", 580 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 581 dump_stack(); 582 583 eeh_send_failure_event(pe); 584 585 return 1; 586 587 dn_unlock: 588 eeh_serialize_unlock(flags); 589 return rc; 590 } 591 592 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 593 594 /** 595 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 596 * @token: I/O address 597 * 598 * Check for an EEH failure at the given I/O address. Call this 599 * routine if the result of a read was all 0xff's and you want to 600 * find out if this is due to an EEH slot freeze event. This routine 601 * will query firmware for the EEH status. 602 * 603 * Note this routine is safe to call in an interrupt context. 604 */ 605 int eeh_check_failure(const volatile void __iomem *token) 606 { 607 unsigned long addr; 608 struct eeh_dev *edev; 609 610 /* Finding the phys addr + pci device; this is pretty quick. */ 611 addr = eeh_token_to_phys((unsigned long __force) token); 612 edev = eeh_addr_cache_get_dev(addr); 613 if (!edev) { 614 eeh_stats.no_device++; 615 return 0; 616 } 617 618 return eeh_dev_check_failure(edev); 619 } 620 EXPORT_SYMBOL(eeh_check_failure); 621 622 623 /** 624 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 625 * @pe: EEH PE 626 * 627 * This routine should be called to reenable frozen MMIO or DMA 628 * so that it would work correctly again. It's useful while doing 629 * recovery or log collection on the indicated device. 630 */ 631 int eeh_pci_enable(struct eeh_pe *pe, int function) 632 { 633 int active_flag, rc; 634 635 /* 636 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 637 * Also, it's pointless to enable them on unfrozen PE. So 638 * we have to check before enabling IO or DMA. 639 */ 640 switch (function) { 641 case EEH_OPT_THAW_MMIO: 642 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 643 break; 644 case EEH_OPT_THAW_DMA: 645 active_flag = EEH_STATE_DMA_ACTIVE; 646 break; 647 case EEH_OPT_DISABLE: 648 case EEH_OPT_ENABLE: 649 case EEH_OPT_FREEZE_PE: 650 active_flag = 0; 651 break; 652 default: 653 pr_warn("%s: Invalid function %d\n", 654 __func__, function); 655 return -EINVAL; 656 } 657 658 /* 659 * Check if IO or DMA has been enabled before 660 * enabling them. 661 */ 662 if (active_flag) { 663 rc = eeh_ops->get_state(pe, NULL); 664 if (rc < 0) 665 return rc; 666 667 /* Needn't enable it at all */ 668 if (rc == EEH_STATE_NOT_SUPPORT) 669 return 0; 670 671 /* It's already enabled */ 672 if (rc & active_flag) 673 return 0; 674 } 675 676 677 /* Issue the request */ 678 rc = eeh_ops->set_option(pe, function); 679 if (rc) 680 pr_warn("%s: Unexpected state change %d on " 681 "PHB#%x-PE#%x, err=%d\n", 682 __func__, function, pe->phb->global_number, 683 pe->addr, rc); 684 685 /* Check if the request is finished successfully */ 686 if (active_flag) { 687 rc = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 688 if (rc < 0) 689 return rc; 690 691 if (rc & active_flag) 692 return 0; 693 694 return -EIO; 695 } 696 697 return rc; 698 } 699 700 static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, 701 void *userdata) 702 { 703 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 704 struct pci_dev *dev = userdata; 705 706 /* 707 * The caller should have disabled and saved the 708 * state for the specified device 709 */ 710 if (!pdev || pdev == dev) 711 return NULL; 712 713 /* Ensure we have D0 power state */ 714 pci_set_power_state(pdev, PCI_D0); 715 716 /* Save device state */ 717 pci_save_state(pdev); 718 719 /* 720 * Disable device to avoid any DMA traffic and 721 * interrupt from the device 722 */ 723 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 724 725 return NULL; 726 } 727 728 static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) 729 { 730 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 731 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 732 struct pci_dev *dev = userdata; 733 734 if (!pdev) 735 return NULL; 736 737 /* Apply customization from firmware */ 738 if (pdn && eeh_ops->restore_config) 739 eeh_ops->restore_config(pdn); 740 741 /* The caller should restore state for the specified device */ 742 if (pdev != dev) 743 pci_restore_state(pdev); 744 745 return NULL; 746 } 747 748 int eeh_restore_vf_config(struct pci_dn *pdn) 749 { 750 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 751 u32 devctl, cmd, cap2, aer_capctl; 752 int old_mps; 753 754 if (edev->pcie_cap) { 755 /* Restore MPS */ 756 old_mps = (ffs(pdn->mps) - 8) << 5; 757 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 758 2, &devctl); 759 devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; 760 devctl |= old_mps; 761 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 762 2, devctl); 763 764 /* Disable Completion Timeout if possible */ 765 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 766 4, &cap2); 767 if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { 768 eeh_ops->read_config(pdn, 769 edev->pcie_cap + PCI_EXP_DEVCTL2, 770 4, &cap2); 771 cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; 772 eeh_ops->write_config(pdn, 773 edev->pcie_cap + PCI_EXP_DEVCTL2, 774 4, cap2); 775 } 776 } 777 778 /* Enable SERR and parity checking */ 779 eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); 780 cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); 781 eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); 782 783 /* Enable report various errors */ 784 if (edev->pcie_cap) { 785 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 786 2, &devctl); 787 devctl &= ~PCI_EXP_DEVCTL_CERE; 788 devctl |= (PCI_EXP_DEVCTL_NFERE | 789 PCI_EXP_DEVCTL_FERE | 790 PCI_EXP_DEVCTL_URRE); 791 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 792 2, devctl); 793 } 794 795 /* Enable ECRC generation and check */ 796 if (edev->pcie_cap && edev->aer_cap) { 797 eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, 798 4, &aer_capctl); 799 aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 800 eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, 801 4, aer_capctl); 802 } 803 804 return 0; 805 } 806 807 /** 808 * pcibios_set_pcie_reset_state - Set PCI-E reset state 809 * @dev: pci device struct 810 * @state: reset state to enter 811 * 812 * Return value: 813 * 0 if success 814 */ 815 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 816 { 817 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 818 struct eeh_pe *pe = eeh_dev_to_pe(edev); 819 820 if (!pe) { 821 pr_err("%s: No PE found on PCI device %s\n", 822 __func__, pci_name(dev)); 823 return -EINVAL; 824 } 825 826 switch (state) { 827 case pcie_deassert_reset: 828 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 829 eeh_unfreeze_pe(pe); 830 if (!(pe->type & EEH_PE_VF)) 831 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 832 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 833 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 834 break; 835 case pcie_hot_reset: 836 eeh_pe_mark_isolated(pe); 837 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 838 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 839 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 840 if (!(pe->type & EEH_PE_VF)) 841 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 842 eeh_ops->reset(pe, EEH_RESET_HOT); 843 break; 844 case pcie_warm_reset: 845 eeh_pe_mark_isolated(pe); 846 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, true); 847 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 848 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 849 if (!(pe->type & EEH_PE_VF)) 850 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 851 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 852 break; 853 default: 854 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED, true); 855 return -EINVAL; 856 }; 857 858 return 0; 859 } 860 861 /** 862 * eeh_set_pe_freset - Check the required reset for the indicated device 863 * @data: EEH device 864 * @flag: return value 865 * 866 * Each device might have its preferred reset type: fundamental or 867 * hot reset. The routine is used to collected the information for 868 * the indicated device and its children so that the bunch of the 869 * devices could be reset properly. 870 */ 871 static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) 872 { 873 struct pci_dev *dev; 874 unsigned int *freset = (unsigned int *)flag; 875 876 dev = eeh_dev_to_pci_dev(edev); 877 if (dev) 878 *freset |= dev->needs_freset; 879 880 return NULL; 881 } 882 883 static void eeh_pe_refreeze_passed(struct eeh_pe *root) 884 { 885 struct eeh_pe *pe; 886 int state; 887 888 eeh_for_each_pe(root, pe) { 889 if (eeh_pe_passed(pe)) { 890 state = eeh_ops->get_state(pe, NULL); 891 if (state & 892 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED)) { 893 pr_info("EEH: Passed-through PE PHB#%x-PE#%x was thawed by reset, re-freezing for safety.\n", 894 pe->phb->global_number, pe->addr); 895 eeh_pe_set_option(pe, EEH_OPT_FREEZE_PE); 896 } 897 } 898 } 899 } 900 901 /** 902 * eeh_pe_reset_full - Complete a full reset process on the indicated PE 903 * @pe: EEH PE 904 * 905 * This function executes a full reset procedure on a PE, including setting 906 * the appropriate flags, performing a fundamental or hot reset, and then 907 * deactivating the reset status. It is designed to be used within the EEH 908 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and 909 * only performs a single operation at a time. 910 * 911 * This function will attempt to reset a PE three times before failing. 912 */ 913 int eeh_pe_reset_full(struct eeh_pe *pe, bool include_passed) 914 { 915 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 916 int type = EEH_RESET_HOT; 917 unsigned int freset = 0; 918 int i, state = 0, ret; 919 920 /* 921 * Determine the type of reset to perform - hot or fundamental. 922 * Hot reset is the default operation, unless any device under the 923 * PE requires a fundamental reset. 924 */ 925 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 926 927 if (freset) 928 type = EEH_RESET_FUNDAMENTAL; 929 930 /* Mark the PE as in reset state and block config space accesses */ 931 eeh_pe_state_mark(pe, reset_state); 932 933 /* Make three attempts at resetting the bus */ 934 for (i = 0; i < 3; i++) { 935 ret = eeh_pe_reset(pe, type, include_passed); 936 if (!ret) 937 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE, 938 include_passed); 939 if (ret) { 940 ret = -EIO; 941 pr_warn("EEH: Failure %d resetting PHB#%x-PE#%x (attempt %d)\n\n", 942 state, pe->phb->global_number, pe->addr, i + 1); 943 continue; 944 } 945 if (i) 946 pr_warn("EEH: PHB#%x-PE#%x: Successful reset (attempt %d)\n", 947 pe->phb->global_number, pe->addr, i + 1); 948 949 /* Wait until the PE is in a functioning state */ 950 state = eeh_wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 951 if (state < 0) { 952 pr_warn("EEH: Unrecoverable slot failure on PHB#%x-PE#%x", 953 pe->phb->global_number, pe->addr); 954 ret = -ENOTRECOVERABLE; 955 break; 956 } 957 if (eeh_state_active(state)) 958 break; 959 else 960 pr_warn("EEH: PHB#%x-PE#%x: Slot inactive after reset: 0x%x (attempt %d)\n", 961 pe->phb->global_number, pe->addr, state, i + 1); 962 } 963 964 /* Resetting the PE may have unfrozen child PEs. If those PEs have been 965 * (potentially) passed through to a guest, re-freeze them: 966 */ 967 if (!include_passed) 968 eeh_pe_refreeze_passed(pe); 969 970 eeh_pe_state_clear(pe, reset_state, true); 971 return ret; 972 } 973 974 /** 975 * eeh_save_bars - Save device bars 976 * @edev: PCI device associated EEH device 977 * 978 * Save the values of the device bars. Unlike the restore 979 * routine, this routine is *not* recursive. This is because 980 * PCI devices are added individually; but, for the restore, 981 * an entire slot is reset at a time. 982 */ 983 void eeh_save_bars(struct eeh_dev *edev) 984 { 985 struct pci_dn *pdn; 986 int i; 987 988 pdn = eeh_dev_to_pdn(edev); 989 if (!pdn) 990 return; 991 992 for (i = 0; i < 16; i++) 993 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 994 995 /* 996 * For PCI bridges including root port, we need enable bus 997 * master explicitly. Otherwise, it can't fetch IODA table 998 * entries correctly. So we cache the bit in advance so that 999 * we can restore it after reset, either PHB range or PE range. 1000 */ 1001 if (edev->mode & EEH_DEV_BRIDGE) 1002 edev->config_space[1] |= PCI_COMMAND_MASTER; 1003 } 1004 1005 /** 1006 * eeh_ops_register - Register platform dependent EEH operations 1007 * @ops: platform dependent EEH operations 1008 * 1009 * Register the platform dependent EEH operation callback 1010 * functions. The platform should call this function before 1011 * any other EEH operations. 1012 */ 1013 int __init eeh_ops_register(struct eeh_ops *ops) 1014 { 1015 if (!ops->name) { 1016 pr_warn("%s: Invalid EEH ops name for %p\n", 1017 __func__, ops); 1018 return -EINVAL; 1019 } 1020 1021 if (eeh_ops && eeh_ops != ops) { 1022 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 1023 __func__, eeh_ops->name, ops->name); 1024 return -EEXIST; 1025 } 1026 1027 eeh_ops = ops; 1028 1029 return 0; 1030 } 1031 1032 /** 1033 * eeh_ops_unregister - Unreigster platform dependent EEH operations 1034 * @name: name of EEH platform operations 1035 * 1036 * Unregister the platform dependent EEH operation callback 1037 * functions. 1038 */ 1039 int __exit eeh_ops_unregister(const char *name) 1040 { 1041 if (!name || !strlen(name)) { 1042 pr_warn("%s: Invalid EEH ops name\n", 1043 __func__); 1044 return -EINVAL; 1045 } 1046 1047 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 1048 eeh_ops = NULL; 1049 return 0; 1050 } 1051 1052 return -EEXIST; 1053 } 1054 1055 static int eeh_reboot_notifier(struct notifier_block *nb, 1056 unsigned long action, void *unused) 1057 { 1058 eeh_clear_flag(EEH_ENABLED); 1059 return NOTIFY_DONE; 1060 } 1061 1062 static struct notifier_block eeh_reboot_nb = { 1063 .notifier_call = eeh_reboot_notifier, 1064 }; 1065 1066 void eeh_probe_devices(void) 1067 { 1068 struct pci_controller *hose, *tmp; 1069 struct pci_dn *pdn; 1070 1071 /* Enable EEH for all adapters */ 1072 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1073 pdn = hose->pci_data; 1074 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1075 } 1076 if (eeh_enabled()) 1077 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1078 else 1079 pr_info("EEH: No capable adapters found\n"); 1080 1081 } 1082 1083 /** 1084 * eeh_init - EEH initialization 1085 * 1086 * Initialize EEH by trying to enable it for all of the adapters in the system. 1087 * As a side effect we can determine here if eeh is supported at all. 1088 * Note that we leave EEH on so failed config cycles won't cause a machine 1089 * check. If a user turns off EEH for a particular adapter they are really 1090 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 1091 * grant access to a slot if EEH isn't enabled, and so we always enable 1092 * EEH for all slots/all devices. 1093 * 1094 * The eeh-force-off option disables EEH checking globally, for all slots. 1095 * Even if force-off is set, the EEH hardware is still enabled, so that 1096 * newer systems can boot. 1097 */ 1098 static int eeh_init(void) 1099 { 1100 struct pci_controller *hose, *tmp; 1101 int ret = 0; 1102 1103 /* Register reboot notifier */ 1104 ret = register_reboot_notifier(&eeh_reboot_nb); 1105 if (ret) { 1106 pr_warn("%s: Failed to register notifier (%d)\n", 1107 __func__, ret); 1108 return ret; 1109 } 1110 1111 /* call platform initialization function */ 1112 if (!eeh_ops) { 1113 pr_warn("%s: Platform EEH operation not found\n", 1114 __func__); 1115 return -EEXIST; 1116 } else if ((ret = eeh_ops->init())) 1117 return ret; 1118 1119 /* Initialize PHB PEs */ 1120 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 1121 eeh_dev_phb_init_dynamic(hose); 1122 1123 /* Initialize EEH event */ 1124 return eeh_event_init(); 1125 } 1126 1127 core_initcall_sync(eeh_init); 1128 1129 /** 1130 * eeh_add_device_early - Enable EEH for the indicated device node 1131 * @pdn: PCI device node for which to set up EEH 1132 * 1133 * This routine must be used to perform EEH initialization for PCI 1134 * devices that were added after system boot (e.g. hotplug, dlpar). 1135 * This routine must be called before any i/o is performed to the 1136 * adapter (inluding any config-space i/o). 1137 * Whether this actually enables EEH or not for this device depends 1138 * on the CEC architecture, type of the device, on earlier boot 1139 * command-line arguments & etc. 1140 */ 1141 void eeh_add_device_early(struct pci_dn *pdn) 1142 { 1143 struct pci_controller *phb = pdn ? pdn->phb : NULL; 1144 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1145 1146 if (!edev) 1147 return; 1148 1149 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1150 return; 1151 1152 /* USB Bus children of PCI devices will not have BUID's */ 1153 if (NULL == phb || 1154 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1155 return; 1156 1157 eeh_ops->probe(pdn, NULL); 1158 } 1159 1160 /** 1161 * eeh_add_device_tree_early - Enable EEH for the indicated device 1162 * @pdn: PCI device node 1163 * 1164 * This routine must be used to perform EEH initialization for the 1165 * indicated PCI device that was added after system boot (e.g. 1166 * hotplug, dlpar). 1167 */ 1168 void eeh_add_device_tree_early(struct pci_dn *pdn) 1169 { 1170 struct pci_dn *n; 1171 1172 if (!pdn) 1173 return; 1174 1175 list_for_each_entry(n, &pdn->child_list, list) 1176 eeh_add_device_tree_early(n); 1177 eeh_add_device_early(pdn); 1178 } 1179 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1180 1181 /** 1182 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1183 * @dev: pci device for which to set up EEH 1184 * 1185 * This routine must be used to complete EEH initialization for PCI 1186 * devices that were added after system boot (e.g. hotplug, dlpar). 1187 */ 1188 void eeh_add_device_late(struct pci_dev *dev) 1189 { 1190 struct pci_dn *pdn; 1191 struct eeh_dev *edev; 1192 1193 if (!dev || !eeh_enabled()) 1194 return; 1195 1196 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1197 1198 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1199 edev = pdn_to_eeh_dev(pdn); 1200 if (edev->pdev == dev) { 1201 pr_debug("EEH: Already referenced !\n"); 1202 return; 1203 } 1204 1205 /* 1206 * The EEH cache might not be removed correctly because of 1207 * unbalanced kref to the device during unplug time, which 1208 * relies on pcibios_release_device(). So we have to remove 1209 * that here explicitly. 1210 */ 1211 if (edev->pdev) { 1212 eeh_rmv_from_parent_pe(edev); 1213 eeh_addr_cache_rmv_dev(edev->pdev); 1214 eeh_sysfs_remove_device(edev->pdev); 1215 edev->mode &= ~EEH_DEV_SYSFS; 1216 1217 /* 1218 * We definitely should have the PCI device removed 1219 * though it wasn't correctly. So we needn't call 1220 * into error handler afterwards. 1221 */ 1222 edev->mode |= EEH_DEV_NO_HANDLER; 1223 1224 edev->pdev = NULL; 1225 dev->dev.archdata.edev = NULL; 1226 } 1227 1228 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1229 eeh_ops->probe(pdn, NULL); 1230 1231 edev->pdev = dev; 1232 dev->dev.archdata.edev = edev; 1233 1234 eeh_addr_cache_insert_dev(dev); 1235 } 1236 1237 /** 1238 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1239 * @bus: PCI bus 1240 * 1241 * This routine must be used to perform EEH initialization for PCI 1242 * devices which are attached to the indicated PCI bus. The PCI bus 1243 * is added after system boot through hotplug or dlpar. 1244 */ 1245 void eeh_add_device_tree_late(struct pci_bus *bus) 1246 { 1247 struct pci_dev *dev; 1248 1249 list_for_each_entry(dev, &bus->devices, bus_list) { 1250 eeh_add_device_late(dev); 1251 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1252 struct pci_bus *subbus = dev->subordinate; 1253 if (subbus) 1254 eeh_add_device_tree_late(subbus); 1255 } 1256 } 1257 } 1258 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1259 1260 /** 1261 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1262 * @bus: PCI bus 1263 * 1264 * This routine must be used to add EEH sysfs files for PCI 1265 * devices which are attached to the indicated PCI bus. The PCI bus 1266 * is added after system boot through hotplug or dlpar. 1267 */ 1268 void eeh_add_sysfs_files(struct pci_bus *bus) 1269 { 1270 struct pci_dev *dev; 1271 1272 list_for_each_entry(dev, &bus->devices, bus_list) { 1273 eeh_sysfs_add_device(dev); 1274 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1275 struct pci_bus *subbus = dev->subordinate; 1276 if (subbus) 1277 eeh_add_sysfs_files(subbus); 1278 } 1279 } 1280 } 1281 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1282 1283 /** 1284 * eeh_remove_device - Undo EEH setup for the indicated pci device 1285 * @dev: pci device to be removed 1286 * 1287 * This routine should be called when a device is removed from 1288 * a running system (e.g. by hotplug or dlpar). It unregisters 1289 * the PCI device from the EEH subsystem. I/O errors affecting 1290 * this device will no longer be detected after this call; thus, 1291 * i/o errors affecting this slot may leave this device unusable. 1292 */ 1293 void eeh_remove_device(struct pci_dev *dev) 1294 { 1295 struct eeh_dev *edev; 1296 1297 if (!dev || !eeh_enabled()) 1298 return; 1299 edev = pci_dev_to_eeh_dev(dev); 1300 1301 /* Unregister the device with the EEH/PCI address search system */ 1302 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1303 1304 if (!edev || !edev->pdev || !edev->pe) { 1305 pr_debug("EEH: Not referenced !\n"); 1306 return; 1307 } 1308 1309 /* 1310 * During the hotplug for EEH error recovery, we need the EEH 1311 * device attached to the parent PE in order for BAR restore 1312 * a bit later. So we keep it for BAR restore and remove it 1313 * from the parent PE during the BAR resotre. 1314 */ 1315 edev->pdev = NULL; 1316 1317 /* 1318 * The flag "in_error" is used to trace EEH devices for VFs 1319 * in error state or not. It's set in eeh_report_error(). If 1320 * it's not set, eeh_report_{reset,resume}() won't be called 1321 * for the VF EEH device. 1322 */ 1323 edev->in_error = false; 1324 dev->dev.archdata.edev = NULL; 1325 if (!(edev->pe->state & EEH_PE_KEEP)) 1326 eeh_rmv_from_parent_pe(edev); 1327 else 1328 edev->mode |= EEH_DEV_DISCONNECTED; 1329 1330 /* 1331 * We're removing from the PCI subsystem, that means 1332 * the PCI device driver can't support EEH or not 1333 * well. So we rely on hotplug completely to do recovery 1334 * for the specific PCI device. 1335 */ 1336 edev->mode |= EEH_DEV_NO_HANDLER; 1337 1338 eeh_addr_cache_rmv_dev(dev); 1339 eeh_sysfs_remove_device(dev); 1340 edev->mode &= ~EEH_DEV_SYSFS; 1341 } 1342 1343 int eeh_unfreeze_pe(struct eeh_pe *pe) 1344 { 1345 int ret; 1346 1347 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1348 if (ret) { 1349 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1350 __func__, ret, pe->phb->global_number, pe->addr); 1351 return ret; 1352 } 1353 1354 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1355 if (ret) { 1356 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1357 __func__, ret, pe->phb->global_number, pe->addr); 1358 return ret; 1359 } 1360 1361 return ret; 1362 } 1363 1364 1365 static struct pci_device_id eeh_reset_ids[] = { 1366 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1367 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1368 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1369 { 0 } 1370 }; 1371 1372 static int eeh_pe_change_owner(struct eeh_pe *pe) 1373 { 1374 struct eeh_dev *edev, *tmp; 1375 struct pci_dev *pdev; 1376 struct pci_device_id *id; 1377 int ret; 1378 1379 /* Check PE state */ 1380 ret = eeh_ops->get_state(pe, NULL); 1381 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1382 return 0; 1383 1384 /* Unfrozen PE, nothing to do */ 1385 if (eeh_state_active(ret)) 1386 return 0; 1387 1388 /* Frozen PE, check if it needs PE level reset */ 1389 eeh_pe_for_each_dev(pe, edev, tmp) { 1390 pdev = eeh_dev_to_pci_dev(edev); 1391 if (!pdev) 1392 continue; 1393 1394 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1395 if (id->vendor != PCI_ANY_ID && 1396 id->vendor != pdev->vendor) 1397 continue; 1398 if (id->device != PCI_ANY_ID && 1399 id->device != pdev->device) 1400 continue; 1401 if (id->subvendor != PCI_ANY_ID && 1402 id->subvendor != pdev->subsystem_vendor) 1403 continue; 1404 if (id->subdevice != PCI_ANY_ID && 1405 id->subdevice != pdev->subsystem_device) 1406 continue; 1407 1408 return eeh_pe_reset_and_recover(pe); 1409 } 1410 } 1411 1412 ret = eeh_unfreeze_pe(pe); 1413 if (!ret) 1414 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true); 1415 return ret; 1416 } 1417 1418 /** 1419 * eeh_dev_open - Increase count of pass through devices for PE 1420 * @pdev: PCI device 1421 * 1422 * Increase count of passed through devices for the indicated 1423 * PE. In the result, the EEH errors detected on the PE won't be 1424 * reported. The PE owner will be responsible for detection 1425 * and recovery. 1426 */ 1427 int eeh_dev_open(struct pci_dev *pdev) 1428 { 1429 struct eeh_dev *edev; 1430 int ret = -ENODEV; 1431 1432 mutex_lock(&eeh_dev_mutex); 1433 1434 /* No PCI device ? */ 1435 if (!pdev) 1436 goto out; 1437 1438 /* No EEH device or PE ? */ 1439 edev = pci_dev_to_eeh_dev(pdev); 1440 if (!edev || !edev->pe) 1441 goto out; 1442 1443 /* 1444 * The PE might have been put into frozen state, but we 1445 * didn't detect that yet. The passed through PCI devices 1446 * in frozen PE won't work properly. Clear the frozen state 1447 * in advance. 1448 */ 1449 ret = eeh_pe_change_owner(edev->pe); 1450 if (ret) 1451 goto out; 1452 1453 /* Increase PE's pass through count */ 1454 atomic_inc(&edev->pe->pass_dev_cnt); 1455 mutex_unlock(&eeh_dev_mutex); 1456 1457 return 0; 1458 out: 1459 mutex_unlock(&eeh_dev_mutex); 1460 return ret; 1461 } 1462 EXPORT_SYMBOL_GPL(eeh_dev_open); 1463 1464 /** 1465 * eeh_dev_release - Decrease count of pass through devices for PE 1466 * @pdev: PCI device 1467 * 1468 * Decrease count of pass through devices for the indicated PE. If 1469 * there is no passed through device in PE, the EEH errors detected 1470 * on the PE will be reported and handled as usual. 1471 */ 1472 void eeh_dev_release(struct pci_dev *pdev) 1473 { 1474 struct eeh_dev *edev; 1475 1476 mutex_lock(&eeh_dev_mutex); 1477 1478 /* No PCI device ? */ 1479 if (!pdev) 1480 goto out; 1481 1482 /* No EEH device ? */ 1483 edev = pci_dev_to_eeh_dev(pdev); 1484 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1485 goto out; 1486 1487 /* Decrease PE's pass through count */ 1488 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1489 eeh_pe_change_owner(edev->pe); 1490 out: 1491 mutex_unlock(&eeh_dev_mutex); 1492 } 1493 EXPORT_SYMBOL(eeh_dev_release); 1494 1495 #ifdef CONFIG_IOMMU_API 1496 1497 static int dev_has_iommu_table(struct device *dev, void *data) 1498 { 1499 struct pci_dev *pdev = to_pci_dev(dev); 1500 struct pci_dev **ppdev = data; 1501 1502 if (!dev) 1503 return 0; 1504 1505 if (device_iommu_mapped(dev)) { 1506 *ppdev = pdev; 1507 return 1; 1508 } 1509 1510 return 0; 1511 } 1512 1513 /** 1514 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1515 * @group: IOMMU group 1516 * 1517 * The routine is called to convert IOMMU group to EEH PE. 1518 */ 1519 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1520 { 1521 struct pci_dev *pdev = NULL; 1522 struct eeh_dev *edev; 1523 int ret; 1524 1525 /* No IOMMU group ? */ 1526 if (!group) 1527 return NULL; 1528 1529 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1530 if (!ret || !pdev) 1531 return NULL; 1532 1533 /* No EEH device or PE ? */ 1534 edev = pci_dev_to_eeh_dev(pdev); 1535 if (!edev || !edev->pe) 1536 return NULL; 1537 1538 return edev->pe; 1539 } 1540 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1541 1542 #endif /* CONFIG_IOMMU_API */ 1543 1544 /** 1545 * eeh_pe_set_option - Set options for the indicated PE 1546 * @pe: EEH PE 1547 * @option: requested option 1548 * 1549 * The routine is called to enable or disable EEH functionality 1550 * on the indicated PE, to enable IO or DMA for the frozen PE. 1551 */ 1552 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1553 { 1554 int ret = 0; 1555 1556 /* Invalid PE ? */ 1557 if (!pe) 1558 return -ENODEV; 1559 1560 /* 1561 * EEH functionality could possibly be disabled, just 1562 * return error for the case. And the EEH functinality 1563 * isn't expected to be disabled on one specific PE. 1564 */ 1565 switch (option) { 1566 case EEH_OPT_ENABLE: 1567 if (eeh_enabled()) { 1568 ret = eeh_pe_change_owner(pe); 1569 break; 1570 } 1571 ret = -EIO; 1572 break; 1573 case EEH_OPT_DISABLE: 1574 break; 1575 case EEH_OPT_THAW_MMIO: 1576 case EEH_OPT_THAW_DMA: 1577 case EEH_OPT_FREEZE_PE: 1578 if (!eeh_ops || !eeh_ops->set_option) { 1579 ret = -ENOENT; 1580 break; 1581 } 1582 1583 ret = eeh_pci_enable(pe, option); 1584 break; 1585 default: 1586 pr_debug("%s: Option %d out of range (%d, %d)\n", 1587 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1588 ret = -EINVAL; 1589 } 1590 1591 return ret; 1592 } 1593 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1594 1595 /** 1596 * eeh_pe_get_state - Retrieve PE's state 1597 * @pe: EEH PE 1598 * 1599 * Retrieve the PE's state, which includes 3 aspects: enabled 1600 * DMA, enabled IO and asserted reset. 1601 */ 1602 int eeh_pe_get_state(struct eeh_pe *pe) 1603 { 1604 int result, ret = 0; 1605 bool rst_active, dma_en, mmio_en; 1606 1607 /* Existing PE ? */ 1608 if (!pe) 1609 return -ENODEV; 1610 1611 if (!eeh_ops || !eeh_ops->get_state) 1612 return -ENOENT; 1613 1614 /* 1615 * If the parent PE is owned by the host kernel and is undergoing 1616 * error recovery, we should return the PE state as temporarily 1617 * unavailable so that the error recovery on the guest is suspended 1618 * until the recovery completes on the host. 1619 */ 1620 if (pe->parent && 1621 !(pe->state & EEH_PE_REMOVED) && 1622 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1623 return EEH_PE_STATE_UNAVAIL; 1624 1625 result = eeh_ops->get_state(pe, NULL); 1626 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1627 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1628 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1629 1630 if (rst_active) 1631 ret = EEH_PE_STATE_RESET; 1632 else if (dma_en && mmio_en) 1633 ret = EEH_PE_STATE_NORMAL; 1634 else if (!dma_en && !mmio_en) 1635 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1636 else if (!dma_en && mmio_en) 1637 ret = EEH_PE_STATE_STOPPED_DMA; 1638 else 1639 ret = EEH_PE_STATE_UNAVAIL; 1640 1641 return ret; 1642 } 1643 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1644 1645 static int eeh_pe_reenable_devices(struct eeh_pe *pe, bool include_passed) 1646 { 1647 struct eeh_dev *edev, *tmp; 1648 struct pci_dev *pdev; 1649 int ret = 0; 1650 1651 eeh_pe_restore_bars(pe); 1652 1653 /* 1654 * Reenable PCI devices as the devices passed 1655 * through are always enabled before the reset. 1656 */ 1657 eeh_pe_for_each_dev(pe, edev, tmp) { 1658 pdev = eeh_dev_to_pci_dev(edev); 1659 if (!pdev) 1660 continue; 1661 1662 ret = pci_reenable_device(pdev); 1663 if (ret) { 1664 pr_warn("%s: Failure %d reenabling %s\n", 1665 __func__, ret, pci_name(pdev)); 1666 return ret; 1667 } 1668 } 1669 1670 /* The PE is still in frozen state */ 1671 if (include_passed || !eeh_pe_passed(pe)) { 1672 ret = eeh_unfreeze_pe(pe); 1673 } else 1674 pr_info("EEH: Note: Leaving passthrough PHB#%x-PE#%x frozen.\n", 1675 pe->phb->global_number, pe->addr); 1676 if (!ret) 1677 eeh_pe_state_clear(pe, EEH_PE_ISOLATED, include_passed); 1678 return ret; 1679 } 1680 1681 1682 /** 1683 * eeh_pe_reset - Issue PE reset according to specified type 1684 * @pe: EEH PE 1685 * @option: reset type 1686 * 1687 * The routine is called to reset the specified PE with the 1688 * indicated type, either fundamental reset or hot reset. 1689 * PE reset is the most important part for error recovery. 1690 */ 1691 int eeh_pe_reset(struct eeh_pe *pe, int option, bool include_passed) 1692 { 1693 int ret = 0; 1694 1695 /* Invalid PE ? */ 1696 if (!pe) 1697 return -ENODEV; 1698 1699 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1700 return -ENOENT; 1701 1702 switch (option) { 1703 case EEH_RESET_DEACTIVATE: 1704 ret = eeh_ops->reset(pe, option); 1705 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED, include_passed); 1706 if (ret) 1707 break; 1708 1709 ret = eeh_pe_reenable_devices(pe, include_passed); 1710 break; 1711 case EEH_RESET_HOT: 1712 case EEH_RESET_FUNDAMENTAL: 1713 /* 1714 * Proactively freeze the PE to drop all MMIO access 1715 * during reset, which should be banned as it's always 1716 * cause recursive EEH error. 1717 */ 1718 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1719 1720 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1721 ret = eeh_ops->reset(pe, option); 1722 break; 1723 default: 1724 pr_debug("%s: Unsupported option %d\n", 1725 __func__, option); 1726 ret = -EINVAL; 1727 } 1728 1729 return ret; 1730 } 1731 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1732 1733 /** 1734 * eeh_pe_configure - Configure PCI bridges after PE reset 1735 * @pe: EEH PE 1736 * 1737 * The routine is called to restore the PCI config space for 1738 * those PCI devices, especially PCI bridges affected by PE 1739 * reset issued previously. 1740 */ 1741 int eeh_pe_configure(struct eeh_pe *pe) 1742 { 1743 int ret = 0; 1744 1745 /* Invalid PE ? */ 1746 if (!pe) 1747 return -ENODEV; 1748 1749 return ret; 1750 } 1751 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1752 1753 /** 1754 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1755 * @pe: the indicated PE 1756 * @type: error type 1757 * @function: error function 1758 * @addr: address 1759 * @mask: address mask 1760 * 1761 * The routine is called to inject the specified PCI error, which 1762 * is determined by @type and @function, to the indicated PE for 1763 * testing purpose. 1764 */ 1765 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1766 unsigned long addr, unsigned long mask) 1767 { 1768 /* Invalid PE ? */ 1769 if (!pe) 1770 return -ENODEV; 1771 1772 /* Unsupported operation ? */ 1773 if (!eeh_ops || !eeh_ops->err_inject) 1774 return -ENOENT; 1775 1776 /* Check on PCI error type */ 1777 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1778 return -EINVAL; 1779 1780 /* Check on PCI error function */ 1781 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1782 return -EINVAL; 1783 1784 return eeh_ops->err_inject(pe, type, func, addr, mask); 1785 } 1786 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1787 1788 static int proc_eeh_show(struct seq_file *m, void *v) 1789 { 1790 if (!eeh_enabled()) { 1791 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1792 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1793 } else { 1794 seq_printf(m, "EEH Subsystem is enabled\n"); 1795 seq_printf(m, 1796 "no device=%llu\n" 1797 "no device node=%llu\n" 1798 "no config address=%llu\n" 1799 "check not wanted=%llu\n" 1800 "eeh_total_mmio_ffs=%llu\n" 1801 "eeh_false_positives=%llu\n" 1802 "eeh_slot_resets=%llu\n", 1803 eeh_stats.no_device, 1804 eeh_stats.no_dn, 1805 eeh_stats.no_cfg_addr, 1806 eeh_stats.ignored_check, 1807 eeh_stats.total_mmio_ffs, 1808 eeh_stats.false_positives, 1809 eeh_stats.slot_resets); 1810 } 1811 1812 return 0; 1813 } 1814 1815 #ifdef CONFIG_DEBUG_FS 1816 static int eeh_enable_dbgfs_set(void *data, u64 val) 1817 { 1818 if (val) 1819 eeh_clear_flag(EEH_FORCE_DISABLED); 1820 else 1821 eeh_add_flag(EEH_FORCE_DISABLED); 1822 1823 return 0; 1824 } 1825 1826 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1827 { 1828 if (eeh_enabled()) 1829 *val = 0x1ul; 1830 else 1831 *val = 0x0ul; 1832 return 0; 1833 } 1834 1835 DEFINE_DEBUGFS_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1836 eeh_enable_dbgfs_set, "0x%llx\n"); 1837 1838 static ssize_t eeh_force_recover_write(struct file *filp, 1839 const char __user *user_buf, 1840 size_t count, loff_t *ppos) 1841 { 1842 struct pci_controller *hose; 1843 uint32_t phbid, pe_no; 1844 struct eeh_pe *pe; 1845 char buf[20]; 1846 int ret; 1847 1848 ret = simple_write_to_buffer(buf, sizeof(buf), ppos, user_buf, count); 1849 if (!ret) 1850 return -EFAULT; 1851 1852 /* 1853 * When PE is NULL the event is a "special" event. Rather than 1854 * recovering a specific PE it forces the EEH core to scan for failed 1855 * PHBs and recovers each. This needs to be done before any device 1856 * recoveries can occur. 1857 */ 1858 if (!strncmp(buf, "hwcheck", 7)) { 1859 __eeh_send_failure_event(NULL); 1860 return count; 1861 } 1862 1863 ret = sscanf(buf, "%x:%x", &phbid, &pe_no); 1864 if (ret != 2) 1865 return -EINVAL; 1866 1867 hose = pci_find_controller_for_domain(phbid); 1868 if (!hose) 1869 return -ENODEV; 1870 1871 /* Retrieve PE */ 1872 pe = eeh_pe_get(hose, pe_no, 0); 1873 if (!pe) 1874 return -ENODEV; 1875 1876 /* 1877 * We don't do any state checking here since the detection 1878 * process is async to the recovery process. The recovery 1879 * thread *should* not break even if we schedule a recovery 1880 * from an odd state (e.g. PE removed, or recovery of a 1881 * non-isolated PE) 1882 */ 1883 __eeh_send_failure_event(pe); 1884 1885 return ret < 0 ? ret : count; 1886 } 1887 1888 static const struct file_operations eeh_force_recover_fops = { 1889 .open = simple_open, 1890 .llseek = no_llseek, 1891 .write = eeh_force_recover_write, 1892 }; 1893 #endif 1894 1895 static int __init eeh_init_proc(void) 1896 { 1897 if (machine_is(pseries) || machine_is(powernv)) { 1898 proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); 1899 #ifdef CONFIG_DEBUG_FS 1900 debugfs_create_file_unsafe("eeh_enable", 0600, 1901 powerpc_debugfs_root, NULL, 1902 &eeh_enable_dbgfs_ops); 1903 debugfs_create_u32("eeh_max_freezes", 0600, 1904 powerpc_debugfs_root, &eeh_max_freezes); 1905 debugfs_create_bool("eeh_disable_recovery", 0600, 1906 powerpc_debugfs_root, 1907 &eeh_debugfs_no_recover); 1908 debugfs_create_file_unsafe("eeh_force_recover", 0600, 1909 powerpc_debugfs_root, NULL, 1910 &eeh_force_recover_fops); 1911 eeh_cache_debugfs_init(); 1912 #endif 1913 } 1914 1915 return 0; 1916 } 1917 __initcall(eeh_init_proc); 1918