1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 static int __init eeh_setup(char *str) 148 { 149 if (!strcmp(str, "off")) 150 eeh_add_flag(EEH_FORCE_DISABLED); 151 else if (!strcmp(str, "early_log")) 152 eeh_add_flag(EEH_EARLY_DUMP_LOG); 153 154 return 1; 155 } 156 __setup("eeh=", eeh_setup); 157 158 /* 159 * This routine captures assorted PCI configuration space data 160 * for the indicated PCI device, and puts them into a buffer 161 * for RTAS error logging. 162 */ 163 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 164 { 165 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 166 u32 cfg; 167 int cap, i; 168 int n = 0, l = 0; 169 char buffer[128]; 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 172 edev->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 175 edev->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(void *data, void *flag) 266 { 267 struct eeh_pe *pe = data; 268 struct eeh_dev *edev, *tmp; 269 size_t *plen = flag; 270 271 /* If the PE's config space is blocked, 0xFF's will be 272 * returned. It's pointless to collect the log in this 273 * case. 274 */ 275 if (pe->state & EEH_PE_CFG_BLOCKED) 276 return NULL; 277 278 eeh_pe_for_each_dev(pe, edev, tmp) 279 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 280 EEH_PCI_REGS_LOG_LEN - *plen); 281 282 return NULL; 283 } 284 285 /** 286 * eeh_slot_error_detail - Generate combined log including driver log and error log 287 * @pe: EEH PE 288 * @severity: temporary or permanent error log 289 * 290 * This routine should be called to generate the combined log, which 291 * is comprised of driver log and error log. The driver log is figured 292 * out from the config space of the corresponding PCI device, while 293 * the error log is fetched through platform dependent function call. 294 */ 295 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 296 { 297 size_t loglen = 0; 298 299 /* 300 * When the PHB is fenced or dead, it's pointless to collect 301 * the data from PCI config space because it should return 302 * 0xFF's. For ER, we still retrieve the data from the PCI 303 * config space. 304 * 305 * For pHyp, we have to enable IO for log retrieval. Otherwise, 306 * 0xFF's is always returned from PCI config space. 307 */ 308 if (!(pe->type & EEH_PE_PHB)) { 309 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 310 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 311 eeh_ops->configure_bridge(pe); 312 eeh_pe_restore_bars(pe); 313 314 pci_regs_buf[0] = 0; 315 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 316 } 317 318 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 319 } 320 321 /** 322 * eeh_token_to_phys - Convert EEH address token to phys address 323 * @token: I/O token, should be address in the form 0xA.... 324 * 325 * This routine should be called to convert virtual I/O address 326 * to physical one. 327 */ 328 static inline unsigned long eeh_token_to_phys(unsigned long token) 329 { 330 pte_t *ptep; 331 unsigned long pa; 332 int hugepage_shift; 333 334 /* 335 * We won't find hugepages here(this is iomem). Hence we are not 336 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 337 * page table free, because of init_mm. 338 */ 339 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 340 if (!ptep) 341 return token; 342 WARN_ON(hugepage_shift); 343 pa = pte_pfn(*ptep) << PAGE_SHIFT; 344 345 return pa | (token & (PAGE_SIZE-1)); 346 } 347 348 /* 349 * On PowerNV platform, we might already have fenced PHB there. 350 * For that case, it's meaningless to recover frozen PE. Intead, 351 * We have to handle fenced PHB firstly. 352 */ 353 static int eeh_phb_check_failure(struct eeh_pe *pe) 354 { 355 struct eeh_pe *phb_pe; 356 unsigned long flags; 357 int ret; 358 359 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 360 return -EPERM; 361 362 /* Find the PHB PE */ 363 phb_pe = eeh_phb_pe_get(pe->phb); 364 if (!phb_pe) { 365 pr_warn("%s Can't find PE for PHB#%d\n", 366 __func__, pe->phb->global_number); 367 return -EEXIST; 368 } 369 370 /* If the PHB has been in problematic state */ 371 eeh_serialize_lock(&flags); 372 if (phb_pe->state & EEH_PE_ISOLATED) { 373 ret = 0; 374 goto out; 375 } 376 377 /* Check PHB state */ 378 ret = eeh_ops->get_state(phb_pe, NULL); 379 if ((ret < 0) || 380 (ret == EEH_STATE_NOT_SUPPORT) || 381 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 382 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 383 ret = 0; 384 goto out; 385 } 386 387 /* Isolate the PHB and send event */ 388 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 389 eeh_serialize_unlock(flags); 390 391 pr_err("EEH: PHB#%x failure detected, location: %s\n", 392 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 393 dump_stack(); 394 eeh_send_failure_event(phb_pe); 395 396 return 1; 397 out: 398 eeh_serialize_unlock(flags); 399 return ret; 400 } 401 402 /** 403 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 404 * @edev: eeh device 405 * 406 * Check for an EEH failure for the given device node. Call this 407 * routine if the result of a read was all 0xff's and you want to 408 * find out if this is due to an EEH slot freeze. This routine 409 * will query firmware for the EEH status. 410 * 411 * Returns 0 if there has not been an EEH error; otherwise returns 412 * a non-zero value and queues up a slot isolation event notification. 413 * 414 * It is safe to call this routine in an interrupt context. 415 */ 416 int eeh_dev_check_failure(struct eeh_dev *edev) 417 { 418 int ret; 419 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 420 unsigned long flags; 421 struct pci_dn *pdn; 422 struct pci_dev *dev; 423 struct eeh_pe *pe, *parent_pe, *phb_pe; 424 int rc = 0; 425 const char *location = NULL; 426 427 eeh_stats.total_mmio_ffs++; 428 429 if (!eeh_enabled()) 430 return 0; 431 432 if (!edev) { 433 eeh_stats.no_dn++; 434 return 0; 435 } 436 dev = eeh_dev_to_pci_dev(edev); 437 pe = eeh_dev_to_pe(edev); 438 439 /* Access to IO BARs might get this far and still not want checking. */ 440 if (!pe) { 441 eeh_stats.ignored_check++; 442 pr_debug("EEH: Ignored check for %s\n", 443 eeh_pci_name(dev)); 444 return 0; 445 } 446 447 if (!pe->addr && !pe->config_addr) { 448 eeh_stats.no_cfg_addr++; 449 return 0; 450 } 451 452 /* 453 * On PowerNV platform, we might already have fenced PHB 454 * there and we need take care of that firstly. 455 */ 456 ret = eeh_phb_check_failure(pe); 457 if (ret > 0) 458 return ret; 459 460 /* 461 * If the PE isn't owned by us, we shouldn't check the 462 * state. Instead, let the owner handle it if the PE has 463 * been frozen. 464 */ 465 if (eeh_pe_passed(pe)) 466 return 0; 467 468 /* If we already have a pending isolation event for this 469 * slot, we know it's bad already, we don't need to check. 470 * Do this checking under a lock; as multiple PCI devices 471 * in one slot might report errors simultaneously, and we 472 * only want one error recovery routine running. 473 */ 474 eeh_serialize_lock(&flags); 475 rc = 1; 476 if (pe->state & EEH_PE_ISOLATED) { 477 pe->check_count++; 478 if (pe->check_count % EEH_MAX_FAILS == 0) { 479 pdn = eeh_dev_to_pdn(edev); 480 if (pdn->node) 481 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 482 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 483 "location=%s driver=%s pci addr=%s\n", 484 pe->check_count, 485 location ? location : "unknown", 486 eeh_driver_name(dev), eeh_pci_name(dev)); 487 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 488 eeh_driver_name(dev)); 489 dump_stack(); 490 } 491 goto dn_unlock; 492 } 493 494 /* 495 * Now test for an EEH failure. This is VERY expensive. 496 * Note that the eeh_config_addr may be a parent device 497 * in the case of a device behind a bridge, or it may be 498 * function zero of a multi-function device. 499 * In any case they must share a common PHB. 500 */ 501 ret = eeh_ops->get_state(pe, NULL); 502 503 /* Note that config-io to empty slots may fail; 504 * they are empty when they don't have children. 505 * We will punt with the following conditions: Failure to get 506 * PE's state, EEH not support and Permanently unavailable 507 * state, PE is in good state. 508 */ 509 if ((ret < 0) || 510 (ret == EEH_STATE_NOT_SUPPORT) || 511 ((ret & active_flags) == active_flags)) { 512 eeh_stats.false_positives++; 513 pe->false_positives++; 514 rc = 0; 515 goto dn_unlock; 516 } 517 518 /* 519 * It should be corner case that the parent PE has been 520 * put into frozen state as well. We should take care 521 * that at first. 522 */ 523 parent_pe = pe->parent; 524 while (parent_pe) { 525 /* Hit the ceiling ? */ 526 if (parent_pe->type & EEH_PE_PHB) 527 break; 528 529 /* Frozen parent PE ? */ 530 ret = eeh_ops->get_state(parent_pe, NULL); 531 if (ret > 0 && 532 (ret & active_flags) != active_flags) 533 pe = parent_pe; 534 535 /* Next parent level */ 536 parent_pe = parent_pe->parent; 537 } 538 539 eeh_stats.slot_resets++; 540 541 /* Avoid repeated reports of this failure, including problems 542 * with other functions on this device, and functions under 543 * bridges. 544 */ 545 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 546 eeh_serialize_unlock(flags); 547 548 /* Most EEH events are due to device driver bugs. Having 549 * a stack trace will help the device-driver authors figure 550 * out what happened. So print that out. 551 */ 552 phb_pe = eeh_phb_pe_get(pe->phb); 553 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 554 pe->phb->global_number, pe->addr); 555 pr_err("EEH: PE location: %s, PHB location: %s\n", 556 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 557 dump_stack(); 558 559 eeh_send_failure_event(pe); 560 561 return 1; 562 563 dn_unlock: 564 eeh_serialize_unlock(flags); 565 return rc; 566 } 567 568 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 569 570 /** 571 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 572 * @token: I/O address 573 * 574 * Check for an EEH failure at the given I/O address. Call this 575 * routine if the result of a read was all 0xff's and you want to 576 * find out if this is due to an EEH slot freeze event. This routine 577 * will query firmware for the EEH status. 578 * 579 * Note this routine is safe to call in an interrupt context. 580 */ 581 int eeh_check_failure(const volatile void __iomem *token) 582 { 583 unsigned long addr; 584 struct eeh_dev *edev; 585 586 /* Finding the phys addr + pci device; this is pretty quick. */ 587 addr = eeh_token_to_phys((unsigned long __force) token); 588 edev = eeh_addr_cache_get_dev(addr); 589 if (!edev) { 590 eeh_stats.no_device++; 591 return 0; 592 } 593 594 return eeh_dev_check_failure(edev); 595 } 596 EXPORT_SYMBOL(eeh_check_failure); 597 598 599 /** 600 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 601 * @pe: EEH PE 602 * 603 * This routine should be called to reenable frozen MMIO or DMA 604 * so that it would work correctly again. It's useful while doing 605 * recovery or log collection on the indicated device. 606 */ 607 int eeh_pci_enable(struct eeh_pe *pe, int function) 608 { 609 int active_flag, rc; 610 611 /* 612 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 613 * Also, it's pointless to enable them on unfrozen PE. So 614 * we have to check before enabling IO or DMA. 615 */ 616 switch (function) { 617 case EEH_OPT_THAW_MMIO: 618 active_flag = EEH_STATE_MMIO_ACTIVE; 619 break; 620 case EEH_OPT_THAW_DMA: 621 active_flag = EEH_STATE_DMA_ACTIVE; 622 break; 623 case EEH_OPT_DISABLE: 624 case EEH_OPT_ENABLE: 625 case EEH_OPT_FREEZE_PE: 626 active_flag = 0; 627 break; 628 default: 629 pr_warn("%s: Invalid function %d\n", 630 __func__, function); 631 return -EINVAL; 632 } 633 634 /* 635 * Check if IO or DMA has been enabled before 636 * enabling them. 637 */ 638 if (active_flag) { 639 rc = eeh_ops->get_state(pe, NULL); 640 if (rc < 0) 641 return rc; 642 643 /* Needn't enable it at all */ 644 if (rc == EEH_STATE_NOT_SUPPORT) 645 return 0; 646 647 /* It's already enabled */ 648 if (rc & active_flag) 649 return 0; 650 } 651 652 653 /* Issue the request */ 654 rc = eeh_ops->set_option(pe, function); 655 if (rc) 656 pr_warn("%s: Unexpected state change %d on " 657 "PHB#%d-PE#%x, err=%d\n", 658 __func__, function, pe->phb->global_number, 659 pe->addr, rc); 660 661 /* Check if the request is finished successfully */ 662 if (active_flag) { 663 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 664 if (rc <= 0) 665 return rc; 666 667 if (rc & active_flag) 668 return 0; 669 670 return -EIO; 671 } 672 673 return rc; 674 } 675 676 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 677 { 678 struct eeh_dev *edev = data; 679 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 680 struct pci_dev *dev = userdata; 681 682 /* 683 * The caller should have disabled and saved the 684 * state for the specified device 685 */ 686 if (!pdev || pdev == dev) 687 return NULL; 688 689 /* Ensure we have D0 power state */ 690 pci_set_power_state(pdev, PCI_D0); 691 692 /* Save device state */ 693 pci_save_state(pdev); 694 695 /* 696 * Disable device to avoid any DMA traffic and 697 * interrupt from the device 698 */ 699 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 700 701 return NULL; 702 } 703 704 static void *eeh_restore_dev_state(void *data, void *userdata) 705 { 706 struct eeh_dev *edev = data; 707 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 708 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 709 struct pci_dev *dev = userdata; 710 711 if (!pdev) 712 return NULL; 713 714 /* Apply customization from firmware */ 715 if (pdn && eeh_ops->restore_config) 716 eeh_ops->restore_config(pdn); 717 718 /* The caller should restore state for the specified device */ 719 if (pdev != dev) 720 pci_restore_state(pdev); 721 722 return NULL; 723 } 724 725 /** 726 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 727 * @dev: pci device struct 728 * @state: reset state to enter 729 * 730 * Return value: 731 * 0 if success 732 */ 733 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 734 { 735 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 736 struct eeh_pe *pe = eeh_dev_to_pe(edev); 737 738 if (!pe) { 739 pr_err("%s: No PE found on PCI device %s\n", 740 __func__, pci_name(dev)); 741 return -EINVAL; 742 } 743 744 switch (state) { 745 case pcie_deassert_reset: 746 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 747 eeh_unfreeze_pe(pe, false); 748 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 749 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 750 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 751 break; 752 case pcie_hot_reset: 753 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 754 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 755 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 756 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 757 eeh_ops->reset(pe, EEH_RESET_HOT); 758 break; 759 case pcie_warm_reset: 760 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 761 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 762 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 763 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 764 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 765 break; 766 default: 767 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 768 return -EINVAL; 769 }; 770 771 return 0; 772 } 773 774 /** 775 * eeh_set_pe_freset - Check the required reset for the indicated device 776 * @data: EEH device 777 * @flag: return value 778 * 779 * Each device might have its preferred reset type: fundamental or 780 * hot reset. The routine is used to collected the information for 781 * the indicated device and its children so that the bunch of the 782 * devices could be reset properly. 783 */ 784 static void *eeh_set_dev_freset(void *data, void *flag) 785 { 786 struct pci_dev *dev; 787 unsigned int *freset = (unsigned int *)flag; 788 struct eeh_dev *edev = (struct eeh_dev *)data; 789 790 dev = eeh_dev_to_pci_dev(edev); 791 if (dev) 792 *freset |= dev->needs_freset; 793 794 return NULL; 795 } 796 797 /** 798 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 799 * @pe: EEH PE 800 * 801 * Assert the PCI #RST line for 1/4 second. 802 */ 803 static void eeh_reset_pe_once(struct eeh_pe *pe) 804 { 805 unsigned int freset = 0; 806 807 /* Determine type of EEH reset required for 808 * Partitionable Endpoint, a hot-reset (1) 809 * or a fundamental reset (3). 810 * A fundamental reset required by any device under 811 * Partitionable Endpoint trumps hot-reset. 812 */ 813 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 814 815 if (freset) 816 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 817 else 818 eeh_ops->reset(pe, EEH_RESET_HOT); 819 820 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 821 } 822 823 /** 824 * eeh_reset_pe - Reset the indicated PE 825 * @pe: EEH PE 826 * 827 * This routine should be called to reset indicated device, including 828 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 829 * might be involved as well. 830 */ 831 int eeh_reset_pe(struct eeh_pe *pe) 832 { 833 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 834 int i, state, ret; 835 836 /* Mark as reset and block config space */ 837 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 838 839 /* Take three shots at resetting the bus */ 840 for (i = 0; i < 3; i++) { 841 eeh_reset_pe_once(pe); 842 843 /* 844 * EEH_PE_ISOLATED is expected to be removed after 845 * BAR restore. 846 */ 847 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 848 if ((state & flags) == flags) { 849 ret = 0; 850 goto out; 851 } 852 853 if (state < 0) { 854 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 855 __func__, pe->phb->global_number, pe->addr); 856 ret = -ENOTRECOVERABLE; 857 goto out; 858 } 859 860 /* We might run out of credits */ 861 ret = -EIO; 862 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 863 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 864 } 865 866 out: 867 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 868 return ret; 869 } 870 871 /** 872 * eeh_save_bars - Save device bars 873 * @edev: PCI device associated EEH device 874 * 875 * Save the values of the device bars. Unlike the restore 876 * routine, this routine is *not* recursive. This is because 877 * PCI devices are added individually; but, for the restore, 878 * an entire slot is reset at a time. 879 */ 880 void eeh_save_bars(struct eeh_dev *edev) 881 { 882 struct pci_dn *pdn; 883 int i; 884 885 pdn = eeh_dev_to_pdn(edev); 886 if (!pdn) 887 return; 888 889 for (i = 0; i < 16; i++) 890 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 891 892 /* 893 * For PCI bridges including root port, we need enable bus 894 * master explicitly. Otherwise, it can't fetch IODA table 895 * entries correctly. So we cache the bit in advance so that 896 * we can restore it after reset, either PHB range or PE range. 897 */ 898 if (edev->mode & EEH_DEV_BRIDGE) 899 edev->config_space[1] |= PCI_COMMAND_MASTER; 900 } 901 902 /** 903 * eeh_ops_register - Register platform dependent EEH operations 904 * @ops: platform dependent EEH operations 905 * 906 * Register the platform dependent EEH operation callback 907 * functions. The platform should call this function before 908 * any other EEH operations. 909 */ 910 int __init eeh_ops_register(struct eeh_ops *ops) 911 { 912 if (!ops->name) { 913 pr_warn("%s: Invalid EEH ops name for %p\n", 914 __func__, ops); 915 return -EINVAL; 916 } 917 918 if (eeh_ops && eeh_ops != ops) { 919 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 920 __func__, eeh_ops->name, ops->name); 921 return -EEXIST; 922 } 923 924 eeh_ops = ops; 925 926 return 0; 927 } 928 929 /** 930 * eeh_ops_unregister - Unreigster platform dependent EEH operations 931 * @name: name of EEH platform operations 932 * 933 * Unregister the platform dependent EEH operation callback 934 * functions. 935 */ 936 int __exit eeh_ops_unregister(const char *name) 937 { 938 if (!name || !strlen(name)) { 939 pr_warn("%s: Invalid EEH ops name\n", 940 __func__); 941 return -EINVAL; 942 } 943 944 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 945 eeh_ops = NULL; 946 return 0; 947 } 948 949 return -EEXIST; 950 } 951 952 static int eeh_reboot_notifier(struct notifier_block *nb, 953 unsigned long action, void *unused) 954 { 955 eeh_clear_flag(EEH_ENABLED); 956 return NOTIFY_DONE; 957 } 958 959 static struct notifier_block eeh_reboot_nb = { 960 .notifier_call = eeh_reboot_notifier, 961 }; 962 963 /** 964 * eeh_init - EEH initialization 965 * 966 * Initialize EEH by trying to enable it for all of the adapters in the system. 967 * As a side effect we can determine here if eeh is supported at all. 968 * Note that we leave EEH on so failed config cycles won't cause a machine 969 * check. If a user turns off EEH for a particular adapter they are really 970 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 971 * grant access to a slot if EEH isn't enabled, and so we always enable 972 * EEH for all slots/all devices. 973 * 974 * The eeh-force-off option disables EEH checking globally, for all slots. 975 * Even if force-off is set, the EEH hardware is still enabled, so that 976 * newer systems can boot. 977 */ 978 int eeh_init(void) 979 { 980 struct pci_controller *hose, *tmp; 981 struct pci_dn *pdn; 982 static int cnt = 0; 983 int ret = 0; 984 985 /* 986 * We have to delay the initialization on PowerNV after 987 * the PCI hierarchy tree has been built because the PEs 988 * are figured out based on PCI devices instead of device 989 * tree nodes 990 */ 991 if (machine_is(powernv) && cnt++ <= 0) 992 return ret; 993 994 /* Register reboot notifier */ 995 ret = register_reboot_notifier(&eeh_reboot_nb); 996 if (ret) { 997 pr_warn("%s: Failed to register notifier (%d)\n", 998 __func__, ret); 999 return ret; 1000 } 1001 1002 /* call platform initialization function */ 1003 if (!eeh_ops) { 1004 pr_warn("%s: Platform EEH operation not found\n", 1005 __func__); 1006 return -EEXIST; 1007 } else if ((ret = eeh_ops->init())) 1008 return ret; 1009 1010 /* Initialize EEH event */ 1011 ret = eeh_event_init(); 1012 if (ret) 1013 return ret; 1014 1015 /* Enable EEH for all adapters */ 1016 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1017 pdn = hose->pci_data; 1018 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1019 } 1020 1021 /* 1022 * Call platform post-initialization. Actually, It's good chance 1023 * to inform platform that EEH is ready to supply service if the 1024 * I/O cache stuff has been built up. 1025 */ 1026 if (eeh_ops->post_init) { 1027 ret = eeh_ops->post_init(); 1028 if (ret) 1029 return ret; 1030 } 1031 1032 if (eeh_enabled()) 1033 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1034 else 1035 pr_warn("EEH: No capable adapters found\n"); 1036 1037 return ret; 1038 } 1039 1040 core_initcall_sync(eeh_init); 1041 1042 /** 1043 * eeh_add_device_early - Enable EEH for the indicated device node 1044 * @pdn: PCI device node for which to set up EEH 1045 * 1046 * This routine must be used to perform EEH initialization for PCI 1047 * devices that were added after system boot (e.g. hotplug, dlpar). 1048 * This routine must be called before any i/o is performed to the 1049 * adapter (inluding any config-space i/o). 1050 * Whether this actually enables EEH or not for this device depends 1051 * on the CEC architecture, type of the device, on earlier boot 1052 * command-line arguments & etc. 1053 */ 1054 void eeh_add_device_early(struct pci_dn *pdn) 1055 { 1056 struct pci_controller *phb; 1057 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1058 1059 if (!edev || !eeh_enabled()) 1060 return; 1061 1062 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1063 return; 1064 1065 /* USB Bus children of PCI devices will not have BUID's */ 1066 phb = edev->phb; 1067 if (NULL == phb || 1068 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1069 return; 1070 1071 eeh_ops->probe(pdn, NULL); 1072 } 1073 1074 /** 1075 * eeh_add_device_tree_early - Enable EEH for the indicated device 1076 * @pdn: PCI device node 1077 * 1078 * This routine must be used to perform EEH initialization for the 1079 * indicated PCI device that was added after system boot (e.g. 1080 * hotplug, dlpar). 1081 */ 1082 void eeh_add_device_tree_early(struct pci_dn *pdn) 1083 { 1084 struct pci_dn *n; 1085 1086 if (!pdn) 1087 return; 1088 1089 list_for_each_entry(n, &pdn->child_list, list) 1090 eeh_add_device_tree_early(n); 1091 eeh_add_device_early(pdn); 1092 } 1093 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1094 1095 /** 1096 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1097 * @dev: pci device for which to set up EEH 1098 * 1099 * This routine must be used to complete EEH initialization for PCI 1100 * devices that were added after system boot (e.g. hotplug, dlpar). 1101 */ 1102 void eeh_add_device_late(struct pci_dev *dev) 1103 { 1104 struct pci_dn *pdn; 1105 struct eeh_dev *edev; 1106 1107 if (!dev || !eeh_enabled()) 1108 return; 1109 1110 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1111 1112 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1113 edev = pdn_to_eeh_dev(pdn); 1114 if (edev->pdev == dev) { 1115 pr_debug("EEH: Already referenced !\n"); 1116 return; 1117 } 1118 1119 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1120 eeh_ops->probe(pdn, NULL); 1121 1122 /* 1123 * The EEH cache might not be removed correctly because of 1124 * unbalanced kref to the device during unplug time, which 1125 * relies on pcibios_release_device(). So we have to remove 1126 * that here explicitly. 1127 */ 1128 if (edev->pdev) { 1129 eeh_rmv_from_parent_pe(edev); 1130 eeh_addr_cache_rmv_dev(edev->pdev); 1131 eeh_sysfs_remove_device(edev->pdev); 1132 edev->mode &= ~EEH_DEV_SYSFS; 1133 1134 /* 1135 * We definitely should have the PCI device removed 1136 * though it wasn't correctly. So we needn't call 1137 * into error handler afterwards. 1138 */ 1139 edev->mode |= EEH_DEV_NO_HANDLER; 1140 1141 edev->pdev = NULL; 1142 dev->dev.archdata.edev = NULL; 1143 } 1144 1145 edev->pdev = dev; 1146 dev->dev.archdata.edev = edev; 1147 1148 eeh_addr_cache_insert_dev(dev); 1149 } 1150 1151 /** 1152 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1153 * @bus: PCI bus 1154 * 1155 * This routine must be used to perform EEH initialization for PCI 1156 * devices which are attached to the indicated PCI bus. The PCI bus 1157 * is added after system boot through hotplug or dlpar. 1158 */ 1159 void eeh_add_device_tree_late(struct pci_bus *bus) 1160 { 1161 struct pci_dev *dev; 1162 1163 list_for_each_entry(dev, &bus->devices, bus_list) { 1164 eeh_add_device_late(dev); 1165 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1166 struct pci_bus *subbus = dev->subordinate; 1167 if (subbus) 1168 eeh_add_device_tree_late(subbus); 1169 } 1170 } 1171 } 1172 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1173 1174 /** 1175 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1176 * @bus: PCI bus 1177 * 1178 * This routine must be used to add EEH sysfs files for PCI 1179 * devices which are attached to the indicated PCI bus. The PCI bus 1180 * is added after system boot through hotplug or dlpar. 1181 */ 1182 void eeh_add_sysfs_files(struct pci_bus *bus) 1183 { 1184 struct pci_dev *dev; 1185 1186 list_for_each_entry(dev, &bus->devices, bus_list) { 1187 eeh_sysfs_add_device(dev); 1188 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1189 struct pci_bus *subbus = dev->subordinate; 1190 if (subbus) 1191 eeh_add_sysfs_files(subbus); 1192 } 1193 } 1194 } 1195 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1196 1197 /** 1198 * eeh_remove_device - Undo EEH setup for the indicated pci device 1199 * @dev: pci device to be removed 1200 * 1201 * This routine should be called when a device is removed from 1202 * a running system (e.g. by hotplug or dlpar). It unregisters 1203 * the PCI device from the EEH subsystem. I/O errors affecting 1204 * this device will no longer be detected after this call; thus, 1205 * i/o errors affecting this slot may leave this device unusable. 1206 */ 1207 void eeh_remove_device(struct pci_dev *dev) 1208 { 1209 struct eeh_dev *edev; 1210 1211 if (!dev || !eeh_enabled()) 1212 return; 1213 edev = pci_dev_to_eeh_dev(dev); 1214 1215 /* Unregister the device with the EEH/PCI address search system */ 1216 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1217 1218 if (!edev || !edev->pdev || !edev->pe) { 1219 pr_debug("EEH: Not referenced !\n"); 1220 return; 1221 } 1222 1223 /* 1224 * During the hotplug for EEH error recovery, we need the EEH 1225 * device attached to the parent PE in order for BAR restore 1226 * a bit later. So we keep it for BAR restore and remove it 1227 * from the parent PE during the BAR resotre. 1228 */ 1229 edev->pdev = NULL; 1230 dev->dev.archdata.edev = NULL; 1231 if (!(edev->pe->state & EEH_PE_KEEP)) 1232 eeh_rmv_from_parent_pe(edev); 1233 else 1234 edev->mode |= EEH_DEV_DISCONNECTED; 1235 1236 /* 1237 * We're removing from the PCI subsystem, that means 1238 * the PCI device driver can't support EEH or not 1239 * well. So we rely on hotplug completely to do recovery 1240 * for the specific PCI device. 1241 */ 1242 edev->mode |= EEH_DEV_NO_HANDLER; 1243 1244 eeh_addr_cache_rmv_dev(dev); 1245 eeh_sysfs_remove_device(dev); 1246 edev->mode &= ~EEH_DEV_SYSFS; 1247 } 1248 1249 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1250 { 1251 int ret; 1252 1253 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1254 if (ret) { 1255 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1256 __func__, ret, pe->phb->global_number, pe->addr); 1257 return ret; 1258 } 1259 1260 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1261 if (ret) { 1262 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1263 __func__, ret, pe->phb->global_number, pe->addr); 1264 return ret; 1265 } 1266 1267 /* Clear software isolated state */ 1268 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1269 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1270 1271 return ret; 1272 } 1273 1274 1275 static struct pci_device_id eeh_reset_ids[] = { 1276 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1277 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1278 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1279 { 0 } 1280 }; 1281 1282 static int eeh_pe_change_owner(struct eeh_pe *pe) 1283 { 1284 struct eeh_dev *edev, *tmp; 1285 struct pci_dev *pdev; 1286 struct pci_device_id *id; 1287 int flags, ret; 1288 1289 /* Check PE state */ 1290 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1291 ret = eeh_ops->get_state(pe, NULL); 1292 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1293 return 0; 1294 1295 /* Unfrozen PE, nothing to do */ 1296 if ((ret & flags) == flags) 1297 return 0; 1298 1299 /* Frozen PE, check if it needs PE level reset */ 1300 eeh_pe_for_each_dev(pe, edev, tmp) { 1301 pdev = eeh_dev_to_pci_dev(edev); 1302 if (!pdev) 1303 continue; 1304 1305 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1306 if (id->vendor != PCI_ANY_ID && 1307 id->vendor != pdev->vendor) 1308 continue; 1309 if (id->device != PCI_ANY_ID && 1310 id->device != pdev->device) 1311 continue; 1312 if (id->subvendor != PCI_ANY_ID && 1313 id->subvendor != pdev->subsystem_vendor) 1314 continue; 1315 if (id->subdevice != PCI_ANY_ID && 1316 id->subdevice != pdev->subsystem_device) 1317 continue; 1318 1319 goto reset; 1320 } 1321 } 1322 1323 return eeh_unfreeze_pe(pe, true); 1324 1325 reset: 1326 return eeh_pe_reset_and_recover(pe); 1327 } 1328 1329 /** 1330 * eeh_dev_open - Increase count of pass through devices for PE 1331 * @pdev: PCI device 1332 * 1333 * Increase count of passed through devices for the indicated 1334 * PE. In the result, the EEH errors detected on the PE won't be 1335 * reported. The PE owner will be responsible for detection 1336 * and recovery. 1337 */ 1338 int eeh_dev_open(struct pci_dev *pdev) 1339 { 1340 struct eeh_dev *edev; 1341 int ret = -ENODEV; 1342 1343 mutex_lock(&eeh_dev_mutex); 1344 1345 /* No PCI device ? */ 1346 if (!pdev) 1347 goto out; 1348 1349 /* No EEH device or PE ? */ 1350 edev = pci_dev_to_eeh_dev(pdev); 1351 if (!edev || !edev->pe) 1352 goto out; 1353 1354 /* 1355 * The PE might have been put into frozen state, but we 1356 * didn't detect that yet. The passed through PCI devices 1357 * in frozen PE won't work properly. Clear the frozen state 1358 * in advance. 1359 */ 1360 ret = eeh_pe_change_owner(edev->pe); 1361 if (ret) 1362 goto out; 1363 1364 /* Increase PE's pass through count */ 1365 atomic_inc(&edev->pe->pass_dev_cnt); 1366 mutex_unlock(&eeh_dev_mutex); 1367 1368 return 0; 1369 out: 1370 mutex_unlock(&eeh_dev_mutex); 1371 return ret; 1372 } 1373 EXPORT_SYMBOL_GPL(eeh_dev_open); 1374 1375 /** 1376 * eeh_dev_release - Decrease count of pass through devices for PE 1377 * @pdev: PCI device 1378 * 1379 * Decrease count of pass through devices for the indicated PE. If 1380 * there is no passed through device in PE, the EEH errors detected 1381 * on the PE will be reported and handled as usual. 1382 */ 1383 void eeh_dev_release(struct pci_dev *pdev) 1384 { 1385 struct eeh_dev *edev; 1386 1387 mutex_lock(&eeh_dev_mutex); 1388 1389 /* No PCI device ? */ 1390 if (!pdev) 1391 goto out; 1392 1393 /* No EEH device ? */ 1394 edev = pci_dev_to_eeh_dev(pdev); 1395 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1396 goto out; 1397 1398 /* Decrease PE's pass through count */ 1399 atomic_dec(&edev->pe->pass_dev_cnt); 1400 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1401 eeh_pe_change_owner(edev->pe); 1402 out: 1403 mutex_unlock(&eeh_dev_mutex); 1404 } 1405 EXPORT_SYMBOL(eeh_dev_release); 1406 1407 #ifdef CONFIG_IOMMU_API 1408 1409 static int dev_has_iommu_table(struct device *dev, void *data) 1410 { 1411 struct pci_dev *pdev = to_pci_dev(dev); 1412 struct pci_dev **ppdev = data; 1413 1414 if (!dev) 1415 return 0; 1416 1417 if (dev->iommu_group) { 1418 *ppdev = pdev; 1419 return 1; 1420 } 1421 1422 return 0; 1423 } 1424 1425 /** 1426 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1427 * @group: IOMMU group 1428 * 1429 * The routine is called to convert IOMMU group to EEH PE. 1430 */ 1431 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1432 { 1433 struct pci_dev *pdev = NULL; 1434 struct eeh_dev *edev; 1435 int ret; 1436 1437 /* No IOMMU group ? */ 1438 if (!group) 1439 return NULL; 1440 1441 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1442 if (!ret || !pdev) 1443 return NULL; 1444 1445 /* No EEH device or PE ? */ 1446 edev = pci_dev_to_eeh_dev(pdev); 1447 if (!edev || !edev->pe) 1448 return NULL; 1449 1450 return edev->pe; 1451 } 1452 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1453 1454 #endif /* CONFIG_IOMMU_API */ 1455 1456 /** 1457 * eeh_pe_set_option - Set options for the indicated PE 1458 * @pe: EEH PE 1459 * @option: requested option 1460 * 1461 * The routine is called to enable or disable EEH functionality 1462 * on the indicated PE, to enable IO or DMA for the frozen PE. 1463 */ 1464 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1465 { 1466 int ret = 0; 1467 1468 /* Invalid PE ? */ 1469 if (!pe) 1470 return -ENODEV; 1471 1472 /* 1473 * EEH functionality could possibly be disabled, just 1474 * return error for the case. And the EEH functinality 1475 * isn't expected to be disabled on one specific PE. 1476 */ 1477 switch (option) { 1478 case EEH_OPT_ENABLE: 1479 if (eeh_enabled()) { 1480 ret = eeh_pe_change_owner(pe); 1481 break; 1482 } 1483 ret = -EIO; 1484 break; 1485 case EEH_OPT_DISABLE: 1486 break; 1487 case EEH_OPT_THAW_MMIO: 1488 case EEH_OPT_THAW_DMA: 1489 if (!eeh_ops || !eeh_ops->set_option) { 1490 ret = -ENOENT; 1491 break; 1492 } 1493 1494 ret = eeh_pci_enable(pe, option); 1495 break; 1496 default: 1497 pr_debug("%s: Option %d out of range (%d, %d)\n", 1498 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1499 ret = -EINVAL; 1500 } 1501 1502 return ret; 1503 } 1504 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1505 1506 /** 1507 * eeh_pe_get_state - Retrieve PE's state 1508 * @pe: EEH PE 1509 * 1510 * Retrieve the PE's state, which includes 3 aspects: enabled 1511 * DMA, enabled IO and asserted reset. 1512 */ 1513 int eeh_pe_get_state(struct eeh_pe *pe) 1514 { 1515 int result, ret = 0; 1516 bool rst_active, dma_en, mmio_en; 1517 1518 /* Existing PE ? */ 1519 if (!pe) 1520 return -ENODEV; 1521 1522 if (!eeh_ops || !eeh_ops->get_state) 1523 return -ENOENT; 1524 1525 result = eeh_ops->get_state(pe, NULL); 1526 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1527 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1528 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1529 1530 if (rst_active) 1531 ret = EEH_PE_STATE_RESET; 1532 else if (dma_en && mmio_en) 1533 ret = EEH_PE_STATE_NORMAL; 1534 else if (!dma_en && !mmio_en) 1535 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1536 else if (!dma_en && mmio_en) 1537 ret = EEH_PE_STATE_STOPPED_DMA; 1538 else 1539 ret = EEH_PE_STATE_UNAVAIL; 1540 1541 return ret; 1542 } 1543 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1544 1545 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1546 { 1547 struct eeh_dev *edev, *tmp; 1548 struct pci_dev *pdev; 1549 int ret = 0; 1550 1551 /* Restore config space */ 1552 eeh_pe_restore_bars(pe); 1553 1554 /* 1555 * Reenable PCI devices as the devices passed 1556 * through are always enabled before the reset. 1557 */ 1558 eeh_pe_for_each_dev(pe, edev, tmp) { 1559 pdev = eeh_dev_to_pci_dev(edev); 1560 if (!pdev) 1561 continue; 1562 1563 ret = pci_reenable_device(pdev); 1564 if (ret) { 1565 pr_warn("%s: Failure %d reenabling %s\n", 1566 __func__, ret, pci_name(pdev)); 1567 return ret; 1568 } 1569 } 1570 1571 /* The PE is still in frozen state */ 1572 return eeh_unfreeze_pe(pe, true); 1573 } 1574 1575 /** 1576 * eeh_pe_reset - Issue PE reset according to specified type 1577 * @pe: EEH PE 1578 * @option: reset type 1579 * 1580 * The routine is called to reset the specified PE with the 1581 * indicated type, either fundamental reset or hot reset. 1582 * PE reset is the most important part for error recovery. 1583 */ 1584 int eeh_pe_reset(struct eeh_pe *pe, int option) 1585 { 1586 int ret = 0; 1587 1588 /* Invalid PE ? */ 1589 if (!pe) 1590 return -ENODEV; 1591 1592 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1593 return -ENOENT; 1594 1595 switch (option) { 1596 case EEH_RESET_DEACTIVATE: 1597 ret = eeh_ops->reset(pe, option); 1598 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1599 if (ret) 1600 break; 1601 1602 ret = eeh_pe_reenable_devices(pe); 1603 break; 1604 case EEH_RESET_HOT: 1605 case EEH_RESET_FUNDAMENTAL: 1606 /* 1607 * Proactively freeze the PE to drop all MMIO access 1608 * during reset, which should be banned as it's always 1609 * cause recursive EEH error. 1610 */ 1611 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1612 1613 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1614 ret = eeh_ops->reset(pe, option); 1615 break; 1616 default: 1617 pr_debug("%s: Unsupported option %d\n", 1618 __func__, option); 1619 ret = -EINVAL; 1620 } 1621 1622 return ret; 1623 } 1624 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1625 1626 /** 1627 * eeh_pe_configure - Configure PCI bridges after PE reset 1628 * @pe: EEH PE 1629 * 1630 * The routine is called to restore the PCI config space for 1631 * those PCI devices, especially PCI bridges affected by PE 1632 * reset issued previously. 1633 */ 1634 int eeh_pe_configure(struct eeh_pe *pe) 1635 { 1636 int ret = 0; 1637 1638 /* Invalid PE ? */ 1639 if (!pe) 1640 return -ENODEV; 1641 1642 return ret; 1643 } 1644 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1645 1646 /** 1647 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1648 * @pe: the indicated PE 1649 * @type: error type 1650 * @function: error function 1651 * @addr: address 1652 * @mask: address mask 1653 * 1654 * The routine is called to inject the specified PCI error, which 1655 * is determined by @type and @function, to the indicated PE for 1656 * testing purpose. 1657 */ 1658 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1659 unsigned long addr, unsigned long mask) 1660 { 1661 /* Invalid PE ? */ 1662 if (!pe) 1663 return -ENODEV; 1664 1665 /* Unsupported operation ? */ 1666 if (!eeh_ops || !eeh_ops->err_inject) 1667 return -ENOENT; 1668 1669 /* Check on PCI error type */ 1670 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1671 return -EINVAL; 1672 1673 /* Check on PCI error function */ 1674 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1675 return -EINVAL; 1676 1677 return eeh_ops->err_inject(pe, type, func, addr, mask); 1678 } 1679 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1680 1681 static int proc_eeh_show(struct seq_file *m, void *v) 1682 { 1683 if (!eeh_enabled()) { 1684 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1685 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1686 } else { 1687 seq_printf(m, "EEH Subsystem is enabled\n"); 1688 seq_printf(m, 1689 "no device=%llu\n" 1690 "no device node=%llu\n" 1691 "no config address=%llu\n" 1692 "check not wanted=%llu\n" 1693 "eeh_total_mmio_ffs=%llu\n" 1694 "eeh_false_positives=%llu\n" 1695 "eeh_slot_resets=%llu\n", 1696 eeh_stats.no_device, 1697 eeh_stats.no_dn, 1698 eeh_stats.no_cfg_addr, 1699 eeh_stats.ignored_check, 1700 eeh_stats.total_mmio_ffs, 1701 eeh_stats.false_positives, 1702 eeh_stats.slot_resets); 1703 } 1704 1705 return 0; 1706 } 1707 1708 static int proc_eeh_open(struct inode *inode, struct file *file) 1709 { 1710 return single_open(file, proc_eeh_show, NULL); 1711 } 1712 1713 static const struct file_operations proc_eeh_operations = { 1714 .open = proc_eeh_open, 1715 .read = seq_read, 1716 .llseek = seq_lseek, 1717 .release = single_release, 1718 }; 1719 1720 #ifdef CONFIG_DEBUG_FS 1721 static int eeh_enable_dbgfs_set(void *data, u64 val) 1722 { 1723 if (val) 1724 eeh_clear_flag(EEH_FORCE_DISABLED); 1725 else 1726 eeh_add_flag(EEH_FORCE_DISABLED); 1727 1728 /* Notify the backend */ 1729 if (eeh_ops->post_init) 1730 eeh_ops->post_init(); 1731 1732 return 0; 1733 } 1734 1735 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1736 { 1737 if (eeh_enabled()) 1738 *val = 0x1ul; 1739 else 1740 *val = 0x0ul; 1741 return 0; 1742 } 1743 1744 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1745 { 1746 eeh_max_freezes = val; 1747 return 0; 1748 } 1749 1750 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1751 { 1752 *val = eeh_max_freezes; 1753 return 0; 1754 } 1755 1756 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1757 eeh_enable_dbgfs_set, "0x%llx\n"); 1758 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1759 eeh_freeze_dbgfs_set, "0x%llx\n"); 1760 #endif 1761 1762 static int __init eeh_init_proc(void) 1763 { 1764 if (machine_is(pseries) || machine_is(powernv)) { 1765 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1766 #ifdef CONFIG_DEBUG_FS 1767 debugfs_create_file("eeh_enable", 0600, 1768 powerpc_debugfs_root, NULL, 1769 &eeh_enable_dbgfs_ops); 1770 debugfs_create_file("eeh_max_freezes", 0600, 1771 powerpc_debugfs_root, NULL, 1772 &eeh_freeze_dbgfs_ops); 1773 #endif 1774 } 1775 1776 return 0; 1777 } 1778 __initcall(eeh_init_proc); 1779