1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Extended Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 static int __init eeh_setup(char *str) 148 { 149 if (!strcmp(str, "off")) 150 eeh_add_flag(EEH_FORCE_DISABLED); 151 else if (!strcmp(str, "early_log")) 152 eeh_add_flag(EEH_EARLY_DUMP_LOG); 153 154 return 1; 155 } 156 __setup("eeh=", eeh_setup); 157 158 /* 159 * This routine captures assorted PCI configuration space data 160 * for the indicated PCI device, and puts them into a buffer 161 * for RTAS error logging. 162 */ 163 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 164 { 165 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 166 u32 cfg; 167 int cap, i; 168 int n = 0, l = 0; 169 char buffer[128]; 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 172 edev->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 175 edev->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(void *data, void *flag) 266 { 267 struct eeh_pe *pe = data; 268 struct eeh_dev *edev, *tmp; 269 size_t *plen = flag; 270 271 /* If the PE's config space is blocked, 0xFF's will be 272 * returned. It's pointless to collect the log in this 273 * case. 274 */ 275 if (pe->state & EEH_PE_CFG_BLOCKED) 276 return NULL; 277 278 eeh_pe_for_each_dev(pe, edev, tmp) 279 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 280 EEH_PCI_REGS_LOG_LEN - *plen); 281 282 return NULL; 283 } 284 285 /** 286 * eeh_slot_error_detail - Generate combined log including driver log and error log 287 * @pe: EEH PE 288 * @severity: temporary or permanent error log 289 * 290 * This routine should be called to generate the combined log, which 291 * is comprised of driver log and error log. The driver log is figured 292 * out from the config space of the corresponding PCI device, while 293 * the error log is fetched through platform dependent function call. 294 */ 295 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 296 { 297 size_t loglen = 0; 298 299 /* 300 * When the PHB is fenced or dead, it's pointless to collect 301 * the data from PCI config space because it should return 302 * 0xFF's. For ER, we still retrieve the data from the PCI 303 * config space. 304 * 305 * For pHyp, we have to enable IO for log retrieval. Otherwise, 306 * 0xFF's is always returned from PCI config space. 307 */ 308 if (!(pe->type & EEH_PE_PHB)) { 309 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 310 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 311 312 /* 313 * The config space of some PCI devices can't be accessed 314 * when their PEs are in frozen state. Otherwise, fenced 315 * PHB might be seen. Those PEs are identified with flag 316 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 317 * is set automatically when the PE is put to EEH_PE_ISOLATED. 318 * 319 * Restoring BARs possibly triggers PCI config access in 320 * (OPAL) firmware and then causes fenced PHB. If the 321 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 322 * pointless to restore BARs and dump config space. 323 */ 324 eeh_ops->configure_bridge(pe); 325 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 326 eeh_pe_restore_bars(pe); 327 328 pci_regs_buf[0] = 0; 329 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 330 } 331 } 332 333 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 334 } 335 336 /** 337 * eeh_token_to_phys - Convert EEH address token to phys address 338 * @token: I/O token, should be address in the form 0xA.... 339 * 340 * This routine should be called to convert virtual I/O address 341 * to physical one. 342 */ 343 static inline unsigned long eeh_token_to_phys(unsigned long token) 344 { 345 pte_t *ptep; 346 unsigned long pa; 347 int hugepage_shift; 348 349 /* 350 * We won't find hugepages here(this is iomem). Hence we are not 351 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 352 * page table free, because of init_mm. 353 */ 354 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, &hugepage_shift); 355 if (!ptep) 356 return token; 357 WARN_ON(hugepage_shift); 358 pa = pte_pfn(*ptep) << PAGE_SHIFT; 359 360 return pa | (token & (PAGE_SIZE-1)); 361 } 362 363 /* 364 * On PowerNV platform, we might already have fenced PHB there. 365 * For that case, it's meaningless to recover frozen PE. Intead, 366 * We have to handle fenced PHB firstly. 367 */ 368 static int eeh_phb_check_failure(struct eeh_pe *pe) 369 { 370 struct eeh_pe *phb_pe; 371 unsigned long flags; 372 int ret; 373 374 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 375 return -EPERM; 376 377 /* Find the PHB PE */ 378 phb_pe = eeh_phb_pe_get(pe->phb); 379 if (!phb_pe) { 380 pr_warn("%s Can't find PE for PHB#%d\n", 381 __func__, pe->phb->global_number); 382 return -EEXIST; 383 } 384 385 /* If the PHB has been in problematic state */ 386 eeh_serialize_lock(&flags); 387 if (phb_pe->state & EEH_PE_ISOLATED) { 388 ret = 0; 389 goto out; 390 } 391 392 /* Check PHB state */ 393 ret = eeh_ops->get_state(phb_pe, NULL); 394 if ((ret < 0) || 395 (ret == EEH_STATE_NOT_SUPPORT) || 396 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 397 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 398 ret = 0; 399 goto out; 400 } 401 402 /* Isolate the PHB and send event */ 403 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 404 eeh_serialize_unlock(flags); 405 406 pr_err("EEH: PHB#%x failure detected, location: %s\n", 407 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 408 dump_stack(); 409 eeh_send_failure_event(phb_pe); 410 411 return 1; 412 out: 413 eeh_serialize_unlock(flags); 414 return ret; 415 } 416 417 /** 418 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 419 * @edev: eeh device 420 * 421 * Check for an EEH failure for the given device node. Call this 422 * routine if the result of a read was all 0xff's and you want to 423 * find out if this is due to an EEH slot freeze. This routine 424 * will query firmware for the EEH status. 425 * 426 * Returns 0 if there has not been an EEH error; otherwise returns 427 * a non-zero value and queues up a slot isolation event notification. 428 * 429 * It is safe to call this routine in an interrupt context. 430 */ 431 int eeh_dev_check_failure(struct eeh_dev *edev) 432 { 433 int ret; 434 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 435 unsigned long flags; 436 struct pci_dn *pdn; 437 struct pci_dev *dev; 438 struct eeh_pe *pe, *parent_pe, *phb_pe; 439 int rc = 0; 440 const char *location = NULL; 441 442 eeh_stats.total_mmio_ffs++; 443 444 if (!eeh_enabled()) 445 return 0; 446 447 if (!edev) { 448 eeh_stats.no_dn++; 449 return 0; 450 } 451 dev = eeh_dev_to_pci_dev(edev); 452 pe = eeh_dev_to_pe(edev); 453 454 /* Access to IO BARs might get this far and still not want checking. */ 455 if (!pe) { 456 eeh_stats.ignored_check++; 457 pr_debug("EEH: Ignored check for %s\n", 458 eeh_pci_name(dev)); 459 return 0; 460 } 461 462 if (!pe->addr && !pe->config_addr) { 463 eeh_stats.no_cfg_addr++; 464 return 0; 465 } 466 467 /* 468 * On PowerNV platform, we might already have fenced PHB 469 * there and we need take care of that firstly. 470 */ 471 ret = eeh_phb_check_failure(pe); 472 if (ret > 0) 473 return ret; 474 475 /* 476 * If the PE isn't owned by us, we shouldn't check the 477 * state. Instead, let the owner handle it if the PE has 478 * been frozen. 479 */ 480 if (eeh_pe_passed(pe)) 481 return 0; 482 483 /* If we already have a pending isolation event for this 484 * slot, we know it's bad already, we don't need to check. 485 * Do this checking under a lock; as multiple PCI devices 486 * in one slot might report errors simultaneously, and we 487 * only want one error recovery routine running. 488 */ 489 eeh_serialize_lock(&flags); 490 rc = 1; 491 if (pe->state & EEH_PE_ISOLATED) { 492 pe->check_count++; 493 if (pe->check_count % EEH_MAX_FAILS == 0) { 494 pdn = eeh_dev_to_pdn(edev); 495 if (pdn->node) 496 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 497 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 498 "location=%s driver=%s pci addr=%s\n", 499 pe->check_count, 500 location ? location : "unknown", 501 eeh_driver_name(dev), eeh_pci_name(dev)); 502 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 503 eeh_driver_name(dev)); 504 dump_stack(); 505 } 506 goto dn_unlock; 507 } 508 509 /* 510 * Now test for an EEH failure. This is VERY expensive. 511 * Note that the eeh_config_addr may be a parent device 512 * in the case of a device behind a bridge, or it may be 513 * function zero of a multi-function device. 514 * In any case they must share a common PHB. 515 */ 516 ret = eeh_ops->get_state(pe, NULL); 517 518 /* Note that config-io to empty slots may fail; 519 * they are empty when they don't have children. 520 * We will punt with the following conditions: Failure to get 521 * PE's state, EEH not support and Permanently unavailable 522 * state, PE is in good state. 523 */ 524 if ((ret < 0) || 525 (ret == EEH_STATE_NOT_SUPPORT) || 526 ((ret & active_flags) == active_flags)) { 527 eeh_stats.false_positives++; 528 pe->false_positives++; 529 rc = 0; 530 goto dn_unlock; 531 } 532 533 /* 534 * It should be corner case that the parent PE has been 535 * put into frozen state as well. We should take care 536 * that at first. 537 */ 538 parent_pe = pe->parent; 539 while (parent_pe) { 540 /* Hit the ceiling ? */ 541 if (parent_pe->type & EEH_PE_PHB) 542 break; 543 544 /* Frozen parent PE ? */ 545 ret = eeh_ops->get_state(parent_pe, NULL); 546 if (ret > 0 && 547 (ret & active_flags) != active_flags) 548 pe = parent_pe; 549 550 /* Next parent level */ 551 parent_pe = parent_pe->parent; 552 } 553 554 eeh_stats.slot_resets++; 555 556 /* Avoid repeated reports of this failure, including problems 557 * with other functions on this device, and functions under 558 * bridges. 559 */ 560 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 561 eeh_serialize_unlock(flags); 562 563 /* Most EEH events are due to device driver bugs. Having 564 * a stack trace will help the device-driver authors figure 565 * out what happened. So print that out. 566 */ 567 phb_pe = eeh_phb_pe_get(pe->phb); 568 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 569 pe->phb->global_number, pe->addr); 570 pr_err("EEH: PE location: %s, PHB location: %s\n", 571 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 572 dump_stack(); 573 574 eeh_send_failure_event(pe); 575 576 return 1; 577 578 dn_unlock: 579 eeh_serialize_unlock(flags); 580 return rc; 581 } 582 583 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 584 585 /** 586 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 587 * @token: I/O address 588 * 589 * Check for an EEH failure at the given I/O address. Call this 590 * routine if the result of a read was all 0xff's and you want to 591 * find out if this is due to an EEH slot freeze event. This routine 592 * will query firmware for the EEH status. 593 * 594 * Note this routine is safe to call in an interrupt context. 595 */ 596 int eeh_check_failure(const volatile void __iomem *token) 597 { 598 unsigned long addr; 599 struct eeh_dev *edev; 600 601 /* Finding the phys addr + pci device; this is pretty quick. */ 602 addr = eeh_token_to_phys((unsigned long __force) token); 603 edev = eeh_addr_cache_get_dev(addr); 604 if (!edev) { 605 eeh_stats.no_device++; 606 return 0; 607 } 608 609 return eeh_dev_check_failure(edev); 610 } 611 EXPORT_SYMBOL(eeh_check_failure); 612 613 614 /** 615 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 616 * @pe: EEH PE 617 * 618 * This routine should be called to reenable frozen MMIO or DMA 619 * so that it would work correctly again. It's useful while doing 620 * recovery or log collection on the indicated device. 621 */ 622 int eeh_pci_enable(struct eeh_pe *pe, int function) 623 { 624 int active_flag, rc; 625 626 /* 627 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 628 * Also, it's pointless to enable them on unfrozen PE. So 629 * we have to check before enabling IO or DMA. 630 */ 631 switch (function) { 632 case EEH_OPT_THAW_MMIO: 633 active_flag = EEH_STATE_MMIO_ACTIVE; 634 break; 635 case EEH_OPT_THAW_DMA: 636 active_flag = EEH_STATE_DMA_ACTIVE; 637 break; 638 case EEH_OPT_DISABLE: 639 case EEH_OPT_ENABLE: 640 case EEH_OPT_FREEZE_PE: 641 active_flag = 0; 642 break; 643 default: 644 pr_warn("%s: Invalid function %d\n", 645 __func__, function); 646 return -EINVAL; 647 } 648 649 /* 650 * Check if IO or DMA has been enabled before 651 * enabling them. 652 */ 653 if (active_flag) { 654 rc = eeh_ops->get_state(pe, NULL); 655 if (rc < 0) 656 return rc; 657 658 /* Needn't enable it at all */ 659 if (rc == EEH_STATE_NOT_SUPPORT) 660 return 0; 661 662 /* It's already enabled */ 663 if (rc & active_flag) 664 return 0; 665 } 666 667 668 /* Issue the request */ 669 rc = eeh_ops->set_option(pe, function); 670 if (rc) 671 pr_warn("%s: Unexpected state change %d on " 672 "PHB#%d-PE#%x, err=%d\n", 673 __func__, function, pe->phb->global_number, 674 pe->addr, rc); 675 676 /* Check if the request is finished successfully */ 677 if (active_flag) { 678 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 679 if (rc <= 0) 680 return rc; 681 682 if (rc & active_flag) 683 return 0; 684 685 return -EIO; 686 } 687 688 return rc; 689 } 690 691 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 692 { 693 struct eeh_dev *edev = data; 694 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 695 struct pci_dev *dev = userdata; 696 697 /* 698 * The caller should have disabled and saved the 699 * state for the specified device 700 */ 701 if (!pdev || pdev == dev) 702 return NULL; 703 704 /* Ensure we have D0 power state */ 705 pci_set_power_state(pdev, PCI_D0); 706 707 /* Save device state */ 708 pci_save_state(pdev); 709 710 /* 711 * Disable device to avoid any DMA traffic and 712 * interrupt from the device 713 */ 714 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 715 716 return NULL; 717 } 718 719 static void *eeh_restore_dev_state(void *data, void *userdata) 720 { 721 struct eeh_dev *edev = data; 722 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 723 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 724 struct pci_dev *dev = userdata; 725 726 if (!pdev) 727 return NULL; 728 729 /* Apply customization from firmware */ 730 if (pdn && eeh_ops->restore_config) 731 eeh_ops->restore_config(pdn); 732 733 /* The caller should restore state for the specified device */ 734 if (pdev != dev) 735 pci_restore_state(pdev); 736 737 return NULL; 738 } 739 740 /** 741 * pcibios_set_pcie_slot_reset - Set PCI-E reset state 742 * @dev: pci device struct 743 * @state: reset state to enter 744 * 745 * Return value: 746 * 0 if success 747 */ 748 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 749 { 750 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 751 struct eeh_pe *pe = eeh_dev_to_pe(edev); 752 753 if (!pe) { 754 pr_err("%s: No PE found on PCI device %s\n", 755 __func__, pci_name(dev)); 756 return -EINVAL; 757 } 758 759 switch (state) { 760 case pcie_deassert_reset: 761 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 762 eeh_unfreeze_pe(pe, false); 763 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 764 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 765 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 766 break; 767 case pcie_hot_reset: 768 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 769 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 770 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 771 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 772 eeh_ops->reset(pe, EEH_RESET_HOT); 773 break; 774 case pcie_warm_reset: 775 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 776 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 777 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 778 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 779 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 780 break; 781 default: 782 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 783 return -EINVAL; 784 }; 785 786 return 0; 787 } 788 789 /** 790 * eeh_set_pe_freset - Check the required reset for the indicated device 791 * @data: EEH device 792 * @flag: return value 793 * 794 * Each device might have its preferred reset type: fundamental or 795 * hot reset. The routine is used to collected the information for 796 * the indicated device and its children so that the bunch of the 797 * devices could be reset properly. 798 */ 799 static void *eeh_set_dev_freset(void *data, void *flag) 800 { 801 struct pci_dev *dev; 802 unsigned int *freset = (unsigned int *)flag; 803 struct eeh_dev *edev = (struct eeh_dev *)data; 804 805 dev = eeh_dev_to_pci_dev(edev); 806 if (dev) 807 *freset |= dev->needs_freset; 808 809 return NULL; 810 } 811 812 /** 813 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 814 * @pe: EEH PE 815 * 816 * Assert the PCI #RST line for 1/4 second. 817 */ 818 static void eeh_reset_pe_once(struct eeh_pe *pe) 819 { 820 unsigned int freset = 0; 821 822 /* Determine type of EEH reset required for 823 * Partitionable Endpoint, a hot-reset (1) 824 * or a fundamental reset (3). 825 * A fundamental reset required by any device under 826 * Partitionable Endpoint trumps hot-reset. 827 */ 828 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 829 830 if (freset) 831 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 832 else 833 eeh_ops->reset(pe, EEH_RESET_HOT); 834 835 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 836 } 837 838 /** 839 * eeh_reset_pe - Reset the indicated PE 840 * @pe: EEH PE 841 * 842 * This routine should be called to reset indicated device, including 843 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 844 * might be involved as well. 845 */ 846 int eeh_reset_pe(struct eeh_pe *pe) 847 { 848 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 849 int i, state, ret; 850 851 /* Mark as reset and block config space */ 852 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 853 854 /* Take three shots at resetting the bus */ 855 for (i = 0; i < 3; i++) { 856 eeh_reset_pe_once(pe); 857 858 /* 859 * EEH_PE_ISOLATED is expected to be removed after 860 * BAR restore. 861 */ 862 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 863 if ((state & flags) == flags) { 864 ret = 0; 865 goto out; 866 } 867 868 if (state < 0) { 869 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 870 __func__, pe->phb->global_number, pe->addr); 871 ret = -ENOTRECOVERABLE; 872 goto out; 873 } 874 875 /* We might run out of credits */ 876 ret = -EIO; 877 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 878 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 879 } 880 881 out: 882 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 883 return ret; 884 } 885 886 /** 887 * eeh_save_bars - Save device bars 888 * @edev: PCI device associated EEH device 889 * 890 * Save the values of the device bars. Unlike the restore 891 * routine, this routine is *not* recursive. This is because 892 * PCI devices are added individually; but, for the restore, 893 * an entire slot is reset at a time. 894 */ 895 void eeh_save_bars(struct eeh_dev *edev) 896 { 897 struct pci_dn *pdn; 898 int i; 899 900 pdn = eeh_dev_to_pdn(edev); 901 if (!pdn) 902 return; 903 904 for (i = 0; i < 16; i++) 905 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 906 907 /* 908 * For PCI bridges including root port, we need enable bus 909 * master explicitly. Otherwise, it can't fetch IODA table 910 * entries correctly. So we cache the bit in advance so that 911 * we can restore it after reset, either PHB range or PE range. 912 */ 913 if (edev->mode & EEH_DEV_BRIDGE) 914 edev->config_space[1] |= PCI_COMMAND_MASTER; 915 } 916 917 /** 918 * eeh_ops_register - Register platform dependent EEH operations 919 * @ops: platform dependent EEH operations 920 * 921 * Register the platform dependent EEH operation callback 922 * functions. The platform should call this function before 923 * any other EEH operations. 924 */ 925 int __init eeh_ops_register(struct eeh_ops *ops) 926 { 927 if (!ops->name) { 928 pr_warn("%s: Invalid EEH ops name for %p\n", 929 __func__, ops); 930 return -EINVAL; 931 } 932 933 if (eeh_ops && eeh_ops != ops) { 934 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 935 __func__, eeh_ops->name, ops->name); 936 return -EEXIST; 937 } 938 939 eeh_ops = ops; 940 941 return 0; 942 } 943 944 /** 945 * eeh_ops_unregister - Unreigster platform dependent EEH operations 946 * @name: name of EEH platform operations 947 * 948 * Unregister the platform dependent EEH operation callback 949 * functions. 950 */ 951 int __exit eeh_ops_unregister(const char *name) 952 { 953 if (!name || !strlen(name)) { 954 pr_warn("%s: Invalid EEH ops name\n", 955 __func__); 956 return -EINVAL; 957 } 958 959 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 960 eeh_ops = NULL; 961 return 0; 962 } 963 964 return -EEXIST; 965 } 966 967 static int eeh_reboot_notifier(struct notifier_block *nb, 968 unsigned long action, void *unused) 969 { 970 eeh_clear_flag(EEH_ENABLED); 971 return NOTIFY_DONE; 972 } 973 974 static struct notifier_block eeh_reboot_nb = { 975 .notifier_call = eeh_reboot_notifier, 976 }; 977 978 /** 979 * eeh_init - EEH initialization 980 * 981 * Initialize EEH by trying to enable it for all of the adapters in the system. 982 * As a side effect we can determine here if eeh is supported at all. 983 * Note that we leave EEH on so failed config cycles won't cause a machine 984 * check. If a user turns off EEH for a particular adapter they are really 985 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 986 * grant access to a slot if EEH isn't enabled, and so we always enable 987 * EEH for all slots/all devices. 988 * 989 * The eeh-force-off option disables EEH checking globally, for all slots. 990 * Even if force-off is set, the EEH hardware is still enabled, so that 991 * newer systems can boot. 992 */ 993 int eeh_init(void) 994 { 995 struct pci_controller *hose, *tmp; 996 struct pci_dn *pdn; 997 static int cnt = 0; 998 int ret = 0; 999 1000 /* 1001 * We have to delay the initialization on PowerNV after 1002 * the PCI hierarchy tree has been built because the PEs 1003 * are figured out based on PCI devices instead of device 1004 * tree nodes 1005 */ 1006 if (machine_is(powernv) && cnt++ <= 0) 1007 return ret; 1008 1009 /* Register reboot notifier */ 1010 ret = register_reboot_notifier(&eeh_reboot_nb); 1011 if (ret) { 1012 pr_warn("%s: Failed to register notifier (%d)\n", 1013 __func__, ret); 1014 return ret; 1015 } 1016 1017 /* call platform initialization function */ 1018 if (!eeh_ops) { 1019 pr_warn("%s: Platform EEH operation not found\n", 1020 __func__); 1021 return -EEXIST; 1022 } else if ((ret = eeh_ops->init())) 1023 return ret; 1024 1025 /* Initialize EEH event */ 1026 ret = eeh_event_init(); 1027 if (ret) 1028 return ret; 1029 1030 /* Enable EEH for all adapters */ 1031 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1032 pdn = hose->pci_data; 1033 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1034 } 1035 1036 /* 1037 * Call platform post-initialization. Actually, It's good chance 1038 * to inform platform that EEH is ready to supply service if the 1039 * I/O cache stuff has been built up. 1040 */ 1041 if (eeh_ops->post_init) { 1042 ret = eeh_ops->post_init(); 1043 if (ret) 1044 return ret; 1045 } 1046 1047 if (eeh_enabled()) 1048 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1049 else 1050 pr_warn("EEH: No capable adapters found\n"); 1051 1052 return ret; 1053 } 1054 1055 core_initcall_sync(eeh_init); 1056 1057 /** 1058 * eeh_add_device_early - Enable EEH for the indicated device node 1059 * @pdn: PCI device node for which to set up EEH 1060 * 1061 * This routine must be used to perform EEH initialization for PCI 1062 * devices that were added after system boot (e.g. hotplug, dlpar). 1063 * This routine must be called before any i/o is performed to the 1064 * adapter (inluding any config-space i/o). 1065 * Whether this actually enables EEH or not for this device depends 1066 * on the CEC architecture, type of the device, on earlier boot 1067 * command-line arguments & etc. 1068 */ 1069 void eeh_add_device_early(struct pci_dn *pdn) 1070 { 1071 struct pci_controller *phb; 1072 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1073 1074 if (!edev || !eeh_enabled()) 1075 return; 1076 1077 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1078 return; 1079 1080 /* USB Bus children of PCI devices will not have BUID's */ 1081 phb = edev->phb; 1082 if (NULL == phb || 1083 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1084 return; 1085 1086 eeh_ops->probe(pdn, NULL); 1087 } 1088 1089 /** 1090 * eeh_add_device_tree_early - Enable EEH for the indicated device 1091 * @pdn: PCI device node 1092 * 1093 * This routine must be used to perform EEH initialization for the 1094 * indicated PCI device that was added after system boot (e.g. 1095 * hotplug, dlpar). 1096 */ 1097 void eeh_add_device_tree_early(struct pci_dn *pdn) 1098 { 1099 struct pci_dn *n; 1100 1101 if (!pdn) 1102 return; 1103 1104 list_for_each_entry(n, &pdn->child_list, list) 1105 eeh_add_device_tree_early(n); 1106 eeh_add_device_early(pdn); 1107 } 1108 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1109 1110 /** 1111 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1112 * @dev: pci device for which to set up EEH 1113 * 1114 * This routine must be used to complete EEH initialization for PCI 1115 * devices that were added after system boot (e.g. hotplug, dlpar). 1116 */ 1117 void eeh_add_device_late(struct pci_dev *dev) 1118 { 1119 struct pci_dn *pdn; 1120 struct eeh_dev *edev; 1121 1122 if (!dev || !eeh_enabled()) 1123 return; 1124 1125 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1126 1127 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1128 edev = pdn_to_eeh_dev(pdn); 1129 if (edev->pdev == dev) { 1130 pr_debug("EEH: Already referenced !\n"); 1131 return; 1132 } 1133 1134 /* 1135 * The EEH cache might not be removed correctly because of 1136 * unbalanced kref to the device during unplug time, which 1137 * relies on pcibios_release_device(). So we have to remove 1138 * that here explicitly. 1139 */ 1140 if (edev->pdev) { 1141 eeh_rmv_from_parent_pe(edev); 1142 eeh_addr_cache_rmv_dev(edev->pdev); 1143 eeh_sysfs_remove_device(edev->pdev); 1144 edev->mode &= ~EEH_DEV_SYSFS; 1145 1146 /* 1147 * We definitely should have the PCI device removed 1148 * though it wasn't correctly. So we needn't call 1149 * into error handler afterwards. 1150 */ 1151 edev->mode |= EEH_DEV_NO_HANDLER; 1152 1153 edev->pdev = NULL; 1154 dev->dev.archdata.edev = NULL; 1155 } 1156 1157 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1158 eeh_ops->probe(pdn, NULL); 1159 1160 edev->pdev = dev; 1161 dev->dev.archdata.edev = edev; 1162 1163 eeh_addr_cache_insert_dev(dev); 1164 } 1165 1166 /** 1167 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1168 * @bus: PCI bus 1169 * 1170 * This routine must be used to perform EEH initialization for PCI 1171 * devices which are attached to the indicated PCI bus. The PCI bus 1172 * is added after system boot through hotplug or dlpar. 1173 */ 1174 void eeh_add_device_tree_late(struct pci_bus *bus) 1175 { 1176 struct pci_dev *dev; 1177 1178 list_for_each_entry(dev, &bus->devices, bus_list) { 1179 eeh_add_device_late(dev); 1180 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1181 struct pci_bus *subbus = dev->subordinate; 1182 if (subbus) 1183 eeh_add_device_tree_late(subbus); 1184 } 1185 } 1186 } 1187 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1188 1189 /** 1190 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1191 * @bus: PCI bus 1192 * 1193 * This routine must be used to add EEH sysfs files for PCI 1194 * devices which are attached to the indicated PCI bus. The PCI bus 1195 * is added after system boot through hotplug or dlpar. 1196 */ 1197 void eeh_add_sysfs_files(struct pci_bus *bus) 1198 { 1199 struct pci_dev *dev; 1200 1201 list_for_each_entry(dev, &bus->devices, bus_list) { 1202 eeh_sysfs_add_device(dev); 1203 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1204 struct pci_bus *subbus = dev->subordinate; 1205 if (subbus) 1206 eeh_add_sysfs_files(subbus); 1207 } 1208 } 1209 } 1210 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1211 1212 /** 1213 * eeh_remove_device - Undo EEH setup for the indicated pci device 1214 * @dev: pci device to be removed 1215 * 1216 * This routine should be called when a device is removed from 1217 * a running system (e.g. by hotplug or dlpar). It unregisters 1218 * the PCI device from the EEH subsystem. I/O errors affecting 1219 * this device will no longer be detected after this call; thus, 1220 * i/o errors affecting this slot may leave this device unusable. 1221 */ 1222 void eeh_remove_device(struct pci_dev *dev) 1223 { 1224 struct eeh_dev *edev; 1225 1226 if (!dev || !eeh_enabled()) 1227 return; 1228 edev = pci_dev_to_eeh_dev(dev); 1229 1230 /* Unregister the device with the EEH/PCI address search system */ 1231 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1232 1233 if (!edev || !edev->pdev || !edev->pe) { 1234 pr_debug("EEH: Not referenced !\n"); 1235 return; 1236 } 1237 1238 /* 1239 * During the hotplug for EEH error recovery, we need the EEH 1240 * device attached to the parent PE in order for BAR restore 1241 * a bit later. So we keep it for BAR restore and remove it 1242 * from the parent PE during the BAR resotre. 1243 */ 1244 edev->pdev = NULL; 1245 dev->dev.archdata.edev = NULL; 1246 if (!(edev->pe->state & EEH_PE_KEEP)) 1247 eeh_rmv_from_parent_pe(edev); 1248 else 1249 edev->mode |= EEH_DEV_DISCONNECTED; 1250 1251 /* 1252 * We're removing from the PCI subsystem, that means 1253 * the PCI device driver can't support EEH or not 1254 * well. So we rely on hotplug completely to do recovery 1255 * for the specific PCI device. 1256 */ 1257 edev->mode |= EEH_DEV_NO_HANDLER; 1258 1259 eeh_addr_cache_rmv_dev(dev); 1260 eeh_sysfs_remove_device(dev); 1261 edev->mode &= ~EEH_DEV_SYSFS; 1262 } 1263 1264 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1265 { 1266 int ret; 1267 1268 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1269 if (ret) { 1270 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1271 __func__, ret, pe->phb->global_number, pe->addr); 1272 return ret; 1273 } 1274 1275 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1276 if (ret) { 1277 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1278 __func__, ret, pe->phb->global_number, pe->addr); 1279 return ret; 1280 } 1281 1282 /* Clear software isolated state */ 1283 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1284 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1285 1286 return ret; 1287 } 1288 1289 1290 static struct pci_device_id eeh_reset_ids[] = { 1291 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1292 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1293 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1294 { 0 } 1295 }; 1296 1297 static int eeh_pe_change_owner(struct eeh_pe *pe) 1298 { 1299 struct eeh_dev *edev, *tmp; 1300 struct pci_dev *pdev; 1301 struct pci_device_id *id; 1302 int flags, ret; 1303 1304 /* Check PE state */ 1305 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1306 ret = eeh_ops->get_state(pe, NULL); 1307 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1308 return 0; 1309 1310 /* Unfrozen PE, nothing to do */ 1311 if ((ret & flags) == flags) 1312 return 0; 1313 1314 /* Frozen PE, check if it needs PE level reset */ 1315 eeh_pe_for_each_dev(pe, edev, tmp) { 1316 pdev = eeh_dev_to_pci_dev(edev); 1317 if (!pdev) 1318 continue; 1319 1320 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1321 if (id->vendor != PCI_ANY_ID && 1322 id->vendor != pdev->vendor) 1323 continue; 1324 if (id->device != PCI_ANY_ID && 1325 id->device != pdev->device) 1326 continue; 1327 if (id->subvendor != PCI_ANY_ID && 1328 id->subvendor != pdev->subsystem_vendor) 1329 continue; 1330 if (id->subdevice != PCI_ANY_ID && 1331 id->subdevice != pdev->subsystem_device) 1332 continue; 1333 1334 goto reset; 1335 } 1336 } 1337 1338 return eeh_unfreeze_pe(pe, true); 1339 1340 reset: 1341 return eeh_pe_reset_and_recover(pe); 1342 } 1343 1344 /** 1345 * eeh_dev_open - Increase count of pass through devices for PE 1346 * @pdev: PCI device 1347 * 1348 * Increase count of passed through devices for the indicated 1349 * PE. In the result, the EEH errors detected on the PE won't be 1350 * reported. The PE owner will be responsible for detection 1351 * and recovery. 1352 */ 1353 int eeh_dev_open(struct pci_dev *pdev) 1354 { 1355 struct eeh_dev *edev; 1356 int ret = -ENODEV; 1357 1358 mutex_lock(&eeh_dev_mutex); 1359 1360 /* No PCI device ? */ 1361 if (!pdev) 1362 goto out; 1363 1364 /* No EEH device or PE ? */ 1365 edev = pci_dev_to_eeh_dev(pdev); 1366 if (!edev || !edev->pe) 1367 goto out; 1368 1369 /* 1370 * The PE might have been put into frozen state, but we 1371 * didn't detect that yet. The passed through PCI devices 1372 * in frozen PE won't work properly. Clear the frozen state 1373 * in advance. 1374 */ 1375 ret = eeh_pe_change_owner(edev->pe); 1376 if (ret) 1377 goto out; 1378 1379 /* Increase PE's pass through count */ 1380 atomic_inc(&edev->pe->pass_dev_cnt); 1381 mutex_unlock(&eeh_dev_mutex); 1382 1383 return 0; 1384 out: 1385 mutex_unlock(&eeh_dev_mutex); 1386 return ret; 1387 } 1388 EXPORT_SYMBOL_GPL(eeh_dev_open); 1389 1390 /** 1391 * eeh_dev_release - Decrease count of pass through devices for PE 1392 * @pdev: PCI device 1393 * 1394 * Decrease count of pass through devices for the indicated PE. If 1395 * there is no passed through device in PE, the EEH errors detected 1396 * on the PE will be reported and handled as usual. 1397 */ 1398 void eeh_dev_release(struct pci_dev *pdev) 1399 { 1400 struct eeh_dev *edev; 1401 1402 mutex_lock(&eeh_dev_mutex); 1403 1404 /* No PCI device ? */ 1405 if (!pdev) 1406 goto out; 1407 1408 /* No EEH device ? */ 1409 edev = pci_dev_to_eeh_dev(pdev); 1410 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1411 goto out; 1412 1413 /* Decrease PE's pass through count */ 1414 atomic_dec(&edev->pe->pass_dev_cnt); 1415 WARN_ON(atomic_read(&edev->pe->pass_dev_cnt) < 0); 1416 eeh_pe_change_owner(edev->pe); 1417 out: 1418 mutex_unlock(&eeh_dev_mutex); 1419 } 1420 EXPORT_SYMBOL(eeh_dev_release); 1421 1422 #ifdef CONFIG_IOMMU_API 1423 1424 static int dev_has_iommu_table(struct device *dev, void *data) 1425 { 1426 struct pci_dev *pdev = to_pci_dev(dev); 1427 struct pci_dev **ppdev = data; 1428 1429 if (!dev) 1430 return 0; 1431 1432 if (dev->iommu_group) { 1433 *ppdev = pdev; 1434 return 1; 1435 } 1436 1437 return 0; 1438 } 1439 1440 /** 1441 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1442 * @group: IOMMU group 1443 * 1444 * The routine is called to convert IOMMU group to EEH PE. 1445 */ 1446 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1447 { 1448 struct pci_dev *pdev = NULL; 1449 struct eeh_dev *edev; 1450 int ret; 1451 1452 /* No IOMMU group ? */ 1453 if (!group) 1454 return NULL; 1455 1456 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1457 if (!ret || !pdev) 1458 return NULL; 1459 1460 /* No EEH device or PE ? */ 1461 edev = pci_dev_to_eeh_dev(pdev); 1462 if (!edev || !edev->pe) 1463 return NULL; 1464 1465 return edev->pe; 1466 } 1467 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1468 1469 #endif /* CONFIG_IOMMU_API */ 1470 1471 /** 1472 * eeh_pe_set_option - Set options for the indicated PE 1473 * @pe: EEH PE 1474 * @option: requested option 1475 * 1476 * The routine is called to enable or disable EEH functionality 1477 * on the indicated PE, to enable IO or DMA for the frozen PE. 1478 */ 1479 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1480 { 1481 int ret = 0; 1482 1483 /* Invalid PE ? */ 1484 if (!pe) 1485 return -ENODEV; 1486 1487 /* 1488 * EEH functionality could possibly be disabled, just 1489 * return error for the case. And the EEH functinality 1490 * isn't expected to be disabled on one specific PE. 1491 */ 1492 switch (option) { 1493 case EEH_OPT_ENABLE: 1494 if (eeh_enabled()) { 1495 ret = eeh_pe_change_owner(pe); 1496 break; 1497 } 1498 ret = -EIO; 1499 break; 1500 case EEH_OPT_DISABLE: 1501 break; 1502 case EEH_OPT_THAW_MMIO: 1503 case EEH_OPT_THAW_DMA: 1504 if (!eeh_ops || !eeh_ops->set_option) { 1505 ret = -ENOENT; 1506 break; 1507 } 1508 1509 ret = eeh_pci_enable(pe, option); 1510 break; 1511 default: 1512 pr_debug("%s: Option %d out of range (%d, %d)\n", 1513 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1514 ret = -EINVAL; 1515 } 1516 1517 return ret; 1518 } 1519 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1520 1521 /** 1522 * eeh_pe_get_state - Retrieve PE's state 1523 * @pe: EEH PE 1524 * 1525 * Retrieve the PE's state, which includes 3 aspects: enabled 1526 * DMA, enabled IO and asserted reset. 1527 */ 1528 int eeh_pe_get_state(struct eeh_pe *pe) 1529 { 1530 int result, ret = 0; 1531 bool rst_active, dma_en, mmio_en; 1532 1533 /* Existing PE ? */ 1534 if (!pe) 1535 return -ENODEV; 1536 1537 if (!eeh_ops || !eeh_ops->get_state) 1538 return -ENOENT; 1539 1540 result = eeh_ops->get_state(pe, NULL); 1541 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1542 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1543 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1544 1545 if (rst_active) 1546 ret = EEH_PE_STATE_RESET; 1547 else if (dma_en && mmio_en) 1548 ret = EEH_PE_STATE_NORMAL; 1549 else if (!dma_en && !mmio_en) 1550 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1551 else if (!dma_en && mmio_en) 1552 ret = EEH_PE_STATE_STOPPED_DMA; 1553 else 1554 ret = EEH_PE_STATE_UNAVAIL; 1555 1556 return ret; 1557 } 1558 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1559 1560 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1561 { 1562 struct eeh_dev *edev, *tmp; 1563 struct pci_dev *pdev; 1564 int ret = 0; 1565 1566 /* Restore config space */ 1567 eeh_pe_restore_bars(pe); 1568 1569 /* 1570 * Reenable PCI devices as the devices passed 1571 * through are always enabled before the reset. 1572 */ 1573 eeh_pe_for_each_dev(pe, edev, tmp) { 1574 pdev = eeh_dev_to_pci_dev(edev); 1575 if (!pdev) 1576 continue; 1577 1578 ret = pci_reenable_device(pdev); 1579 if (ret) { 1580 pr_warn("%s: Failure %d reenabling %s\n", 1581 __func__, ret, pci_name(pdev)); 1582 return ret; 1583 } 1584 } 1585 1586 /* The PE is still in frozen state */ 1587 return eeh_unfreeze_pe(pe, true); 1588 } 1589 1590 /** 1591 * eeh_pe_reset - Issue PE reset according to specified type 1592 * @pe: EEH PE 1593 * @option: reset type 1594 * 1595 * The routine is called to reset the specified PE with the 1596 * indicated type, either fundamental reset or hot reset. 1597 * PE reset is the most important part for error recovery. 1598 */ 1599 int eeh_pe_reset(struct eeh_pe *pe, int option) 1600 { 1601 int ret = 0; 1602 1603 /* Invalid PE ? */ 1604 if (!pe) 1605 return -ENODEV; 1606 1607 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1608 return -ENOENT; 1609 1610 switch (option) { 1611 case EEH_RESET_DEACTIVATE: 1612 ret = eeh_ops->reset(pe, option); 1613 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1614 if (ret) 1615 break; 1616 1617 ret = eeh_pe_reenable_devices(pe); 1618 break; 1619 case EEH_RESET_HOT: 1620 case EEH_RESET_FUNDAMENTAL: 1621 /* 1622 * Proactively freeze the PE to drop all MMIO access 1623 * during reset, which should be banned as it's always 1624 * cause recursive EEH error. 1625 */ 1626 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1627 1628 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1629 ret = eeh_ops->reset(pe, option); 1630 break; 1631 default: 1632 pr_debug("%s: Unsupported option %d\n", 1633 __func__, option); 1634 ret = -EINVAL; 1635 } 1636 1637 return ret; 1638 } 1639 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1640 1641 /** 1642 * eeh_pe_configure - Configure PCI bridges after PE reset 1643 * @pe: EEH PE 1644 * 1645 * The routine is called to restore the PCI config space for 1646 * those PCI devices, especially PCI bridges affected by PE 1647 * reset issued previously. 1648 */ 1649 int eeh_pe_configure(struct eeh_pe *pe) 1650 { 1651 int ret = 0; 1652 1653 /* Invalid PE ? */ 1654 if (!pe) 1655 return -ENODEV; 1656 1657 return ret; 1658 } 1659 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1660 1661 /** 1662 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1663 * @pe: the indicated PE 1664 * @type: error type 1665 * @function: error function 1666 * @addr: address 1667 * @mask: address mask 1668 * 1669 * The routine is called to inject the specified PCI error, which 1670 * is determined by @type and @function, to the indicated PE for 1671 * testing purpose. 1672 */ 1673 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1674 unsigned long addr, unsigned long mask) 1675 { 1676 /* Invalid PE ? */ 1677 if (!pe) 1678 return -ENODEV; 1679 1680 /* Unsupported operation ? */ 1681 if (!eeh_ops || !eeh_ops->err_inject) 1682 return -ENOENT; 1683 1684 /* Check on PCI error type */ 1685 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1686 return -EINVAL; 1687 1688 /* Check on PCI error function */ 1689 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1690 return -EINVAL; 1691 1692 return eeh_ops->err_inject(pe, type, func, addr, mask); 1693 } 1694 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1695 1696 static int proc_eeh_show(struct seq_file *m, void *v) 1697 { 1698 if (!eeh_enabled()) { 1699 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1700 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1701 } else { 1702 seq_printf(m, "EEH Subsystem is enabled\n"); 1703 seq_printf(m, 1704 "no device=%llu\n" 1705 "no device node=%llu\n" 1706 "no config address=%llu\n" 1707 "check not wanted=%llu\n" 1708 "eeh_total_mmio_ffs=%llu\n" 1709 "eeh_false_positives=%llu\n" 1710 "eeh_slot_resets=%llu\n", 1711 eeh_stats.no_device, 1712 eeh_stats.no_dn, 1713 eeh_stats.no_cfg_addr, 1714 eeh_stats.ignored_check, 1715 eeh_stats.total_mmio_ffs, 1716 eeh_stats.false_positives, 1717 eeh_stats.slot_resets); 1718 } 1719 1720 return 0; 1721 } 1722 1723 static int proc_eeh_open(struct inode *inode, struct file *file) 1724 { 1725 return single_open(file, proc_eeh_show, NULL); 1726 } 1727 1728 static const struct file_operations proc_eeh_operations = { 1729 .open = proc_eeh_open, 1730 .read = seq_read, 1731 .llseek = seq_lseek, 1732 .release = single_release, 1733 }; 1734 1735 #ifdef CONFIG_DEBUG_FS 1736 static int eeh_enable_dbgfs_set(void *data, u64 val) 1737 { 1738 if (val) 1739 eeh_clear_flag(EEH_FORCE_DISABLED); 1740 else 1741 eeh_add_flag(EEH_FORCE_DISABLED); 1742 1743 /* Notify the backend */ 1744 if (eeh_ops->post_init) 1745 eeh_ops->post_init(); 1746 1747 return 0; 1748 } 1749 1750 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1751 { 1752 if (eeh_enabled()) 1753 *val = 0x1ul; 1754 else 1755 *val = 0x0ul; 1756 return 0; 1757 } 1758 1759 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1760 { 1761 eeh_max_freezes = val; 1762 return 0; 1763 } 1764 1765 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1766 { 1767 *val = eeh_max_freezes; 1768 return 0; 1769 } 1770 1771 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1772 eeh_enable_dbgfs_set, "0x%llx\n"); 1773 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1774 eeh_freeze_dbgfs_set, "0x%llx\n"); 1775 #endif 1776 1777 static int __init eeh_init_proc(void) 1778 { 1779 if (machine_is(pseries) || machine_is(powernv)) { 1780 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1781 #ifdef CONFIG_DEBUG_FS 1782 debugfs_create_file("eeh_enable", 0600, 1783 powerpc_debugfs_root, NULL, 1784 &eeh_enable_dbgfs_ops); 1785 debugfs_create_file("eeh_max_freezes", 0600, 1786 powerpc_debugfs_root, NULL, 1787 &eeh_freeze_dbgfs_ops); 1788 #endif 1789 } 1790 1791 return 0; 1792 } 1793 __initcall(eeh_init_proc); 1794