1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/debugfs.h> 26 #include <linux/sched.h> 27 #include <linux/init.h> 28 #include <linux/list.h> 29 #include <linux/pci.h> 30 #include <linux/iommu.h> 31 #include <linux/proc_fs.h> 32 #include <linux/rbtree.h> 33 #include <linux/reboot.h> 34 #include <linux/seq_file.h> 35 #include <linux/spinlock.h> 36 #include <linux/export.h> 37 #include <linux/of.h> 38 39 #include <linux/atomic.h> 40 #include <asm/debug.h> 41 #include <asm/eeh.h> 42 #include <asm/eeh_event.h> 43 #include <asm/io.h> 44 #include <asm/iommu.h> 45 #include <asm/machdep.h> 46 #include <asm/ppc-pci.h> 47 #include <asm/rtas.h> 48 49 50 /** Overview: 51 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 120 /* Lock to protect passed flags */ 121 static DEFINE_MUTEX(eeh_dev_mutex); 122 123 /* Buffer for reporting pci register dumps. Its here in BSS, and 124 * not dynamically alloced, so that it ends up in RMO where RTAS 125 * can access it. 126 */ 127 #define EEH_PCI_REGS_LOG_LEN 8192 128 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 129 130 /* 131 * The struct is used to maintain the EEH global statistic 132 * information. Besides, the EEH global statistics will be 133 * exported to user space through procfs 134 */ 135 struct eeh_stats { 136 u64 no_device; /* PCI device not found */ 137 u64 no_dn; /* OF node not found */ 138 u64 no_cfg_addr; /* Config address not found */ 139 u64 ignored_check; /* EEH check skipped */ 140 u64 total_mmio_ffs; /* Total EEH checks */ 141 u64 false_positives; /* Unnecessary EEH checks */ 142 u64 slot_resets; /* PE reset */ 143 }; 144 145 static struct eeh_stats eeh_stats; 146 147 static int __init eeh_setup(char *str) 148 { 149 if (!strcmp(str, "off")) 150 eeh_add_flag(EEH_FORCE_DISABLED); 151 else if (!strcmp(str, "early_log")) 152 eeh_add_flag(EEH_EARLY_DUMP_LOG); 153 154 return 1; 155 } 156 __setup("eeh=", eeh_setup); 157 158 /* 159 * This routine captures assorted PCI configuration space data 160 * for the indicated PCI device, and puts them into a buffer 161 * for RTAS error logging. 162 */ 163 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 164 { 165 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 166 u32 cfg; 167 int cap, i; 168 int n = 0, l = 0; 169 char buffer[128]; 170 171 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x:%01x\n", 172 edev->phb->global_number, pdn->busno, 173 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 174 pr_warn("EEH: of node=%04x:%02x:%02x:%01x\n", 175 edev->phb->global_number, pdn->busno, 176 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 177 178 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 179 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 180 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 181 182 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 183 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 184 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 185 186 /* Gather bridge-specific registers */ 187 if (edev->mode & EEH_DEV_BRIDGE) { 188 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 189 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 190 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 191 192 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 193 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 194 pr_warn("EEH: Bridge control: %04x\n", cfg); 195 } 196 197 /* Dump out the PCI-X command and status regs */ 198 cap = edev->pcix_cap; 199 if (cap) { 200 eeh_ops->read_config(pdn, cap, 4, &cfg); 201 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 202 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 203 204 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 205 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 206 pr_warn("EEH: PCI-X status: %08x\n", cfg); 207 } 208 209 /* If PCI-E capable, dump PCI-E cap 10 */ 210 cap = edev->pcie_cap; 211 if (cap) { 212 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 213 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 214 215 for (i=0; i<=8; i++) { 216 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 217 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 218 219 if ((i % 4) == 0) { 220 if (i != 0) 221 pr_warn("%s\n", buffer); 222 223 l = scnprintf(buffer, sizeof(buffer), 224 "EEH: PCI-E %02x: %08x ", 225 4*i, cfg); 226 } else { 227 l += scnprintf(buffer+l, sizeof(buffer)-l, 228 "%08x ", cfg); 229 } 230 231 } 232 233 pr_warn("%s\n", buffer); 234 } 235 236 /* If AER capable, dump it */ 237 cap = edev->aer_cap; 238 if (cap) { 239 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 240 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 241 242 for (i=0; i<=13; i++) { 243 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 244 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 245 246 if ((i % 4) == 0) { 247 if (i != 0) 248 pr_warn("%s\n", buffer); 249 250 l = scnprintf(buffer, sizeof(buffer), 251 "EEH: PCI-E AER %02x: %08x ", 252 4*i, cfg); 253 } else { 254 l += scnprintf(buffer+l, sizeof(buffer)-l, 255 "%08x ", cfg); 256 } 257 } 258 259 pr_warn("%s\n", buffer); 260 } 261 262 return n; 263 } 264 265 static void *eeh_dump_pe_log(void *data, void *flag) 266 { 267 struct eeh_pe *pe = data; 268 struct eeh_dev *edev, *tmp; 269 size_t *plen = flag; 270 271 eeh_pe_for_each_dev(pe, edev, tmp) 272 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 273 EEH_PCI_REGS_LOG_LEN - *plen); 274 275 return NULL; 276 } 277 278 /** 279 * eeh_slot_error_detail - Generate combined log including driver log and error log 280 * @pe: EEH PE 281 * @severity: temporary or permanent error log 282 * 283 * This routine should be called to generate the combined log, which 284 * is comprised of driver log and error log. The driver log is figured 285 * out from the config space of the corresponding PCI device, while 286 * the error log is fetched through platform dependent function call. 287 */ 288 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 289 { 290 size_t loglen = 0; 291 292 /* 293 * When the PHB is fenced or dead, it's pointless to collect 294 * the data from PCI config space because it should return 295 * 0xFF's. For ER, we still retrieve the data from the PCI 296 * config space. 297 * 298 * For pHyp, we have to enable IO for log retrieval. Otherwise, 299 * 0xFF's is always returned from PCI config space. 300 */ 301 if (!(pe->type & EEH_PE_PHB)) { 302 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG)) 303 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 304 305 /* 306 * The config space of some PCI devices can't be accessed 307 * when their PEs are in frozen state. Otherwise, fenced 308 * PHB might be seen. Those PEs are identified with flag 309 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 310 * is set automatically when the PE is put to EEH_PE_ISOLATED. 311 * 312 * Restoring BARs possibly triggers PCI config access in 313 * (OPAL) firmware and then causes fenced PHB. If the 314 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 315 * pointless to restore BARs and dump config space. 316 */ 317 eeh_ops->configure_bridge(pe); 318 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 319 eeh_pe_restore_bars(pe); 320 321 pci_regs_buf[0] = 0; 322 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 323 } 324 } 325 326 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 327 } 328 329 /** 330 * eeh_token_to_phys - Convert EEH address token to phys address 331 * @token: I/O token, should be address in the form 0xA.... 332 * 333 * This routine should be called to convert virtual I/O address 334 * to physical one. 335 */ 336 static inline unsigned long eeh_token_to_phys(unsigned long token) 337 { 338 pte_t *ptep; 339 unsigned long pa; 340 int hugepage_shift; 341 342 /* 343 * We won't find hugepages here(this is iomem). Hence we are not 344 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 345 * page table free, because of init_mm. 346 */ 347 ptep = __find_linux_pte_or_hugepte(init_mm.pgd, token, 348 NULL, &hugepage_shift); 349 if (!ptep) 350 return token; 351 WARN_ON(hugepage_shift); 352 pa = pte_pfn(*ptep) << PAGE_SHIFT; 353 354 return pa | (token & (PAGE_SIZE-1)); 355 } 356 357 /* 358 * On PowerNV platform, we might already have fenced PHB there. 359 * For that case, it's meaningless to recover frozen PE. Intead, 360 * We have to handle fenced PHB firstly. 361 */ 362 static int eeh_phb_check_failure(struct eeh_pe *pe) 363 { 364 struct eeh_pe *phb_pe; 365 unsigned long flags; 366 int ret; 367 368 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 369 return -EPERM; 370 371 /* Find the PHB PE */ 372 phb_pe = eeh_phb_pe_get(pe->phb); 373 if (!phb_pe) { 374 pr_warn("%s Can't find PE for PHB#%d\n", 375 __func__, pe->phb->global_number); 376 return -EEXIST; 377 } 378 379 /* If the PHB has been in problematic state */ 380 eeh_serialize_lock(&flags); 381 if (phb_pe->state & EEH_PE_ISOLATED) { 382 ret = 0; 383 goto out; 384 } 385 386 /* Check PHB state */ 387 ret = eeh_ops->get_state(phb_pe, NULL); 388 if ((ret < 0) || 389 (ret == EEH_STATE_NOT_SUPPORT) || 390 (ret & (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) == 391 (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE)) { 392 ret = 0; 393 goto out; 394 } 395 396 /* Isolate the PHB and send event */ 397 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 398 eeh_serialize_unlock(flags); 399 400 pr_err("EEH: PHB#%x failure detected, location: %s\n", 401 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 402 dump_stack(); 403 eeh_send_failure_event(phb_pe); 404 405 return 1; 406 out: 407 eeh_serialize_unlock(flags); 408 return ret; 409 } 410 411 /** 412 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 413 * @edev: eeh device 414 * 415 * Check for an EEH failure for the given device node. Call this 416 * routine if the result of a read was all 0xff's and you want to 417 * find out if this is due to an EEH slot freeze. This routine 418 * will query firmware for the EEH status. 419 * 420 * Returns 0 if there has not been an EEH error; otherwise returns 421 * a non-zero value and queues up a slot isolation event notification. 422 * 423 * It is safe to call this routine in an interrupt context. 424 */ 425 int eeh_dev_check_failure(struct eeh_dev *edev) 426 { 427 int ret; 428 int active_flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 429 unsigned long flags; 430 struct pci_dn *pdn; 431 struct pci_dev *dev; 432 struct eeh_pe *pe, *parent_pe, *phb_pe; 433 int rc = 0; 434 const char *location = NULL; 435 436 eeh_stats.total_mmio_ffs++; 437 438 if (!eeh_enabled()) 439 return 0; 440 441 if (!edev) { 442 eeh_stats.no_dn++; 443 return 0; 444 } 445 dev = eeh_dev_to_pci_dev(edev); 446 pe = eeh_dev_to_pe(edev); 447 448 /* Access to IO BARs might get this far and still not want checking. */ 449 if (!pe) { 450 eeh_stats.ignored_check++; 451 pr_debug("EEH: Ignored check for %s\n", 452 eeh_pci_name(dev)); 453 return 0; 454 } 455 456 if (!pe->addr && !pe->config_addr) { 457 eeh_stats.no_cfg_addr++; 458 return 0; 459 } 460 461 /* 462 * On PowerNV platform, we might already have fenced PHB 463 * there and we need take care of that firstly. 464 */ 465 ret = eeh_phb_check_failure(pe); 466 if (ret > 0) 467 return ret; 468 469 /* 470 * If the PE isn't owned by us, we shouldn't check the 471 * state. Instead, let the owner handle it if the PE has 472 * been frozen. 473 */ 474 if (eeh_pe_passed(pe)) 475 return 0; 476 477 /* If we already have a pending isolation event for this 478 * slot, we know it's bad already, we don't need to check. 479 * Do this checking under a lock; as multiple PCI devices 480 * in one slot might report errors simultaneously, and we 481 * only want one error recovery routine running. 482 */ 483 eeh_serialize_lock(&flags); 484 rc = 1; 485 if (pe->state & EEH_PE_ISOLATED) { 486 pe->check_count++; 487 if (pe->check_count % EEH_MAX_FAILS == 0) { 488 pdn = eeh_dev_to_pdn(edev); 489 if (pdn->node) 490 location = of_get_property(pdn->node, "ibm,loc-code", NULL); 491 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 492 "location=%s driver=%s pci addr=%s\n", 493 pe->check_count, 494 location ? location : "unknown", 495 eeh_driver_name(dev), eeh_pci_name(dev)); 496 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 497 eeh_driver_name(dev)); 498 dump_stack(); 499 } 500 goto dn_unlock; 501 } 502 503 /* 504 * Now test for an EEH failure. This is VERY expensive. 505 * Note that the eeh_config_addr may be a parent device 506 * in the case of a device behind a bridge, or it may be 507 * function zero of a multi-function device. 508 * In any case they must share a common PHB. 509 */ 510 ret = eeh_ops->get_state(pe, NULL); 511 512 /* Note that config-io to empty slots may fail; 513 * they are empty when they don't have children. 514 * We will punt with the following conditions: Failure to get 515 * PE's state, EEH not support and Permanently unavailable 516 * state, PE is in good state. 517 */ 518 if ((ret < 0) || 519 (ret == EEH_STATE_NOT_SUPPORT) || 520 ((ret & active_flags) == active_flags)) { 521 eeh_stats.false_positives++; 522 pe->false_positives++; 523 rc = 0; 524 goto dn_unlock; 525 } 526 527 /* 528 * It should be corner case that the parent PE has been 529 * put into frozen state as well. We should take care 530 * that at first. 531 */ 532 parent_pe = pe->parent; 533 while (parent_pe) { 534 /* Hit the ceiling ? */ 535 if (parent_pe->type & EEH_PE_PHB) 536 break; 537 538 /* Frozen parent PE ? */ 539 ret = eeh_ops->get_state(parent_pe, NULL); 540 if (ret > 0 && 541 (ret & active_flags) != active_flags) 542 pe = parent_pe; 543 544 /* Next parent level */ 545 parent_pe = parent_pe->parent; 546 } 547 548 eeh_stats.slot_resets++; 549 550 /* Avoid repeated reports of this failure, including problems 551 * with other functions on this device, and functions under 552 * bridges. 553 */ 554 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 555 eeh_serialize_unlock(flags); 556 557 /* Most EEH events are due to device driver bugs. Having 558 * a stack trace will help the device-driver authors figure 559 * out what happened. So print that out. 560 */ 561 phb_pe = eeh_phb_pe_get(pe->phb); 562 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 563 pe->phb->global_number, pe->addr); 564 pr_err("EEH: PE location: %s, PHB location: %s\n", 565 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 566 dump_stack(); 567 568 eeh_send_failure_event(pe); 569 570 return 1; 571 572 dn_unlock: 573 eeh_serialize_unlock(flags); 574 return rc; 575 } 576 577 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 578 579 /** 580 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 581 * @token: I/O address 582 * 583 * Check for an EEH failure at the given I/O address. Call this 584 * routine if the result of a read was all 0xff's and you want to 585 * find out if this is due to an EEH slot freeze event. This routine 586 * will query firmware for the EEH status. 587 * 588 * Note this routine is safe to call in an interrupt context. 589 */ 590 int eeh_check_failure(const volatile void __iomem *token) 591 { 592 unsigned long addr; 593 struct eeh_dev *edev; 594 595 /* Finding the phys addr + pci device; this is pretty quick. */ 596 addr = eeh_token_to_phys((unsigned long __force) token); 597 edev = eeh_addr_cache_get_dev(addr); 598 if (!edev) { 599 eeh_stats.no_device++; 600 return 0; 601 } 602 603 return eeh_dev_check_failure(edev); 604 } 605 EXPORT_SYMBOL(eeh_check_failure); 606 607 608 /** 609 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 610 * @pe: EEH PE 611 * 612 * This routine should be called to reenable frozen MMIO or DMA 613 * so that it would work correctly again. It's useful while doing 614 * recovery or log collection on the indicated device. 615 */ 616 int eeh_pci_enable(struct eeh_pe *pe, int function) 617 { 618 int active_flag, rc; 619 620 /* 621 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 622 * Also, it's pointless to enable them on unfrozen PE. So 623 * we have to check before enabling IO or DMA. 624 */ 625 switch (function) { 626 case EEH_OPT_THAW_MMIO: 627 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 628 break; 629 case EEH_OPT_THAW_DMA: 630 active_flag = EEH_STATE_DMA_ACTIVE; 631 break; 632 case EEH_OPT_DISABLE: 633 case EEH_OPT_ENABLE: 634 case EEH_OPT_FREEZE_PE: 635 active_flag = 0; 636 break; 637 default: 638 pr_warn("%s: Invalid function %d\n", 639 __func__, function); 640 return -EINVAL; 641 } 642 643 /* 644 * Check if IO or DMA has been enabled before 645 * enabling them. 646 */ 647 if (active_flag) { 648 rc = eeh_ops->get_state(pe, NULL); 649 if (rc < 0) 650 return rc; 651 652 /* Needn't enable it at all */ 653 if (rc == EEH_STATE_NOT_SUPPORT) 654 return 0; 655 656 /* It's already enabled */ 657 if (rc & active_flag) 658 return 0; 659 } 660 661 662 /* Issue the request */ 663 rc = eeh_ops->set_option(pe, function); 664 if (rc) 665 pr_warn("%s: Unexpected state change %d on " 666 "PHB#%d-PE#%x, err=%d\n", 667 __func__, function, pe->phb->global_number, 668 pe->addr, rc); 669 670 /* Check if the request is finished successfully */ 671 if (active_flag) { 672 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 673 if (rc < 0) 674 return rc; 675 676 if (rc & active_flag) 677 return 0; 678 679 return -EIO; 680 } 681 682 return rc; 683 } 684 685 static void *eeh_disable_and_save_dev_state(void *data, void *userdata) 686 { 687 struct eeh_dev *edev = data; 688 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 689 struct pci_dev *dev = userdata; 690 691 /* 692 * The caller should have disabled and saved the 693 * state for the specified device 694 */ 695 if (!pdev || pdev == dev) 696 return NULL; 697 698 /* Ensure we have D0 power state */ 699 pci_set_power_state(pdev, PCI_D0); 700 701 /* Save device state */ 702 pci_save_state(pdev); 703 704 /* 705 * Disable device to avoid any DMA traffic and 706 * interrupt from the device 707 */ 708 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 709 710 return NULL; 711 } 712 713 static void *eeh_restore_dev_state(void *data, void *userdata) 714 { 715 struct eeh_dev *edev = data; 716 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 717 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 718 struct pci_dev *dev = userdata; 719 720 if (!pdev) 721 return NULL; 722 723 /* Apply customization from firmware */ 724 if (pdn && eeh_ops->restore_config) 725 eeh_ops->restore_config(pdn); 726 727 /* The caller should restore state for the specified device */ 728 if (pdev != dev) 729 pci_restore_state(pdev); 730 731 return NULL; 732 } 733 734 /** 735 * pcibios_set_pcie_reset_state - Set PCI-E reset state 736 * @dev: pci device struct 737 * @state: reset state to enter 738 * 739 * Return value: 740 * 0 if success 741 */ 742 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 743 { 744 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 745 struct eeh_pe *pe = eeh_dev_to_pe(edev); 746 747 if (!pe) { 748 pr_err("%s: No PE found on PCI device %s\n", 749 __func__, pci_name(dev)); 750 return -EINVAL; 751 } 752 753 switch (state) { 754 case pcie_deassert_reset: 755 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 756 eeh_unfreeze_pe(pe, false); 757 if (!(pe->type & EEH_PE_VF)) 758 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 759 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 760 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 761 break; 762 case pcie_hot_reset: 763 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 764 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 765 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 766 if (!(pe->type & EEH_PE_VF)) 767 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 768 eeh_ops->reset(pe, EEH_RESET_HOT); 769 break; 770 case pcie_warm_reset: 771 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 772 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 773 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 774 if (!(pe->type & EEH_PE_VF)) 775 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 776 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 777 break; 778 default: 779 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 780 return -EINVAL; 781 }; 782 783 return 0; 784 } 785 786 /** 787 * eeh_set_pe_freset - Check the required reset for the indicated device 788 * @data: EEH device 789 * @flag: return value 790 * 791 * Each device might have its preferred reset type: fundamental or 792 * hot reset. The routine is used to collected the information for 793 * the indicated device and its children so that the bunch of the 794 * devices could be reset properly. 795 */ 796 static void *eeh_set_dev_freset(void *data, void *flag) 797 { 798 struct pci_dev *dev; 799 unsigned int *freset = (unsigned int *)flag; 800 struct eeh_dev *edev = (struct eeh_dev *)data; 801 802 dev = eeh_dev_to_pci_dev(edev); 803 if (dev) 804 *freset |= dev->needs_freset; 805 806 return NULL; 807 } 808 809 /** 810 * eeh_reset_pe_once - Assert the pci #RST line for 1/4 second 811 * @pe: EEH PE 812 * 813 * Assert the PCI #RST line for 1/4 second. 814 */ 815 static void eeh_reset_pe_once(struct eeh_pe *pe) 816 { 817 unsigned int freset = 0; 818 819 /* Determine type of EEH reset required for 820 * Partitionable Endpoint, a hot-reset (1) 821 * or a fundamental reset (3). 822 * A fundamental reset required by any device under 823 * Partitionable Endpoint trumps hot-reset. 824 */ 825 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 826 827 if (freset) 828 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 829 else 830 eeh_ops->reset(pe, EEH_RESET_HOT); 831 832 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 833 } 834 835 /** 836 * eeh_reset_pe - Reset the indicated PE 837 * @pe: EEH PE 838 * 839 * This routine should be called to reset indicated device, including 840 * PE. A PE might include multiple PCI devices and sometimes PCI bridges 841 * might be involved as well. 842 */ 843 int eeh_reset_pe(struct eeh_pe *pe) 844 { 845 int flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 846 int i, state, ret; 847 848 /* Mark as reset and block config space */ 849 eeh_pe_state_mark(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 850 851 /* Take three shots at resetting the bus */ 852 for (i = 0; i < 3; i++) { 853 eeh_reset_pe_once(pe); 854 855 /* 856 * EEH_PE_ISOLATED is expected to be removed after 857 * BAR restore. 858 */ 859 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 860 if ((state & flags) == flags) { 861 ret = 0; 862 goto out; 863 } 864 865 if (state < 0) { 866 pr_warn("%s: Unrecoverable slot failure on PHB#%d-PE#%x", 867 __func__, pe->phb->global_number, pe->addr); 868 ret = -ENOTRECOVERABLE; 869 goto out; 870 } 871 872 /* We might run out of credits */ 873 ret = -EIO; 874 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 875 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 876 } 877 878 out: 879 eeh_pe_state_clear(pe, EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 880 return ret; 881 } 882 883 /** 884 * eeh_save_bars - Save device bars 885 * @edev: PCI device associated EEH device 886 * 887 * Save the values of the device bars. Unlike the restore 888 * routine, this routine is *not* recursive. This is because 889 * PCI devices are added individually; but, for the restore, 890 * an entire slot is reset at a time. 891 */ 892 void eeh_save_bars(struct eeh_dev *edev) 893 { 894 struct pci_dn *pdn; 895 int i; 896 897 pdn = eeh_dev_to_pdn(edev); 898 if (!pdn) 899 return; 900 901 for (i = 0; i < 16; i++) 902 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 903 904 /* 905 * For PCI bridges including root port, we need enable bus 906 * master explicitly. Otherwise, it can't fetch IODA table 907 * entries correctly. So we cache the bit in advance so that 908 * we can restore it after reset, either PHB range or PE range. 909 */ 910 if (edev->mode & EEH_DEV_BRIDGE) 911 edev->config_space[1] |= PCI_COMMAND_MASTER; 912 } 913 914 /** 915 * eeh_ops_register - Register platform dependent EEH operations 916 * @ops: platform dependent EEH operations 917 * 918 * Register the platform dependent EEH operation callback 919 * functions. The platform should call this function before 920 * any other EEH operations. 921 */ 922 int __init eeh_ops_register(struct eeh_ops *ops) 923 { 924 if (!ops->name) { 925 pr_warn("%s: Invalid EEH ops name for %p\n", 926 __func__, ops); 927 return -EINVAL; 928 } 929 930 if (eeh_ops && eeh_ops != ops) { 931 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 932 __func__, eeh_ops->name, ops->name); 933 return -EEXIST; 934 } 935 936 eeh_ops = ops; 937 938 return 0; 939 } 940 941 /** 942 * eeh_ops_unregister - Unreigster platform dependent EEH operations 943 * @name: name of EEH platform operations 944 * 945 * Unregister the platform dependent EEH operation callback 946 * functions. 947 */ 948 int __exit eeh_ops_unregister(const char *name) 949 { 950 if (!name || !strlen(name)) { 951 pr_warn("%s: Invalid EEH ops name\n", 952 __func__); 953 return -EINVAL; 954 } 955 956 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 957 eeh_ops = NULL; 958 return 0; 959 } 960 961 return -EEXIST; 962 } 963 964 static int eeh_reboot_notifier(struct notifier_block *nb, 965 unsigned long action, void *unused) 966 { 967 eeh_clear_flag(EEH_ENABLED); 968 return NOTIFY_DONE; 969 } 970 971 static struct notifier_block eeh_reboot_nb = { 972 .notifier_call = eeh_reboot_notifier, 973 }; 974 975 /** 976 * eeh_init - EEH initialization 977 * 978 * Initialize EEH by trying to enable it for all of the adapters in the system. 979 * As a side effect we can determine here if eeh is supported at all. 980 * Note that we leave EEH on so failed config cycles won't cause a machine 981 * check. If a user turns off EEH for a particular adapter they are really 982 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 983 * grant access to a slot if EEH isn't enabled, and so we always enable 984 * EEH for all slots/all devices. 985 * 986 * The eeh-force-off option disables EEH checking globally, for all slots. 987 * Even if force-off is set, the EEH hardware is still enabled, so that 988 * newer systems can boot. 989 */ 990 int eeh_init(void) 991 { 992 struct pci_controller *hose, *tmp; 993 struct pci_dn *pdn; 994 static int cnt = 0; 995 int ret = 0; 996 997 /* 998 * We have to delay the initialization on PowerNV after 999 * the PCI hierarchy tree has been built because the PEs 1000 * are figured out based on PCI devices instead of device 1001 * tree nodes 1002 */ 1003 if (machine_is(powernv) && cnt++ <= 0) 1004 return ret; 1005 1006 /* Register reboot notifier */ 1007 ret = register_reboot_notifier(&eeh_reboot_nb); 1008 if (ret) { 1009 pr_warn("%s: Failed to register notifier (%d)\n", 1010 __func__, ret); 1011 return ret; 1012 } 1013 1014 /* call platform initialization function */ 1015 if (!eeh_ops) { 1016 pr_warn("%s: Platform EEH operation not found\n", 1017 __func__); 1018 return -EEXIST; 1019 } else if ((ret = eeh_ops->init())) 1020 return ret; 1021 1022 /* Initialize EEH event */ 1023 ret = eeh_event_init(); 1024 if (ret) 1025 return ret; 1026 1027 /* Enable EEH for all adapters */ 1028 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1029 pdn = hose->pci_data; 1030 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1031 } 1032 1033 /* 1034 * Call platform post-initialization. Actually, It's good chance 1035 * to inform platform that EEH is ready to supply service if the 1036 * I/O cache stuff has been built up. 1037 */ 1038 if (eeh_ops->post_init) { 1039 ret = eeh_ops->post_init(); 1040 if (ret) 1041 return ret; 1042 } 1043 1044 if (eeh_enabled()) 1045 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1046 else 1047 pr_warn("EEH: No capable adapters found\n"); 1048 1049 return ret; 1050 } 1051 1052 core_initcall_sync(eeh_init); 1053 1054 /** 1055 * eeh_add_device_early - Enable EEH for the indicated device node 1056 * @pdn: PCI device node for which to set up EEH 1057 * 1058 * This routine must be used to perform EEH initialization for PCI 1059 * devices that were added after system boot (e.g. hotplug, dlpar). 1060 * This routine must be called before any i/o is performed to the 1061 * adapter (inluding any config-space i/o). 1062 * Whether this actually enables EEH or not for this device depends 1063 * on the CEC architecture, type of the device, on earlier boot 1064 * command-line arguments & etc. 1065 */ 1066 void eeh_add_device_early(struct pci_dn *pdn) 1067 { 1068 struct pci_controller *phb; 1069 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1070 1071 if (!edev) 1072 return; 1073 1074 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1075 return; 1076 1077 /* USB Bus children of PCI devices will not have BUID's */ 1078 phb = edev->phb; 1079 if (NULL == phb || 1080 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1081 return; 1082 1083 eeh_ops->probe(pdn, NULL); 1084 } 1085 1086 /** 1087 * eeh_add_device_tree_early - Enable EEH for the indicated device 1088 * @pdn: PCI device node 1089 * 1090 * This routine must be used to perform EEH initialization for the 1091 * indicated PCI device that was added after system boot (e.g. 1092 * hotplug, dlpar). 1093 */ 1094 void eeh_add_device_tree_early(struct pci_dn *pdn) 1095 { 1096 struct pci_dn *n; 1097 1098 if (!pdn) 1099 return; 1100 1101 list_for_each_entry(n, &pdn->child_list, list) 1102 eeh_add_device_tree_early(n); 1103 eeh_add_device_early(pdn); 1104 } 1105 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1106 1107 /** 1108 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1109 * @dev: pci device for which to set up EEH 1110 * 1111 * This routine must be used to complete EEH initialization for PCI 1112 * devices that were added after system boot (e.g. hotplug, dlpar). 1113 */ 1114 void eeh_add_device_late(struct pci_dev *dev) 1115 { 1116 struct pci_dn *pdn; 1117 struct eeh_dev *edev; 1118 1119 if (!dev || !eeh_enabled()) 1120 return; 1121 1122 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1123 1124 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1125 edev = pdn_to_eeh_dev(pdn); 1126 if (edev->pdev == dev) { 1127 pr_debug("EEH: Already referenced !\n"); 1128 return; 1129 } 1130 1131 /* 1132 * The EEH cache might not be removed correctly because of 1133 * unbalanced kref to the device during unplug time, which 1134 * relies on pcibios_release_device(). So we have to remove 1135 * that here explicitly. 1136 */ 1137 if (edev->pdev) { 1138 eeh_rmv_from_parent_pe(edev); 1139 eeh_addr_cache_rmv_dev(edev->pdev); 1140 eeh_sysfs_remove_device(edev->pdev); 1141 edev->mode &= ~EEH_DEV_SYSFS; 1142 1143 /* 1144 * We definitely should have the PCI device removed 1145 * though it wasn't correctly. So we needn't call 1146 * into error handler afterwards. 1147 */ 1148 edev->mode |= EEH_DEV_NO_HANDLER; 1149 1150 edev->pdev = NULL; 1151 dev->dev.archdata.edev = NULL; 1152 } 1153 1154 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1155 eeh_ops->probe(pdn, NULL); 1156 1157 edev->pdev = dev; 1158 dev->dev.archdata.edev = edev; 1159 1160 eeh_addr_cache_insert_dev(dev); 1161 } 1162 1163 /** 1164 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1165 * @bus: PCI bus 1166 * 1167 * This routine must be used to perform EEH initialization for PCI 1168 * devices which are attached to the indicated PCI bus. The PCI bus 1169 * is added after system boot through hotplug or dlpar. 1170 */ 1171 void eeh_add_device_tree_late(struct pci_bus *bus) 1172 { 1173 struct pci_dev *dev; 1174 1175 list_for_each_entry(dev, &bus->devices, bus_list) { 1176 eeh_add_device_late(dev); 1177 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1178 struct pci_bus *subbus = dev->subordinate; 1179 if (subbus) 1180 eeh_add_device_tree_late(subbus); 1181 } 1182 } 1183 } 1184 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1185 1186 /** 1187 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1188 * @bus: PCI bus 1189 * 1190 * This routine must be used to add EEH sysfs files for PCI 1191 * devices which are attached to the indicated PCI bus. The PCI bus 1192 * is added after system boot through hotplug or dlpar. 1193 */ 1194 void eeh_add_sysfs_files(struct pci_bus *bus) 1195 { 1196 struct pci_dev *dev; 1197 1198 list_for_each_entry(dev, &bus->devices, bus_list) { 1199 eeh_sysfs_add_device(dev); 1200 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1201 struct pci_bus *subbus = dev->subordinate; 1202 if (subbus) 1203 eeh_add_sysfs_files(subbus); 1204 } 1205 } 1206 } 1207 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1208 1209 /** 1210 * eeh_remove_device - Undo EEH setup for the indicated pci device 1211 * @dev: pci device to be removed 1212 * 1213 * This routine should be called when a device is removed from 1214 * a running system (e.g. by hotplug or dlpar). It unregisters 1215 * the PCI device from the EEH subsystem. I/O errors affecting 1216 * this device will no longer be detected after this call; thus, 1217 * i/o errors affecting this slot may leave this device unusable. 1218 */ 1219 void eeh_remove_device(struct pci_dev *dev) 1220 { 1221 struct eeh_dev *edev; 1222 1223 if (!dev || !eeh_enabled()) 1224 return; 1225 edev = pci_dev_to_eeh_dev(dev); 1226 1227 /* Unregister the device with the EEH/PCI address search system */ 1228 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1229 1230 if (!edev || !edev->pdev || !edev->pe) { 1231 pr_debug("EEH: Not referenced !\n"); 1232 return; 1233 } 1234 1235 /* 1236 * During the hotplug for EEH error recovery, we need the EEH 1237 * device attached to the parent PE in order for BAR restore 1238 * a bit later. So we keep it for BAR restore and remove it 1239 * from the parent PE during the BAR resotre. 1240 */ 1241 edev->pdev = NULL; 1242 1243 /* 1244 * The flag "in_error" is used to trace EEH devices for VFs 1245 * in error state or not. It's set in eeh_report_error(). If 1246 * it's not set, eeh_report_{reset,resume}() won't be called 1247 * for the VF EEH device. 1248 */ 1249 edev->in_error = false; 1250 dev->dev.archdata.edev = NULL; 1251 if (!(edev->pe->state & EEH_PE_KEEP)) 1252 eeh_rmv_from_parent_pe(edev); 1253 else 1254 edev->mode |= EEH_DEV_DISCONNECTED; 1255 1256 /* 1257 * We're removing from the PCI subsystem, that means 1258 * the PCI device driver can't support EEH or not 1259 * well. So we rely on hotplug completely to do recovery 1260 * for the specific PCI device. 1261 */ 1262 edev->mode |= EEH_DEV_NO_HANDLER; 1263 1264 eeh_addr_cache_rmv_dev(dev); 1265 eeh_sysfs_remove_device(dev); 1266 edev->mode &= ~EEH_DEV_SYSFS; 1267 } 1268 1269 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1270 { 1271 int ret; 1272 1273 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1274 if (ret) { 1275 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1276 __func__, ret, pe->phb->global_number, pe->addr); 1277 return ret; 1278 } 1279 1280 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1281 if (ret) { 1282 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1283 __func__, ret, pe->phb->global_number, pe->addr); 1284 return ret; 1285 } 1286 1287 /* Clear software isolated state */ 1288 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1289 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1290 1291 return ret; 1292 } 1293 1294 1295 static struct pci_device_id eeh_reset_ids[] = { 1296 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1297 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1298 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1299 { 0 } 1300 }; 1301 1302 static int eeh_pe_change_owner(struct eeh_pe *pe) 1303 { 1304 struct eeh_dev *edev, *tmp; 1305 struct pci_dev *pdev; 1306 struct pci_device_id *id; 1307 int flags, ret; 1308 1309 /* Check PE state */ 1310 flags = (EEH_STATE_MMIO_ACTIVE | EEH_STATE_DMA_ACTIVE); 1311 ret = eeh_ops->get_state(pe, NULL); 1312 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1313 return 0; 1314 1315 /* Unfrozen PE, nothing to do */ 1316 if ((ret & flags) == flags) 1317 return 0; 1318 1319 /* Frozen PE, check if it needs PE level reset */ 1320 eeh_pe_for_each_dev(pe, edev, tmp) { 1321 pdev = eeh_dev_to_pci_dev(edev); 1322 if (!pdev) 1323 continue; 1324 1325 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1326 if (id->vendor != PCI_ANY_ID && 1327 id->vendor != pdev->vendor) 1328 continue; 1329 if (id->device != PCI_ANY_ID && 1330 id->device != pdev->device) 1331 continue; 1332 if (id->subvendor != PCI_ANY_ID && 1333 id->subvendor != pdev->subsystem_vendor) 1334 continue; 1335 if (id->subdevice != PCI_ANY_ID && 1336 id->subdevice != pdev->subsystem_device) 1337 continue; 1338 1339 return eeh_pe_reset_and_recover(pe); 1340 } 1341 } 1342 1343 return eeh_unfreeze_pe(pe, true); 1344 } 1345 1346 /** 1347 * eeh_dev_open - Increase count of pass through devices for PE 1348 * @pdev: PCI device 1349 * 1350 * Increase count of passed through devices for the indicated 1351 * PE. In the result, the EEH errors detected on the PE won't be 1352 * reported. The PE owner will be responsible for detection 1353 * and recovery. 1354 */ 1355 int eeh_dev_open(struct pci_dev *pdev) 1356 { 1357 struct eeh_dev *edev; 1358 int ret = -ENODEV; 1359 1360 mutex_lock(&eeh_dev_mutex); 1361 1362 /* No PCI device ? */ 1363 if (!pdev) 1364 goto out; 1365 1366 /* No EEH device or PE ? */ 1367 edev = pci_dev_to_eeh_dev(pdev); 1368 if (!edev || !edev->pe) 1369 goto out; 1370 1371 /* 1372 * The PE might have been put into frozen state, but we 1373 * didn't detect that yet. The passed through PCI devices 1374 * in frozen PE won't work properly. Clear the frozen state 1375 * in advance. 1376 */ 1377 ret = eeh_pe_change_owner(edev->pe); 1378 if (ret) 1379 goto out; 1380 1381 /* Increase PE's pass through count */ 1382 atomic_inc(&edev->pe->pass_dev_cnt); 1383 mutex_unlock(&eeh_dev_mutex); 1384 1385 return 0; 1386 out: 1387 mutex_unlock(&eeh_dev_mutex); 1388 return ret; 1389 } 1390 EXPORT_SYMBOL_GPL(eeh_dev_open); 1391 1392 /** 1393 * eeh_dev_release - Decrease count of pass through devices for PE 1394 * @pdev: PCI device 1395 * 1396 * Decrease count of pass through devices for the indicated PE. If 1397 * there is no passed through device in PE, the EEH errors detected 1398 * on the PE will be reported and handled as usual. 1399 */ 1400 void eeh_dev_release(struct pci_dev *pdev) 1401 { 1402 struct eeh_dev *edev; 1403 1404 mutex_lock(&eeh_dev_mutex); 1405 1406 /* No PCI device ? */ 1407 if (!pdev) 1408 goto out; 1409 1410 /* No EEH device ? */ 1411 edev = pci_dev_to_eeh_dev(pdev); 1412 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1413 goto out; 1414 1415 /* Decrease PE's pass through count */ 1416 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1417 eeh_pe_change_owner(edev->pe); 1418 out: 1419 mutex_unlock(&eeh_dev_mutex); 1420 } 1421 EXPORT_SYMBOL(eeh_dev_release); 1422 1423 #ifdef CONFIG_IOMMU_API 1424 1425 static int dev_has_iommu_table(struct device *dev, void *data) 1426 { 1427 struct pci_dev *pdev = to_pci_dev(dev); 1428 struct pci_dev **ppdev = data; 1429 1430 if (!dev) 1431 return 0; 1432 1433 if (dev->iommu_group) { 1434 *ppdev = pdev; 1435 return 1; 1436 } 1437 1438 return 0; 1439 } 1440 1441 /** 1442 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1443 * @group: IOMMU group 1444 * 1445 * The routine is called to convert IOMMU group to EEH PE. 1446 */ 1447 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1448 { 1449 struct pci_dev *pdev = NULL; 1450 struct eeh_dev *edev; 1451 int ret; 1452 1453 /* No IOMMU group ? */ 1454 if (!group) 1455 return NULL; 1456 1457 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1458 if (!ret || !pdev) 1459 return NULL; 1460 1461 /* No EEH device or PE ? */ 1462 edev = pci_dev_to_eeh_dev(pdev); 1463 if (!edev || !edev->pe) 1464 return NULL; 1465 1466 return edev->pe; 1467 } 1468 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1469 1470 #endif /* CONFIG_IOMMU_API */ 1471 1472 /** 1473 * eeh_pe_set_option - Set options for the indicated PE 1474 * @pe: EEH PE 1475 * @option: requested option 1476 * 1477 * The routine is called to enable or disable EEH functionality 1478 * on the indicated PE, to enable IO or DMA for the frozen PE. 1479 */ 1480 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1481 { 1482 int ret = 0; 1483 1484 /* Invalid PE ? */ 1485 if (!pe) 1486 return -ENODEV; 1487 1488 /* 1489 * EEH functionality could possibly be disabled, just 1490 * return error for the case. And the EEH functinality 1491 * isn't expected to be disabled on one specific PE. 1492 */ 1493 switch (option) { 1494 case EEH_OPT_ENABLE: 1495 if (eeh_enabled()) { 1496 ret = eeh_pe_change_owner(pe); 1497 break; 1498 } 1499 ret = -EIO; 1500 break; 1501 case EEH_OPT_DISABLE: 1502 break; 1503 case EEH_OPT_THAW_MMIO: 1504 case EEH_OPT_THAW_DMA: 1505 if (!eeh_ops || !eeh_ops->set_option) { 1506 ret = -ENOENT; 1507 break; 1508 } 1509 1510 ret = eeh_pci_enable(pe, option); 1511 break; 1512 default: 1513 pr_debug("%s: Option %d out of range (%d, %d)\n", 1514 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1515 ret = -EINVAL; 1516 } 1517 1518 return ret; 1519 } 1520 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1521 1522 /** 1523 * eeh_pe_get_state - Retrieve PE's state 1524 * @pe: EEH PE 1525 * 1526 * Retrieve the PE's state, which includes 3 aspects: enabled 1527 * DMA, enabled IO and asserted reset. 1528 */ 1529 int eeh_pe_get_state(struct eeh_pe *pe) 1530 { 1531 int result, ret = 0; 1532 bool rst_active, dma_en, mmio_en; 1533 1534 /* Existing PE ? */ 1535 if (!pe) 1536 return -ENODEV; 1537 1538 if (!eeh_ops || !eeh_ops->get_state) 1539 return -ENOENT; 1540 1541 /* 1542 * If the parent PE is owned by the host kernel and is undergoing 1543 * error recovery, we should return the PE state as temporarily 1544 * unavailable so that the error recovery on the guest is suspended 1545 * until the recovery completes on the host. 1546 */ 1547 if (pe->parent && 1548 !(pe->state & EEH_PE_REMOVED) && 1549 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1550 return EEH_PE_STATE_UNAVAIL; 1551 1552 result = eeh_ops->get_state(pe, NULL); 1553 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1554 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1555 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1556 1557 if (rst_active) 1558 ret = EEH_PE_STATE_RESET; 1559 else if (dma_en && mmio_en) 1560 ret = EEH_PE_STATE_NORMAL; 1561 else if (!dma_en && !mmio_en) 1562 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1563 else if (!dma_en && mmio_en) 1564 ret = EEH_PE_STATE_STOPPED_DMA; 1565 else 1566 ret = EEH_PE_STATE_UNAVAIL; 1567 1568 return ret; 1569 } 1570 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1571 1572 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1573 { 1574 struct eeh_dev *edev, *tmp; 1575 struct pci_dev *pdev; 1576 int ret = 0; 1577 1578 /* Restore config space */ 1579 eeh_pe_restore_bars(pe); 1580 1581 /* 1582 * Reenable PCI devices as the devices passed 1583 * through are always enabled before the reset. 1584 */ 1585 eeh_pe_for_each_dev(pe, edev, tmp) { 1586 pdev = eeh_dev_to_pci_dev(edev); 1587 if (!pdev) 1588 continue; 1589 1590 ret = pci_reenable_device(pdev); 1591 if (ret) { 1592 pr_warn("%s: Failure %d reenabling %s\n", 1593 __func__, ret, pci_name(pdev)); 1594 return ret; 1595 } 1596 } 1597 1598 /* The PE is still in frozen state */ 1599 return eeh_unfreeze_pe(pe, true); 1600 } 1601 1602 /** 1603 * eeh_pe_reset - Issue PE reset according to specified type 1604 * @pe: EEH PE 1605 * @option: reset type 1606 * 1607 * The routine is called to reset the specified PE with the 1608 * indicated type, either fundamental reset or hot reset. 1609 * PE reset is the most important part for error recovery. 1610 */ 1611 int eeh_pe_reset(struct eeh_pe *pe, int option) 1612 { 1613 int ret = 0; 1614 1615 /* Invalid PE ? */ 1616 if (!pe) 1617 return -ENODEV; 1618 1619 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1620 return -ENOENT; 1621 1622 switch (option) { 1623 case EEH_RESET_DEACTIVATE: 1624 ret = eeh_ops->reset(pe, option); 1625 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1626 if (ret) 1627 break; 1628 1629 ret = eeh_pe_reenable_devices(pe); 1630 break; 1631 case EEH_RESET_HOT: 1632 case EEH_RESET_FUNDAMENTAL: 1633 /* 1634 * Proactively freeze the PE to drop all MMIO access 1635 * during reset, which should be banned as it's always 1636 * cause recursive EEH error. 1637 */ 1638 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1639 1640 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1641 ret = eeh_ops->reset(pe, option); 1642 break; 1643 default: 1644 pr_debug("%s: Unsupported option %d\n", 1645 __func__, option); 1646 ret = -EINVAL; 1647 } 1648 1649 return ret; 1650 } 1651 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1652 1653 /** 1654 * eeh_pe_configure - Configure PCI bridges after PE reset 1655 * @pe: EEH PE 1656 * 1657 * The routine is called to restore the PCI config space for 1658 * those PCI devices, especially PCI bridges affected by PE 1659 * reset issued previously. 1660 */ 1661 int eeh_pe_configure(struct eeh_pe *pe) 1662 { 1663 int ret = 0; 1664 1665 /* Invalid PE ? */ 1666 if (!pe) 1667 return -ENODEV; 1668 1669 return ret; 1670 } 1671 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1672 1673 /** 1674 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1675 * @pe: the indicated PE 1676 * @type: error type 1677 * @function: error function 1678 * @addr: address 1679 * @mask: address mask 1680 * 1681 * The routine is called to inject the specified PCI error, which 1682 * is determined by @type and @function, to the indicated PE for 1683 * testing purpose. 1684 */ 1685 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1686 unsigned long addr, unsigned long mask) 1687 { 1688 /* Invalid PE ? */ 1689 if (!pe) 1690 return -ENODEV; 1691 1692 /* Unsupported operation ? */ 1693 if (!eeh_ops || !eeh_ops->err_inject) 1694 return -ENOENT; 1695 1696 /* Check on PCI error type */ 1697 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1698 return -EINVAL; 1699 1700 /* Check on PCI error function */ 1701 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1702 return -EINVAL; 1703 1704 return eeh_ops->err_inject(pe, type, func, addr, mask); 1705 } 1706 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1707 1708 static int proc_eeh_show(struct seq_file *m, void *v) 1709 { 1710 if (!eeh_enabled()) { 1711 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1712 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1713 } else { 1714 seq_printf(m, "EEH Subsystem is enabled\n"); 1715 seq_printf(m, 1716 "no device=%llu\n" 1717 "no device node=%llu\n" 1718 "no config address=%llu\n" 1719 "check not wanted=%llu\n" 1720 "eeh_total_mmio_ffs=%llu\n" 1721 "eeh_false_positives=%llu\n" 1722 "eeh_slot_resets=%llu\n", 1723 eeh_stats.no_device, 1724 eeh_stats.no_dn, 1725 eeh_stats.no_cfg_addr, 1726 eeh_stats.ignored_check, 1727 eeh_stats.total_mmio_ffs, 1728 eeh_stats.false_positives, 1729 eeh_stats.slot_resets); 1730 } 1731 1732 return 0; 1733 } 1734 1735 static int proc_eeh_open(struct inode *inode, struct file *file) 1736 { 1737 return single_open(file, proc_eeh_show, NULL); 1738 } 1739 1740 static const struct file_operations proc_eeh_operations = { 1741 .open = proc_eeh_open, 1742 .read = seq_read, 1743 .llseek = seq_lseek, 1744 .release = single_release, 1745 }; 1746 1747 #ifdef CONFIG_DEBUG_FS 1748 static int eeh_enable_dbgfs_set(void *data, u64 val) 1749 { 1750 if (val) 1751 eeh_clear_flag(EEH_FORCE_DISABLED); 1752 else 1753 eeh_add_flag(EEH_FORCE_DISABLED); 1754 1755 /* Notify the backend */ 1756 if (eeh_ops->post_init) 1757 eeh_ops->post_init(); 1758 1759 return 0; 1760 } 1761 1762 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1763 { 1764 if (eeh_enabled()) 1765 *val = 0x1ul; 1766 else 1767 *val = 0x0ul; 1768 return 0; 1769 } 1770 1771 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1772 { 1773 eeh_max_freezes = val; 1774 return 0; 1775 } 1776 1777 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1778 { 1779 *val = eeh_max_freezes; 1780 return 0; 1781 } 1782 1783 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1784 eeh_enable_dbgfs_set, "0x%llx\n"); 1785 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1786 eeh_freeze_dbgfs_set, "0x%llx\n"); 1787 #endif 1788 1789 static int __init eeh_init_proc(void) 1790 { 1791 if (machine_is(pseries) || machine_is(powernv)) { 1792 proc_create("powerpc/eeh", 0, NULL, &proc_eeh_operations); 1793 #ifdef CONFIG_DEBUG_FS 1794 debugfs_create_file("eeh_enable", 0600, 1795 powerpc_debugfs_root, NULL, 1796 &eeh_enable_dbgfs_ops); 1797 debugfs_create_file("eeh_max_freezes", 0600, 1798 powerpc_debugfs_root, NULL, 1799 &eeh_freeze_dbgfs_ops); 1800 #endif 1801 } 1802 1803 return 0; 1804 } 1805 __initcall(eeh_init_proc); 1806