1 /* 2 * Copyright IBM Corporation 2001, 2005, 2006 3 * Copyright Dave Engebretsen & Todd Inglett 2001 4 * Copyright Linas Vepstas 2005, 2006 5 * Copyright 2001-2012 IBM Corporation. 6 * 7 * This program is free software; you can redistribute it and/or modify 8 * it under the terms of the GNU General Public License as published by 9 * the Free Software Foundation; either version 2 of the License, or 10 * (at your option) any later version. 11 * 12 * This program is distributed in the hope that it will be useful, 13 * but WITHOUT ANY WARRANTY; without even the implied warranty of 14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the 15 * GNU General Public License for more details. 16 * 17 * You should have received a copy of the GNU General Public License 18 * along with this program; if not, write to the Free Software 19 * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA 20 * 21 * Please address comments and feedback to Linas Vepstas <linas@austin.ibm.com> 22 */ 23 24 #include <linux/delay.h> 25 #include <linux/sched.h> 26 #include <linux/init.h> 27 #include <linux/list.h> 28 #include <linux/pci.h> 29 #include <linux/iommu.h> 30 #include <linux/proc_fs.h> 31 #include <linux/rbtree.h> 32 #include <linux/reboot.h> 33 #include <linux/seq_file.h> 34 #include <linux/spinlock.h> 35 #include <linux/export.h> 36 #include <linux/of.h> 37 38 #include <linux/atomic.h> 39 #include <asm/debugfs.h> 40 #include <asm/eeh.h> 41 #include <asm/eeh_event.h> 42 #include <asm/io.h> 43 #include <asm/iommu.h> 44 #include <asm/machdep.h> 45 #include <asm/ppc-pci.h> 46 #include <asm/rtas.h> 47 #include <asm/pte-walk.h> 48 49 50 /** Overview: 51 * EEH, or "Enhanced Error Handling" is a PCI bridge technology for 52 * dealing with PCI bus errors that can't be dealt with within the 53 * usual PCI framework, except by check-stopping the CPU. Systems 54 * that are designed for high-availability/reliability cannot afford 55 * to crash due to a "mere" PCI error, thus the need for EEH. 56 * An EEH-capable bridge operates by converting a detected error 57 * into a "slot freeze", taking the PCI adapter off-line, making 58 * the slot behave, from the OS'es point of view, as if the slot 59 * were "empty": all reads return 0xff's and all writes are silently 60 * ignored. EEH slot isolation events can be triggered by parity 61 * errors on the address or data busses (e.g. during posted writes), 62 * which in turn might be caused by low voltage on the bus, dust, 63 * vibration, humidity, radioactivity or plain-old failed hardware. 64 * 65 * Note, however, that one of the leading causes of EEH slot 66 * freeze events are buggy device drivers, buggy device microcode, 67 * or buggy device hardware. This is because any attempt by the 68 * device to bus-master data to a memory address that is not 69 * assigned to the device will trigger a slot freeze. (The idea 70 * is to prevent devices-gone-wild from corrupting system memory). 71 * Buggy hardware/drivers will have a miserable time co-existing 72 * with EEH. 73 * 74 * Ideally, a PCI device driver, when suspecting that an isolation 75 * event has occurred (e.g. by reading 0xff's), will then ask EEH 76 * whether this is the case, and then take appropriate steps to 77 * reset the PCI slot, the PCI device, and then resume operations. 78 * However, until that day, the checking is done here, with the 79 * eeh_check_failure() routine embedded in the MMIO macros. If 80 * the slot is found to be isolated, an "EEH Event" is synthesized 81 * and sent out for processing. 82 */ 83 84 /* If a device driver keeps reading an MMIO register in an interrupt 85 * handler after a slot isolation event, it might be broken. 86 * This sets the threshold for how many read attempts we allow 87 * before printing an error message. 88 */ 89 #define EEH_MAX_FAILS 2100000 90 91 /* Time to wait for a PCI slot to report status, in milliseconds */ 92 #define PCI_BUS_RESET_WAIT_MSEC (5*60*1000) 93 94 /* 95 * EEH probe mode support, which is part of the flags, 96 * is to support multiple platforms for EEH. Some platforms 97 * like pSeries do PCI emunation based on device tree. 98 * However, other platforms like powernv probe PCI devices 99 * from hardware. The flag is used to distinguish that. 100 * In addition, struct eeh_ops::probe would be invoked for 101 * particular OF node or PCI device so that the corresponding 102 * PE would be created there. 103 */ 104 int eeh_subsystem_flags; 105 EXPORT_SYMBOL(eeh_subsystem_flags); 106 107 /* 108 * EEH allowed maximal frozen times. If one particular PE's 109 * frozen count in last hour exceeds this limit, the PE will 110 * be forced to be offline permanently. 111 */ 112 int eeh_max_freezes = 5; 113 114 /* Platform dependent EEH operations */ 115 struct eeh_ops *eeh_ops = NULL; 116 117 /* Lock to avoid races due to multiple reports of an error */ 118 DEFINE_RAW_SPINLOCK(confirm_error_lock); 119 EXPORT_SYMBOL_GPL(confirm_error_lock); 120 121 /* Lock to protect passed flags */ 122 static DEFINE_MUTEX(eeh_dev_mutex); 123 124 /* Buffer for reporting pci register dumps. Its here in BSS, and 125 * not dynamically alloced, so that it ends up in RMO where RTAS 126 * can access it. 127 */ 128 #define EEH_PCI_REGS_LOG_LEN 8192 129 static unsigned char pci_regs_buf[EEH_PCI_REGS_LOG_LEN]; 130 131 /* 132 * The struct is used to maintain the EEH global statistic 133 * information. Besides, the EEH global statistics will be 134 * exported to user space through procfs 135 */ 136 struct eeh_stats { 137 u64 no_device; /* PCI device not found */ 138 u64 no_dn; /* OF node not found */ 139 u64 no_cfg_addr; /* Config address not found */ 140 u64 ignored_check; /* EEH check skipped */ 141 u64 total_mmio_ffs; /* Total EEH checks */ 142 u64 false_positives; /* Unnecessary EEH checks */ 143 u64 slot_resets; /* PE reset */ 144 }; 145 146 static struct eeh_stats eeh_stats; 147 148 static int __init eeh_setup(char *str) 149 { 150 if (!strcmp(str, "off")) 151 eeh_add_flag(EEH_FORCE_DISABLED); 152 else if (!strcmp(str, "early_log")) 153 eeh_add_flag(EEH_EARLY_DUMP_LOG); 154 155 return 1; 156 } 157 __setup("eeh=", eeh_setup); 158 159 /* 160 * This routine captures assorted PCI configuration space data 161 * for the indicated PCI device, and puts them into a buffer 162 * for RTAS error logging. 163 */ 164 static size_t eeh_dump_dev_log(struct eeh_dev *edev, char *buf, size_t len) 165 { 166 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 167 u32 cfg; 168 int cap, i; 169 int n = 0, l = 0; 170 char buffer[128]; 171 172 n += scnprintf(buf+n, len-n, "%04x:%02x:%02x.%01x\n", 173 pdn->phb->global_number, pdn->busno, 174 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 175 pr_warn("EEH: of node=%04x:%02x:%02x.%01x\n", 176 pdn->phb->global_number, pdn->busno, 177 PCI_SLOT(pdn->devfn), PCI_FUNC(pdn->devfn)); 178 179 eeh_ops->read_config(pdn, PCI_VENDOR_ID, 4, &cfg); 180 n += scnprintf(buf+n, len-n, "dev/vend:%08x\n", cfg); 181 pr_warn("EEH: PCI device/vendor: %08x\n", cfg); 182 183 eeh_ops->read_config(pdn, PCI_COMMAND, 4, &cfg); 184 n += scnprintf(buf+n, len-n, "cmd/stat:%x\n", cfg); 185 pr_warn("EEH: PCI cmd/status register: %08x\n", cfg); 186 187 /* Gather bridge-specific registers */ 188 if (edev->mode & EEH_DEV_BRIDGE) { 189 eeh_ops->read_config(pdn, PCI_SEC_STATUS, 2, &cfg); 190 n += scnprintf(buf+n, len-n, "sec stat:%x\n", cfg); 191 pr_warn("EEH: Bridge secondary status: %04x\n", cfg); 192 193 eeh_ops->read_config(pdn, PCI_BRIDGE_CONTROL, 2, &cfg); 194 n += scnprintf(buf+n, len-n, "brdg ctl:%x\n", cfg); 195 pr_warn("EEH: Bridge control: %04x\n", cfg); 196 } 197 198 /* Dump out the PCI-X command and status regs */ 199 cap = edev->pcix_cap; 200 if (cap) { 201 eeh_ops->read_config(pdn, cap, 4, &cfg); 202 n += scnprintf(buf+n, len-n, "pcix-cmd:%x\n", cfg); 203 pr_warn("EEH: PCI-X cmd: %08x\n", cfg); 204 205 eeh_ops->read_config(pdn, cap+4, 4, &cfg); 206 n += scnprintf(buf+n, len-n, "pcix-stat:%x\n", cfg); 207 pr_warn("EEH: PCI-X status: %08x\n", cfg); 208 } 209 210 /* If PCI-E capable, dump PCI-E cap 10 */ 211 cap = edev->pcie_cap; 212 if (cap) { 213 n += scnprintf(buf+n, len-n, "pci-e cap10:\n"); 214 pr_warn("EEH: PCI-E capabilities and status follow:\n"); 215 216 for (i=0; i<=8; i++) { 217 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 218 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 219 220 if ((i % 4) == 0) { 221 if (i != 0) 222 pr_warn("%s\n", buffer); 223 224 l = scnprintf(buffer, sizeof(buffer), 225 "EEH: PCI-E %02x: %08x ", 226 4*i, cfg); 227 } else { 228 l += scnprintf(buffer+l, sizeof(buffer)-l, 229 "%08x ", cfg); 230 } 231 232 } 233 234 pr_warn("%s\n", buffer); 235 } 236 237 /* If AER capable, dump it */ 238 cap = edev->aer_cap; 239 if (cap) { 240 n += scnprintf(buf+n, len-n, "pci-e AER:\n"); 241 pr_warn("EEH: PCI-E AER capability register set follows:\n"); 242 243 for (i=0; i<=13; i++) { 244 eeh_ops->read_config(pdn, cap+4*i, 4, &cfg); 245 n += scnprintf(buf+n, len-n, "%02x:%x\n", 4*i, cfg); 246 247 if ((i % 4) == 0) { 248 if (i != 0) 249 pr_warn("%s\n", buffer); 250 251 l = scnprintf(buffer, sizeof(buffer), 252 "EEH: PCI-E AER %02x: %08x ", 253 4*i, cfg); 254 } else { 255 l += scnprintf(buffer+l, sizeof(buffer)-l, 256 "%08x ", cfg); 257 } 258 } 259 260 pr_warn("%s\n", buffer); 261 } 262 263 return n; 264 } 265 266 static void *eeh_dump_pe_log(struct eeh_pe *pe, void *flag) 267 { 268 struct eeh_dev *edev, *tmp; 269 size_t *plen = flag; 270 271 eeh_pe_for_each_dev(pe, edev, tmp) 272 *plen += eeh_dump_dev_log(edev, pci_regs_buf + *plen, 273 EEH_PCI_REGS_LOG_LEN - *plen); 274 275 return NULL; 276 } 277 278 /** 279 * eeh_slot_error_detail - Generate combined log including driver log and error log 280 * @pe: EEH PE 281 * @severity: temporary or permanent error log 282 * 283 * This routine should be called to generate the combined log, which 284 * is comprised of driver log and error log. The driver log is figured 285 * out from the config space of the corresponding PCI device, while 286 * the error log is fetched through platform dependent function call. 287 */ 288 void eeh_slot_error_detail(struct eeh_pe *pe, int severity) 289 { 290 size_t loglen = 0; 291 292 /* 293 * When the PHB is fenced or dead, it's pointless to collect 294 * the data from PCI config space because it should return 295 * 0xFF's. For ER, we still retrieve the data from the PCI 296 * config space. 297 * 298 * For pHyp, we have to enable IO for log retrieval. Otherwise, 299 * 0xFF's is always returned from PCI config space. 300 * 301 * When the @severity is EEH_LOG_PERM, the PE is going to be 302 * removed. Prior to that, the drivers for devices included in 303 * the PE will be closed. The drivers rely on working IO path 304 * to bring the devices to quiet state. Otherwise, PCI traffic 305 * from those devices after they are removed is like to cause 306 * another unexpected EEH error. 307 */ 308 if (!(pe->type & EEH_PE_PHB)) { 309 if (eeh_has_flag(EEH_ENABLE_IO_FOR_LOG) || 310 severity == EEH_LOG_PERM) 311 eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 312 313 /* 314 * The config space of some PCI devices can't be accessed 315 * when their PEs are in frozen state. Otherwise, fenced 316 * PHB might be seen. Those PEs are identified with flag 317 * EEH_PE_CFG_RESTRICTED, indicating EEH_PE_CFG_BLOCKED 318 * is set automatically when the PE is put to EEH_PE_ISOLATED. 319 * 320 * Restoring BARs possibly triggers PCI config access in 321 * (OPAL) firmware and then causes fenced PHB. If the 322 * PCI config is blocked with flag EEH_PE_CFG_BLOCKED, it's 323 * pointless to restore BARs and dump config space. 324 */ 325 eeh_ops->configure_bridge(pe); 326 if (!(pe->state & EEH_PE_CFG_BLOCKED)) { 327 eeh_pe_restore_bars(pe); 328 329 pci_regs_buf[0] = 0; 330 eeh_pe_traverse(pe, eeh_dump_pe_log, &loglen); 331 } 332 } 333 334 eeh_ops->get_log(pe, severity, pci_regs_buf, loglen); 335 } 336 337 /** 338 * eeh_token_to_phys - Convert EEH address token to phys address 339 * @token: I/O token, should be address in the form 0xA.... 340 * 341 * This routine should be called to convert virtual I/O address 342 * to physical one. 343 */ 344 static inline unsigned long eeh_token_to_phys(unsigned long token) 345 { 346 pte_t *ptep; 347 unsigned long pa; 348 int hugepage_shift; 349 350 /* 351 * We won't find hugepages here(this is iomem). Hence we are not 352 * worried about _PAGE_SPLITTING/collapse. Also we will not hit 353 * page table free, because of init_mm. 354 */ 355 ptep = find_init_mm_pte(token, &hugepage_shift); 356 if (!ptep) 357 return token; 358 WARN_ON(hugepage_shift); 359 pa = pte_pfn(*ptep) << PAGE_SHIFT; 360 361 return pa | (token & (PAGE_SIZE-1)); 362 } 363 364 /* 365 * On PowerNV platform, we might already have fenced PHB there. 366 * For that case, it's meaningless to recover frozen PE. Intead, 367 * We have to handle fenced PHB firstly. 368 */ 369 static int eeh_phb_check_failure(struct eeh_pe *pe) 370 { 371 struct eeh_pe *phb_pe; 372 unsigned long flags; 373 int ret; 374 375 if (!eeh_has_flag(EEH_PROBE_MODE_DEV)) 376 return -EPERM; 377 378 /* Find the PHB PE */ 379 phb_pe = eeh_phb_pe_get(pe->phb); 380 if (!phb_pe) { 381 pr_warn("%s Can't find PE for PHB#%x\n", 382 __func__, pe->phb->global_number); 383 return -EEXIST; 384 } 385 386 /* If the PHB has been in problematic state */ 387 eeh_serialize_lock(&flags); 388 if (phb_pe->state & EEH_PE_ISOLATED) { 389 ret = 0; 390 goto out; 391 } 392 393 /* Check PHB state */ 394 ret = eeh_ops->get_state(phb_pe, NULL); 395 if ((ret < 0) || 396 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 397 ret = 0; 398 goto out; 399 } 400 401 /* Isolate the PHB and send event */ 402 eeh_pe_state_mark(phb_pe, EEH_PE_ISOLATED); 403 eeh_serialize_unlock(flags); 404 405 pr_err("EEH: PHB#%x failure detected, location: %s\n", 406 phb_pe->phb->global_number, eeh_pe_loc_get(phb_pe)); 407 dump_stack(); 408 eeh_send_failure_event(phb_pe); 409 410 return 1; 411 out: 412 eeh_serialize_unlock(flags); 413 return ret; 414 } 415 416 /** 417 * eeh_dev_check_failure - Check if all 1's data is due to EEH slot freeze 418 * @edev: eeh device 419 * 420 * Check for an EEH failure for the given device node. Call this 421 * routine if the result of a read was all 0xff's and you want to 422 * find out if this is due to an EEH slot freeze. This routine 423 * will query firmware for the EEH status. 424 * 425 * Returns 0 if there has not been an EEH error; otherwise returns 426 * a non-zero value and queues up a slot isolation event notification. 427 * 428 * It is safe to call this routine in an interrupt context. 429 */ 430 int eeh_dev_check_failure(struct eeh_dev *edev) 431 { 432 int ret; 433 unsigned long flags; 434 struct device_node *dn; 435 struct pci_dev *dev; 436 struct eeh_pe *pe, *parent_pe, *phb_pe; 437 int rc = 0; 438 const char *location = NULL; 439 440 eeh_stats.total_mmio_ffs++; 441 442 if (!eeh_enabled()) 443 return 0; 444 445 if (!edev) { 446 eeh_stats.no_dn++; 447 return 0; 448 } 449 dev = eeh_dev_to_pci_dev(edev); 450 pe = eeh_dev_to_pe(edev); 451 452 /* Access to IO BARs might get this far and still not want checking. */ 453 if (!pe) { 454 eeh_stats.ignored_check++; 455 pr_debug("EEH: Ignored check for %s\n", 456 eeh_pci_name(dev)); 457 return 0; 458 } 459 460 if (!pe->addr && !pe->config_addr) { 461 eeh_stats.no_cfg_addr++; 462 return 0; 463 } 464 465 /* 466 * On PowerNV platform, we might already have fenced PHB 467 * there and we need take care of that firstly. 468 */ 469 ret = eeh_phb_check_failure(pe); 470 if (ret > 0) 471 return ret; 472 473 /* 474 * If the PE isn't owned by us, we shouldn't check the 475 * state. Instead, let the owner handle it if the PE has 476 * been frozen. 477 */ 478 if (eeh_pe_passed(pe)) 479 return 0; 480 481 /* If we already have a pending isolation event for this 482 * slot, we know it's bad already, we don't need to check. 483 * Do this checking under a lock; as multiple PCI devices 484 * in one slot might report errors simultaneously, and we 485 * only want one error recovery routine running. 486 */ 487 eeh_serialize_lock(&flags); 488 rc = 1; 489 if (pe->state & EEH_PE_ISOLATED) { 490 pe->check_count++; 491 if (pe->check_count % EEH_MAX_FAILS == 0) { 492 dn = pci_device_to_OF_node(dev); 493 if (dn) 494 location = of_get_property(dn, "ibm,loc-code", 495 NULL); 496 printk(KERN_ERR "EEH: %d reads ignored for recovering device at " 497 "location=%s driver=%s pci addr=%s\n", 498 pe->check_count, 499 location ? location : "unknown", 500 eeh_driver_name(dev), eeh_pci_name(dev)); 501 printk(KERN_ERR "EEH: Might be infinite loop in %s driver\n", 502 eeh_driver_name(dev)); 503 dump_stack(); 504 } 505 goto dn_unlock; 506 } 507 508 /* 509 * Now test for an EEH failure. This is VERY expensive. 510 * Note that the eeh_config_addr may be a parent device 511 * in the case of a device behind a bridge, or it may be 512 * function zero of a multi-function device. 513 * In any case they must share a common PHB. 514 */ 515 ret = eeh_ops->get_state(pe, NULL); 516 517 /* Note that config-io to empty slots may fail; 518 * they are empty when they don't have children. 519 * We will punt with the following conditions: Failure to get 520 * PE's state, EEH not support and Permanently unavailable 521 * state, PE is in good state. 522 */ 523 if ((ret < 0) || 524 (ret == EEH_STATE_NOT_SUPPORT) || eeh_state_active(ret)) { 525 eeh_stats.false_positives++; 526 pe->false_positives++; 527 rc = 0; 528 goto dn_unlock; 529 } 530 531 /* 532 * It should be corner case that the parent PE has been 533 * put into frozen state as well. We should take care 534 * that at first. 535 */ 536 parent_pe = pe->parent; 537 while (parent_pe) { 538 /* Hit the ceiling ? */ 539 if (parent_pe->type & EEH_PE_PHB) 540 break; 541 542 /* Frozen parent PE ? */ 543 ret = eeh_ops->get_state(parent_pe, NULL); 544 if (ret > 0 && !eeh_state_active(ret)) { 545 pe = parent_pe; 546 pr_err("EEH: Failure of PHB#%x-PE#%x will be handled at parent PHB#%x-PE#%x.\n", 547 pe->phb->global_number, pe->addr, 548 pe->phb->global_number, parent_pe->addr); 549 } 550 551 /* Next parent level */ 552 parent_pe = parent_pe->parent; 553 } 554 555 eeh_stats.slot_resets++; 556 557 /* Avoid repeated reports of this failure, including problems 558 * with other functions on this device, and functions under 559 * bridges. 560 */ 561 eeh_pe_state_mark(pe, EEH_PE_ISOLATED); 562 eeh_serialize_unlock(flags); 563 564 /* Most EEH events are due to device driver bugs. Having 565 * a stack trace will help the device-driver authors figure 566 * out what happened. So print that out. 567 */ 568 phb_pe = eeh_phb_pe_get(pe->phb); 569 pr_err("EEH: Frozen PHB#%x-PE#%x detected\n", 570 pe->phb->global_number, pe->addr); 571 pr_err("EEH: PE location: %s, PHB location: %s\n", 572 eeh_pe_loc_get(pe), eeh_pe_loc_get(phb_pe)); 573 dump_stack(); 574 575 eeh_send_failure_event(pe); 576 577 return 1; 578 579 dn_unlock: 580 eeh_serialize_unlock(flags); 581 return rc; 582 } 583 584 EXPORT_SYMBOL_GPL(eeh_dev_check_failure); 585 586 /** 587 * eeh_check_failure - Check if all 1's data is due to EEH slot freeze 588 * @token: I/O address 589 * 590 * Check for an EEH failure at the given I/O address. Call this 591 * routine if the result of a read was all 0xff's and you want to 592 * find out if this is due to an EEH slot freeze event. This routine 593 * will query firmware for the EEH status. 594 * 595 * Note this routine is safe to call in an interrupt context. 596 */ 597 int eeh_check_failure(const volatile void __iomem *token) 598 { 599 unsigned long addr; 600 struct eeh_dev *edev; 601 602 /* Finding the phys addr + pci device; this is pretty quick. */ 603 addr = eeh_token_to_phys((unsigned long __force) token); 604 edev = eeh_addr_cache_get_dev(addr); 605 if (!edev) { 606 eeh_stats.no_device++; 607 return 0; 608 } 609 610 return eeh_dev_check_failure(edev); 611 } 612 EXPORT_SYMBOL(eeh_check_failure); 613 614 615 /** 616 * eeh_pci_enable - Enable MMIO or DMA transfers for this slot 617 * @pe: EEH PE 618 * 619 * This routine should be called to reenable frozen MMIO or DMA 620 * so that it would work correctly again. It's useful while doing 621 * recovery or log collection on the indicated device. 622 */ 623 int eeh_pci_enable(struct eeh_pe *pe, int function) 624 { 625 int active_flag, rc; 626 627 /* 628 * pHyp doesn't allow to enable IO or DMA on unfrozen PE. 629 * Also, it's pointless to enable them on unfrozen PE. So 630 * we have to check before enabling IO or DMA. 631 */ 632 switch (function) { 633 case EEH_OPT_THAW_MMIO: 634 active_flag = EEH_STATE_MMIO_ACTIVE | EEH_STATE_MMIO_ENABLED; 635 break; 636 case EEH_OPT_THAW_DMA: 637 active_flag = EEH_STATE_DMA_ACTIVE; 638 break; 639 case EEH_OPT_DISABLE: 640 case EEH_OPT_ENABLE: 641 case EEH_OPT_FREEZE_PE: 642 active_flag = 0; 643 break; 644 default: 645 pr_warn("%s: Invalid function %d\n", 646 __func__, function); 647 return -EINVAL; 648 } 649 650 /* 651 * Check if IO or DMA has been enabled before 652 * enabling them. 653 */ 654 if (active_flag) { 655 rc = eeh_ops->get_state(pe, NULL); 656 if (rc < 0) 657 return rc; 658 659 /* Needn't enable it at all */ 660 if (rc == EEH_STATE_NOT_SUPPORT) 661 return 0; 662 663 /* It's already enabled */ 664 if (rc & active_flag) 665 return 0; 666 } 667 668 669 /* Issue the request */ 670 rc = eeh_ops->set_option(pe, function); 671 if (rc) 672 pr_warn("%s: Unexpected state change %d on " 673 "PHB#%x-PE#%x, err=%d\n", 674 __func__, function, pe->phb->global_number, 675 pe->addr, rc); 676 677 /* Check if the request is finished successfully */ 678 if (active_flag) { 679 rc = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 680 if (rc < 0) 681 return rc; 682 683 if (rc & active_flag) 684 return 0; 685 686 return -EIO; 687 } 688 689 return rc; 690 } 691 692 static void *eeh_disable_and_save_dev_state(struct eeh_dev *edev, 693 void *userdata) 694 { 695 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 696 struct pci_dev *dev = userdata; 697 698 /* 699 * The caller should have disabled and saved the 700 * state for the specified device 701 */ 702 if (!pdev || pdev == dev) 703 return NULL; 704 705 /* Ensure we have D0 power state */ 706 pci_set_power_state(pdev, PCI_D0); 707 708 /* Save device state */ 709 pci_save_state(pdev); 710 711 /* 712 * Disable device to avoid any DMA traffic and 713 * interrupt from the device 714 */ 715 pci_write_config_word(pdev, PCI_COMMAND, PCI_COMMAND_INTX_DISABLE); 716 717 return NULL; 718 } 719 720 static void *eeh_restore_dev_state(struct eeh_dev *edev, void *userdata) 721 { 722 struct pci_dn *pdn = eeh_dev_to_pdn(edev); 723 struct pci_dev *pdev = eeh_dev_to_pci_dev(edev); 724 struct pci_dev *dev = userdata; 725 726 if (!pdev) 727 return NULL; 728 729 /* Apply customization from firmware */ 730 if (pdn && eeh_ops->restore_config) 731 eeh_ops->restore_config(pdn); 732 733 /* The caller should restore state for the specified device */ 734 if (pdev != dev) 735 pci_restore_state(pdev); 736 737 return NULL; 738 } 739 740 int eeh_restore_vf_config(struct pci_dn *pdn) 741 { 742 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 743 u32 devctl, cmd, cap2, aer_capctl; 744 int old_mps; 745 746 if (edev->pcie_cap) { 747 /* Restore MPS */ 748 old_mps = (ffs(pdn->mps) - 8) << 5; 749 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 750 2, &devctl); 751 devctl &= ~PCI_EXP_DEVCTL_PAYLOAD; 752 devctl |= old_mps; 753 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 754 2, devctl); 755 756 /* Disable Completion Timeout if possible */ 757 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCAP2, 758 4, &cap2); 759 if (cap2 & PCI_EXP_DEVCAP2_COMP_TMOUT_DIS) { 760 eeh_ops->read_config(pdn, 761 edev->pcie_cap + PCI_EXP_DEVCTL2, 762 4, &cap2); 763 cap2 |= PCI_EXP_DEVCTL2_COMP_TMOUT_DIS; 764 eeh_ops->write_config(pdn, 765 edev->pcie_cap + PCI_EXP_DEVCTL2, 766 4, cap2); 767 } 768 } 769 770 /* Enable SERR and parity checking */ 771 eeh_ops->read_config(pdn, PCI_COMMAND, 2, &cmd); 772 cmd |= (PCI_COMMAND_PARITY | PCI_COMMAND_SERR); 773 eeh_ops->write_config(pdn, PCI_COMMAND, 2, cmd); 774 775 /* Enable report various errors */ 776 if (edev->pcie_cap) { 777 eeh_ops->read_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 778 2, &devctl); 779 devctl &= ~PCI_EXP_DEVCTL_CERE; 780 devctl |= (PCI_EXP_DEVCTL_NFERE | 781 PCI_EXP_DEVCTL_FERE | 782 PCI_EXP_DEVCTL_URRE); 783 eeh_ops->write_config(pdn, edev->pcie_cap + PCI_EXP_DEVCTL, 784 2, devctl); 785 } 786 787 /* Enable ECRC generation and check */ 788 if (edev->pcie_cap && edev->aer_cap) { 789 eeh_ops->read_config(pdn, edev->aer_cap + PCI_ERR_CAP, 790 4, &aer_capctl); 791 aer_capctl |= (PCI_ERR_CAP_ECRC_GENE | PCI_ERR_CAP_ECRC_CHKE); 792 eeh_ops->write_config(pdn, edev->aer_cap + PCI_ERR_CAP, 793 4, aer_capctl); 794 } 795 796 return 0; 797 } 798 799 /** 800 * pcibios_set_pcie_reset_state - Set PCI-E reset state 801 * @dev: pci device struct 802 * @state: reset state to enter 803 * 804 * Return value: 805 * 0 if success 806 */ 807 int pcibios_set_pcie_reset_state(struct pci_dev *dev, enum pcie_reset_state state) 808 { 809 struct eeh_dev *edev = pci_dev_to_eeh_dev(dev); 810 struct eeh_pe *pe = eeh_dev_to_pe(edev); 811 812 if (!pe) { 813 pr_err("%s: No PE found on PCI device %s\n", 814 __func__, pci_name(dev)); 815 return -EINVAL; 816 } 817 818 switch (state) { 819 case pcie_deassert_reset: 820 eeh_ops->reset(pe, EEH_RESET_DEACTIVATE); 821 eeh_unfreeze_pe(pe, false); 822 if (!(pe->type & EEH_PE_VF)) 823 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 824 eeh_pe_dev_traverse(pe, eeh_restore_dev_state, dev); 825 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 826 break; 827 case pcie_hot_reset: 828 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 829 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 830 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 831 if (!(pe->type & EEH_PE_VF)) 832 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 833 eeh_ops->reset(pe, EEH_RESET_HOT); 834 break; 835 case pcie_warm_reset: 836 eeh_pe_state_mark_with_cfg(pe, EEH_PE_ISOLATED); 837 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 838 eeh_pe_dev_traverse(pe, eeh_disable_and_save_dev_state, dev); 839 if (!(pe->type & EEH_PE_VF)) 840 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 841 eeh_ops->reset(pe, EEH_RESET_FUNDAMENTAL); 842 break; 843 default: 844 eeh_pe_state_clear(pe, EEH_PE_ISOLATED | EEH_PE_CFG_BLOCKED); 845 return -EINVAL; 846 }; 847 848 return 0; 849 } 850 851 /** 852 * eeh_set_pe_freset - Check the required reset for the indicated device 853 * @data: EEH device 854 * @flag: return value 855 * 856 * Each device might have its preferred reset type: fundamental or 857 * hot reset. The routine is used to collected the information for 858 * the indicated device and its children so that the bunch of the 859 * devices could be reset properly. 860 */ 861 static void *eeh_set_dev_freset(struct eeh_dev *edev, void *flag) 862 { 863 struct pci_dev *dev; 864 unsigned int *freset = (unsigned int *)flag; 865 866 dev = eeh_dev_to_pci_dev(edev); 867 if (dev) 868 *freset |= dev->needs_freset; 869 870 return NULL; 871 } 872 873 /** 874 * eeh_pe_reset_full - Complete a full reset process on the indicated PE 875 * @pe: EEH PE 876 * 877 * This function executes a full reset procedure on a PE, including setting 878 * the appropriate flags, performing a fundamental or hot reset, and then 879 * deactivating the reset status. It is designed to be used within the EEH 880 * subsystem, as opposed to eeh_pe_reset which is exported to drivers and 881 * only performs a single operation at a time. 882 * 883 * This function will attempt to reset a PE three times before failing. 884 */ 885 int eeh_pe_reset_full(struct eeh_pe *pe) 886 { 887 int reset_state = (EEH_PE_RESET | EEH_PE_CFG_BLOCKED); 888 int type = EEH_RESET_HOT; 889 unsigned int freset = 0; 890 int i, state, ret; 891 892 /* 893 * Determine the type of reset to perform - hot or fundamental. 894 * Hot reset is the default operation, unless any device under the 895 * PE requires a fundamental reset. 896 */ 897 eeh_pe_dev_traverse(pe, eeh_set_dev_freset, &freset); 898 899 if (freset) 900 type = EEH_RESET_FUNDAMENTAL; 901 902 /* Mark the PE as in reset state and block config space accesses */ 903 eeh_pe_state_mark(pe, reset_state); 904 905 /* Make three attempts at resetting the bus */ 906 for (i = 0; i < 3; i++) { 907 ret = eeh_pe_reset(pe, type); 908 if (ret) 909 break; 910 911 ret = eeh_pe_reset(pe, EEH_RESET_DEACTIVATE); 912 if (ret) 913 break; 914 915 /* Wait until the PE is in a functioning state */ 916 state = eeh_ops->wait_state(pe, PCI_BUS_RESET_WAIT_MSEC); 917 if (eeh_state_active(state)) 918 break; 919 920 if (state < 0) { 921 pr_warn("%s: Unrecoverable slot failure on PHB#%x-PE#%x", 922 __func__, pe->phb->global_number, pe->addr); 923 ret = -ENOTRECOVERABLE; 924 break; 925 } 926 927 /* Set error in case this is our last attempt */ 928 ret = -EIO; 929 pr_warn("%s: Failure %d resetting PHB#%x-PE#%x\n (%d)\n", 930 __func__, state, pe->phb->global_number, pe->addr, (i + 1)); 931 } 932 933 eeh_pe_state_clear(pe, reset_state); 934 return ret; 935 } 936 937 /** 938 * eeh_save_bars - Save device bars 939 * @edev: PCI device associated EEH device 940 * 941 * Save the values of the device bars. Unlike the restore 942 * routine, this routine is *not* recursive. This is because 943 * PCI devices are added individually; but, for the restore, 944 * an entire slot is reset at a time. 945 */ 946 void eeh_save_bars(struct eeh_dev *edev) 947 { 948 struct pci_dn *pdn; 949 int i; 950 951 pdn = eeh_dev_to_pdn(edev); 952 if (!pdn) 953 return; 954 955 for (i = 0; i < 16; i++) 956 eeh_ops->read_config(pdn, i * 4, 4, &edev->config_space[i]); 957 958 /* 959 * For PCI bridges including root port, we need enable bus 960 * master explicitly. Otherwise, it can't fetch IODA table 961 * entries correctly. So we cache the bit in advance so that 962 * we can restore it after reset, either PHB range or PE range. 963 */ 964 if (edev->mode & EEH_DEV_BRIDGE) 965 edev->config_space[1] |= PCI_COMMAND_MASTER; 966 } 967 968 /** 969 * eeh_ops_register - Register platform dependent EEH operations 970 * @ops: platform dependent EEH operations 971 * 972 * Register the platform dependent EEH operation callback 973 * functions. The platform should call this function before 974 * any other EEH operations. 975 */ 976 int __init eeh_ops_register(struct eeh_ops *ops) 977 { 978 if (!ops->name) { 979 pr_warn("%s: Invalid EEH ops name for %p\n", 980 __func__, ops); 981 return -EINVAL; 982 } 983 984 if (eeh_ops && eeh_ops != ops) { 985 pr_warn("%s: EEH ops of platform %s already existing (%s)\n", 986 __func__, eeh_ops->name, ops->name); 987 return -EEXIST; 988 } 989 990 eeh_ops = ops; 991 992 return 0; 993 } 994 995 /** 996 * eeh_ops_unregister - Unreigster platform dependent EEH operations 997 * @name: name of EEH platform operations 998 * 999 * Unregister the platform dependent EEH operation callback 1000 * functions. 1001 */ 1002 int __exit eeh_ops_unregister(const char *name) 1003 { 1004 if (!name || !strlen(name)) { 1005 pr_warn("%s: Invalid EEH ops name\n", 1006 __func__); 1007 return -EINVAL; 1008 } 1009 1010 if (eeh_ops && !strcmp(eeh_ops->name, name)) { 1011 eeh_ops = NULL; 1012 return 0; 1013 } 1014 1015 return -EEXIST; 1016 } 1017 1018 static int eeh_reboot_notifier(struct notifier_block *nb, 1019 unsigned long action, void *unused) 1020 { 1021 eeh_clear_flag(EEH_ENABLED); 1022 return NOTIFY_DONE; 1023 } 1024 1025 static struct notifier_block eeh_reboot_nb = { 1026 .notifier_call = eeh_reboot_notifier, 1027 }; 1028 1029 void eeh_probe_devices(void) 1030 { 1031 struct pci_controller *hose, *tmp; 1032 struct pci_dn *pdn; 1033 1034 /* Enable EEH for all adapters */ 1035 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) { 1036 pdn = hose->pci_data; 1037 traverse_pci_dn(pdn, eeh_ops->probe, NULL); 1038 } 1039 } 1040 1041 /** 1042 * eeh_init - EEH initialization 1043 * 1044 * Initialize EEH by trying to enable it for all of the adapters in the system. 1045 * As a side effect we can determine here if eeh is supported at all. 1046 * Note that we leave EEH on so failed config cycles won't cause a machine 1047 * check. If a user turns off EEH for a particular adapter they are really 1048 * telling Linux to ignore errors. Some hardware (e.g. POWER5) won't 1049 * grant access to a slot if EEH isn't enabled, and so we always enable 1050 * EEH for all slots/all devices. 1051 * 1052 * The eeh-force-off option disables EEH checking globally, for all slots. 1053 * Even if force-off is set, the EEH hardware is still enabled, so that 1054 * newer systems can boot. 1055 */ 1056 static int eeh_init(void) 1057 { 1058 struct pci_controller *hose, *tmp; 1059 int ret = 0; 1060 1061 /* Register reboot notifier */ 1062 ret = register_reboot_notifier(&eeh_reboot_nb); 1063 if (ret) { 1064 pr_warn("%s: Failed to register notifier (%d)\n", 1065 __func__, ret); 1066 return ret; 1067 } 1068 1069 /* call platform initialization function */ 1070 if (!eeh_ops) { 1071 pr_warn("%s: Platform EEH operation not found\n", 1072 __func__); 1073 return -EEXIST; 1074 } else if ((ret = eeh_ops->init())) 1075 return ret; 1076 1077 /* Initialize PHB PEs */ 1078 list_for_each_entry_safe(hose, tmp, &hose_list, list_node) 1079 eeh_dev_phb_init_dynamic(hose); 1080 1081 /* Initialize EEH event */ 1082 ret = eeh_event_init(); 1083 if (ret) 1084 return ret; 1085 1086 eeh_probe_devices(); 1087 1088 if (eeh_enabled()) 1089 pr_info("EEH: PCI Enhanced I/O Error Handling Enabled\n"); 1090 else if (!eeh_has_flag(EEH_POSTPONED_PROBE)) 1091 pr_info("EEH: No capable adapters found\n"); 1092 1093 return ret; 1094 } 1095 1096 core_initcall_sync(eeh_init); 1097 1098 /** 1099 * eeh_add_device_early - Enable EEH for the indicated device node 1100 * @pdn: PCI device node for which to set up EEH 1101 * 1102 * This routine must be used to perform EEH initialization for PCI 1103 * devices that were added after system boot (e.g. hotplug, dlpar). 1104 * This routine must be called before any i/o is performed to the 1105 * adapter (inluding any config-space i/o). 1106 * Whether this actually enables EEH or not for this device depends 1107 * on the CEC architecture, type of the device, on earlier boot 1108 * command-line arguments & etc. 1109 */ 1110 void eeh_add_device_early(struct pci_dn *pdn) 1111 { 1112 struct pci_controller *phb = pdn ? pdn->phb : NULL; 1113 struct eeh_dev *edev = pdn_to_eeh_dev(pdn); 1114 1115 if (!edev) 1116 return; 1117 1118 if (!eeh_has_flag(EEH_PROBE_MODE_DEVTREE)) 1119 return; 1120 1121 /* USB Bus children of PCI devices will not have BUID's */ 1122 if (NULL == phb || 1123 (eeh_has_flag(EEH_PROBE_MODE_DEVTREE) && 0 == phb->buid)) 1124 return; 1125 1126 eeh_ops->probe(pdn, NULL); 1127 } 1128 1129 /** 1130 * eeh_add_device_tree_early - Enable EEH for the indicated device 1131 * @pdn: PCI device node 1132 * 1133 * This routine must be used to perform EEH initialization for the 1134 * indicated PCI device that was added after system boot (e.g. 1135 * hotplug, dlpar). 1136 */ 1137 void eeh_add_device_tree_early(struct pci_dn *pdn) 1138 { 1139 struct pci_dn *n; 1140 1141 if (!pdn) 1142 return; 1143 1144 list_for_each_entry(n, &pdn->child_list, list) 1145 eeh_add_device_tree_early(n); 1146 eeh_add_device_early(pdn); 1147 } 1148 EXPORT_SYMBOL_GPL(eeh_add_device_tree_early); 1149 1150 /** 1151 * eeh_add_device_late - Perform EEH initialization for the indicated pci device 1152 * @dev: pci device for which to set up EEH 1153 * 1154 * This routine must be used to complete EEH initialization for PCI 1155 * devices that were added after system boot (e.g. hotplug, dlpar). 1156 */ 1157 void eeh_add_device_late(struct pci_dev *dev) 1158 { 1159 struct pci_dn *pdn; 1160 struct eeh_dev *edev; 1161 1162 if (!dev || !eeh_enabled()) 1163 return; 1164 1165 pr_debug("EEH: Adding device %s\n", pci_name(dev)); 1166 1167 pdn = pci_get_pdn_by_devfn(dev->bus, dev->devfn); 1168 edev = pdn_to_eeh_dev(pdn); 1169 if (edev->pdev == dev) { 1170 pr_debug("EEH: Already referenced !\n"); 1171 return; 1172 } 1173 1174 /* 1175 * The EEH cache might not be removed correctly because of 1176 * unbalanced kref to the device during unplug time, which 1177 * relies on pcibios_release_device(). So we have to remove 1178 * that here explicitly. 1179 */ 1180 if (edev->pdev) { 1181 eeh_rmv_from_parent_pe(edev); 1182 eeh_addr_cache_rmv_dev(edev->pdev); 1183 eeh_sysfs_remove_device(edev->pdev); 1184 edev->mode &= ~EEH_DEV_SYSFS; 1185 1186 /* 1187 * We definitely should have the PCI device removed 1188 * though it wasn't correctly. So we needn't call 1189 * into error handler afterwards. 1190 */ 1191 edev->mode |= EEH_DEV_NO_HANDLER; 1192 1193 edev->pdev = NULL; 1194 dev->dev.archdata.edev = NULL; 1195 } 1196 1197 if (eeh_has_flag(EEH_PROBE_MODE_DEV)) 1198 eeh_ops->probe(pdn, NULL); 1199 1200 edev->pdev = dev; 1201 dev->dev.archdata.edev = edev; 1202 1203 eeh_addr_cache_insert_dev(dev); 1204 } 1205 1206 /** 1207 * eeh_add_device_tree_late - Perform EEH initialization for the indicated PCI bus 1208 * @bus: PCI bus 1209 * 1210 * This routine must be used to perform EEH initialization for PCI 1211 * devices which are attached to the indicated PCI bus. The PCI bus 1212 * is added after system boot through hotplug or dlpar. 1213 */ 1214 void eeh_add_device_tree_late(struct pci_bus *bus) 1215 { 1216 struct pci_dev *dev; 1217 1218 list_for_each_entry(dev, &bus->devices, bus_list) { 1219 eeh_add_device_late(dev); 1220 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1221 struct pci_bus *subbus = dev->subordinate; 1222 if (subbus) 1223 eeh_add_device_tree_late(subbus); 1224 } 1225 } 1226 } 1227 EXPORT_SYMBOL_GPL(eeh_add_device_tree_late); 1228 1229 /** 1230 * eeh_add_sysfs_files - Add EEH sysfs files for the indicated PCI bus 1231 * @bus: PCI bus 1232 * 1233 * This routine must be used to add EEH sysfs files for PCI 1234 * devices which are attached to the indicated PCI bus. The PCI bus 1235 * is added after system boot through hotplug or dlpar. 1236 */ 1237 void eeh_add_sysfs_files(struct pci_bus *bus) 1238 { 1239 struct pci_dev *dev; 1240 1241 list_for_each_entry(dev, &bus->devices, bus_list) { 1242 eeh_sysfs_add_device(dev); 1243 if (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE) { 1244 struct pci_bus *subbus = dev->subordinate; 1245 if (subbus) 1246 eeh_add_sysfs_files(subbus); 1247 } 1248 } 1249 } 1250 EXPORT_SYMBOL_GPL(eeh_add_sysfs_files); 1251 1252 /** 1253 * eeh_remove_device - Undo EEH setup for the indicated pci device 1254 * @dev: pci device to be removed 1255 * 1256 * This routine should be called when a device is removed from 1257 * a running system (e.g. by hotplug or dlpar). It unregisters 1258 * the PCI device from the EEH subsystem. I/O errors affecting 1259 * this device will no longer be detected after this call; thus, 1260 * i/o errors affecting this slot may leave this device unusable. 1261 */ 1262 void eeh_remove_device(struct pci_dev *dev) 1263 { 1264 struct eeh_dev *edev; 1265 1266 if (!dev || !eeh_enabled()) 1267 return; 1268 edev = pci_dev_to_eeh_dev(dev); 1269 1270 /* Unregister the device with the EEH/PCI address search system */ 1271 pr_debug("EEH: Removing device %s\n", pci_name(dev)); 1272 1273 if (!edev || !edev->pdev || !edev->pe) { 1274 pr_debug("EEH: Not referenced !\n"); 1275 return; 1276 } 1277 1278 /* 1279 * During the hotplug for EEH error recovery, we need the EEH 1280 * device attached to the parent PE in order for BAR restore 1281 * a bit later. So we keep it for BAR restore and remove it 1282 * from the parent PE during the BAR resotre. 1283 */ 1284 edev->pdev = NULL; 1285 1286 /* 1287 * The flag "in_error" is used to trace EEH devices for VFs 1288 * in error state or not. It's set in eeh_report_error(). If 1289 * it's not set, eeh_report_{reset,resume}() won't be called 1290 * for the VF EEH device. 1291 */ 1292 edev->in_error = false; 1293 dev->dev.archdata.edev = NULL; 1294 if (!(edev->pe->state & EEH_PE_KEEP)) 1295 eeh_rmv_from_parent_pe(edev); 1296 else 1297 edev->mode |= EEH_DEV_DISCONNECTED; 1298 1299 /* 1300 * We're removing from the PCI subsystem, that means 1301 * the PCI device driver can't support EEH or not 1302 * well. So we rely on hotplug completely to do recovery 1303 * for the specific PCI device. 1304 */ 1305 edev->mode |= EEH_DEV_NO_HANDLER; 1306 1307 eeh_addr_cache_rmv_dev(dev); 1308 eeh_sysfs_remove_device(dev); 1309 edev->mode &= ~EEH_DEV_SYSFS; 1310 } 1311 1312 int eeh_unfreeze_pe(struct eeh_pe *pe, bool sw_state) 1313 { 1314 int ret; 1315 1316 ret = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO); 1317 if (ret) { 1318 pr_warn("%s: Failure %d enabling IO on PHB#%x-PE#%x\n", 1319 __func__, ret, pe->phb->global_number, pe->addr); 1320 return ret; 1321 } 1322 1323 ret = eeh_pci_enable(pe, EEH_OPT_THAW_DMA); 1324 if (ret) { 1325 pr_warn("%s: Failure %d enabling DMA on PHB#%x-PE#%x\n", 1326 __func__, ret, pe->phb->global_number, pe->addr); 1327 return ret; 1328 } 1329 1330 /* Clear software isolated state */ 1331 if (sw_state && (pe->state & EEH_PE_ISOLATED)) 1332 eeh_pe_state_clear(pe, EEH_PE_ISOLATED); 1333 1334 return ret; 1335 } 1336 1337 1338 static struct pci_device_id eeh_reset_ids[] = { 1339 { PCI_DEVICE(0x19a2, 0x0710) }, /* Emulex, BE */ 1340 { PCI_DEVICE(0x10df, 0xe220) }, /* Emulex, Lancer */ 1341 { PCI_DEVICE(0x14e4, 0x1657) }, /* Broadcom BCM5719 */ 1342 { 0 } 1343 }; 1344 1345 static int eeh_pe_change_owner(struct eeh_pe *pe) 1346 { 1347 struct eeh_dev *edev, *tmp; 1348 struct pci_dev *pdev; 1349 struct pci_device_id *id; 1350 int ret; 1351 1352 /* Check PE state */ 1353 ret = eeh_ops->get_state(pe, NULL); 1354 if (ret < 0 || ret == EEH_STATE_NOT_SUPPORT) 1355 return 0; 1356 1357 /* Unfrozen PE, nothing to do */ 1358 if (eeh_state_active(ret)) 1359 return 0; 1360 1361 /* Frozen PE, check if it needs PE level reset */ 1362 eeh_pe_for_each_dev(pe, edev, tmp) { 1363 pdev = eeh_dev_to_pci_dev(edev); 1364 if (!pdev) 1365 continue; 1366 1367 for (id = &eeh_reset_ids[0]; id->vendor != 0; id++) { 1368 if (id->vendor != PCI_ANY_ID && 1369 id->vendor != pdev->vendor) 1370 continue; 1371 if (id->device != PCI_ANY_ID && 1372 id->device != pdev->device) 1373 continue; 1374 if (id->subvendor != PCI_ANY_ID && 1375 id->subvendor != pdev->subsystem_vendor) 1376 continue; 1377 if (id->subdevice != PCI_ANY_ID && 1378 id->subdevice != pdev->subsystem_device) 1379 continue; 1380 1381 return eeh_pe_reset_and_recover(pe); 1382 } 1383 } 1384 1385 return eeh_unfreeze_pe(pe, true); 1386 } 1387 1388 /** 1389 * eeh_dev_open - Increase count of pass through devices for PE 1390 * @pdev: PCI device 1391 * 1392 * Increase count of passed through devices for the indicated 1393 * PE. In the result, the EEH errors detected on the PE won't be 1394 * reported. The PE owner will be responsible for detection 1395 * and recovery. 1396 */ 1397 int eeh_dev_open(struct pci_dev *pdev) 1398 { 1399 struct eeh_dev *edev; 1400 int ret = -ENODEV; 1401 1402 mutex_lock(&eeh_dev_mutex); 1403 1404 /* No PCI device ? */ 1405 if (!pdev) 1406 goto out; 1407 1408 /* No EEH device or PE ? */ 1409 edev = pci_dev_to_eeh_dev(pdev); 1410 if (!edev || !edev->pe) 1411 goto out; 1412 1413 /* 1414 * The PE might have been put into frozen state, but we 1415 * didn't detect that yet. The passed through PCI devices 1416 * in frozen PE won't work properly. Clear the frozen state 1417 * in advance. 1418 */ 1419 ret = eeh_pe_change_owner(edev->pe); 1420 if (ret) 1421 goto out; 1422 1423 /* Increase PE's pass through count */ 1424 atomic_inc(&edev->pe->pass_dev_cnt); 1425 mutex_unlock(&eeh_dev_mutex); 1426 1427 return 0; 1428 out: 1429 mutex_unlock(&eeh_dev_mutex); 1430 return ret; 1431 } 1432 EXPORT_SYMBOL_GPL(eeh_dev_open); 1433 1434 /** 1435 * eeh_dev_release - Decrease count of pass through devices for PE 1436 * @pdev: PCI device 1437 * 1438 * Decrease count of pass through devices for the indicated PE. If 1439 * there is no passed through device in PE, the EEH errors detected 1440 * on the PE will be reported and handled as usual. 1441 */ 1442 void eeh_dev_release(struct pci_dev *pdev) 1443 { 1444 struct eeh_dev *edev; 1445 1446 mutex_lock(&eeh_dev_mutex); 1447 1448 /* No PCI device ? */ 1449 if (!pdev) 1450 goto out; 1451 1452 /* No EEH device ? */ 1453 edev = pci_dev_to_eeh_dev(pdev); 1454 if (!edev || !edev->pe || !eeh_pe_passed(edev->pe)) 1455 goto out; 1456 1457 /* Decrease PE's pass through count */ 1458 WARN_ON(atomic_dec_if_positive(&edev->pe->pass_dev_cnt) < 0); 1459 eeh_pe_change_owner(edev->pe); 1460 out: 1461 mutex_unlock(&eeh_dev_mutex); 1462 } 1463 EXPORT_SYMBOL(eeh_dev_release); 1464 1465 #ifdef CONFIG_IOMMU_API 1466 1467 static int dev_has_iommu_table(struct device *dev, void *data) 1468 { 1469 struct pci_dev *pdev = to_pci_dev(dev); 1470 struct pci_dev **ppdev = data; 1471 1472 if (!dev) 1473 return 0; 1474 1475 if (dev->iommu_group) { 1476 *ppdev = pdev; 1477 return 1; 1478 } 1479 1480 return 0; 1481 } 1482 1483 /** 1484 * eeh_iommu_group_to_pe - Convert IOMMU group to EEH PE 1485 * @group: IOMMU group 1486 * 1487 * The routine is called to convert IOMMU group to EEH PE. 1488 */ 1489 struct eeh_pe *eeh_iommu_group_to_pe(struct iommu_group *group) 1490 { 1491 struct pci_dev *pdev = NULL; 1492 struct eeh_dev *edev; 1493 int ret; 1494 1495 /* No IOMMU group ? */ 1496 if (!group) 1497 return NULL; 1498 1499 ret = iommu_group_for_each_dev(group, &pdev, dev_has_iommu_table); 1500 if (!ret || !pdev) 1501 return NULL; 1502 1503 /* No EEH device or PE ? */ 1504 edev = pci_dev_to_eeh_dev(pdev); 1505 if (!edev || !edev->pe) 1506 return NULL; 1507 1508 return edev->pe; 1509 } 1510 EXPORT_SYMBOL_GPL(eeh_iommu_group_to_pe); 1511 1512 #endif /* CONFIG_IOMMU_API */ 1513 1514 /** 1515 * eeh_pe_set_option - Set options for the indicated PE 1516 * @pe: EEH PE 1517 * @option: requested option 1518 * 1519 * The routine is called to enable or disable EEH functionality 1520 * on the indicated PE, to enable IO or DMA for the frozen PE. 1521 */ 1522 int eeh_pe_set_option(struct eeh_pe *pe, int option) 1523 { 1524 int ret = 0; 1525 1526 /* Invalid PE ? */ 1527 if (!pe) 1528 return -ENODEV; 1529 1530 /* 1531 * EEH functionality could possibly be disabled, just 1532 * return error for the case. And the EEH functinality 1533 * isn't expected to be disabled on one specific PE. 1534 */ 1535 switch (option) { 1536 case EEH_OPT_ENABLE: 1537 if (eeh_enabled()) { 1538 ret = eeh_pe_change_owner(pe); 1539 break; 1540 } 1541 ret = -EIO; 1542 break; 1543 case EEH_OPT_DISABLE: 1544 break; 1545 case EEH_OPT_THAW_MMIO: 1546 case EEH_OPT_THAW_DMA: 1547 case EEH_OPT_FREEZE_PE: 1548 if (!eeh_ops || !eeh_ops->set_option) { 1549 ret = -ENOENT; 1550 break; 1551 } 1552 1553 ret = eeh_pci_enable(pe, option); 1554 break; 1555 default: 1556 pr_debug("%s: Option %d out of range (%d, %d)\n", 1557 __func__, option, EEH_OPT_DISABLE, EEH_OPT_THAW_DMA); 1558 ret = -EINVAL; 1559 } 1560 1561 return ret; 1562 } 1563 EXPORT_SYMBOL_GPL(eeh_pe_set_option); 1564 1565 /** 1566 * eeh_pe_get_state - Retrieve PE's state 1567 * @pe: EEH PE 1568 * 1569 * Retrieve the PE's state, which includes 3 aspects: enabled 1570 * DMA, enabled IO and asserted reset. 1571 */ 1572 int eeh_pe_get_state(struct eeh_pe *pe) 1573 { 1574 int result, ret = 0; 1575 bool rst_active, dma_en, mmio_en; 1576 1577 /* Existing PE ? */ 1578 if (!pe) 1579 return -ENODEV; 1580 1581 if (!eeh_ops || !eeh_ops->get_state) 1582 return -ENOENT; 1583 1584 /* 1585 * If the parent PE is owned by the host kernel and is undergoing 1586 * error recovery, we should return the PE state as temporarily 1587 * unavailable so that the error recovery on the guest is suspended 1588 * until the recovery completes on the host. 1589 */ 1590 if (pe->parent && 1591 !(pe->state & EEH_PE_REMOVED) && 1592 (pe->parent->state & (EEH_PE_ISOLATED | EEH_PE_RECOVERING))) 1593 return EEH_PE_STATE_UNAVAIL; 1594 1595 result = eeh_ops->get_state(pe, NULL); 1596 rst_active = !!(result & EEH_STATE_RESET_ACTIVE); 1597 dma_en = !!(result & EEH_STATE_DMA_ENABLED); 1598 mmio_en = !!(result & EEH_STATE_MMIO_ENABLED); 1599 1600 if (rst_active) 1601 ret = EEH_PE_STATE_RESET; 1602 else if (dma_en && mmio_en) 1603 ret = EEH_PE_STATE_NORMAL; 1604 else if (!dma_en && !mmio_en) 1605 ret = EEH_PE_STATE_STOPPED_IO_DMA; 1606 else if (!dma_en && mmio_en) 1607 ret = EEH_PE_STATE_STOPPED_DMA; 1608 else 1609 ret = EEH_PE_STATE_UNAVAIL; 1610 1611 return ret; 1612 } 1613 EXPORT_SYMBOL_GPL(eeh_pe_get_state); 1614 1615 static int eeh_pe_reenable_devices(struct eeh_pe *pe) 1616 { 1617 struct eeh_dev *edev, *tmp; 1618 struct pci_dev *pdev; 1619 int ret = 0; 1620 1621 /* Restore config space */ 1622 eeh_pe_restore_bars(pe); 1623 1624 /* 1625 * Reenable PCI devices as the devices passed 1626 * through are always enabled before the reset. 1627 */ 1628 eeh_pe_for_each_dev(pe, edev, tmp) { 1629 pdev = eeh_dev_to_pci_dev(edev); 1630 if (!pdev) 1631 continue; 1632 1633 ret = pci_reenable_device(pdev); 1634 if (ret) { 1635 pr_warn("%s: Failure %d reenabling %s\n", 1636 __func__, ret, pci_name(pdev)); 1637 return ret; 1638 } 1639 } 1640 1641 /* The PE is still in frozen state */ 1642 return eeh_unfreeze_pe(pe, true); 1643 } 1644 1645 1646 /** 1647 * eeh_pe_reset - Issue PE reset according to specified type 1648 * @pe: EEH PE 1649 * @option: reset type 1650 * 1651 * The routine is called to reset the specified PE with the 1652 * indicated type, either fundamental reset or hot reset. 1653 * PE reset is the most important part for error recovery. 1654 */ 1655 int eeh_pe_reset(struct eeh_pe *pe, int option) 1656 { 1657 int ret = 0; 1658 1659 /* Invalid PE ? */ 1660 if (!pe) 1661 return -ENODEV; 1662 1663 if (!eeh_ops || !eeh_ops->set_option || !eeh_ops->reset) 1664 return -ENOENT; 1665 1666 switch (option) { 1667 case EEH_RESET_DEACTIVATE: 1668 ret = eeh_ops->reset(pe, option); 1669 eeh_pe_state_clear(pe, EEH_PE_CFG_BLOCKED); 1670 if (ret) 1671 break; 1672 1673 ret = eeh_pe_reenable_devices(pe); 1674 break; 1675 case EEH_RESET_HOT: 1676 case EEH_RESET_FUNDAMENTAL: 1677 /* 1678 * Proactively freeze the PE to drop all MMIO access 1679 * during reset, which should be banned as it's always 1680 * cause recursive EEH error. 1681 */ 1682 eeh_ops->set_option(pe, EEH_OPT_FREEZE_PE); 1683 1684 eeh_pe_state_mark(pe, EEH_PE_CFG_BLOCKED); 1685 ret = eeh_ops->reset(pe, option); 1686 break; 1687 default: 1688 pr_debug("%s: Unsupported option %d\n", 1689 __func__, option); 1690 ret = -EINVAL; 1691 } 1692 1693 return ret; 1694 } 1695 EXPORT_SYMBOL_GPL(eeh_pe_reset); 1696 1697 /** 1698 * eeh_pe_configure - Configure PCI bridges after PE reset 1699 * @pe: EEH PE 1700 * 1701 * The routine is called to restore the PCI config space for 1702 * those PCI devices, especially PCI bridges affected by PE 1703 * reset issued previously. 1704 */ 1705 int eeh_pe_configure(struct eeh_pe *pe) 1706 { 1707 int ret = 0; 1708 1709 /* Invalid PE ? */ 1710 if (!pe) 1711 return -ENODEV; 1712 1713 return ret; 1714 } 1715 EXPORT_SYMBOL_GPL(eeh_pe_configure); 1716 1717 /** 1718 * eeh_pe_inject_err - Injecting the specified PCI error to the indicated PE 1719 * @pe: the indicated PE 1720 * @type: error type 1721 * @function: error function 1722 * @addr: address 1723 * @mask: address mask 1724 * 1725 * The routine is called to inject the specified PCI error, which 1726 * is determined by @type and @function, to the indicated PE for 1727 * testing purpose. 1728 */ 1729 int eeh_pe_inject_err(struct eeh_pe *pe, int type, int func, 1730 unsigned long addr, unsigned long mask) 1731 { 1732 /* Invalid PE ? */ 1733 if (!pe) 1734 return -ENODEV; 1735 1736 /* Unsupported operation ? */ 1737 if (!eeh_ops || !eeh_ops->err_inject) 1738 return -ENOENT; 1739 1740 /* Check on PCI error type */ 1741 if (type != EEH_ERR_TYPE_32 && type != EEH_ERR_TYPE_64) 1742 return -EINVAL; 1743 1744 /* Check on PCI error function */ 1745 if (func < EEH_ERR_FUNC_MIN || func > EEH_ERR_FUNC_MAX) 1746 return -EINVAL; 1747 1748 return eeh_ops->err_inject(pe, type, func, addr, mask); 1749 } 1750 EXPORT_SYMBOL_GPL(eeh_pe_inject_err); 1751 1752 static int proc_eeh_show(struct seq_file *m, void *v) 1753 { 1754 if (!eeh_enabled()) { 1755 seq_printf(m, "EEH Subsystem is globally disabled\n"); 1756 seq_printf(m, "eeh_total_mmio_ffs=%llu\n", eeh_stats.total_mmio_ffs); 1757 } else { 1758 seq_printf(m, "EEH Subsystem is enabled\n"); 1759 seq_printf(m, 1760 "no device=%llu\n" 1761 "no device node=%llu\n" 1762 "no config address=%llu\n" 1763 "check not wanted=%llu\n" 1764 "eeh_total_mmio_ffs=%llu\n" 1765 "eeh_false_positives=%llu\n" 1766 "eeh_slot_resets=%llu\n", 1767 eeh_stats.no_device, 1768 eeh_stats.no_dn, 1769 eeh_stats.no_cfg_addr, 1770 eeh_stats.ignored_check, 1771 eeh_stats.total_mmio_ffs, 1772 eeh_stats.false_positives, 1773 eeh_stats.slot_resets); 1774 } 1775 1776 return 0; 1777 } 1778 1779 #ifdef CONFIG_DEBUG_FS 1780 static int eeh_enable_dbgfs_set(void *data, u64 val) 1781 { 1782 if (val) 1783 eeh_clear_flag(EEH_FORCE_DISABLED); 1784 else 1785 eeh_add_flag(EEH_FORCE_DISABLED); 1786 1787 return 0; 1788 } 1789 1790 static int eeh_enable_dbgfs_get(void *data, u64 *val) 1791 { 1792 if (eeh_enabled()) 1793 *val = 0x1ul; 1794 else 1795 *val = 0x0ul; 1796 return 0; 1797 } 1798 1799 static int eeh_freeze_dbgfs_set(void *data, u64 val) 1800 { 1801 eeh_max_freezes = val; 1802 return 0; 1803 } 1804 1805 static int eeh_freeze_dbgfs_get(void *data, u64 *val) 1806 { 1807 *val = eeh_max_freezes; 1808 return 0; 1809 } 1810 1811 DEFINE_SIMPLE_ATTRIBUTE(eeh_enable_dbgfs_ops, eeh_enable_dbgfs_get, 1812 eeh_enable_dbgfs_set, "0x%llx\n"); 1813 DEFINE_SIMPLE_ATTRIBUTE(eeh_freeze_dbgfs_ops, eeh_freeze_dbgfs_get, 1814 eeh_freeze_dbgfs_set, "0x%llx\n"); 1815 #endif 1816 1817 static int __init eeh_init_proc(void) 1818 { 1819 if (machine_is(pseries) || machine_is(powernv)) { 1820 proc_create_single("powerpc/eeh", 0, NULL, proc_eeh_show); 1821 #ifdef CONFIG_DEBUG_FS 1822 debugfs_create_file("eeh_enable", 0600, 1823 powerpc_debugfs_root, NULL, 1824 &eeh_enable_dbgfs_ops); 1825 debugfs_create_file("eeh_max_freezes", 0600, 1826 powerpc_debugfs_root, NULL, 1827 &eeh_freeze_dbgfs_ops); 1828 #endif 1829 } 1830 1831 return 0; 1832 } 1833 __initcall(eeh_init_proc); 1834