xref: /linux/arch/powerpc/kernel/eeh_driver.c (revision 9d535e200f09ce347afc38c81ec7f2901187e5f0)
1 /*
2  * PCI Error Recovery Driver for RPA-compliant PPC64 platform.
3  * Copyright IBM Corp. 2004 2005
4  * Copyright Linas Vepstas <linas@linas.org> 2004, 2005
5  *
6  * All rights reserved.
7  *
8  * This program is free software; you can redistribute it and/or modify
9  * it under the terms of the GNU General Public License as published by
10  * the Free Software Foundation; either version 2 of the License, or (at
11  * your option) any later version.
12  *
13  * This program is distributed in the hope that it will be useful, but
14  * WITHOUT ANY WARRANTY; without even the implied warranty of
15  * MERCHANTABILITY OR FITNESS FOR A PARTICULAR PURPOSE, GOOD TITLE or
16  * NON INFRINGEMENT.  See the GNU General Public License for more
17  * details.
18  *
19  * You should have received a copy of the GNU General Public License
20  * along with this program; if not, write to the Free Software
21  * Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
22  *
23  * Send comments and feedback to Linas Vepstas <linas@austin.ibm.com>
24  */
25 #include <linux/delay.h>
26 #include <linux/interrupt.h>
27 #include <linux/irq.h>
28 #include <linux/module.h>
29 #include <linux/pci.h>
30 #include <asm/eeh.h>
31 #include <asm/eeh_event.h>
32 #include <asm/ppc-pci.h>
33 #include <asm/pci-bridge.h>
34 #include <asm/prom.h>
35 #include <asm/rtas.h>
36 
37 struct eeh_rmv_data {
38 	struct list_head removed_vf_list;
39 	int removed_dev_count;
40 };
41 
42 static int eeh_result_priority(enum pci_ers_result result)
43 {
44 	switch (result) {
45 	case PCI_ERS_RESULT_NONE:
46 		return 1;
47 	case PCI_ERS_RESULT_NO_AER_DRIVER:
48 		return 2;
49 	case PCI_ERS_RESULT_RECOVERED:
50 		return 3;
51 	case PCI_ERS_RESULT_CAN_RECOVER:
52 		return 4;
53 	case PCI_ERS_RESULT_DISCONNECT:
54 		return 5;
55 	case PCI_ERS_RESULT_NEED_RESET:
56 		return 6;
57 	default:
58 		WARN_ONCE(1, "Unknown pci_ers_result value: %d\n", (int)result);
59 		return 0;
60 	}
61 };
62 
63 static const char *pci_ers_result_name(enum pci_ers_result result)
64 {
65 	switch (result) {
66 	case PCI_ERS_RESULT_NONE:
67 		return "none";
68 	case PCI_ERS_RESULT_CAN_RECOVER:
69 		return "can recover";
70 	case PCI_ERS_RESULT_NEED_RESET:
71 		return "need reset";
72 	case PCI_ERS_RESULT_DISCONNECT:
73 		return "disconnect";
74 	case PCI_ERS_RESULT_RECOVERED:
75 		return "recovered";
76 	case PCI_ERS_RESULT_NO_AER_DRIVER:
77 		return "no AER driver";
78 	default:
79 		WARN_ONCE(1, "Unknown result type: %d\n", (int)result);
80 		return "unknown";
81 	}
82 };
83 
84 static enum pci_ers_result pci_ers_merge_result(enum pci_ers_result old,
85 						enum pci_ers_result new)
86 {
87 	if (eeh_result_priority(new) > eeh_result_priority(old))
88 		return new;
89 	return old;
90 }
91 
92 static bool eeh_dev_removed(struct eeh_dev *edev)
93 {
94 	return !edev || (edev->mode & EEH_DEV_REMOVED);
95 }
96 
97 static bool eeh_edev_actionable(struct eeh_dev *edev)
98 {
99 	return (edev->pdev && !eeh_dev_removed(edev) &&
100 		!eeh_pe_passed(edev->pe));
101 }
102 
103 /**
104  * eeh_pcid_get - Get the PCI device driver
105  * @pdev: PCI device
106  *
107  * The function is used to retrieve the PCI device driver for
108  * the indicated PCI device. Besides, we will increase the reference
109  * of the PCI device driver to prevent that being unloaded on
110  * the fly. Otherwise, kernel crash would be seen.
111  */
112 static inline struct pci_driver *eeh_pcid_get(struct pci_dev *pdev)
113 {
114 	if (!pdev || !pdev->driver)
115 		return NULL;
116 
117 	if (!try_module_get(pdev->driver->driver.owner))
118 		return NULL;
119 
120 	return pdev->driver;
121 }
122 
123 /**
124  * eeh_pcid_put - Dereference on the PCI device driver
125  * @pdev: PCI device
126  *
127  * The function is called to do dereference on the PCI device
128  * driver of the indicated PCI device.
129  */
130 static inline void eeh_pcid_put(struct pci_dev *pdev)
131 {
132 	if (!pdev || !pdev->driver)
133 		return;
134 
135 	module_put(pdev->driver->driver.owner);
136 }
137 
138 /**
139  * eeh_disable_irq - Disable interrupt for the recovering device
140  * @dev: PCI device
141  *
142  * This routine must be called when reporting temporary or permanent
143  * error to the particular PCI device to disable interrupt of that
144  * device. If the device has enabled MSI or MSI-X interrupt, we needn't
145  * do real work because EEH should freeze DMA transfers for those PCI
146  * devices encountering EEH errors, which includes MSI or MSI-X.
147  */
148 static void eeh_disable_irq(struct eeh_dev *edev)
149 {
150 	/* Don't disable MSI and MSI-X interrupts. They are
151 	 * effectively disabled by the DMA Stopped state
152 	 * when an EEH error occurs.
153 	 */
154 	if (edev->pdev->msi_enabled || edev->pdev->msix_enabled)
155 		return;
156 
157 	if (!irq_has_action(edev->pdev->irq))
158 		return;
159 
160 	edev->mode |= EEH_DEV_IRQ_DISABLED;
161 	disable_irq_nosync(edev->pdev->irq);
162 }
163 
164 /**
165  * eeh_enable_irq - Enable interrupt for the recovering device
166  * @dev: PCI device
167  *
168  * This routine must be called to enable interrupt while failed
169  * device could be resumed.
170  */
171 static void eeh_enable_irq(struct eeh_dev *edev)
172 {
173 	if ((edev->mode) & EEH_DEV_IRQ_DISABLED) {
174 		edev->mode &= ~EEH_DEV_IRQ_DISABLED;
175 		/*
176 		 * FIXME !!!!!
177 		 *
178 		 * This is just ass backwards. This maze has
179 		 * unbalanced irq_enable/disable calls. So instead of
180 		 * finding the root cause it works around the warning
181 		 * in the irq_enable code by conditionally calling
182 		 * into it.
183 		 *
184 		 * That's just wrong.The warning in the core code is
185 		 * there to tell people to fix their asymmetries in
186 		 * their own code, not by abusing the core information
187 		 * to avoid it.
188 		 *
189 		 * I so wish that the assymetry would be the other way
190 		 * round and a few more irq_disable calls render that
191 		 * shit unusable forever.
192 		 *
193 		 *	tglx
194 		 */
195 		if (irqd_irq_disabled(irq_get_irq_data(edev->pdev->irq)))
196 			enable_irq(edev->pdev->irq);
197 	}
198 }
199 
200 static void eeh_dev_save_state(struct eeh_dev *edev, void *userdata)
201 {
202 	struct pci_dev *pdev;
203 
204 	if (!edev)
205 		return;
206 
207 	/*
208 	 * We cannot access the config space on some adapters.
209 	 * Otherwise, it will cause fenced PHB. We don't save
210 	 * the content in their config space and will restore
211 	 * from the initial config space saved when the EEH
212 	 * device is created.
213 	 */
214 	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED))
215 		return;
216 
217 	pdev = eeh_dev_to_pci_dev(edev);
218 	if (!pdev)
219 		return;
220 
221 	pci_save_state(pdev);
222 }
223 
224 static void eeh_set_channel_state(struct eeh_pe *root, enum pci_channel_state s)
225 {
226 	struct eeh_pe *pe;
227 	struct eeh_dev *edev, *tmp;
228 
229 	eeh_for_each_pe(root, pe)
230 		eeh_pe_for_each_dev(pe, edev, tmp)
231 			if (eeh_edev_actionable(edev))
232 				edev->pdev->error_state = s;
233 }
234 
235 static void eeh_set_irq_state(struct eeh_pe *root, bool enable)
236 {
237 	struct eeh_pe *pe;
238 	struct eeh_dev *edev, *tmp;
239 
240 	eeh_for_each_pe(root, pe) {
241 		eeh_pe_for_each_dev(pe, edev, tmp) {
242 			if (!eeh_edev_actionable(edev))
243 				continue;
244 
245 			if (!eeh_pcid_get(edev->pdev))
246 				continue;
247 
248 			if (enable)
249 				eeh_enable_irq(edev);
250 			else
251 				eeh_disable_irq(edev);
252 
253 			eeh_pcid_put(edev->pdev);
254 		}
255 	}
256 }
257 
258 typedef enum pci_ers_result (*eeh_report_fn)(struct eeh_dev *,
259 					     struct pci_dev *,
260 					     struct pci_driver *);
261 static void eeh_pe_report_edev(struct eeh_dev *edev, eeh_report_fn fn,
262 			       enum pci_ers_result *result)
263 {
264 	struct pci_dev *pdev;
265 	struct pci_driver *driver;
266 	enum pci_ers_result new_result;
267 
268 	pci_lock_rescan_remove();
269 	pdev = edev->pdev;
270 	if (pdev)
271 		get_device(&pdev->dev);
272 	pci_unlock_rescan_remove();
273 	if (!pdev) {
274 		eeh_edev_info(edev, "no device");
275 		return;
276 	}
277 	device_lock(&pdev->dev);
278 	if (eeh_edev_actionable(edev)) {
279 		driver = eeh_pcid_get(pdev);
280 
281 		if (!driver)
282 			eeh_edev_info(edev, "no driver");
283 		else if (!driver->err_handler)
284 			eeh_edev_info(edev, "driver not EEH aware");
285 		else if (edev->mode & EEH_DEV_NO_HANDLER)
286 			eeh_edev_info(edev, "driver bound too late");
287 		else {
288 			new_result = fn(edev, pdev, driver);
289 			eeh_edev_info(edev, "%s driver reports: '%s'",
290 				      driver->name,
291 				      pci_ers_result_name(new_result));
292 			if (result)
293 				*result = pci_ers_merge_result(*result,
294 							       new_result);
295 		}
296 		if (driver)
297 			eeh_pcid_put(pdev);
298 	} else {
299 		eeh_edev_info(edev, "not actionable (%d,%d,%d)", !!pdev,
300 			      !eeh_dev_removed(edev), !eeh_pe_passed(edev->pe));
301 	}
302 	device_unlock(&pdev->dev);
303 	if (edev->pdev != pdev)
304 		eeh_edev_warn(edev, "Device changed during processing!\n");
305 	put_device(&pdev->dev);
306 }
307 
308 static void eeh_pe_report(const char *name, struct eeh_pe *root,
309 			  eeh_report_fn fn, enum pci_ers_result *result)
310 {
311 	struct eeh_pe *pe;
312 	struct eeh_dev *edev, *tmp;
313 
314 	pr_info("EEH: Beginning: '%s'\n", name);
315 	eeh_for_each_pe(root, pe) eeh_pe_for_each_dev(pe, edev, tmp)
316 		eeh_pe_report_edev(edev, fn, result);
317 	if (result)
318 		pr_info("EEH: Finished:'%s' with aggregate recovery state:'%s'\n",
319 			name, pci_ers_result_name(*result));
320 	else
321 		pr_info("EEH: Finished:'%s'", name);
322 }
323 
324 /**
325  * eeh_report_error - Report pci error to each device driver
326  * @edev: eeh device
327  * @driver: device's PCI driver
328  *
329  * Report an EEH error to each device driver.
330  */
331 static enum pci_ers_result eeh_report_error(struct eeh_dev *edev,
332 					    struct pci_dev *pdev,
333 					    struct pci_driver *driver)
334 {
335 	enum pci_ers_result rc;
336 
337 	if (!driver->err_handler->error_detected)
338 		return PCI_ERS_RESULT_NONE;
339 
340 	eeh_edev_info(edev, "Invoking %s->error_detected(IO frozen)",
341 		      driver->name);
342 	rc = driver->err_handler->error_detected(pdev, pci_channel_io_frozen);
343 
344 	edev->in_error = true;
345 	pci_uevent_ers(pdev, PCI_ERS_RESULT_NONE);
346 	return rc;
347 }
348 
349 /**
350  * eeh_report_mmio_enabled - Tell drivers that MMIO has been enabled
351  * @edev: eeh device
352  * @driver: device's PCI driver
353  *
354  * Tells each device driver that IO ports, MMIO and config space I/O
355  * are now enabled.
356  */
357 static enum pci_ers_result eeh_report_mmio_enabled(struct eeh_dev *edev,
358 						   struct pci_dev *pdev,
359 						   struct pci_driver *driver)
360 {
361 	if (!driver->err_handler->mmio_enabled)
362 		return PCI_ERS_RESULT_NONE;
363 	eeh_edev_info(edev, "Invoking %s->mmio_enabled()", driver->name);
364 	return driver->err_handler->mmio_enabled(pdev);
365 }
366 
367 /**
368  * eeh_report_reset - Tell device that slot has been reset
369  * @edev: eeh device
370  * @driver: device's PCI driver
371  *
372  * This routine must be called while EEH tries to reset particular
373  * PCI device so that the associated PCI device driver could take
374  * some actions, usually to save data the driver needs so that the
375  * driver can work again while the device is recovered.
376  */
377 static enum pci_ers_result eeh_report_reset(struct eeh_dev *edev,
378 					    struct pci_dev *pdev,
379 					    struct pci_driver *driver)
380 {
381 	if (!driver->err_handler->slot_reset || !edev->in_error)
382 		return PCI_ERS_RESULT_NONE;
383 	eeh_edev_info(edev, "Invoking %s->slot_reset()", driver->name);
384 	return driver->err_handler->slot_reset(pdev);
385 }
386 
387 static void eeh_dev_restore_state(struct eeh_dev *edev, void *userdata)
388 {
389 	struct pci_dev *pdev;
390 
391 	if (!edev)
392 		return;
393 
394 	/*
395 	 * The content in the config space isn't saved because
396 	 * the blocked config space on some adapters. We have
397 	 * to restore the initial saved config space when the
398 	 * EEH device is created.
399 	 */
400 	if (edev->pe && (edev->pe->state & EEH_PE_CFG_RESTRICTED)) {
401 		if (list_is_last(&edev->entry, &edev->pe->edevs))
402 			eeh_pe_restore_bars(edev->pe);
403 
404 		return;
405 	}
406 
407 	pdev = eeh_dev_to_pci_dev(edev);
408 	if (!pdev)
409 		return;
410 
411 	pci_restore_state(pdev);
412 }
413 
414 /**
415  * eeh_report_resume - Tell device to resume normal operations
416  * @edev: eeh device
417  * @driver: device's PCI driver
418  *
419  * This routine must be called to notify the device driver that it
420  * could resume so that the device driver can do some initialization
421  * to make the recovered device work again.
422  */
423 static enum pci_ers_result eeh_report_resume(struct eeh_dev *edev,
424 					     struct pci_dev *pdev,
425 					     struct pci_driver *driver)
426 {
427 	if (!driver->err_handler->resume || !edev->in_error)
428 		return PCI_ERS_RESULT_NONE;
429 
430 	eeh_edev_info(edev, "Invoking %s->resume()", driver->name);
431 	driver->err_handler->resume(pdev);
432 
433 	pci_uevent_ers(edev->pdev, PCI_ERS_RESULT_RECOVERED);
434 #ifdef CONFIG_PCI_IOV
435 	if (eeh_ops->notify_resume && eeh_dev_to_pdn(edev))
436 		eeh_ops->notify_resume(eeh_dev_to_pdn(edev));
437 #endif
438 	return PCI_ERS_RESULT_NONE;
439 }
440 
441 /**
442  * eeh_report_failure - Tell device driver that device is dead.
443  * @edev: eeh device
444  * @driver: device's PCI driver
445  *
446  * This informs the device driver that the device is permanently
447  * dead, and that no further recovery attempts will be made on it.
448  */
449 static enum pci_ers_result eeh_report_failure(struct eeh_dev *edev,
450 					      struct pci_dev *pdev,
451 					      struct pci_driver *driver)
452 {
453 	enum pci_ers_result rc;
454 
455 	if (!driver->err_handler->error_detected)
456 		return PCI_ERS_RESULT_NONE;
457 
458 	eeh_edev_info(edev, "Invoking %s->error_detected(permanent failure)",
459 		      driver->name);
460 	rc = driver->err_handler->error_detected(pdev,
461 						 pci_channel_io_perm_failure);
462 
463 	pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT);
464 	return rc;
465 }
466 
467 static void *eeh_add_virt_device(struct eeh_dev *edev)
468 {
469 	struct pci_driver *driver;
470 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
471 
472 	if (!(edev->physfn)) {
473 		eeh_edev_warn(edev, "Not for VF\n");
474 		return NULL;
475 	}
476 
477 	driver = eeh_pcid_get(dev);
478 	if (driver) {
479 		if (driver->err_handler) {
480 			eeh_pcid_put(dev);
481 			return NULL;
482 		}
483 		eeh_pcid_put(dev);
484 	}
485 
486 #ifdef CONFIG_PCI_IOV
487 	pci_iov_add_virtfn(edev->physfn, eeh_dev_to_pdn(edev)->vf_index);
488 #endif
489 	return NULL;
490 }
491 
492 static void eeh_rmv_device(struct eeh_dev *edev, void *userdata)
493 {
494 	struct pci_driver *driver;
495 	struct pci_dev *dev = eeh_dev_to_pci_dev(edev);
496 	struct eeh_rmv_data *rmv_data = (struct eeh_rmv_data *)userdata;
497 
498 	/*
499 	 * Actually, we should remove the PCI bridges as well.
500 	 * However, that's lots of complexity to do that,
501 	 * particularly some of devices under the bridge might
502 	 * support EEH. So we just care about PCI devices for
503 	 * simplicity here.
504 	 */
505 	if (!eeh_edev_actionable(edev) ||
506 	    (dev->hdr_type == PCI_HEADER_TYPE_BRIDGE))
507 		return;
508 
509 	if (rmv_data) {
510 		driver = eeh_pcid_get(dev);
511 		if (driver) {
512 			if (driver->err_handler &&
513 			    driver->err_handler->error_detected &&
514 			    driver->err_handler->slot_reset) {
515 				eeh_pcid_put(dev);
516 				return;
517 			}
518 			eeh_pcid_put(dev);
519 		}
520 	}
521 
522 	/* Remove it from PCI subsystem */
523 	pr_info("EEH: Removing %s without EEH sensitive driver\n",
524 		pci_name(dev));
525 	edev->mode |= EEH_DEV_DISCONNECTED;
526 	if (rmv_data)
527 		rmv_data->removed_dev_count++;
528 
529 	if (edev->physfn) {
530 #ifdef CONFIG_PCI_IOV
531 		struct pci_dn *pdn = eeh_dev_to_pdn(edev);
532 
533 		pci_iov_remove_virtfn(edev->physfn, pdn->vf_index);
534 		edev->pdev = NULL;
535 
536 		/*
537 		 * We have to set the VF PE number to invalid one, which is
538 		 * required to plug the VF successfully.
539 		 */
540 		pdn->pe_number = IODA_INVALID_PE;
541 #endif
542 		if (rmv_data)
543 			list_add(&edev->rmv_entry, &rmv_data->removed_vf_list);
544 	} else {
545 		pci_lock_rescan_remove();
546 		pci_stop_and_remove_bus_device(dev);
547 		pci_unlock_rescan_remove();
548 	}
549 }
550 
551 static void *eeh_pe_detach_dev(struct eeh_pe *pe, void *userdata)
552 {
553 	struct eeh_dev *edev, *tmp;
554 
555 	eeh_pe_for_each_dev(pe, edev, tmp) {
556 		if (!(edev->mode & EEH_DEV_DISCONNECTED))
557 			continue;
558 
559 		edev->mode &= ~(EEH_DEV_DISCONNECTED | EEH_DEV_IRQ_DISABLED);
560 		eeh_rmv_from_parent_pe(edev);
561 	}
562 
563 	return NULL;
564 }
565 
566 /*
567  * Explicitly clear PE's frozen state for PowerNV where
568  * we have frozen PE until BAR restore is completed. It's
569  * harmless to clear it for pSeries. To be consistent with
570  * PE reset (for 3 times), we try to clear the frozen state
571  * for 3 times as well.
572  */
573 static int eeh_clear_pe_frozen_state(struct eeh_pe *root, bool include_passed)
574 {
575 	struct eeh_pe *pe;
576 	int i;
577 
578 	eeh_for_each_pe(root, pe) {
579 		if (include_passed || !eeh_pe_passed(pe)) {
580 			for (i = 0; i < 3; i++)
581 				if (!eeh_unfreeze_pe(pe))
582 					break;
583 			if (i >= 3)
584 				return -EIO;
585 		}
586 	}
587 	eeh_pe_state_clear(root, EEH_PE_ISOLATED, include_passed);
588 	return 0;
589 }
590 
591 int eeh_pe_reset_and_recover(struct eeh_pe *pe)
592 {
593 	int ret;
594 
595 	/* Bail if the PE is being recovered */
596 	if (pe->state & EEH_PE_RECOVERING)
597 		return 0;
598 
599 	/* Put the PE into recovery mode */
600 	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
601 
602 	/* Save states */
603 	eeh_pe_dev_traverse(pe, eeh_dev_save_state, NULL);
604 
605 	/* Issue reset */
606 	ret = eeh_pe_reset_full(pe, true);
607 	if (ret) {
608 		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
609 		return ret;
610 	}
611 
612 	/* Unfreeze the PE */
613 	ret = eeh_clear_pe_frozen_state(pe, true);
614 	if (ret) {
615 		eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
616 		return ret;
617 	}
618 
619 	/* Restore device state */
620 	eeh_pe_dev_traverse(pe, eeh_dev_restore_state, NULL);
621 
622 	/* Clear recovery mode */
623 	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
624 
625 	return 0;
626 }
627 
628 /**
629  * eeh_reset_device - Perform actual reset of a pci slot
630  * @driver_eeh_aware: Does the device's driver provide EEH support?
631  * @pe: EEH PE
632  * @bus: PCI bus corresponding to the isolcated slot
633  * @rmv_data: Optional, list to record removed devices
634  *
635  * This routine must be called to do reset on the indicated PE.
636  * During the reset, udev might be invoked because those affected
637  * PCI devices will be removed and then added.
638  */
639 static int eeh_reset_device(struct eeh_pe *pe, struct pci_bus *bus,
640 			    struct eeh_rmv_data *rmv_data,
641 			    bool driver_eeh_aware)
642 {
643 	time64_t tstamp;
644 	int cnt, rc;
645 	struct eeh_dev *edev;
646 	struct eeh_pe *tmp_pe;
647 	bool any_passed = false;
648 
649 	eeh_for_each_pe(pe, tmp_pe)
650 		any_passed |= eeh_pe_passed(tmp_pe);
651 
652 	/* pcibios will clear the counter; save the value */
653 	cnt = pe->freeze_count;
654 	tstamp = pe->tstamp;
655 
656 	/*
657 	 * We don't remove the corresponding PE instances because
658 	 * we need the information afterwords. The attached EEH
659 	 * devices are expected to be attached soon when calling
660 	 * into pci_hp_add_devices().
661 	 */
662 	eeh_pe_state_mark(pe, EEH_PE_KEEP);
663 	if (any_passed || driver_eeh_aware || (pe->type & EEH_PE_VF)) {
664 		eeh_pe_dev_traverse(pe, eeh_rmv_device, rmv_data);
665 	} else {
666 		pci_lock_rescan_remove();
667 		pci_hp_remove_devices(bus);
668 		pci_unlock_rescan_remove();
669 	}
670 
671 	/*
672 	 * Reset the pci controller. (Asserts RST#; resets config space).
673 	 * Reconfigure bridges and devices. Don't try to bring the system
674 	 * up if the reset failed for some reason.
675 	 *
676 	 * During the reset, it's very dangerous to have uncontrolled PCI
677 	 * config accesses. So we prefer to block them. However, controlled
678 	 * PCI config accesses initiated from EEH itself are allowed.
679 	 */
680 	rc = eeh_pe_reset_full(pe, false);
681 	if (rc)
682 		return rc;
683 
684 	pci_lock_rescan_remove();
685 
686 	/* Restore PE */
687 	eeh_ops->configure_bridge(pe);
688 	eeh_pe_restore_bars(pe);
689 
690 	/* Clear frozen state */
691 	rc = eeh_clear_pe_frozen_state(pe, false);
692 	if (rc) {
693 		pci_unlock_rescan_remove();
694 		return rc;
695 	}
696 
697 	/* Give the system 5 seconds to finish running the user-space
698 	 * hotplug shutdown scripts, e.g. ifdown for ethernet.  Yes,
699 	 * this is a hack, but if we don't do this, and try to bring
700 	 * the device up before the scripts have taken it down,
701 	 * potentially weird things happen.
702 	 */
703 	if (!driver_eeh_aware || rmv_data->removed_dev_count) {
704 		pr_info("EEH: Sleep 5s ahead of %s hotplug\n",
705 			(driver_eeh_aware ? "partial" : "complete"));
706 		ssleep(5);
707 
708 		/*
709 		 * The EEH device is still connected with its parent
710 		 * PE. We should disconnect it so the binding can be
711 		 * rebuilt when adding PCI devices.
712 		 */
713 		edev = list_first_entry(&pe->edevs, struct eeh_dev, entry);
714 		eeh_pe_traverse(pe, eeh_pe_detach_dev, NULL);
715 		if (pe->type & EEH_PE_VF) {
716 			eeh_add_virt_device(edev);
717 		} else {
718 			if (!driver_eeh_aware)
719 				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
720 			pci_hp_add_devices(bus);
721 		}
722 	}
723 	eeh_pe_state_clear(pe, EEH_PE_KEEP, true);
724 
725 	pe->tstamp = tstamp;
726 	pe->freeze_count = cnt;
727 
728 	pci_unlock_rescan_remove();
729 	return 0;
730 }
731 
732 /* The longest amount of time to wait for a pci device
733  * to come back on line, in seconds.
734  */
735 #define MAX_WAIT_FOR_RECOVERY 300
736 
737 /**
738  * eeh_handle_normal_event - Handle EEH events on a specific PE
739  * @pe: EEH PE - which should not be used after we return, as it may
740  * have been invalidated.
741  *
742  * Attempts to recover the given PE.  If recovery fails or the PE has failed
743  * too many times, remove the PE.
744  *
745  * While PHB detects address or data parity errors on particular PCI
746  * slot, the associated PE will be frozen. Besides, DMA's occurring
747  * to wild addresses (which usually happen due to bugs in device
748  * drivers or in PCI adapter firmware) can cause EEH error. #SERR,
749  * #PERR or other misc PCI-related errors also can trigger EEH errors.
750  *
751  * Recovery process consists of unplugging the device driver (which
752  * generated hotplug events to userspace), then issuing a PCI #RST to
753  * the device, then reconfiguring the PCI config space for all bridges
754  * & devices under this slot, and then finally restarting the device
755  * drivers (which cause a second set of hotplug events to go out to
756  * userspace).
757  */
758 void eeh_handle_normal_event(struct eeh_pe *pe)
759 {
760 	struct pci_bus *bus;
761 	struct eeh_dev *edev, *tmp;
762 	struct eeh_pe *tmp_pe;
763 	int rc = 0;
764 	enum pci_ers_result result = PCI_ERS_RESULT_NONE;
765 	struct eeh_rmv_data rmv_data =
766 		{LIST_HEAD_INIT(rmv_data.removed_vf_list), 0};
767 
768 	bus = eeh_pe_bus_get(pe);
769 	if (!bus) {
770 		pr_err("%s: Cannot find PCI bus for PHB#%x-PE#%x\n",
771 			__func__, pe->phb->global_number, pe->addr);
772 		return;
773 	}
774 
775 	eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
776 
777 	eeh_pe_update_time_stamp(pe);
778 	pe->freeze_count++;
779 	if (pe->freeze_count > eeh_max_freezes) {
780 		pr_err("EEH: PHB#%x-PE#%x has failed %d times in the last hour and has been permanently disabled.\n",
781 		       pe->phb->global_number, pe->addr,
782 		       pe->freeze_count);
783 		result = PCI_ERS_RESULT_DISCONNECT;
784 	}
785 
786 	eeh_for_each_pe(pe, tmp_pe)
787 		eeh_pe_for_each_dev(tmp_pe, edev, tmp)
788 			edev->mode &= ~EEH_DEV_NO_HANDLER;
789 
790 	/* Walk the various device drivers attached to this slot through
791 	 * a reset sequence, giving each an opportunity to do what it needs
792 	 * to accomplish the reset.  Each child gets a report of the
793 	 * status ... if any child can't handle the reset, then the entire
794 	 * slot is dlpar removed and added.
795 	 *
796 	 * When the PHB is fenced, we have to issue a reset to recover from
797 	 * the error. Override the result if necessary to have partially
798 	 * hotplug for this case.
799 	 */
800 	if (result != PCI_ERS_RESULT_DISCONNECT) {
801 		pr_warn("EEH: This PCI device has failed %d times in the last hour and will be permanently disabled after %d failures.\n",
802 			pe->freeze_count, eeh_max_freezes);
803 		pr_info("EEH: Notify device drivers to shutdown\n");
804 		eeh_set_channel_state(pe, pci_channel_io_frozen);
805 		eeh_set_irq_state(pe, false);
806 		eeh_pe_report("error_detected(IO frozen)", pe,
807 			      eeh_report_error, &result);
808 		if ((pe->type & EEH_PE_PHB) &&
809 		    result != PCI_ERS_RESULT_NONE &&
810 		    result != PCI_ERS_RESULT_NEED_RESET)
811 			result = PCI_ERS_RESULT_NEED_RESET;
812 	}
813 
814 	/* Get the current PCI slot state. This can take a long time,
815 	 * sometimes over 300 seconds for certain systems.
816 	 */
817 	if (result != PCI_ERS_RESULT_DISCONNECT) {
818 		rc = eeh_wait_state(pe, MAX_WAIT_FOR_RECOVERY*1000);
819 		if (rc < 0 || rc == EEH_STATE_NOT_SUPPORT) {
820 			pr_warn("EEH: Permanent failure\n");
821 			result = PCI_ERS_RESULT_DISCONNECT;
822 		}
823 	}
824 
825 	/* Since rtas may enable MMIO when posting the error log,
826 	 * don't post the error log until after all dev drivers
827 	 * have been informed.
828 	 */
829 	if (result != PCI_ERS_RESULT_DISCONNECT) {
830 		pr_info("EEH: Collect temporary log\n");
831 		eeh_slot_error_detail(pe, EEH_LOG_TEMP);
832 	}
833 
834 	/* If all device drivers were EEH-unaware, then shut
835 	 * down all of the device drivers, and hope they
836 	 * go down willingly, without panicing the system.
837 	 */
838 	if (result == PCI_ERS_RESULT_NONE) {
839 		pr_info("EEH: Reset with hotplug activity\n");
840 		rc = eeh_reset_device(pe, bus, NULL, false);
841 		if (rc) {
842 			pr_warn("%s: Unable to reset, err=%d\n",
843 				__func__, rc);
844 			result = PCI_ERS_RESULT_DISCONNECT;
845 		}
846 	}
847 
848 	/* If all devices reported they can proceed, then re-enable MMIO */
849 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
850 		pr_info("EEH: Enable I/O for affected devices\n");
851 		rc = eeh_pci_enable(pe, EEH_OPT_THAW_MMIO);
852 
853 		if (rc < 0) {
854 			result = PCI_ERS_RESULT_DISCONNECT;
855 		} else if (rc) {
856 			result = PCI_ERS_RESULT_NEED_RESET;
857 		} else {
858 			pr_info("EEH: Notify device drivers to resume I/O\n");
859 			eeh_pe_report("mmio_enabled", pe,
860 				      eeh_report_mmio_enabled, &result);
861 		}
862 	}
863 
864 	/* If all devices reported they can proceed, then re-enable DMA */
865 	if (result == PCI_ERS_RESULT_CAN_RECOVER) {
866 		pr_info("EEH: Enabled DMA for affected devices\n");
867 		rc = eeh_pci_enable(pe, EEH_OPT_THAW_DMA);
868 
869 		if (rc < 0) {
870 			result = PCI_ERS_RESULT_DISCONNECT;
871 		} else if (rc) {
872 			result = PCI_ERS_RESULT_NEED_RESET;
873 		} else {
874 			/*
875 			 * We didn't do PE reset for the case. The PE
876 			 * is still in frozen state. Clear it before
877 			 * resuming the PE.
878 			 */
879 			eeh_pe_state_clear(pe, EEH_PE_ISOLATED, true);
880 			result = PCI_ERS_RESULT_RECOVERED;
881 		}
882 	}
883 
884 	/* If any device called out for a reset, then reset the slot */
885 	if (result == PCI_ERS_RESULT_NEED_RESET) {
886 		pr_info("EEH: Reset without hotplug activity\n");
887 		rc = eeh_reset_device(pe, bus, &rmv_data, true);
888 		if (rc) {
889 			pr_warn("%s: Cannot reset, err=%d\n",
890 				__func__, rc);
891 			result = PCI_ERS_RESULT_DISCONNECT;
892 		} else {
893 			result = PCI_ERS_RESULT_NONE;
894 			eeh_set_channel_state(pe, pci_channel_io_normal);
895 			eeh_set_irq_state(pe, true);
896 			eeh_pe_report("slot_reset", pe, eeh_report_reset,
897 				      &result);
898 		}
899 	}
900 
901 	if ((result == PCI_ERS_RESULT_RECOVERED) ||
902 	    (result == PCI_ERS_RESULT_NONE)) {
903 		/*
904 		 * For those hot removed VFs, we should add back them after PF
905 		 * get recovered properly.
906 		 */
907 		list_for_each_entry_safe(edev, tmp, &rmv_data.removed_vf_list,
908 					 rmv_entry) {
909 			eeh_add_virt_device(edev);
910 			list_del(&edev->rmv_entry);
911 		}
912 
913 		/* Tell all device drivers that they can resume operations */
914 		pr_info("EEH: Notify device driver to resume\n");
915 		eeh_set_channel_state(pe, pci_channel_io_normal);
916 		eeh_set_irq_state(pe, true);
917 		eeh_pe_report("resume", pe, eeh_report_resume, NULL);
918 		eeh_for_each_pe(pe, tmp_pe) {
919 			eeh_pe_for_each_dev(tmp_pe, edev, tmp) {
920 				edev->mode &= ~EEH_DEV_NO_HANDLER;
921 				edev->in_error = false;
922 			}
923 		}
924 
925 		pr_info("EEH: Recovery successful.\n");
926 	} else  {
927 		/*
928 		 * About 90% of all real-life EEH failures in the field
929 		 * are due to poorly seated PCI cards. Only 10% or so are
930 		 * due to actual, failed cards.
931 		 */
932 		pr_err("EEH: Unable to recover from failure from PHB#%x-PE#%x.\n"
933 		       "Please try reseating or replacing it\n",
934 			pe->phb->global_number, pe->addr);
935 
936 		eeh_slot_error_detail(pe, EEH_LOG_PERM);
937 
938 		/* Notify all devices that they're about to go down. */
939 		eeh_set_channel_state(pe, pci_channel_io_perm_failure);
940 		eeh_set_irq_state(pe, false);
941 		eeh_pe_report("error_detected(permanent failure)", pe,
942 			      eeh_report_failure, NULL);
943 
944 		/* Mark the PE to be removed permanently */
945 		eeh_pe_state_mark(pe, EEH_PE_REMOVED);
946 
947 		/*
948 		 * Shut down the device drivers for good. We mark
949 		 * all removed devices correctly to avoid access
950 		 * the their PCI config any more.
951 		 */
952 		if (pe->type & EEH_PE_VF) {
953 			eeh_pe_dev_traverse(pe, eeh_rmv_device, NULL);
954 			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
955 		} else {
956 			eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
957 			eeh_pe_dev_mode_mark(pe, EEH_DEV_REMOVED);
958 
959 			pci_lock_rescan_remove();
960 			pci_hp_remove_devices(bus);
961 			pci_unlock_rescan_remove();
962 			/* The passed PE should no longer be used */
963 			return;
964 		}
965 	}
966 	eeh_pe_state_clear(pe, EEH_PE_RECOVERING, true);
967 }
968 
969 /**
970  * eeh_handle_special_event - Handle EEH events without a specific failing PE
971  *
972  * Called when an EEH event is detected but can't be narrowed down to a
973  * specific PE.  Iterates through possible failures and handles them as
974  * necessary.
975  */
976 void eeh_handle_special_event(void)
977 {
978 	struct eeh_pe *pe, *phb_pe, *tmp_pe;
979 	struct eeh_dev *edev, *tmp_edev;
980 	struct pci_bus *bus;
981 	struct pci_controller *hose;
982 	unsigned long flags;
983 	int rc;
984 
985 
986 	do {
987 		rc = eeh_ops->next_error(&pe);
988 
989 		switch (rc) {
990 		case EEH_NEXT_ERR_DEAD_IOC:
991 			/* Mark all PHBs in dead state */
992 			eeh_serialize_lock(&flags);
993 
994 			/* Purge all events */
995 			eeh_remove_event(NULL, true);
996 
997 			list_for_each_entry(hose, &hose_list, list_node) {
998 				phb_pe = eeh_phb_pe_get(hose);
999 				if (!phb_pe) continue;
1000 
1001 				eeh_pe_mark_isolated(phb_pe);
1002 			}
1003 
1004 			eeh_serialize_unlock(flags);
1005 
1006 			break;
1007 		case EEH_NEXT_ERR_FROZEN_PE:
1008 		case EEH_NEXT_ERR_FENCED_PHB:
1009 		case EEH_NEXT_ERR_DEAD_PHB:
1010 			/* Mark the PE in fenced state */
1011 			eeh_serialize_lock(&flags);
1012 
1013 			/* Purge all events of the PHB */
1014 			eeh_remove_event(pe, true);
1015 
1016 			if (rc != EEH_NEXT_ERR_DEAD_PHB)
1017 				eeh_pe_state_mark(pe, EEH_PE_RECOVERING);
1018 			eeh_pe_mark_isolated(pe);
1019 
1020 			eeh_serialize_unlock(flags);
1021 
1022 			break;
1023 		case EEH_NEXT_ERR_NONE:
1024 			return;
1025 		default:
1026 			pr_warn("%s: Invalid value %d from next_error()\n",
1027 				__func__, rc);
1028 			return;
1029 		}
1030 
1031 		/*
1032 		 * For fenced PHB and frozen PE, it's handled as normal
1033 		 * event. We have to remove the affected PHBs for dead
1034 		 * PHB and IOC
1035 		 */
1036 		if (rc == EEH_NEXT_ERR_FROZEN_PE ||
1037 		    rc == EEH_NEXT_ERR_FENCED_PHB) {
1038 			eeh_handle_normal_event(pe);
1039 		} else {
1040 			pci_lock_rescan_remove();
1041 			list_for_each_entry(hose, &hose_list, list_node) {
1042 				phb_pe = eeh_phb_pe_get(hose);
1043 				if (!phb_pe ||
1044 				    !(phb_pe->state & EEH_PE_ISOLATED) ||
1045 				    (phb_pe->state & EEH_PE_RECOVERING))
1046 					continue;
1047 
1048 				eeh_for_each_pe(pe, tmp_pe)
1049 					eeh_pe_for_each_dev(tmp_pe, edev, tmp_edev)
1050 						edev->mode &= ~EEH_DEV_NO_HANDLER;
1051 
1052 				/* Notify all devices to be down */
1053 				eeh_pe_state_clear(pe, EEH_PE_PRI_BUS, true);
1054 				eeh_set_channel_state(pe, pci_channel_io_perm_failure);
1055 				eeh_pe_report(
1056 					"error_detected(permanent failure)", pe,
1057 					eeh_report_failure, NULL);
1058 				bus = eeh_pe_bus_get(phb_pe);
1059 				if (!bus) {
1060 					pr_err("%s: Cannot find PCI bus for "
1061 					       "PHB#%x-PE#%x\n",
1062 					       __func__,
1063 					       pe->phb->global_number,
1064 					       pe->addr);
1065 					break;
1066 				}
1067 				pci_hp_remove_devices(bus);
1068 			}
1069 			pci_unlock_rescan_remove();
1070 		}
1071 
1072 		/*
1073 		 * If we have detected dead IOC, we needn't proceed
1074 		 * any more since all PHBs would have been removed
1075 		 */
1076 		if (rc == EEH_NEXT_ERR_DEAD_IOC)
1077 			break;
1078 	} while (rc != EEH_NEXT_ERR_NONE);
1079 }
1080