12e28bc84SOza Pawandeep // SPDX-License-Identifier: GPL-2.0 22e28bc84SOza Pawandeep /* 32e28bc84SOza Pawandeep * This file implements the error recovery as a core part of PCIe error 42e28bc84SOza Pawandeep * reporting. When a PCIe error is delivered, an error message will be 52e28bc84SOza Pawandeep * collected and printed to console, then, an error recovery procedure 62e28bc84SOza Pawandeep * will be executed by following the PCI error recovery rules. 72e28bc84SOza Pawandeep * 82e28bc84SOza Pawandeep * Copyright (C) 2006 Intel Corp. 92e28bc84SOza Pawandeep * Tom Long Nguyen (tom.l.nguyen@intel.com) 102e28bc84SOza Pawandeep * Zhang Yanmin (yanmin.zhang@intel.com) 112e28bc84SOza Pawandeep */ 122e28bc84SOza Pawandeep 138d077c3cSBjorn Helgaas #define dev_fmt(fmt) "AER: " fmt 148d077c3cSBjorn Helgaas 152e28bc84SOza Pawandeep #include <linux/pci.h> 162e28bc84SOza Pawandeep #include <linux/module.h> 172e28bc84SOza Pawandeep #include <linux/kernel.h> 182e28bc84SOza Pawandeep #include <linux/errno.h> 192e28bc84SOza Pawandeep #include <linux/aer.h> 202e28bc84SOza Pawandeep #include "portdrv.h" 212e28bc84SOza Pawandeep #include "../pci.h" 222e28bc84SOza Pawandeep 232e28bc84SOza Pawandeep static pci_ers_result_t merge_result(enum pci_ers_result orig, 242e28bc84SOza Pawandeep enum pci_ers_result new) 252e28bc84SOza Pawandeep { 262e28bc84SOza Pawandeep if (new == PCI_ERS_RESULT_NO_AER_DRIVER) 272e28bc84SOza Pawandeep return PCI_ERS_RESULT_NO_AER_DRIVER; 282e28bc84SOza Pawandeep 292e28bc84SOza Pawandeep if (new == PCI_ERS_RESULT_NONE) 302e28bc84SOza Pawandeep return orig; 312e28bc84SOza Pawandeep 322e28bc84SOza Pawandeep switch (orig) { 332e28bc84SOza Pawandeep case PCI_ERS_RESULT_CAN_RECOVER: 342e28bc84SOza Pawandeep case PCI_ERS_RESULT_RECOVERED: 352e28bc84SOza Pawandeep orig = new; 362e28bc84SOza Pawandeep break; 372e28bc84SOza Pawandeep case PCI_ERS_RESULT_DISCONNECT: 382e28bc84SOza Pawandeep if (new == PCI_ERS_RESULT_NEED_RESET) 392e28bc84SOza Pawandeep orig = PCI_ERS_RESULT_NEED_RESET; 402e28bc84SOza Pawandeep break; 412e28bc84SOza Pawandeep default: 422e28bc84SOza Pawandeep break; 432e28bc84SOza Pawandeep } 442e28bc84SOza Pawandeep 452e28bc84SOza Pawandeep return orig; 462e28bc84SOza Pawandeep } 472e28bc84SOza Pawandeep 48542aeb9cSKeith Busch static int report_error_detected(struct pci_dev *dev, 4916d79cd4SLuc Van Oostenryck pci_channel_state_t state, 50542aeb9cSKeith Busch enum pci_ers_result *result) 512e28bc84SOza Pawandeep { 522e28bc84SOza Pawandeep pci_ers_result_t vote; 532e28bc84SOza Pawandeep const struct pci_error_handlers *err_handler; 542e28bc84SOza Pawandeep 552e28bc84SOza Pawandeep device_lock(&dev->dev); 56a6bd101bSKeith Busch if (!pci_dev_set_io_state(dev, state) || 57a6bd101bSKeith Busch !dev->driver || 582e28bc84SOza Pawandeep !dev->driver->err_handler || 592e28bc84SOza Pawandeep !dev->driver->err_handler->error_detected) { 602e28bc84SOza Pawandeep /* 61bfcb79fcSKeith Busch * If any device in the subtree does not have an error_detected 62bfcb79fcSKeith Busch * callback, PCI_ERS_RESULT_NO_AER_DRIVER prevents subsequent 63bfcb79fcSKeith Busch * error callbacks of "any" device in the subtree, and will 64bfcb79fcSKeith Busch * exit in the disconnected error state. 652e28bc84SOza Pawandeep */ 6601daacfbSYicong Yang if (dev->hdr_type != PCI_HEADER_TYPE_BRIDGE) { 672e28bc84SOza Pawandeep vote = PCI_ERS_RESULT_NO_AER_DRIVER; 688d077c3cSBjorn Helgaas pci_info(dev, "can't recover (no error_detected callback)\n"); 6901daacfbSYicong Yang } else { 702e28bc84SOza Pawandeep vote = PCI_ERS_RESULT_NONE; 7101daacfbSYicong Yang } 722e28bc84SOza Pawandeep } else { 732e28bc84SOza Pawandeep err_handler = dev->driver->err_handler; 74542aeb9cSKeith Busch vote = err_handler->error_detected(dev, state); 752e28bc84SOza Pawandeep } 767b42d97eSKeith Busch pci_uevent_ers(dev, vote); 77542aeb9cSKeith Busch *result = merge_result(*result, vote); 782e28bc84SOza Pawandeep device_unlock(&dev->dev); 792e28bc84SOza Pawandeep return 0; 802e28bc84SOza Pawandeep } 812e28bc84SOza Pawandeep 82542aeb9cSKeith Busch static int report_frozen_detected(struct pci_dev *dev, void *data) 83542aeb9cSKeith Busch { 84542aeb9cSKeith Busch return report_error_detected(dev, pci_channel_io_frozen, data); 85542aeb9cSKeith Busch } 86542aeb9cSKeith Busch 87542aeb9cSKeith Busch static int report_normal_detected(struct pci_dev *dev, void *data) 88542aeb9cSKeith Busch { 89542aeb9cSKeith Busch return report_error_detected(dev, pci_channel_io_normal, data); 90542aeb9cSKeith Busch } 91542aeb9cSKeith Busch 922e28bc84SOza Pawandeep static int report_mmio_enabled(struct pci_dev *dev, void *data) 932e28bc84SOza Pawandeep { 94542aeb9cSKeith Busch pci_ers_result_t vote, *result = data; 952e28bc84SOza Pawandeep const struct pci_error_handlers *err_handler; 962e28bc84SOza Pawandeep 972e28bc84SOza Pawandeep device_lock(&dev->dev); 982e28bc84SOza Pawandeep if (!dev->driver || 992e28bc84SOza Pawandeep !dev->driver->err_handler || 1002e28bc84SOza Pawandeep !dev->driver->err_handler->mmio_enabled) 1012e28bc84SOza Pawandeep goto out; 1022e28bc84SOza Pawandeep 1032e28bc84SOza Pawandeep err_handler = dev->driver->err_handler; 1042e28bc84SOza Pawandeep vote = err_handler->mmio_enabled(dev); 105542aeb9cSKeith Busch *result = merge_result(*result, vote); 1062e28bc84SOza Pawandeep out: 1072e28bc84SOza Pawandeep device_unlock(&dev->dev); 1082e28bc84SOza Pawandeep return 0; 1092e28bc84SOza Pawandeep } 1102e28bc84SOza Pawandeep 1112e28bc84SOza Pawandeep static int report_slot_reset(struct pci_dev *dev, void *data) 1122e28bc84SOza Pawandeep { 113542aeb9cSKeith Busch pci_ers_result_t vote, *result = data; 1142e28bc84SOza Pawandeep const struct pci_error_handlers *err_handler; 1152e28bc84SOza Pawandeep 1162e28bc84SOza Pawandeep device_lock(&dev->dev); 1172e28bc84SOza Pawandeep if (!dev->driver || 1182e28bc84SOza Pawandeep !dev->driver->err_handler || 1192e28bc84SOza Pawandeep !dev->driver->err_handler->slot_reset) 1202e28bc84SOza Pawandeep goto out; 1212e28bc84SOza Pawandeep 1222e28bc84SOza Pawandeep err_handler = dev->driver->err_handler; 1232e28bc84SOza Pawandeep vote = err_handler->slot_reset(dev); 124542aeb9cSKeith Busch *result = merge_result(*result, vote); 1252e28bc84SOza Pawandeep out: 1262e28bc84SOza Pawandeep device_unlock(&dev->dev); 1272e28bc84SOza Pawandeep return 0; 1282e28bc84SOza Pawandeep } 1292e28bc84SOza Pawandeep 1302e28bc84SOza Pawandeep static int report_resume(struct pci_dev *dev, void *data) 1312e28bc84SOza Pawandeep { 1322e28bc84SOza Pawandeep const struct pci_error_handlers *err_handler; 1332e28bc84SOza Pawandeep 1342e28bc84SOza Pawandeep device_lock(&dev->dev); 135a6bd101bSKeith Busch if (!pci_dev_set_io_state(dev, pci_channel_io_normal) || 136a6bd101bSKeith Busch !dev->driver || 1372e28bc84SOza Pawandeep !dev->driver->err_handler || 1382e28bc84SOza Pawandeep !dev->driver->err_handler->resume) 1392e28bc84SOza Pawandeep goto out; 1402e28bc84SOza Pawandeep 1412e28bc84SOza Pawandeep err_handler = dev->driver->err_handler; 1422e28bc84SOza Pawandeep err_handler->resume(dev); 1432e28bc84SOza Pawandeep out: 1447b42d97eSKeith Busch pci_uevent_ers(dev, PCI_ERS_RESULT_RECOVERED); 1452e28bc84SOza Pawandeep device_unlock(&dev->dev); 1462e28bc84SOza Pawandeep return 0; 1472e28bc84SOza Pawandeep } 1482e28bc84SOza Pawandeep 14905e9ae19SSean V Kelley /** 15005e9ae19SSean V Kelley * pci_walk_bridge - walk bridges potentially AER affected 15157908622SQiuxu Zhuo * @bridge: bridge which may be a Port, an RCEC, or an RCiEP 15205e9ae19SSean V Kelley * @cb: callback to be called for each device found 15305e9ae19SSean V Kelley * @userdata: arbitrary pointer to be passed to callback 15405e9ae19SSean V Kelley * 15505e9ae19SSean V Kelley * If the device provided is a bridge, walk the subordinate bus, including 15605e9ae19SSean V Kelley * any bridged devices on buses under this bus. Call the provided callback 15705e9ae19SSean V Kelley * on each device found. 158a175102bSSean V Kelley * 15957908622SQiuxu Zhuo * If the device provided has no subordinate bus, e.g., an RCEC or RCiEP, 16057908622SQiuxu Zhuo * call the callback on the device itself. 16105e9ae19SSean V Kelley */ 16205e9ae19SSean V Kelley static void pci_walk_bridge(struct pci_dev *bridge, 16305e9ae19SSean V Kelley int (*cb)(struct pci_dev *, void *), 16405e9ae19SSean V Kelley void *userdata) 16505e9ae19SSean V Kelley { 16605e9ae19SSean V Kelley if (bridge->subordinate) 16705e9ae19SSean V Kelley pci_walk_bus(bridge->subordinate, cb, userdata); 168a175102bSSean V Kelley else 169a175102bSSean V Kelley cb(bridge, userdata); 17005e9ae19SSean V Kelley } 17105e9ae19SSean V Kelley 172e8e5ff2aSKuppuswamy Sathyanarayanan pci_ers_result_t pcie_do_recovery(struct pci_dev *dev, 17316d79cd4SLuc Van Oostenryck pci_channel_state_t state, 1748f1bbfbcSSean V Kelley pci_ers_result_t (*reset_subordinates)(struct pci_dev *pdev)) 1752e28bc84SOza Pawandeep { 176480ef7cbSSean V Kelley int type = pci_pcie_type(dev); 1770791721dSSean V Kelley struct pci_dev *bridge; 1780791721dSSean V Kelley pci_ers_result_t status = PCI_ERS_RESULT_CAN_RECOVER; 179aa344bc8SSean V Kelley struct pci_host_bridge *host = pci_find_host_bridge(dev->bus); 1802e28bc84SOza Pawandeep 181bfcb79fcSKeith Busch /* 18257908622SQiuxu Zhuo * If the error was detected by a Root Port, Downstream Port, RCEC, 18357908622SQiuxu Zhuo * or RCiEP, recovery runs on the device itself. For Ports, that 18457908622SQiuxu Zhuo * also includes any subordinate devices. 185a175102bSSean V Kelley * 186a175102bSSean V Kelley * If it was detected by another device (Endpoint, etc), recovery 187a175102bSSean V Kelley * runs on the device and anything else under the same Port, i.e., 188a175102bSSean V Kelley * everything under "bridge". 189bfcb79fcSKeith Busch */ 1903d7d8fc7SSean V Kelley if (type == PCI_EXP_TYPE_ROOT_PORT || 191a175102bSSean V Kelley type == PCI_EXP_TYPE_DOWNSTREAM || 19257908622SQiuxu Zhuo type == PCI_EXP_TYPE_RC_EC || 19357908622SQiuxu Zhuo type == PCI_EXP_TYPE_RC_END) 1940791721dSSean V Kelley bridge = dev; 1953d7d8fc7SSean V Kelley else 1963d7d8fc7SSean V Kelley bridge = pci_upstream_bridge(dev); 197bfcb79fcSKeith Busch 1980791721dSSean V Kelley pci_dbg(bridge, "broadcast error_detected message\n"); 199b5dfbeacSKuppuswamy Sathyanarayanan if (state == pci_channel_io_frozen) { 20005e9ae19SSean V Kelley pci_walk_bridge(bridge, report_frozen_detected, &status); 201*387c72cdSKeith Busch if (reset_subordinates(bridge) != PCI_ERS_RESULT_RECOVERED) { 2020791721dSSean V Kelley pci_warn(bridge, "subordinate device reset failed\n"); 203bdb5ac85SKeith Busch goto failed; 204b6cf1a42SKuppuswamy Sathyanarayanan } 205b5dfbeacSKuppuswamy Sathyanarayanan } else { 20605e9ae19SSean V Kelley pci_walk_bridge(bridge, report_normal_detected, &status); 207b5dfbeacSKuppuswamy Sathyanarayanan } 208bdb5ac85SKeith Busch 209542aeb9cSKeith Busch if (status == PCI_ERS_RESULT_CAN_RECOVER) { 210542aeb9cSKeith Busch status = PCI_ERS_RESULT_RECOVERED; 2110791721dSSean V Kelley pci_dbg(bridge, "broadcast mmio_enabled message\n"); 21205e9ae19SSean V Kelley pci_walk_bridge(bridge, report_mmio_enabled, &status); 213542aeb9cSKeith Busch } 2142e28bc84SOza Pawandeep 2152e28bc84SOza Pawandeep if (status == PCI_ERS_RESULT_NEED_RESET) { 2162e28bc84SOza Pawandeep /* 2172e28bc84SOza Pawandeep * TODO: Should call platform-specific 2182e28bc84SOza Pawandeep * functions to reset slot before calling 2192e28bc84SOza Pawandeep * drivers' slot_reset callbacks? 2202e28bc84SOza Pawandeep */ 221542aeb9cSKeith Busch status = PCI_ERS_RESULT_RECOVERED; 2220791721dSSean V Kelley pci_dbg(bridge, "broadcast slot_reset message\n"); 22305e9ae19SSean V Kelley pci_walk_bridge(bridge, report_slot_reset, &status); 2242e28bc84SOza Pawandeep } 2252e28bc84SOza Pawandeep 2262e28bc84SOza Pawandeep if (status != PCI_ERS_RESULT_RECOVERED) 2272e28bc84SOza Pawandeep goto failed; 2282e28bc84SOza Pawandeep 2290791721dSSean V Kelley pci_dbg(bridge, "broadcast resume message\n"); 23005e9ae19SSean V Kelley pci_walk_bridge(bridge, report_resume, &status); 2312e28bc84SOza Pawandeep 232aa344bc8SSean V Kelley /* 2337d7cbeabSKeith Busch * If we have native control of AER, clear error status in the device 2347d7cbeabSKeith Busch * that detected the error. If the platform retained control of AER, 2357d7cbeabSKeith Busch * it is responsible for clearing this status. In that case, the 2367d7cbeabSKeith Busch * signaling device may not even be visible to the OS. 237aa344bc8SSean V Kelley */ 238aa344bc8SSean V Kelley if (host->native_aer || pcie_ports_native) { 2397d7cbeabSKeith Busch pcie_clear_device_status(dev); 2407d7cbeabSKeith Busch pci_aer_clear_nonfatal_status(dev); 241aa344bc8SSean V Kelley } 2420791721dSSean V Kelley pci_info(bridge, "device recovery successful\n"); 243e8e5ff2aSKuppuswamy Sathyanarayanan return status; 2442e28bc84SOza Pawandeep 2452e28bc84SOza Pawandeep failed: 2460791721dSSean V Kelley pci_uevent_ers(bridge, PCI_ERS_RESULT_DISCONNECT); 2472e28bc84SOza Pawandeep 2482e28bc84SOza Pawandeep /* TODO: Should kernel panic here? */ 2490791721dSSean V Kelley pci_info(bridge, "device recovery failed\n"); 250e8e5ff2aSKuppuswamy Sathyanarayanan 251e8e5ff2aSKuppuswamy Sathyanarayanan return status; 2522e28bc84SOza Pawandeep } 253