1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Copyright IBM Corp. 2012 4 * 5 * Author(s): 6 * Jan Glauber <jang@linux.vnet.ibm.com> 7 */ 8 9 #define KMSG_COMPONENT "zpci" 10 #define pr_fmt(fmt) KMSG_COMPONENT ": " fmt 11 12 #include <linux/kernel.h> 13 #include <linux/pci.h> 14 #include <asm/pci_debug.h> 15 #include <asm/pci_dma.h> 16 #include <asm/sclp.h> 17 18 #include "pci_bus.h" 19 #include "pci_report.h" 20 21 /* Content Code Description for PCI Function Error */ 22 struct zpci_ccdf_err { 23 u32 reserved1; 24 u32 fh; /* function handle */ 25 u32 fid; /* function id */ 26 u32 ett : 4; /* expected table type */ 27 u32 mvn : 12; /* MSI vector number */ 28 u32 dmaas : 8; /* DMA address space */ 29 u32 : 6; 30 u32 q : 1; /* event qualifier */ 31 u32 rw : 1; /* read/write */ 32 u64 faddr; /* failing address */ 33 u32 reserved3; 34 u16 reserved4; 35 u16 pec; /* PCI event code */ 36 } __packed; 37 38 /* Content Code Description for PCI Function Availability */ 39 struct zpci_ccdf_avail { 40 u32 reserved1; 41 u32 fh; /* function handle */ 42 u32 fid; /* function id */ 43 u32 reserved2; 44 u32 reserved3; 45 u32 reserved4; 46 u32 reserved5; 47 u16 reserved6; 48 u16 pec; /* PCI event code */ 49 } __packed; 50 51 static inline bool ers_result_indicates_abort(pci_ers_result_t ers_res) 52 { 53 switch (ers_res) { 54 case PCI_ERS_RESULT_CAN_RECOVER: 55 case PCI_ERS_RESULT_RECOVERED: 56 case PCI_ERS_RESULT_NEED_RESET: 57 case PCI_ERS_RESULT_NONE: 58 return false; 59 default: 60 return true; 61 } 62 } 63 64 static bool is_passed_through(struct pci_dev *pdev) 65 { 66 struct zpci_dev *zdev = to_zpci(pdev); 67 bool ret; 68 69 mutex_lock(&zdev->kzdev_lock); 70 ret = !!zdev->kzdev; 71 mutex_unlock(&zdev->kzdev_lock); 72 73 return ret; 74 } 75 76 static bool is_driver_supported(struct pci_driver *driver) 77 { 78 if (!driver || !driver->err_handler) 79 return false; 80 if (!driver->err_handler->error_detected) 81 return false; 82 return true; 83 } 84 85 static pci_ers_result_t zpci_event_notify_error_detected(struct pci_dev *pdev, 86 struct pci_driver *driver) 87 { 88 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 89 90 ers_res = driver->err_handler->error_detected(pdev, pdev->error_state); 91 pci_uevent_ers(pdev, ers_res); 92 if (ers_result_indicates_abort(ers_res)) 93 pr_info("%s: Automatic recovery failed after initial reporting\n", pci_name(pdev)); 94 else if (ers_res == PCI_ERS_RESULT_NEED_RESET) 95 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 96 97 return ers_res; 98 } 99 100 static pci_ers_result_t zpci_event_do_error_state_clear(struct pci_dev *pdev, 101 struct pci_driver *driver) 102 { 103 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 104 struct zpci_dev *zdev = to_zpci(pdev); 105 int rc; 106 107 /* The underlying device may have been disabled by the event */ 108 if (!zdev_enabled(zdev)) 109 return PCI_ERS_RESULT_NEED_RESET; 110 111 pr_info("%s: Unblocking device access for examination\n", pci_name(pdev)); 112 rc = zpci_reset_load_store_blocked(zdev); 113 if (rc) { 114 pr_err("%s: Unblocking device access failed\n", pci_name(pdev)); 115 /* Let's try a full reset instead */ 116 return PCI_ERS_RESULT_NEED_RESET; 117 } 118 119 if (driver->err_handler->mmio_enabled) 120 ers_res = driver->err_handler->mmio_enabled(pdev); 121 else 122 ers_res = PCI_ERS_RESULT_NONE; 123 124 if (ers_result_indicates_abort(ers_res)) { 125 pr_info("%s: Automatic recovery failed after MMIO re-enable\n", 126 pci_name(pdev)); 127 return ers_res; 128 } else if (ers_res == PCI_ERS_RESULT_NEED_RESET) { 129 pr_debug("%s: Driver needs reset to recover\n", pci_name(pdev)); 130 return ers_res; 131 } 132 133 pr_debug("%s: Unblocking DMA\n", pci_name(pdev)); 134 rc = zpci_clear_error_state(zdev); 135 if (!rc) { 136 pdev->error_state = pci_channel_io_normal; 137 } else { 138 pr_err("%s: Unblocking DMA failed\n", pci_name(pdev)); 139 /* Let's try a full reset instead */ 140 return PCI_ERS_RESULT_NEED_RESET; 141 } 142 143 return ers_res; 144 } 145 146 static pci_ers_result_t zpci_event_do_reset(struct pci_dev *pdev, 147 struct pci_driver *driver) 148 { 149 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 150 151 pr_info("%s: Initiating reset\n", pci_name(pdev)); 152 if (zpci_hot_reset_device(to_zpci(pdev))) { 153 pr_err("%s: The reset request failed\n", pci_name(pdev)); 154 return ers_res; 155 } 156 pdev->error_state = pci_channel_io_normal; 157 158 if (driver->err_handler->slot_reset) 159 ers_res = driver->err_handler->slot_reset(pdev); 160 else 161 ers_res = PCI_ERS_RESULT_NONE; 162 163 if (ers_result_indicates_abort(ers_res)) { 164 pr_info("%s: Automatic recovery failed after slot reset\n", pci_name(pdev)); 165 return ers_res; 166 } 167 168 return ers_res; 169 } 170 171 /* zpci_event_attempt_error_recovery - Try to recover the given PCI function 172 * @pdev: PCI function to recover currently in the error state 173 * 174 * We follow the scheme outlined in Documentation/PCI/pci-error-recovery.rst. 175 * With the simplification that recovery always happens per function 176 * and the platform determines which functions are affected for 177 * multi-function devices. 178 */ 179 static pci_ers_result_t zpci_event_attempt_error_recovery(struct pci_dev *pdev) 180 { 181 pci_ers_result_t ers_res = PCI_ERS_RESULT_DISCONNECT; 182 struct zpci_dev *zdev = to_zpci(pdev); 183 char *status_str = "success"; 184 struct pci_driver *driver; 185 186 /* 187 * Ensure that the PCI function is not removed concurrently, no driver 188 * is unbound or probed and that userspace can't access its 189 * configuration space while we perform recovery. 190 */ 191 pci_dev_lock(pdev); 192 if (pdev->error_state == pci_channel_io_perm_failure) { 193 ers_res = PCI_ERS_RESULT_DISCONNECT; 194 goto out_unlock; 195 } 196 pdev->error_state = pci_channel_io_frozen; 197 198 if (is_passed_through(pdev)) { 199 pr_info("%s: Cannot be recovered in the host because it is a pass-through device\n", 200 pci_name(pdev)); 201 status_str = "failed (pass-through)"; 202 goto out_unlock; 203 } 204 205 driver = to_pci_driver(pdev->dev.driver); 206 if (!is_driver_supported(driver)) { 207 if (!driver) { 208 pr_info("%s: Cannot be recovered because no driver is bound to the device\n", 209 pci_name(pdev)); 210 status_str = "failed (no driver)"; 211 } else { 212 pr_info("%s: The %s driver bound to the device does not support error recovery\n", 213 pci_name(pdev), 214 driver->name); 215 status_str = "failed (no driver support)"; 216 } 217 goto out_unlock; 218 } 219 220 ers_res = zpci_event_notify_error_detected(pdev, driver); 221 if (ers_result_indicates_abort(ers_res)) { 222 status_str = "failed (abort on detection)"; 223 goto out_unlock; 224 } 225 226 if (ers_res != PCI_ERS_RESULT_NEED_RESET) { 227 ers_res = zpci_event_do_error_state_clear(pdev, driver); 228 if (ers_result_indicates_abort(ers_res)) { 229 status_str = "failed (abort on MMIO enable)"; 230 goto out_unlock; 231 } 232 } 233 234 if (ers_res == PCI_ERS_RESULT_NEED_RESET) 235 ers_res = zpci_event_do_reset(pdev, driver); 236 237 /* 238 * ers_res can be PCI_ERS_RESULT_NONE either because the driver 239 * decided to return it, indicating that it abstains from voting 240 * on how to recover, or because it didn't implement the callback. 241 * Both cases assume, that if there is nothing else causing a 242 * disconnect, we recovered successfully. 243 */ 244 if (ers_res == PCI_ERS_RESULT_NONE) 245 ers_res = PCI_ERS_RESULT_RECOVERED; 246 247 if (ers_res != PCI_ERS_RESULT_RECOVERED) { 248 pci_uevent_ers(pdev, PCI_ERS_RESULT_DISCONNECT); 249 pr_err("%s: Automatic recovery failed; operator intervention is required\n", 250 pci_name(pdev)); 251 status_str = "failed (driver can't recover)"; 252 goto out_unlock; 253 } 254 255 pr_info("%s: The device is ready to resume operations\n", pci_name(pdev)); 256 if (driver->err_handler->resume) 257 driver->err_handler->resume(pdev); 258 pci_uevent_ers(pdev, PCI_ERS_RESULT_RECOVERED); 259 out_unlock: 260 pci_dev_unlock(pdev); 261 zpci_report_status(zdev, "recovery", status_str); 262 263 return ers_res; 264 } 265 266 /* zpci_event_io_failure - Report PCI channel failure state to driver 267 * @pdev: PCI function for which to report 268 * @es: PCI channel failure state to report 269 */ 270 static void zpci_event_io_failure(struct pci_dev *pdev, pci_channel_state_t es) 271 { 272 struct pci_driver *driver; 273 274 pci_dev_lock(pdev); 275 pdev->error_state = es; 276 /** 277 * While vfio-pci's error_detected callback notifies user-space QEMU 278 * reacts to this by freezing the guest. In an s390 environment PCI 279 * errors are rarely fatal so this is overkill. Instead in the future 280 * we will inject the error event and let the guest recover the device 281 * itself. 282 */ 283 if (is_passed_through(pdev)) 284 goto out; 285 driver = to_pci_driver(pdev->dev.driver); 286 if (driver && driver->err_handler && driver->err_handler->error_detected) 287 driver->err_handler->error_detected(pdev, pdev->error_state); 288 out: 289 pci_dev_unlock(pdev); 290 } 291 292 static void __zpci_event_error(struct zpci_ccdf_err *ccdf) 293 { 294 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 295 struct pci_dev *pdev = NULL; 296 pci_ers_result_t ers_res; 297 u32 fh = 0; 298 int rc; 299 300 zpci_dbg(3, "err fid:%x, fh:%x, pec:%x\n", 301 ccdf->fid, ccdf->fh, ccdf->pec); 302 zpci_err("error CCDF:\n"); 303 zpci_err_hex(ccdf, sizeof(*ccdf)); 304 305 if (zdev) { 306 mutex_lock(&zdev->state_lock); 307 rc = clp_refresh_fh(zdev->fid, &fh); 308 if (rc) 309 goto no_pdev; 310 if (!fh || ccdf->fh != fh) { 311 /* Ignore events with stale handles */ 312 zpci_dbg(3, "err fid:%x, fh:%x (stale %x)\n", 313 ccdf->fid, fh, ccdf->fh); 314 goto no_pdev; 315 } 316 zpci_update_fh(zdev, ccdf->fh); 317 if (zdev->zbus->bus) 318 pdev = pci_get_slot(zdev->zbus->bus, zdev->devfn); 319 } 320 321 pr_err("%s: Event 0x%x reports an error for PCI function 0x%x\n", 322 pdev ? pci_name(pdev) : "n/a", ccdf->pec, ccdf->fid); 323 324 if (!pdev) 325 goto no_pdev; 326 327 switch (ccdf->pec) { 328 case 0x002a: /* Error event concerns FMB */ 329 case 0x002b: 330 case 0x002c: 331 break; 332 case 0x0040: /* Service Action or Error Recovery Failed */ 333 case 0x003b: 334 zpci_event_io_failure(pdev, pci_channel_io_perm_failure); 335 break; 336 default: /* PCI function left in the error state attempt to recover */ 337 ers_res = zpci_event_attempt_error_recovery(pdev); 338 if (ers_res != PCI_ERS_RESULT_RECOVERED) 339 zpci_event_io_failure(pdev, pci_channel_io_perm_failure); 340 break; 341 } 342 pci_dev_put(pdev); 343 no_pdev: 344 if (zdev) 345 mutex_unlock(&zdev->state_lock); 346 zpci_zdev_put(zdev); 347 } 348 349 void zpci_event_error(void *data) 350 { 351 if (zpci_is_enabled()) 352 __zpci_event_error(data); 353 } 354 355 static void zpci_event_hard_deconfigured(struct zpci_dev *zdev, u32 fh) 356 { 357 zpci_update_fh(zdev, fh); 358 /* Give the driver a hint that the function is 359 * already unusable. 360 */ 361 zpci_bus_remove_device(zdev, true); 362 /* Even though the device is already gone we still 363 * need to free zPCI resources as part of the disable. 364 */ 365 if (zdev_enabled(zdev)) 366 zpci_disable_device(zdev); 367 zdev->state = ZPCI_FN_STATE_STANDBY; 368 } 369 370 static void zpci_event_reappear(struct zpci_dev *zdev) 371 { 372 lockdep_assert_held(&zdev->state_lock); 373 /* 374 * The zdev is in the reserved state. This means that it was presumed to 375 * go away but there are still undropped references. Now, the platform 376 * announced its availability again. Bring back the lingering zdev 377 * to standby. This is safe because we hold a temporary reference 378 * now so that it won't go away. Account for the re-appearance of the 379 * underlying device by incrementing the reference count. 380 */ 381 zdev->state = ZPCI_FN_STATE_STANDBY; 382 zpci_zdev_get(zdev); 383 zpci_dbg(1, "rea fid:%x, fh:%x\n", zdev->fid, zdev->fh); 384 } 385 386 static void __zpci_event_availability(struct zpci_ccdf_avail *ccdf) 387 { 388 struct zpci_dev *zdev = get_zdev_by_fid(ccdf->fid); 389 bool existing_zdev = !!zdev; 390 enum zpci_state state; 391 392 zpci_dbg(3, "avl fid:%x, fh:%x, pec:%x\n", 393 ccdf->fid, ccdf->fh, ccdf->pec); 394 395 if (existing_zdev) 396 mutex_lock(&zdev->state_lock); 397 398 switch (ccdf->pec) { 399 case 0x0301: /* Reserved|Standby -> Configured */ 400 if (!zdev) { 401 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_CONFIGURED); 402 if (IS_ERR(zdev)) 403 break; 404 if (zpci_add_device(zdev)) { 405 kfree(zdev); 406 break; 407 } 408 } else { 409 if (zdev->state == ZPCI_FN_STATE_RESERVED) 410 zpci_event_reappear(zdev); 411 /* the configuration request may be stale */ 412 else if (zdev->state != ZPCI_FN_STATE_STANDBY) 413 break; 414 zdev->state = ZPCI_FN_STATE_CONFIGURED; 415 } 416 zpci_scan_configured_device(zdev, ccdf->fh); 417 break; 418 case 0x0302: /* Reserved -> Standby */ 419 if (!zdev) { 420 zdev = zpci_create_device(ccdf->fid, ccdf->fh, ZPCI_FN_STATE_STANDBY); 421 if (IS_ERR(zdev)) 422 break; 423 if (zpci_add_device(zdev)) { 424 kfree(zdev); 425 break; 426 } 427 } else { 428 if (zdev->state == ZPCI_FN_STATE_RESERVED) 429 zpci_event_reappear(zdev); 430 zpci_update_fh(zdev, ccdf->fh); 431 } 432 break; 433 case 0x0303: /* Deconfiguration requested */ 434 if (zdev) { 435 /* The event may have been queued before we configured 436 * the device. 437 */ 438 if (zdev->state != ZPCI_FN_STATE_CONFIGURED) 439 break; 440 zpci_update_fh(zdev, ccdf->fh); 441 zpci_deconfigure_device(zdev); 442 } 443 break; 444 case 0x0304: /* Configured -> Standby|Reserved */ 445 if (zdev) { 446 /* The event may have been queued before we configured 447 * the device.: 448 */ 449 if (zdev->state == ZPCI_FN_STATE_CONFIGURED) 450 zpci_event_hard_deconfigured(zdev, ccdf->fh); 451 /* The 0x0304 event may immediately reserve the device */ 452 if (!clp_get_state(zdev->fid, &state) && 453 state == ZPCI_FN_STATE_RESERVED) { 454 zpci_device_reserved(zdev); 455 } 456 } 457 break; 458 case 0x0306: /* 0x308 or 0x302 for multiple devices */ 459 zpci_remove_reserved_devices(); 460 zpci_scan_devices(); 461 break; 462 case 0x0308: /* Standby -> Reserved */ 463 if (!zdev) 464 break; 465 zpci_device_reserved(zdev); 466 break; 467 default: 468 break; 469 } 470 if (existing_zdev) { 471 mutex_unlock(&zdev->state_lock); 472 zpci_zdev_put(zdev); 473 } 474 } 475 476 void zpci_event_availability(void *data) 477 { 478 if (zpci_is_enabled()) 479 __zpci_event_availability(data); 480 } 481