1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * PCI Express Downstream Port Containment services driver 4 * Author: Keith Busch <keith.busch@intel.com> 5 * 6 * Copyright (C) 2016 Intel Corp. 7 */ 8 9 #define dev_fmt(fmt) "DPC: " fmt 10 11 #include <linux/aer.h> 12 #include <linux/bitfield.h> 13 #include <linux/delay.h> 14 #include <linux/interrupt.h> 15 #include <linux/init.h> 16 #include <linux/pci.h> 17 18 #include "portdrv.h" 19 #include "../pci.h" 20 21 #define PCI_EXP_DPC_CTL_EN_MASK (PCI_EXP_DPC_CTL_EN_FATAL | \ 22 PCI_EXP_DPC_CTL_EN_NONFATAL) 23 24 static const char * const rp_pio_error_string[] = { 25 "Configuration Request received UR Completion", /* Bit Position 0 */ 26 "Configuration Request received CA Completion", /* Bit Position 1 */ 27 "Configuration Request Completion Timeout", /* Bit Position 2 */ 28 NULL, 29 NULL, 30 NULL, 31 NULL, 32 NULL, 33 "I/O Request received UR Completion", /* Bit Position 8 */ 34 "I/O Request received CA Completion", /* Bit Position 9 */ 35 "I/O Request Completion Timeout", /* Bit Position 10 */ 36 NULL, 37 NULL, 38 NULL, 39 NULL, 40 NULL, 41 "Memory Request received UR Completion", /* Bit Position 16 */ 42 "Memory Request received CA Completion", /* Bit Position 17 */ 43 "Memory Request Completion Timeout", /* Bit Position 18 */ 44 }; 45 46 void pci_save_dpc_state(struct pci_dev *dev) 47 { 48 struct pci_cap_saved_state *save_state; 49 u16 *cap; 50 51 if (!pci_is_pcie(dev)) 52 return; 53 54 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); 55 if (!save_state) 56 return; 57 58 cap = (u16 *)&save_state->cap.data[0]; 59 pci_read_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, cap); 60 } 61 62 void pci_restore_dpc_state(struct pci_dev *dev) 63 { 64 struct pci_cap_saved_state *save_state; 65 u16 *cap; 66 67 if (!pci_is_pcie(dev)) 68 return; 69 70 save_state = pci_find_saved_ext_cap(dev, PCI_EXT_CAP_ID_DPC); 71 if (!save_state) 72 return; 73 74 cap = (u16 *)&save_state->cap.data[0]; 75 pci_write_config_word(dev, dev->dpc_cap + PCI_EXP_DPC_CTL, *cap); 76 } 77 78 static DECLARE_WAIT_QUEUE_HEAD(dpc_completed_waitqueue); 79 80 #ifdef CONFIG_HOTPLUG_PCI_PCIE 81 static bool dpc_completed(struct pci_dev *pdev) 82 { 83 u16 status; 84 85 pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, &status); 86 if ((!PCI_POSSIBLE_ERROR(status)) && (status & PCI_EXP_DPC_STATUS_TRIGGER)) 87 return false; 88 89 if (test_bit(PCI_DPC_RECOVERING, &pdev->priv_flags)) 90 return false; 91 92 return true; 93 } 94 95 /** 96 * pci_dpc_recovered - whether DPC triggered and has recovered successfully 97 * @pdev: PCI device 98 * 99 * Return true if DPC was triggered for @pdev and has recovered successfully. 100 * Wait for recovery if it hasn't completed yet. Called from the PCIe hotplug 101 * driver to recognize and ignore Link Down/Up events caused by DPC. 102 */ 103 bool pci_dpc_recovered(struct pci_dev *pdev) 104 { 105 struct pci_host_bridge *host; 106 107 if (!pdev->dpc_cap) 108 return false; 109 110 /* 111 * Synchronization between hotplug and DPC is not supported 112 * if DPC is owned by firmware and EDR is not enabled. 113 */ 114 host = pci_find_host_bridge(pdev->bus); 115 if (!host->native_dpc && !IS_ENABLED(CONFIG_PCIE_EDR)) 116 return false; 117 118 /* 119 * Need a timeout in case DPC never completes due to failure of 120 * dpc_wait_rp_inactive(). The spec doesn't mandate a time limit, 121 * but reports indicate that DPC completes within 4 seconds. 122 */ 123 wait_event_timeout(dpc_completed_waitqueue, dpc_completed(pdev), 124 msecs_to_jiffies(4000)); 125 126 return test_and_clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); 127 } 128 #endif /* CONFIG_HOTPLUG_PCI_PCIE */ 129 130 static int dpc_wait_rp_inactive(struct pci_dev *pdev) 131 { 132 unsigned long timeout = jiffies + HZ; 133 u16 cap = pdev->dpc_cap, status; 134 135 pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); 136 while (status & PCI_EXP_DPC_RP_BUSY && 137 !time_after(jiffies, timeout)) { 138 msleep(10); 139 pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); 140 } 141 if (status & PCI_EXP_DPC_RP_BUSY) { 142 pci_warn(pdev, "root port still busy\n"); 143 return -EBUSY; 144 } 145 return 0; 146 } 147 148 pci_ers_result_t dpc_reset_link(struct pci_dev *pdev) 149 { 150 pci_ers_result_t ret; 151 u16 cap; 152 153 set_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); 154 155 /* 156 * DPC disables the Link automatically in hardware, so it has 157 * already been reset by the time we get here. 158 */ 159 cap = pdev->dpc_cap; 160 161 /* 162 * Wait until the Link is inactive, then clear DPC Trigger Status 163 * to allow the Port to leave DPC. 164 */ 165 if (!pcie_wait_for_link(pdev, false)) 166 pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); 167 168 if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) { 169 clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); 170 ret = PCI_ERS_RESULT_DISCONNECT; 171 goto out; 172 } 173 174 pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, 175 PCI_EXP_DPC_STATUS_TRIGGER); 176 177 if (pci_bridge_wait_for_secondary_bus(pdev, "DPC")) { 178 clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); 179 ret = PCI_ERS_RESULT_DISCONNECT; 180 } else { 181 set_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); 182 ret = PCI_ERS_RESULT_RECOVERED; 183 } 184 out: 185 clear_bit(PCI_DPC_RECOVERING, &pdev->priv_flags); 186 wake_up_all(&dpc_completed_waitqueue); 187 return ret; 188 } 189 190 static void dpc_process_rp_pio_error(struct pci_dev *pdev) 191 { 192 u16 cap = pdev->dpc_cap, dpc_status, first_error; 193 u32 status, mask, sev, syserr, exc, log; 194 struct pcie_tlp_log tlp_log; 195 int i; 196 197 pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, &status); 198 pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_MASK, &mask); 199 pci_err(pdev, "rp_pio_status: %#010x, rp_pio_mask: %#010x\n", 200 status, mask); 201 202 pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SEVERITY, &sev); 203 pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_SYSERROR, &syserr); 204 pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_EXCEPTION, &exc); 205 pci_err(pdev, "RP PIO severity=%#010x, syserror=%#010x, exception=%#010x\n", 206 sev, syserr, exc); 207 208 /* Get First Error Pointer */ 209 pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &dpc_status); 210 first_error = FIELD_GET(PCI_EXP_DPC_RP_PIO_FEP, dpc_status); 211 212 for (i = 0; i < ARRAY_SIZE(rp_pio_error_string); i++) { 213 if ((status & ~mask) & (1 << i)) 214 pci_err(pdev, "[%2d] %s%s\n", i, rp_pio_error_string[i], 215 first_error == i ? " (First)" : ""); 216 } 217 218 if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG) 219 goto clear_status; 220 pcie_read_tlp_log(pdev, cap + PCI_EXP_DPC_RP_PIO_HEADER_LOG, 221 cap + PCI_EXP_DPC_RP_PIO_TLPPREFIX_LOG, 222 dpc_tlp_log_len(pdev), 223 pdev->subordinate->flit_mode, 224 &tlp_log); 225 pcie_print_tlp_log(pdev, &tlp_log, KERN_ERR, dev_fmt("")); 226 227 if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG + 1) 228 goto clear_status; 229 pci_read_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_IMPSPEC_LOG, &log); 230 pci_err(pdev, "RP PIO ImpSpec Log %#010x\n", log); 231 232 clear_status: 233 pci_write_config_dword(pdev, cap + PCI_EXP_DPC_RP_PIO_STATUS, status); 234 } 235 236 static int dpc_get_aer_uncorrect_severity(struct pci_dev *dev, 237 struct aer_err_info *info) 238 { 239 int pos = dev->aer_cap; 240 u32 status, mask, sev; 241 242 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_STATUS, &status); 243 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_MASK, &mask); 244 status &= ~mask; 245 if (!status) 246 return 0; 247 248 pci_read_config_dword(dev, pos + PCI_ERR_UNCOR_SEVER, &sev); 249 status &= sev; 250 if (status) 251 info->severity = AER_FATAL; 252 else 253 info->severity = AER_NONFATAL; 254 255 info->level = KERN_ERR; 256 257 info->dev[0] = dev; 258 info->error_dev_num = 1; 259 info->ratelimit_print[0] = 1; 260 261 return 1; 262 } 263 264 void dpc_process_error(struct pci_dev *pdev) 265 { 266 u16 cap = pdev->dpc_cap, status, source, reason, ext_reason; 267 struct aer_err_info info = {}; 268 269 pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); 270 271 reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN; 272 273 switch (reason) { 274 case PCI_EXP_DPC_STATUS_TRIGGER_RSN_UNCOR: 275 pci_warn(pdev, "containment event, status:%#06x: unmasked uncorrectable error detected\n", 276 status); 277 if (dpc_get_aer_uncorrect_severity(pdev, &info) && 278 aer_get_device_error_info(&info, 0)) { 279 aer_print_error(&info, 0); 280 pci_aer_clear_nonfatal_status(pdev); 281 pci_aer_clear_fatal_status(pdev); 282 } 283 break; 284 case PCI_EXP_DPC_STATUS_TRIGGER_RSN_NFE: 285 case PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE: 286 pci_read_config_word(pdev, cap + PCI_EXP_DPC_SOURCE_ID, 287 &source); 288 pci_warn(pdev, "containment event, status:%#06x, %s received from %04x:%02x:%02x.%d\n", 289 status, 290 (reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_FE) ? 291 "ERR_FATAL" : "ERR_NONFATAL", 292 pci_domain_nr(pdev->bus), PCI_BUS_NUM(source), 293 PCI_SLOT(source), PCI_FUNC(source)); 294 break; 295 case PCI_EXP_DPC_STATUS_TRIGGER_RSN_IN_EXT: 296 ext_reason = status & PCI_EXP_DPC_STATUS_TRIGGER_RSN_EXT; 297 pci_warn(pdev, "containment event, status:%#06x: %s detected\n", 298 status, 299 (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO) ? 300 "RP PIO error" : 301 (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_SW_TRIGGER) ? 302 "software trigger" : 303 "reserved error"); 304 /* show RP PIO error detail information */ 305 if (ext_reason == PCI_EXP_DPC_STATUS_TRIGGER_RSN_RP_PIO && 306 pdev->dpc_rp_extensions) 307 dpc_process_rp_pio_error(pdev); 308 break; 309 } 310 } 311 312 static void pci_clear_surpdn_errors(struct pci_dev *pdev) 313 { 314 if (pdev->dpc_rp_extensions) 315 pci_write_config_dword(pdev, pdev->dpc_cap + 316 PCI_EXP_DPC_RP_PIO_STATUS, ~0); 317 318 /* 319 * In practice, Surprise Down errors have been observed to also set 320 * error bits in the Status Register as well as the Fatal Error 321 * Detected bit in the Device Status Register. 322 */ 323 pci_write_config_word(pdev, PCI_STATUS, 0xffff); 324 325 pcie_capability_write_word(pdev, PCI_EXP_DEVSTA, PCI_EXP_DEVSTA_FED); 326 } 327 328 static void dpc_handle_surprise_removal(struct pci_dev *pdev) 329 { 330 if (!pcie_wait_for_link(pdev, false)) { 331 pci_info(pdev, "Data Link Layer Link Active not cleared in 1000 msec\n"); 332 goto out; 333 } 334 335 if (pdev->dpc_rp_extensions && dpc_wait_rp_inactive(pdev)) 336 goto out; 337 338 pci_aer_raw_clear_status(pdev); 339 pci_clear_surpdn_errors(pdev); 340 341 pci_write_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_STATUS, 342 PCI_EXP_DPC_STATUS_TRIGGER); 343 344 out: 345 clear_bit(PCI_DPC_RECOVERED, &pdev->priv_flags); 346 wake_up_all(&dpc_completed_waitqueue); 347 } 348 349 static bool dpc_is_surprise_removal(struct pci_dev *pdev) 350 { 351 u16 status; 352 353 if (!pdev->is_hotplug_bridge) 354 return false; 355 356 if (pci_read_config_word(pdev, pdev->aer_cap + PCI_ERR_UNCOR_STATUS, 357 &status)) 358 return false; 359 360 return status & PCI_ERR_UNC_SURPDN; 361 } 362 363 static irqreturn_t dpc_handler(int irq, void *context) 364 { 365 struct pci_dev *pdev = context; 366 367 /* 368 * According to PCIe r6.0 sec 6.7.6, errors are an expected side effect 369 * of async removal and should be ignored by software. 370 */ 371 if (dpc_is_surprise_removal(pdev)) { 372 dpc_handle_surprise_removal(pdev); 373 return IRQ_HANDLED; 374 } 375 376 pci_dev_get(pdev); 377 dpc_process_error(pdev); 378 379 /* We configure DPC so it only triggers on ERR_FATAL */ 380 pcie_do_recovery(pdev, pci_channel_io_frozen, dpc_reset_link); 381 382 pci_dev_put(pdev); 383 return IRQ_HANDLED; 384 } 385 386 static irqreturn_t dpc_irq(int irq, void *context) 387 { 388 struct pci_dev *pdev = context; 389 u16 cap = pdev->dpc_cap, status; 390 391 pci_read_config_word(pdev, cap + PCI_EXP_DPC_STATUS, &status); 392 393 if (!(status & PCI_EXP_DPC_STATUS_INTERRUPT) || PCI_POSSIBLE_ERROR(status)) 394 return IRQ_NONE; 395 396 pci_write_config_word(pdev, cap + PCI_EXP_DPC_STATUS, 397 PCI_EXP_DPC_STATUS_INTERRUPT); 398 if (status & PCI_EXP_DPC_STATUS_TRIGGER) 399 return IRQ_WAKE_THREAD; 400 return IRQ_HANDLED; 401 } 402 403 void pci_dpc_init(struct pci_dev *pdev) 404 { 405 u16 cap; 406 407 pdev->dpc_cap = pci_find_ext_capability(pdev, PCI_EXT_CAP_ID_DPC); 408 if (!pdev->dpc_cap) 409 return; 410 411 pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); 412 if (!(cap & PCI_EXP_DPC_CAP_RP_EXT)) 413 return; 414 415 pdev->dpc_rp_extensions = true; 416 417 /* Quirks may set dpc_rp_log_size if device or firmware is buggy */ 418 if (!pdev->dpc_rp_log_size) { 419 u16 flags; 420 int ret; 421 422 ret = pcie_capability_read_word(pdev, PCI_EXP_FLAGS, &flags); 423 if (ret) 424 return; 425 426 pdev->dpc_rp_log_size = 427 FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE, cap); 428 if (FIELD_GET(PCI_EXP_FLAGS_FLIT, flags)) 429 pdev->dpc_rp_log_size += FIELD_GET(PCI_EXP_DPC_RP_PIO_LOG_SIZE4, 430 cap) << 4; 431 432 if (pdev->dpc_rp_log_size < PCIE_STD_NUM_TLP_HEADERLOG || 433 pdev->dpc_rp_log_size > PCIE_STD_MAX_TLP_HEADERLOG + 1) { 434 pci_err(pdev, "RP PIO log size %u is invalid\n", 435 pdev->dpc_rp_log_size); 436 pdev->dpc_rp_log_size = 0; 437 } 438 } 439 } 440 441 static void dpc_enable(struct pcie_device *dev) 442 { 443 struct pci_dev *pdev = dev->port; 444 int dpc = pdev->dpc_cap; 445 u16 ctl; 446 447 /* 448 * Clear DPC Interrupt Status so we don't get an interrupt for an 449 * old event when setting DPC Interrupt Enable. 450 */ 451 pci_write_config_word(pdev, dpc + PCI_EXP_DPC_STATUS, 452 PCI_EXP_DPC_STATUS_INTERRUPT); 453 454 pci_read_config_word(pdev, dpc + PCI_EXP_DPC_CTL, &ctl); 455 ctl &= ~PCI_EXP_DPC_CTL_EN_MASK; 456 ctl |= PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN; 457 pci_write_config_word(pdev, dpc + PCI_EXP_DPC_CTL, ctl); 458 } 459 460 static void dpc_disable(struct pcie_device *dev) 461 { 462 struct pci_dev *pdev = dev->port; 463 int dpc = pdev->dpc_cap; 464 u16 ctl; 465 466 /* Disable DPC triggering and DPC interrupts */ 467 pci_read_config_word(pdev, dpc + PCI_EXP_DPC_CTL, &ctl); 468 ctl &= ~(PCI_EXP_DPC_CTL_EN_FATAL | PCI_EXP_DPC_CTL_INT_EN); 469 pci_write_config_word(pdev, dpc + PCI_EXP_DPC_CTL, ctl); 470 } 471 472 #define FLAG(x, y) (((x) & (y)) ? '+' : '-') 473 static int dpc_probe(struct pcie_device *dev) 474 { 475 struct pci_dev *pdev = dev->port; 476 struct device *device = &dev->device; 477 int status; 478 u16 cap; 479 480 if (!pcie_aer_is_native(pdev) && !pcie_ports_dpc_native) 481 return -ENOTSUPP; 482 483 status = devm_request_threaded_irq(device, dev->irq, dpc_irq, 484 dpc_handler, IRQF_SHARED, 485 "pcie-dpc", pdev); 486 if (status) { 487 pci_warn(pdev, "request IRQ%d failed: %d\n", dev->irq, 488 status); 489 return status; 490 } 491 492 pci_read_config_word(pdev, pdev->dpc_cap + PCI_EXP_DPC_CAP, &cap); 493 dpc_enable(dev); 494 495 pci_info(pdev, "enabled with IRQ %d\n", dev->irq); 496 pci_info(pdev, "error containment capabilities: Int Msg #%d, RPExt%c PoisonedTLP%c SwTrigger%c RP PIO Log %d, DL_ActiveErr%c\n", 497 cap & PCI_EXP_DPC_IRQ, FLAG(cap, PCI_EXP_DPC_CAP_RP_EXT), 498 FLAG(cap, PCI_EXP_DPC_CAP_POISONED_TLP), 499 FLAG(cap, PCI_EXP_DPC_CAP_SW_TRIGGER), pdev->dpc_rp_log_size, 500 FLAG(cap, PCI_EXP_DPC_CAP_DL_ACTIVE)); 501 502 pci_add_ext_cap_save_buffer(pdev, PCI_EXT_CAP_ID_DPC, sizeof(u16)); 503 return status; 504 } 505 506 static int dpc_suspend(struct pcie_device *dev) 507 { 508 dpc_disable(dev); 509 return 0; 510 } 511 512 static int dpc_resume(struct pcie_device *dev) 513 { 514 dpc_enable(dev); 515 return 0; 516 } 517 518 static void dpc_remove(struct pcie_device *dev) 519 { 520 dpc_disable(dev); 521 } 522 523 static struct pcie_port_service_driver dpcdriver = { 524 .name = "dpc", 525 .port_type = PCIE_ANY_PORT, 526 .service = PCIE_PORT_SERVICE_DPC, 527 .probe = dpc_probe, 528 .suspend = dpc_suspend, 529 .resume = dpc_resume, 530 .remove = dpc_remove, 531 }; 532 533 int __init pcie_dpc_init(void) 534 { 535 return pcie_port_service_register(&dpcdriver); 536 } 537