1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ 3 #include <linux/units.h> 4 #include <linux/io-64-nonatomic-lo-hi.h> 5 #include <linux/device.h> 6 #include <linux/delay.h> 7 #include <linux/pci.h> 8 #include <linux/pci-doe.h> 9 #include <linux/aer.h> 10 #include <cxlpci.h> 11 #include <cxlmem.h> 12 #include <cxl.h> 13 #include "core.h" 14 #include "trace.h" 15 16 /** 17 * DOC: cxl core pci 18 * 19 * Compute Express Link protocols are layered on top of PCIe. CXL core provides 20 * a set of helpers for CXL interactions which occur via PCIe. 21 */ 22 23 static unsigned short media_ready_timeout = 60; 24 module_param(media_ready_timeout, ushort, 0644); 25 MODULE_PARM_DESC(media_ready_timeout, "seconds to wait for media ready"); 26 27 static int pci_get_port_num(struct pci_dev *pdev) 28 { 29 u32 lnkcap; 30 int type; 31 32 type = pci_pcie_type(pdev); 33 if (type != PCI_EXP_TYPE_DOWNSTREAM && type != PCI_EXP_TYPE_ROOT_PORT) 34 return -EINVAL; 35 36 if (pci_read_config_dword(pdev, pci_pcie_cap(pdev) + PCI_EXP_LNKCAP, 37 &lnkcap)) 38 return -ENXIO; 39 40 return FIELD_GET(PCI_EXP_LNKCAP_PN, lnkcap); 41 } 42 43 /** 44 * __devm_cxl_add_dport_by_dev - allocate a dport by dport device 45 * @port: cxl_port that hosts the dport 46 * @dport_dev: 'struct device' of the dport 47 * 48 * Returns the allocated dport on success or ERR_PTR() of -errno on error 49 */ 50 struct cxl_dport *__devm_cxl_add_dport_by_dev(struct cxl_port *port, 51 struct device *dport_dev) 52 { 53 struct cxl_register_map map; 54 struct pci_dev *pdev; 55 int port_num, rc; 56 57 if (!dev_is_pci(dport_dev)) 58 return ERR_PTR(-EINVAL); 59 60 pdev = to_pci_dev(dport_dev); 61 port_num = pci_get_port_num(pdev); 62 if (port_num < 0) 63 return ERR_PTR(port_num); 64 65 rc = cxl_find_regblock(pdev, CXL_REGLOC_RBI_COMPONENT, &map); 66 if (rc) 67 return ERR_PTR(rc); 68 69 device_lock_assert(&port->dev); 70 return devm_cxl_add_dport(port, dport_dev, port_num, map.resource); 71 } 72 EXPORT_SYMBOL_NS_GPL(__devm_cxl_add_dport_by_dev, "CXL"); 73 74 static int cxl_dvsec_mem_range_valid(struct cxl_dev_state *cxlds, int id) 75 { 76 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 77 int d = cxlds->cxl_dvsec; 78 bool valid = false; 79 int rc, i; 80 u32 temp; 81 82 if (id > CXL_DVSEC_RANGE_MAX) 83 return -EINVAL; 84 85 /* Check MEM INFO VALID bit first, give up after 1s */ 86 i = 1; 87 do { 88 rc = pci_read_config_dword(pdev, 89 d + CXL_DVSEC_RANGE_SIZE_LOW(id), 90 &temp); 91 if (rc) 92 return rc; 93 94 valid = FIELD_GET(CXL_DVSEC_MEM_INFO_VALID, temp); 95 if (valid) 96 break; 97 msleep(1000); 98 } while (i--); 99 100 if (!valid) { 101 dev_err(&pdev->dev, 102 "Timeout awaiting memory range %d valid after 1s.\n", 103 id); 104 return -ETIMEDOUT; 105 } 106 107 return 0; 108 } 109 110 static int cxl_dvsec_mem_range_active(struct cxl_dev_state *cxlds, int id) 111 { 112 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 113 int d = cxlds->cxl_dvsec; 114 bool active = false; 115 int rc, i; 116 u32 temp; 117 118 if (id > CXL_DVSEC_RANGE_MAX) 119 return -EINVAL; 120 121 /* Check MEM ACTIVE bit, up to 60s timeout by default */ 122 for (i = media_ready_timeout; i; i--) { 123 rc = pci_read_config_dword( 124 pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(id), &temp); 125 if (rc) 126 return rc; 127 128 active = FIELD_GET(CXL_DVSEC_MEM_ACTIVE, temp); 129 if (active) 130 break; 131 msleep(1000); 132 } 133 134 if (!active) { 135 dev_err(&pdev->dev, 136 "timeout awaiting memory active after %d seconds\n", 137 media_ready_timeout); 138 return -ETIMEDOUT; 139 } 140 141 return 0; 142 } 143 144 /* 145 * Wait up to @media_ready_timeout for the device to report memory 146 * active. 147 */ 148 int cxl_await_media_ready(struct cxl_dev_state *cxlds) 149 { 150 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 151 int d = cxlds->cxl_dvsec; 152 int rc, i, hdm_count; 153 u64 md_status; 154 u16 cap; 155 156 rc = pci_read_config_word(pdev, 157 d + CXL_DVSEC_CAP_OFFSET, &cap); 158 if (rc) 159 return rc; 160 161 hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); 162 for (i = 0; i < hdm_count; i++) { 163 rc = cxl_dvsec_mem_range_valid(cxlds, i); 164 if (rc) 165 return rc; 166 } 167 168 for (i = 0; i < hdm_count; i++) { 169 rc = cxl_dvsec_mem_range_active(cxlds, i); 170 if (rc) 171 return rc; 172 } 173 174 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 175 if (!CXLMDEV_READY(md_status)) 176 return -EIO; 177 178 return 0; 179 } 180 EXPORT_SYMBOL_NS_GPL(cxl_await_media_ready, "CXL"); 181 182 static int cxl_set_mem_enable(struct cxl_dev_state *cxlds, u16 val) 183 { 184 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 185 int d = cxlds->cxl_dvsec; 186 u16 ctrl; 187 int rc; 188 189 rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); 190 if (rc < 0) 191 return rc; 192 193 if ((ctrl & CXL_DVSEC_MEM_ENABLE) == val) 194 return 1; 195 ctrl &= ~CXL_DVSEC_MEM_ENABLE; 196 ctrl |= val; 197 198 rc = pci_write_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, ctrl); 199 if (rc < 0) 200 return rc; 201 202 return 0; 203 } 204 205 static void clear_mem_enable(void *cxlds) 206 { 207 cxl_set_mem_enable(cxlds, 0); 208 } 209 210 static int devm_cxl_enable_mem(struct device *host, struct cxl_dev_state *cxlds) 211 { 212 int rc; 213 214 rc = cxl_set_mem_enable(cxlds, CXL_DVSEC_MEM_ENABLE); 215 if (rc < 0) 216 return rc; 217 if (rc > 0) 218 return 0; 219 return devm_add_action_or_reset(host, clear_mem_enable, cxlds); 220 } 221 222 /* require dvsec ranges to be covered by a locked platform window */ 223 static int dvsec_range_allowed(struct device *dev, const void *arg) 224 { 225 const struct range *dev_range = arg; 226 struct cxl_decoder *cxld; 227 228 if (!is_root_decoder(dev)) 229 return 0; 230 231 cxld = to_cxl_decoder(dev); 232 233 if (!(cxld->flags & CXL_DECODER_F_RAM)) 234 return 0; 235 236 return range_contains(&cxld->hpa_range, dev_range); 237 } 238 239 static void disable_hdm(void *_cxlhdm) 240 { 241 u32 global_ctrl; 242 struct cxl_hdm *cxlhdm = _cxlhdm; 243 void __iomem *hdm = cxlhdm->regs.hdm_decoder; 244 245 global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); 246 writel(global_ctrl & ~CXL_HDM_DECODER_ENABLE, 247 hdm + CXL_HDM_DECODER_CTRL_OFFSET); 248 } 249 250 static int devm_cxl_enable_hdm(struct device *host, struct cxl_hdm *cxlhdm) 251 { 252 void __iomem *hdm = cxlhdm->regs.hdm_decoder; 253 u32 global_ctrl; 254 255 global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); 256 writel(global_ctrl | CXL_HDM_DECODER_ENABLE, 257 hdm + CXL_HDM_DECODER_CTRL_OFFSET); 258 259 return devm_add_action_or_reset(host, disable_hdm, cxlhdm); 260 } 261 262 int cxl_dvsec_rr_decode(struct cxl_dev_state *cxlds, 263 struct cxl_endpoint_dvsec_info *info) 264 { 265 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 266 struct device *dev = cxlds->dev; 267 int hdm_count, rc, i, ranges = 0; 268 int d = cxlds->cxl_dvsec; 269 u16 cap, ctrl; 270 271 if (!d) { 272 dev_dbg(dev, "No DVSEC Capability\n"); 273 return -ENXIO; 274 } 275 276 rc = pci_read_config_word(pdev, d + CXL_DVSEC_CAP_OFFSET, &cap); 277 if (rc) 278 return rc; 279 280 if (!(cap & CXL_DVSEC_MEM_CAPABLE)) { 281 dev_dbg(dev, "Not MEM Capable\n"); 282 return -ENXIO; 283 } 284 285 /* 286 * It is not allowed by spec for MEM.capable to be set and have 0 legacy 287 * HDM decoders (values > 2 are also undefined as of CXL 2.0). As this 288 * driver is for a spec defined class code which must be CXL.mem 289 * capable, there is no point in continuing to enable CXL.mem. 290 */ 291 hdm_count = FIELD_GET(CXL_DVSEC_HDM_COUNT_MASK, cap); 292 if (!hdm_count || hdm_count > 2) 293 return -EINVAL; 294 295 /* 296 * The current DVSEC values are moot if the memory capability is 297 * disabled, and they will remain moot after the HDM Decoder 298 * capability is enabled. 299 */ 300 rc = pci_read_config_word(pdev, d + CXL_DVSEC_CTRL_OFFSET, &ctrl); 301 if (rc) 302 return rc; 303 304 info->mem_enabled = FIELD_GET(CXL_DVSEC_MEM_ENABLE, ctrl); 305 if (!info->mem_enabled) 306 return 0; 307 308 for (i = 0; i < hdm_count; i++) { 309 u64 base, size; 310 u32 temp; 311 312 rc = cxl_dvsec_mem_range_valid(cxlds, i); 313 if (rc) 314 return rc; 315 316 rc = pci_read_config_dword( 317 pdev, d + CXL_DVSEC_RANGE_SIZE_HIGH(i), &temp); 318 if (rc) 319 return rc; 320 321 size = (u64)temp << 32; 322 323 rc = pci_read_config_dword( 324 pdev, d + CXL_DVSEC_RANGE_SIZE_LOW(i), &temp); 325 if (rc) 326 return rc; 327 328 size |= temp & CXL_DVSEC_MEM_SIZE_LOW_MASK; 329 if (!size) { 330 continue; 331 } 332 333 rc = pci_read_config_dword( 334 pdev, d + CXL_DVSEC_RANGE_BASE_HIGH(i), &temp); 335 if (rc) 336 return rc; 337 338 base = (u64)temp << 32; 339 340 rc = pci_read_config_dword( 341 pdev, d + CXL_DVSEC_RANGE_BASE_LOW(i), &temp); 342 if (rc) 343 return rc; 344 345 base |= temp & CXL_DVSEC_MEM_BASE_LOW_MASK; 346 347 info->dvsec_range[ranges++] = (struct range) { 348 .start = base, 349 .end = base + size - 1 350 }; 351 } 352 353 info->ranges = ranges; 354 355 return 0; 356 } 357 EXPORT_SYMBOL_NS_GPL(cxl_dvsec_rr_decode, "CXL"); 358 359 /** 360 * cxl_hdm_decode_init() - Setup HDM decoding for the endpoint 361 * @cxlds: Device state 362 * @cxlhdm: Mapped HDM decoder Capability 363 * @info: Cached DVSEC range registers info 364 * 365 * Try to enable the endpoint's HDM Decoder Capability 366 */ 367 int cxl_hdm_decode_init(struct cxl_dev_state *cxlds, struct cxl_hdm *cxlhdm, 368 struct cxl_endpoint_dvsec_info *info) 369 { 370 void __iomem *hdm = cxlhdm->regs.hdm_decoder; 371 struct cxl_port *port = cxlhdm->port; 372 struct device *dev = cxlds->dev; 373 struct cxl_port *root; 374 int i, rc, allowed; 375 u32 global_ctrl = 0; 376 377 if (hdm) 378 global_ctrl = readl(hdm + CXL_HDM_DECODER_CTRL_OFFSET); 379 380 /* 381 * If the HDM Decoder Capability is already enabled then assume 382 * that some other agent like platform firmware set it up. 383 */ 384 if (global_ctrl & CXL_HDM_DECODER_ENABLE || (!hdm && info->mem_enabled)) 385 return devm_cxl_enable_mem(&port->dev, cxlds); 386 387 /* 388 * If the HDM Decoder Capability does not exist and DVSEC was 389 * not setup, the DVSEC based emulation cannot be used. 390 */ 391 if (!hdm) 392 return -ENODEV; 393 394 /* The HDM Decoder Capability exists but is globally disabled. */ 395 396 /* 397 * If the DVSEC CXL Range registers are not enabled, just 398 * enable and use the HDM Decoder Capability registers. 399 */ 400 if (!info->mem_enabled) { 401 rc = devm_cxl_enable_hdm(&port->dev, cxlhdm); 402 if (rc) 403 return rc; 404 405 return devm_cxl_enable_mem(&port->dev, cxlds); 406 } 407 408 /* 409 * Per CXL 2.0 Section 8.1.3.8.3 and 8.1.3.8.4 DVSEC CXL Range 1 Base 410 * [High,Low] when HDM operation is enabled the range register values 411 * are ignored by the device, but the spec also recommends matching the 412 * DVSEC Range 1,2 to HDM Decoder Range 0,1. So, non-zero info->ranges 413 * are expected even though Linux does not require or maintain that 414 * match. Check if at least one DVSEC range is enabled and allowed by 415 * the platform. That is, the DVSEC range must be covered by a locked 416 * platform window (CFMWS). Fail otherwise as the endpoint's decoders 417 * cannot be used. 418 */ 419 420 root = to_cxl_port(port->dev.parent); 421 while (!is_cxl_root(root) && is_cxl_port(root->dev.parent)) 422 root = to_cxl_port(root->dev.parent); 423 if (!is_cxl_root(root)) { 424 dev_err(dev, "Failed to acquire root port for HDM enable\n"); 425 return -ENODEV; 426 } 427 428 for (i = 0, allowed = 0; i < info->ranges; i++) { 429 struct device *cxld_dev; 430 431 cxld_dev = device_find_child(&root->dev, &info->dvsec_range[i], 432 dvsec_range_allowed); 433 if (!cxld_dev) { 434 dev_dbg(dev, "DVSEC Range%d denied by platform\n", i); 435 continue; 436 } 437 dev_dbg(dev, "DVSEC Range%d allowed by platform\n", i); 438 put_device(cxld_dev); 439 allowed++; 440 } 441 442 if (!allowed) { 443 dev_err(dev, "Range register decodes outside platform defined CXL ranges.\n"); 444 return -ENXIO; 445 } 446 447 return 0; 448 } 449 EXPORT_SYMBOL_NS_GPL(cxl_hdm_decode_init, "CXL"); 450 451 #define CXL_DOE_TABLE_ACCESS_REQ_CODE 0x000000ff 452 #define CXL_DOE_TABLE_ACCESS_REQ_CODE_READ 0 453 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE 0x0000ff00 454 #define CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA 0 455 #define CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE 0xffff0000 456 #define CXL_DOE_TABLE_ACCESS_LAST_ENTRY 0xffff 457 #define CXL_DOE_PROTOCOL_TABLE_ACCESS 2 458 459 #define CDAT_DOE_REQ(entry_handle) cpu_to_le32 \ 460 (FIELD_PREP(CXL_DOE_TABLE_ACCESS_REQ_CODE, \ 461 CXL_DOE_TABLE_ACCESS_REQ_CODE_READ) | \ 462 FIELD_PREP(CXL_DOE_TABLE_ACCESS_TABLE_TYPE, \ 463 CXL_DOE_TABLE_ACCESS_TABLE_TYPE_CDATA) | \ 464 FIELD_PREP(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, (entry_handle))) 465 466 static int cxl_cdat_get_length(struct device *dev, 467 struct pci_doe_mb *doe_mb, 468 size_t *length) 469 { 470 __le32 request = CDAT_DOE_REQ(0); 471 __le32 response[2]; 472 int rc; 473 474 rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL, 475 CXL_DOE_PROTOCOL_TABLE_ACCESS, 476 &request, sizeof(request), 477 &response, sizeof(response)); 478 if (rc < 0) { 479 dev_err(dev, "DOE failed: %d", rc); 480 return rc; 481 } 482 if (rc < sizeof(response)) 483 return -EIO; 484 485 *length = le32_to_cpu(response[1]); 486 dev_dbg(dev, "CDAT length %zu\n", *length); 487 488 return 0; 489 } 490 491 static int cxl_cdat_read_table(struct device *dev, 492 struct pci_doe_mb *doe_mb, 493 struct cdat_doe_rsp *rsp, size_t *length) 494 { 495 size_t received, remaining = *length; 496 unsigned int entry_handle = 0; 497 union cdat_data *data; 498 __le32 saved_dw = 0; 499 500 do { 501 __le32 request = CDAT_DOE_REQ(entry_handle); 502 int rc; 503 504 rc = pci_doe(doe_mb, PCI_VENDOR_ID_CXL, 505 CXL_DOE_PROTOCOL_TABLE_ACCESS, 506 &request, sizeof(request), 507 rsp, sizeof(*rsp) + remaining); 508 if (rc < 0) { 509 dev_err(dev, "DOE failed: %d", rc); 510 return rc; 511 } 512 513 if (rc < sizeof(*rsp)) 514 return -EIO; 515 516 data = (union cdat_data *)rsp->data; 517 received = rc - sizeof(*rsp); 518 519 if (entry_handle == 0) { 520 if (received != sizeof(data->header)) 521 return -EIO; 522 } else { 523 if (received < sizeof(data->entry) || 524 received != le16_to_cpu(data->entry.length)) 525 return -EIO; 526 } 527 528 /* Get the CXL table access header entry handle */ 529 entry_handle = FIELD_GET(CXL_DOE_TABLE_ACCESS_ENTRY_HANDLE, 530 le32_to_cpu(rsp->doe_header)); 531 532 /* 533 * Table Access Response Header overwrote the last DW of 534 * previous entry, so restore that DW 535 */ 536 rsp->doe_header = saved_dw; 537 remaining -= received; 538 rsp = (void *)rsp + received; 539 saved_dw = rsp->doe_header; 540 } while (entry_handle != CXL_DOE_TABLE_ACCESS_LAST_ENTRY); 541 542 /* Length in CDAT header may exceed concatenation of CDAT entries */ 543 *length -= remaining; 544 545 return 0; 546 } 547 548 static unsigned char cdat_checksum(void *buf, size_t size) 549 { 550 unsigned char sum, *data = buf; 551 size_t i; 552 553 for (sum = 0, i = 0; i < size; i++) 554 sum += data[i]; 555 return sum; 556 } 557 558 /** 559 * read_cdat_data - Read the CDAT data on this port 560 * @port: Port to read data from 561 * 562 * This call will sleep waiting for responses from the DOE mailbox. 563 */ 564 void read_cdat_data(struct cxl_port *port) 565 { 566 struct device *uport = port->uport_dev; 567 struct device *dev = &port->dev; 568 struct pci_doe_mb *doe_mb; 569 struct pci_dev *pdev = NULL; 570 struct cxl_memdev *cxlmd; 571 struct cdat_doe_rsp *buf; 572 size_t table_length, length; 573 int rc; 574 575 if (is_cxl_memdev(uport)) { 576 struct device *host; 577 578 cxlmd = to_cxl_memdev(uport); 579 host = cxlmd->dev.parent; 580 if (dev_is_pci(host)) 581 pdev = to_pci_dev(host); 582 } else if (dev_is_pci(uport)) { 583 pdev = to_pci_dev(uport); 584 } 585 586 if (!pdev) 587 return; 588 589 doe_mb = pci_find_doe_mailbox(pdev, PCI_VENDOR_ID_CXL, 590 CXL_DOE_PROTOCOL_TABLE_ACCESS); 591 if (!doe_mb) { 592 dev_dbg(dev, "No CDAT mailbox\n"); 593 return; 594 } 595 596 port->cdat_available = true; 597 598 if (cxl_cdat_get_length(dev, doe_mb, &length)) { 599 dev_dbg(dev, "No CDAT length\n"); 600 return; 601 } 602 603 /* 604 * The begin of the CDAT buffer needs space for additional 4 605 * bytes for the DOE header. Table data starts afterwards. 606 */ 607 buf = devm_kzalloc(dev, sizeof(*buf) + length, GFP_KERNEL); 608 if (!buf) 609 goto err; 610 611 table_length = length; 612 613 rc = cxl_cdat_read_table(dev, doe_mb, buf, &length); 614 if (rc) 615 goto err; 616 617 if (table_length != length) 618 dev_warn(dev, "Malformed CDAT table length (%zu:%zu), discarding trailing data\n", 619 table_length, length); 620 621 if (cdat_checksum(buf->data, length)) 622 goto err; 623 624 port->cdat.table = buf->data; 625 port->cdat.length = length; 626 627 return; 628 err: 629 /* Don't leave table data allocated on error */ 630 devm_kfree(dev, buf); 631 dev_err(dev, "Failed to read/validate CDAT.\n"); 632 } 633 EXPORT_SYMBOL_NS_GPL(read_cdat_data, "CXL"); 634 635 static void __cxl_handle_cor_ras(struct cxl_dev_state *cxlds, 636 void __iomem *ras_base) 637 { 638 void __iomem *addr; 639 u32 status; 640 641 if (!ras_base) 642 return; 643 644 addr = ras_base + CXL_RAS_CORRECTABLE_STATUS_OFFSET; 645 status = readl(addr); 646 if (status & CXL_RAS_CORRECTABLE_STATUS_MASK) { 647 writel(status & CXL_RAS_CORRECTABLE_STATUS_MASK, addr); 648 trace_cxl_aer_correctable_error(cxlds->cxlmd, status); 649 } 650 } 651 652 static void cxl_handle_endpoint_cor_ras(struct cxl_dev_state *cxlds) 653 { 654 return __cxl_handle_cor_ras(cxlds, cxlds->regs.ras); 655 } 656 657 /* CXL spec rev3.0 8.2.4.16.1 */ 658 static void header_log_copy(void __iomem *ras_base, u32 *log) 659 { 660 void __iomem *addr; 661 u32 *log_addr; 662 int i, log_u32_size = CXL_HEADERLOG_SIZE / sizeof(u32); 663 664 addr = ras_base + CXL_RAS_HEADER_LOG_OFFSET; 665 log_addr = log; 666 667 for (i = 0; i < log_u32_size; i++) { 668 *log_addr = readl(addr); 669 log_addr++; 670 addr += sizeof(u32); 671 } 672 } 673 674 /* 675 * Log the state of the RAS status registers and prepare them to log the 676 * next error status. Return 1 if reset needed. 677 */ 678 static bool __cxl_handle_ras(struct cxl_dev_state *cxlds, 679 void __iomem *ras_base) 680 { 681 u32 hl[CXL_HEADERLOG_SIZE_U32]; 682 void __iomem *addr; 683 u32 status; 684 u32 fe; 685 686 if (!ras_base) 687 return false; 688 689 addr = ras_base + CXL_RAS_UNCORRECTABLE_STATUS_OFFSET; 690 status = readl(addr); 691 if (!(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK)) 692 return false; 693 694 /* If multiple errors, log header points to first error from ctrl reg */ 695 if (hweight32(status) > 1) { 696 void __iomem *rcc_addr = 697 ras_base + CXL_RAS_CAP_CONTROL_OFFSET; 698 699 fe = BIT(FIELD_GET(CXL_RAS_CAP_CONTROL_FE_MASK, 700 readl(rcc_addr))); 701 } else { 702 fe = status; 703 } 704 705 header_log_copy(ras_base, hl); 706 trace_cxl_aer_uncorrectable_error(cxlds->cxlmd, status, fe, hl); 707 writel(status & CXL_RAS_UNCORRECTABLE_STATUS_MASK, addr); 708 709 return true; 710 } 711 712 static bool cxl_handle_endpoint_ras(struct cxl_dev_state *cxlds) 713 { 714 return __cxl_handle_ras(cxlds, cxlds->regs.ras); 715 } 716 717 #ifdef CONFIG_PCIEAER_CXL 718 719 static void cxl_dport_map_rch_aer(struct cxl_dport *dport) 720 { 721 resource_size_t aer_phys; 722 struct device *host; 723 u16 aer_cap; 724 725 aer_cap = cxl_rcrb_to_aer(dport->dport_dev, dport->rcrb.base); 726 if (aer_cap) { 727 host = dport->reg_map.host; 728 aer_phys = aer_cap + dport->rcrb.base; 729 dport->regs.dport_aer = devm_cxl_iomap_block(host, aer_phys, 730 sizeof(struct aer_capability_regs)); 731 } 732 } 733 734 static void cxl_dport_map_ras(struct cxl_dport *dport) 735 { 736 struct cxl_register_map *map = &dport->reg_map; 737 struct device *dev = dport->dport_dev; 738 739 if (!map->component_map.ras.valid) 740 dev_dbg(dev, "RAS registers not found\n"); 741 else if (cxl_map_component_regs(map, &dport->regs.component, 742 BIT(CXL_CM_CAP_CAP_ID_RAS))) 743 dev_dbg(dev, "Failed to map RAS capability.\n"); 744 } 745 746 static void cxl_disable_rch_root_ints(struct cxl_dport *dport) 747 { 748 void __iomem *aer_base = dport->regs.dport_aer; 749 u32 aer_cmd_mask, aer_cmd; 750 751 if (!aer_base) 752 return; 753 754 /* 755 * Disable RCH root port command interrupts. 756 * CXL 3.0 12.2.1.1 - RCH Downstream Port-detected Errors 757 * 758 * This sequence may not be necessary. CXL spec states disabling 759 * the root cmd register's interrupts is required. But, PCI spec 760 * shows these are disabled by default on reset. 761 */ 762 aer_cmd_mask = (PCI_ERR_ROOT_CMD_COR_EN | 763 PCI_ERR_ROOT_CMD_NONFATAL_EN | 764 PCI_ERR_ROOT_CMD_FATAL_EN); 765 aer_cmd = readl(aer_base + PCI_ERR_ROOT_COMMAND); 766 aer_cmd &= ~aer_cmd_mask; 767 writel(aer_cmd, aer_base + PCI_ERR_ROOT_COMMAND); 768 } 769 770 /** 771 * cxl_dport_init_ras_reporting - Setup CXL RAS report on this dport 772 * @dport: the cxl_dport that needs to be initialized 773 * @host: host device for devm operations 774 */ 775 void cxl_dport_init_ras_reporting(struct cxl_dport *dport, struct device *host) 776 { 777 dport->reg_map.host = host; 778 cxl_dport_map_ras(dport); 779 780 if (dport->rch) { 781 struct pci_host_bridge *host_bridge = to_pci_host_bridge(dport->dport_dev); 782 783 if (!host_bridge->native_aer) 784 return; 785 786 cxl_dport_map_rch_aer(dport); 787 cxl_disable_rch_root_ints(dport); 788 } 789 } 790 EXPORT_SYMBOL_NS_GPL(cxl_dport_init_ras_reporting, "CXL"); 791 792 static void cxl_handle_rdport_cor_ras(struct cxl_dev_state *cxlds, 793 struct cxl_dport *dport) 794 { 795 return __cxl_handle_cor_ras(cxlds, dport->regs.ras); 796 } 797 798 static bool cxl_handle_rdport_ras(struct cxl_dev_state *cxlds, 799 struct cxl_dport *dport) 800 { 801 return __cxl_handle_ras(cxlds, dport->regs.ras); 802 } 803 804 /* 805 * Copy the AER capability registers using 32 bit read accesses. 806 * This is necessary because RCRB AER capability is MMIO mapped. Clear the 807 * status after copying. 808 * 809 * @aer_base: base address of AER capability block in RCRB 810 * @aer_regs: destination for copying AER capability 811 */ 812 static bool cxl_rch_get_aer_info(void __iomem *aer_base, 813 struct aer_capability_regs *aer_regs) 814 { 815 int read_cnt = sizeof(struct aer_capability_regs) / sizeof(u32); 816 u32 *aer_regs_buf = (u32 *)aer_regs; 817 int n; 818 819 if (!aer_base) 820 return false; 821 822 /* Use readl() to guarantee 32-bit accesses */ 823 for (n = 0; n < read_cnt; n++) 824 aer_regs_buf[n] = readl(aer_base + n * sizeof(u32)); 825 826 writel(aer_regs->uncor_status, aer_base + PCI_ERR_UNCOR_STATUS); 827 writel(aer_regs->cor_status, aer_base + PCI_ERR_COR_STATUS); 828 829 return true; 830 } 831 832 /* Get AER severity. Return false if there is no error. */ 833 static bool cxl_rch_get_aer_severity(struct aer_capability_regs *aer_regs, 834 int *severity) 835 { 836 if (aer_regs->uncor_status & ~aer_regs->uncor_mask) { 837 if (aer_regs->uncor_status & PCI_ERR_ROOT_FATAL_RCV) 838 *severity = AER_FATAL; 839 else 840 *severity = AER_NONFATAL; 841 return true; 842 } 843 844 if (aer_regs->cor_status & ~aer_regs->cor_mask) { 845 *severity = AER_CORRECTABLE; 846 return true; 847 } 848 849 return false; 850 } 851 852 static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) 853 { 854 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 855 struct aer_capability_regs aer_regs; 856 struct cxl_dport *dport; 857 int severity; 858 859 struct cxl_port *port __free(put_cxl_port) = 860 cxl_pci_find_port(pdev, &dport); 861 if (!port) 862 return; 863 864 if (!cxl_rch_get_aer_info(dport->regs.dport_aer, &aer_regs)) 865 return; 866 867 if (!cxl_rch_get_aer_severity(&aer_regs, &severity)) 868 return; 869 870 pci_print_aer(pdev, severity, &aer_regs); 871 872 if (severity == AER_CORRECTABLE) 873 cxl_handle_rdport_cor_ras(cxlds, dport); 874 else 875 cxl_handle_rdport_ras(cxlds, dport); 876 } 877 878 #else 879 static void cxl_handle_rdport_errors(struct cxl_dev_state *cxlds) { } 880 #endif 881 882 void cxl_cor_error_detected(struct pci_dev *pdev) 883 { 884 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 885 struct device *dev = &cxlds->cxlmd->dev; 886 887 scoped_guard(device, dev) { 888 if (!dev->driver) { 889 dev_warn(&pdev->dev, 890 "%s: memdev disabled, abort error handling\n", 891 dev_name(dev)); 892 return; 893 } 894 895 if (cxlds->rcd) 896 cxl_handle_rdport_errors(cxlds); 897 898 cxl_handle_endpoint_cor_ras(cxlds); 899 } 900 } 901 EXPORT_SYMBOL_NS_GPL(cxl_cor_error_detected, "CXL"); 902 903 pci_ers_result_t cxl_error_detected(struct pci_dev *pdev, 904 pci_channel_state_t state) 905 { 906 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 907 struct cxl_memdev *cxlmd = cxlds->cxlmd; 908 struct device *dev = &cxlmd->dev; 909 bool ue; 910 911 scoped_guard(device, dev) { 912 if (!dev->driver) { 913 dev_warn(&pdev->dev, 914 "%s: memdev disabled, abort error handling\n", 915 dev_name(dev)); 916 return PCI_ERS_RESULT_DISCONNECT; 917 } 918 919 if (cxlds->rcd) 920 cxl_handle_rdport_errors(cxlds); 921 /* 922 * A frozen channel indicates an impending reset which is fatal to 923 * CXL.mem operation, and will likely crash the system. On the off 924 * chance the situation is recoverable dump the status of the RAS 925 * capability registers and bounce the active state of the memdev. 926 */ 927 ue = cxl_handle_endpoint_ras(cxlds); 928 } 929 930 931 switch (state) { 932 case pci_channel_io_normal: 933 if (ue) { 934 device_release_driver(dev); 935 return PCI_ERS_RESULT_NEED_RESET; 936 } 937 return PCI_ERS_RESULT_CAN_RECOVER; 938 case pci_channel_io_frozen: 939 dev_warn(&pdev->dev, 940 "%s: frozen state error detected, disable CXL.mem\n", 941 dev_name(dev)); 942 device_release_driver(dev); 943 return PCI_ERS_RESULT_NEED_RESET; 944 case pci_channel_io_perm_failure: 945 dev_warn(&pdev->dev, 946 "failure state error detected, request disconnect\n"); 947 return PCI_ERS_RESULT_DISCONNECT; 948 } 949 return PCI_ERS_RESULT_NEED_RESET; 950 } 951 EXPORT_SYMBOL_NS_GPL(cxl_error_detected, "CXL"); 952 953 static int cxl_flit_size(struct pci_dev *pdev) 954 { 955 if (cxl_pci_flit_256(pdev)) 956 return 256; 957 958 return 68; 959 } 960 961 /** 962 * cxl_pci_get_latency - calculate the link latency for the PCIe link 963 * @pdev: PCI device 964 * 965 * return: calculated latency or 0 for no latency 966 * 967 * CXL Memory Device SW Guide v1.0 2.11.4 Link latency calculation 968 * Link latency = LinkPropagationLatency + FlitLatency + RetimerLatency 969 * LinkProgationLatency is negligible, so 0 will be used 970 * RetimerLatency is assumed to be negligible and 0 will be used 971 * FlitLatency = FlitSize / LinkBandwidth 972 * FlitSize is defined by spec. CXL rev3.0 4.2.1. 973 * 68B flit is used up to 32GT/s. >32GT/s, 256B flit size is used. 974 * The FlitLatency is converted to picoseconds. 975 */ 976 long cxl_pci_get_latency(struct pci_dev *pdev) 977 { 978 long bw; 979 980 bw = pcie_link_speed_mbps(pdev); 981 if (bw < 0) 982 return 0; 983 bw /= BITS_PER_BYTE; 984 985 return cxl_flit_size(pdev) * MEGA / bw; 986 } 987 988 static int __cxl_endpoint_decoder_reset_detected(struct device *dev, void *data) 989 { 990 struct cxl_port *port = data; 991 struct cxl_decoder *cxld; 992 struct cxl_hdm *cxlhdm; 993 void __iomem *hdm; 994 u32 ctrl; 995 996 if (!is_endpoint_decoder(dev)) 997 return 0; 998 999 cxld = to_cxl_decoder(dev); 1000 if ((cxld->flags & CXL_DECODER_F_ENABLE) == 0) 1001 return 0; 1002 1003 cxlhdm = dev_get_drvdata(&port->dev); 1004 hdm = cxlhdm->regs.hdm_decoder; 1005 ctrl = readl(hdm + CXL_HDM_DECODER0_CTRL_OFFSET(cxld->id)); 1006 1007 return !FIELD_GET(CXL_HDM_DECODER0_CTRL_COMMITTED, ctrl); 1008 } 1009 1010 bool cxl_endpoint_decoder_reset_detected(struct cxl_port *port) 1011 { 1012 return device_for_each_child(&port->dev, port, 1013 __cxl_endpoint_decoder_reset_detected); 1014 } 1015 EXPORT_SYMBOL_NS_GPL(cxl_endpoint_decoder_reset_detected, "CXL"); 1016 1017 int cxl_pci_get_bandwidth(struct pci_dev *pdev, struct access_coordinate *c) 1018 { 1019 int speed, bw; 1020 u16 lnksta; 1021 u32 width; 1022 1023 speed = pcie_link_speed_mbps(pdev); 1024 if (speed < 0) 1025 return speed; 1026 speed /= BITS_PER_BYTE; 1027 1028 pcie_capability_read_word(pdev, PCI_EXP_LNKSTA, &lnksta); 1029 width = FIELD_GET(PCI_EXP_LNKSTA_NLW, lnksta); 1030 bw = speed * width; 1031 1032 for (int i = 0; i < ACCESS_COORDINATE_MAX; i++) { 1033 c[i].read_bandwidth = bw; 1034 c[i].write_bandwidth = bw; 1035 } 1036 1037 return 0; 1038 } 1039 1040 /* 1041 * Set max timeout such that platforms will optimize GPF flow to avoid 1042 * the implied worst-case scenario delays. On a sane platform, all 1043 * devices should always complete GPF within the energy budget of 1044 * the GPF flow. The kernel does not have enough information to pick 1045 * anything better than "maximize timeouts and hope it works". 1046 * 1047 * A misbehaving device could block forward progress of GPF for all 1048 * the other devices, exhausting the energy budget of the platform. 1049 * However, the spec seems to assume that moving on from slow to respond 1050 * devices is a virtue. It is not possible to know that, in actuality, 1051 * the slow to respond device is *the* most critical device in the 1052 * system to wait. 1053 */ 1054 #define GPF_TIMEOUT_BASE_MAX 2 1055 #define GPF_TIMEOUT_SCALE_MAX 7 /* 10 seconds */ 1056 1057 u16 cxl_gpf_get_dvsec(struct device *dev) 1058 { 1059 struct pci_dev *pdev; 1060 bool is_port = true; 1061 u16 dvsec; 1062 1063 if (!dev_is_pci(dev)) 1064 return 0; 1065 1066 pdev = to_pci_dev(dev); 1067 if (pci_pcie_type(pdev) == PCI_EXP_TYPE_ENDPOINT) 1068 is_port = false; 1069 1070 dvsec = pci_find_dvsec_capability(pdev, PCI_VENDOR_ID_CXL, 1071 is_port ? CXL_DVSEC_PORT_GPF : CXL_DVSEC_DEVICE_GPF); 1072 if (!dvsec) 1073 dev_warn(dev, "%s GPF DVSEC not present\n", 1074 is_port ? "Port" : "Device"); 1075 return dvsec; 1076 } 1077 EXPORT_SYMBOL_NS_GPL(cxl_gpf_get_dvsec, "CXL"); 1078 1079 static int update_gpf_port_dvsec(struct pci_dev *pdev, int dvsec, int phase) 1080 { 1081 u64 base, scale; 1082 int rc, offset; 1083 u16 ctrl; 1084 1085 switch (phase) { 1086 case 1: 1087 offset = CXL_DVSEC_PORT_GPF_PHASE_1_CONTROL_OFFSET; 1088 base = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_BASE_MASK; 1089 scale = CXL_DVSEC_PORT_GPF_PHASE_1_TMO_SCALE_MASK; 1090 break; 1091 case 2: 1092 offset = CXL_DVSEC_PORT_GPF_PHASE_2_CONTROL_OFFSET; 1093 base = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_BASE_MASK; 1094 scale = CXL_DVSEC_PORT_GPF_PHASE_2_TMO_SCALE_MASK; 1095 break; 1096 default: 1097 return -EINVAL; 1098 } 1099 1100 rc = pci_read_config_word(pdev, dvsec + offset, &ctrl); 1101 if (rc) 1102 return rc; 1103 1104 if (FIELD_GET(base, ctrl) == GPF_TIMEOUT_BASE_MAX && 1105 FIELD_GET(scale, ctrl) == GPF_TIMEOUT_SCALE_MAX) 1106 return 0; 1107 1108 ctrl = FIELD_PREP(base, GPF_TIMEOUT_BASE_MAX); 1109 ctrl |= FIELD_PREP(scale, GPF_TIMEOUT_SCALE_MAX); 1110 1111 rc = pci_write_config_word(pdev, dvsec + offset, ctrl); 1112 if (!rc) 1113 pci_dbg(pdev, "Port GPF phase %d timeout: %d0 secs\n", 1114 phase, GPF_TIMEOUT_BASE_MAX); 1115 1116 return rc; 1117 } 1118 1119 int cxl_gpf_port_setup(struct cxl_dport *dport) 1120 { 1121 if (!dport) 1122 return -EINVAL; 1123 1124 if (!dport->gpf_dvsec) { 1125 struct pci_dev *pdev; 1126 int dvsec; 1127 1128 dvsec = cxl_gpf_get_dvsec(dport->dport_dev); 1129 if (!dvsec) 1130 return -EINVAL; 1131 1132 dport->gpf_dvsec = dvsec; 1133 pdev = to_pci_dev(dport->dport_dev); 1134 update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 1); 1135 update_gpf_port_dvsec(pdev, dport->gpf_dvsec, 2); 1136 } 1137 1138 return 0; 1139 } 1140 1141 struct cxl_walk_context { 1142 struct pci_bus *bus; 1143 struct cxl_port *port; 1144 int type; 1145 int error; 1146 int count; 1147 }; 1148 1149 static int count_dports(struct pci_dev *pdev, void *data) 1150 { 1151 struct cxl_walk_context *ctx = data; 1152 int type = pci_pcie_type(pdev); 1153 1154 if (pdev->bus != ctx->bus) 1155 return 0; 1156 if (!pci_is_pcie(pdev)) 1157 return 0; 1158 if (type != ctx->type) 1159 return 0; 1160 1161 ctx->count++; 1162 return 0; 1163 } 1164 1165 int cxl_port_get_possible_dports(struct cxl_port *port) 1166 { 1167 struct pci_bus *bus = cxl_port_to_pci_bus(port); 1168 struct cxl_walk_context ctx; 1169 int type; 1170 1171 if (!bus) { 1172 dev_err(&port->dev, "No PCI bus found for port %s\n", 1173 dev_name(&port->dev)); 1174 return -ENXIO; 1175 } 1176 1177 if (pci_is_root_bus(bus)) 1178 type = PCI_EXP_TYPE_ROOT_PORT; 1179 else 1180 type = PCI_EXP_TYPE_DOWNSTREAM; 1181 1182 ctx = (struct cxl_walk_context) { 1183 .bus = bus, 1184 .type = type, 1185 }; 1186 pci_walk_bus(bus, count_dports, &ctx); 1187 1188 return ctx.count; 1189 } 1190