1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 #include <linux/unaligned.h> 4 #include <linux/io-64-nonatomic-lo-hi.h> 5 #include <linux/moduleparam.h> 6 #include <linux/module.h> 7 #include <linux/delay.h> 8 #include <linux/sizes.h> 9 #include <linux/mutex.h> 10 #include <linux/list.h> 11 #include <linux/pci.h> 12 #include <linux/aer.h> 13 #include <linux/io.h> 14 #include <cxl/mailbox.h> 15 #include "cxlmem.h" 16 #include "cxlpci.h" 17 #include "cxl.h" 18 #include "pmu.h" 19 20 /** 21 * DOC: cxl pci 22 * 23 * This implements the PCI exclusive functionality for a CXL device as it is 24 * defined by the Compute Express Link specification. CXL devices may surface 25 * certain functionality even if it isn't CXL enabled. While this driver is 26 * focused around the PCI specific aspects of a CXL device, it binds to the 27 * specific CXL memory device class code, and therefore the implementation of 28 * cxl_pci is focused around CXL memory devices. 29 * 30 * The driver has several responsibilities, mainly: 31 * - Create the memX device and register on the CXL bus. 32 * - Enumerate device's register interface and map them. 33 * - Registers nvdimm bridge device with cxl_core. 34 * - Registers a CXL mailbox with cxl_core. 35 */ 36 37 #define cxl_doorbell_busy(cxlds) \ 38 (readl((cxlds)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ 39 CXLDEV_MBOX_CTRL_DOORBELL) 40 41 /* CXL 2.0 - 8.2.8.4 */ 42 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) 43 44 /* 45 * CXL 2.0 ECN "Add Mailbox Ready Time" defines a capability field to 46 * dictate how long to wait for the mailbox to become ready. The new 47 * field allows the device to tell software the amount of time to wait 48 * before mailbox ready. This field per the spec theoretically allows 49 * for up to 255 seconds. 255 seconds is unreasonably long, its longer 50 * than the maximum SATA port link recovery wait. Default to 60 seconds 51 * until someone builds a CXL device that needs more time in practice. 52 */ 53 static unsigned short mbox_ready_timeout = 60; 54 module_param(mbox_ready_timeout, ushort, 0644); 55 MODULE_PARM_DESC(mbox_ready_timeout, "seconds to wait for mailbox ready"); 56 57 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_dev_state *cxlds) 58 { 59 const unsigned long start = jiffies; 60 unsigned long end = start; 61 62 while (cxl_doorbell_busy(cxlds)) { 63 end = jiffies; 64 65 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { 66 /* Check again in case preempted before timeout test */ 67 if (!cxl_doorbell_busy(cxlds)) 68 break; 69 return -ETIMEDOUT; 70 } 71 cpu_relax(); 72 } 73 74 dev_dbg(cxlds->dev, "Doorbell wait took %dms", 75 jiffies_to_msecs(end) - jiffies_to_msecs(start)); 76 return 0; 77 } 78 79 #define cxl_err(dev, status, msg) \ 80 dev_err_ratelimited(dev, msg ", device state %s%s\n", \ 81 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 82 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 83 84 #define cxl_cmd_err(dev, cmd, status, msg) \ 85 dev_err_ratelimited(dev, msg " (opcode: %#x), device state %s%s\n", \ 86 (cmd)->opcode, \ 87 status & CXLMDEV_DEV_FATAL ? " fatal" : "", \ 88 status & CXLMDEV_FW_HALT ? " firmware-halt" : "") 89 90 /* 91 * Threaded irq dev_id's must be globally unique. cxl_dev_id provides a unique 92 * wrapper object for each irq within the same cxlds. 93 */ 94 struct cxl_dev_id { 95 struct cxl_dev_state *cxlds; 96 }; 97 98 static int cxl_request_irq(struct cxl_dev_state *cxlds, int irq, 99 irq_handler_t thread_fn) 100 { 101 struct device *dev = cxlds->dev; 102 struct cxl_dev_id *dev_id; 103 104 dev_id = devm_kzalloc(dev, sizeof(*dev_id), GFP_KERNEL); 105 if (!dev_id) 106 return -ENOMEM; 107 dev_id->cxlds = cxlds; 108 109 return devm_request_threaded_irq(dev, irq, NULL, thread_fn, 110 IRQF_SHARED | IRQF_ONESHOT, NULL, 111 dev_id); 112 } 113 114 static bool cxl_mbox_background_complete(struct cxl_dev_state *cxlds) 115 { 116 u64 reg; 117 118 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 119 return FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_PCT_MASK, reg) == 100; 120 } 121 122 static irqreturn_t cxl_pci_mbox_irq(int irq, void *id) 123 { 124 u64 reg; 125 u16 opcode; 126 struct cxl_dev_id *dev_id = id; 127 struct cxl_dev_state *cxlds = dev_id->cxlds; 128 struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; 129 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 130 131 if (!cxl_mbox_background_complete(cxlds)) 132 return IRQ_NONE; 133 134 reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 135 opcode = FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_OPCODE_MASK, reg); 136 if (opcode == CXL_MBOX_OP_SANITIZE) { 137 mutex_lock(&cxl_mbox->mbox_mutex); 138 if (mds->security.sanitize_node) 139 mod_delayed_work(system_percpu_wq, &mds->security.poll_dwork, 0); 140 mutex_unlock(&cxl_mbox->mbox_mutex); 141 } else { 142 /* short-circuit the wait in __cxl_pci_mbox_send_cmd() */ 143 rcuwait_wake_up(&cxl_mbox->mbox_wait); 144 } 145 146 return IRQ_HANDLED; 147 } 148 149 /* 150 * Sanitization operation polling mode. 151 */ 152 static void cxl_mbox_sanitize_work(struct work_struct *work) 153 { 154 struct cxl_memdev_state *mds = 155 container_of(work, typeof(*mds), security.poll_dwork.work); 156 struct cxl_dev_state *cxlds = &mds->cxlds; 157 struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; 158 159 mutex_lock(&cxl_mbox->mbox_mutex); 160 if (cxl_mbox_background_complete(cxlds)) { 161 mds->security.poll_tmo_secs = 0; 162 if (mds->security.sanitize_node) 163 sysfs_notify_dirent(mds->security.sanitize_node); 164 mds->security.sanitize_active = false; 165 166 dev_dbg(cxlds->dev, "Sanitization operation ended\n"); 167 } else { 168 int timeout = mds->security.poll_tmo_secs + 10; 169 170 mds->security.poll_tmo_secs = min(15 * 60, timeout); 171 schedule_delayed_work(&mds->security.poll_dwork, timeout * HZ); 172 } 173 mutex_unlock(&cxl_mbox->mbox_mutex); 174 } 175 176 /** 177 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command 178 * @cxl_mbox: CXL mailbox context 179 * @mbox_cmd: Command to send to the memory device. 180 * 181 * Context: Any context. Expects mbox_mutex to be held. 182 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. 183 * Caller should check the return code in @mbox_cmd to make sure it 184 * succeeded. 185 * 186 * This is a generic form of the CXL mailbox send command thus only using the 187 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory 188 * devices, and perhaps other types of CXL devices may have further information 189 * available upon error conditions. Driver facilities wishing to send mailbox 190 * commands should use the wrapper command. 191 * 192 * The CXL spec allows for up to two mailboxes. The intention is for the primary 193 * mailbox to be OS controlled and the secondary mailbox to be used by system 194 * firmware. This allows the OS and firmware to communicate with the device and 195 * not need to coordinate with each other. The driver only uses the primary 196 * mailbox. 197 */ 198 static int __cxl_pci_mbox_send_cmd(struct cxl_mailbox *cxl_mbox, 199 struct cxl_mbox_cmd *mbox_cmd) 200 { 201 struct cxl_dev_state *cxlds = mbox_to_cxlds(cxl_mbox); 202 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 203 void __iomem *payload = cxlds->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; 204 struct device *dev = cxlds->dev; 205 u64 cmd_reg, status_reg; 206 size_t out_len; 207 int rc; 208 209 lockdep_assert_held(&cxl_mbox->mbox_mutex); 210 211 /* 212 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. 213 * 1. Caller reads MB Control Register to verify doorbell is clear 214 * 2. Caller writes Command Register 215 * 3. Caller writes Command Payload Registers if input payload is non-empty 216 * 4. Caller writes MB Control Register to set doorbell 217 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured 218 * 6. Caller reads MB Status Register to fetch Return code 219 * 7. If command successful, Caller reads Command Register to get Payload Length 220 * 8. If output payload is non-empty, host reads Command Payload Registers 221 * 222 * Hardware is free to do whatever it wants before the doorbell is rung, 223 * and isn't allowed to change anything after it clears the doorbell. As 224 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can 225 * also happen in any order (though some orders might not make sense). 226 */ 227 228 /* #1 */ 229 if (cxl_doorbell_busy(cxlds)) { 230 u64 md_status = 231 readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 232 233 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, 234 "mailbox queue busy"); 235 return -EBUSY; 236 } 237 238 /* 239 * With sanitize polling, hardware might be done and the poller still 240 * not be in sync. Ensure no new command comes in until so. Keep the 241 * hardware semantics and only allow device health status. 242 */ 243 if (mds->security.poll_tmo_secs > 0) { 244 if (mbox_cmd->opcode != CXL_MBOX_OP_GET_HEALTH_INFO) 245 return -EBUSY; 246 } 247 248 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, 249 mbox_cmd->opcode); 250 if (mbox_cmd->size_in) { 251 if (WARN_ON(!mbox_cmd->payload_in)) 252 return -EINVAL; 253 254 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, 255 mbox_cmd->size_in); 256 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); 257 } 258 259 /* #2, #3 */ 260 writeq(cmd_reg, cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 261 262 /* #4 */ 263 dev_dbg(dev, "Sending command: 0x%04x\n", mbox_cmd->opcode); 264 writel(CXLDEV_MBOX_CTRL_DOORBELL, 265 cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 266 267 /* #5 */ 268 rc = cxl_pci_mbox_wait_for_doorbell(cxlds); 269 if (rc == -ETIMEDOUT) { 270 u64 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 271 272 cxl_cmd_err(cxlds->dev, mbox_cmd, md_status, "mailbox timeout"); 273 return rc; 274 } 275 276 /* #6 */ 277 status_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); 278 mbox_cmd->return_code = 279 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); 280 281 /* 282 * Handle the background command in a synchronous manner. 283 * 284 * All other mailbox commands will serialize/queue on the mbox_mutex, 285 * which we currently hold. Furthermore this also guarantees that 286 * cxl_mbox_background_complete() checks are safe amongst each other, 287 * in that no new bg operation can occur in between. 288 * 289 * Background operations are timesliced in accordance with the nature 290 * of the command. In the event of timeout, the mailbox state is 291 * indeterminate until the next successful command submission and the 292 * driver can get back in sync with the hardware state. 293 */ 294 if (mbox_cmd->return_code == CXL_MBOX_CMD_RC_BACKGROUND) { 295 u64 bg_status_reg; 296 int i, timeout; 297 298 /* 299 * Sanitization is a special case which monopolizes the device 300 * and cannot be timesliced. Handle asynchronously instead, 301 * and allow userspace to poll(2) for completion. 302 */ 303 if (mbox_cmd->opcode == CXL_MBOX_OP_SANITIZE) { 304 if (mds->security.sanitize_active) 305 return -EBUSY; 306 307 /* give first timeout a second */ 308 timeout = 1; 309 mds->security.poll_tmo_secs = timeout; 310 mds->security.sanitize_active = true; 311 schedule_delayed_work(&mds->security.poll_dwork, 312 timeout * HZ); 313 dev_dbg(dev, "Sanitization operation started\n"); 314 goto success; 315 } 316 317 dev_dbg(dev, "Mailbox background operation (0x%04x) started\n", 318 mbox_cmd->opcode); 319 320 timeout = mbox_cmd->poll_interval_ms; 321 for (i = 0; i < mbox_cmd->poll_count; i++) { 322 if (rcuwait_wait_event_timeout(&cxl_mbox->mbox_wait, 323 cxl_mbox_background_complete(cxlds), 324 TASK_UNINTERRUPTIBLE, 325 msecs_to_jiffies(timeout)) > 0) 326 break; 327 } 328 329 if (!cxl_mbox_background_complete(cxlds)) { 330 dev_err(dev, "timeout waiting for background (%d ms)\n", 331 timeout * mbox_cmd->poll_count); 332 return -ETIMEDOUT; 333 } 334 335 bg_status_reg = readq(cxlds->regs.mbox + 336 CXLDEV_MBOX_BG_CMD_STATUS_OFFSET); 337 mbox_cmd->return_code = 338 FIELD_GET(CXLDEV_MBOX_BG_CMD_COMMAND_RC_MASK, 339 bg_status_reg); 340 dev_dbg(dev, 341 "Mailbox background operation (0x%04x) completed\n", 342 mbox_cmd->opcode); 343 } 344 345 if (mbox_cmd->return_code != CXL_MBOX_CMD_RC_SUCCESS) { 346 dev_dbg(dev, "Mailbox operation had an error: %s\n", 347 cxl_mbox_cmd_rc2str(mbox_cmd)); 348 return 0; /* completed but caller must check return_code */ 349 } 350 351 success: 352 /* #7 */ 353 cmd_reg = readq(cxlds->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 354 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); 355 356 /* #8 */ 357 if (out_len && mbox_cmd->payload_out) { 358 /* 359 * Sanitize the copy. If hardware misbehaves, out_len per the 360 * spec can actually be greater than the max allowed size (21 361 * bits available but spec defined 1M max). The caller also may 362 * have requested less data than the hardware supplied even 363 * within spec. 364 */ 365 size_t n; 366 367 n = min3(mbox_cmd->size_out, cxl_mbox->payload_size, out_len); 368 memcpy_fromio(mbox_cmd->payload_out, payload, n); 369 mbox_cmd->size_out = n; 370 } else { 371 mbox_cmd->size_out = 0; 372 } 373 374 return 0; 375 } 376 377 static int cxl_pci_mbox_send(struct cxl_mailbox *cxl_mbox, 378 struct cxl_mbox_cmd *cmd) 379 { 380 int rc; 381 382 mutex_lock(&cxl_mbox->mbox_mutex); 383 rc = __cxl_pci_mbox_send_cmd(cxl_mbox, cmd); 384 mutex_unlock(&cxl_mbox->mbox_mutex); 385 386 return rc; 387 } 388 389 static int cxl_pci_setup_mailbox(struct cxl_memdev_state *mds, bool irq_avail) 390 { 391 struct cxl_dev_state *cxlds = &mds->cxlds; 392 struct cxl_mailbox *cxl_mbox = &cxlds->cxl_mbox; 393 const int cap = readl(cxlds->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); 394 struct device *dev = cxlds->dev; 395 unsigned long timeout; 396 int irq, msgnum; 397 u64 md_status; 398 u32 ctrl; 399 400 timeout = jiffies + mbox_ready_timeout * HZ; 401 do { 402 md_status = readq(cxlds->regs.memdev + CXLMDEV_STATUS_OFFSET); 403 if (md_status & CXLMDEV_MBOX_IF_READY) 404 break; 405 if (msleep_interruptible(100)) 406 break; 407 } while (!time_after(jiffies, timeout)); 408 409 if (!(md_status & CXLMDEV_MBOX_IF_READY)) { 410 cxl_err(dev, md_status, "timeout awaiting mailbox ready"); 411 return -ETIMEDOUT; 412 } 413 414 /* 415 * A command may be in flight from a previous driver instance, 416 * think kexec, do one doorbell wait so that 417 * __cxl_pci_mbox_send_cmd() can assume that it is the only 418 * source for future doorbell busy events. 419 */ 420 if (cxl_pci_mbox_wait_for_doorbell(cxlds) != 0) { 421 cxl_err(dev, md_status, "timeout awaiting mailbox idle"); 422 return -ETIMEDOUT; 423 } 424 425 cxl_mbox->mbox_send = cxl_pci_mbox_send; 426 cxl_mbox->payload_size = 427 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); 428 429 /* 430 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register 431 * 432 * If the size is too small, mandatory commands will not work and so 433 * there's no point in going forward. If the size is too large, there's 434 * no harm is soft limiting it. 435 */ 436 cxl_mbox->payload_size = min_t(size_t, cxl_mbox->payload_size, SZ_1M); 437 if (cxl_mbox->payload_size < 256) { 438 dev_err(dev, "Mailbox is too small (%zub)", 439 cxl_mbox->payload_size); 440 return -ENXIO; 441 } 442 443 dev_dbg(dev, "Mailbox payload sized %zu", cxl_mbox->payload_size); 444 445 INIT_DELAYED_WORK(&mds->security.poll_dwork, cxl_mbox_sanitize_work); 446 447 /* background command interrupts are optional */ 448 if (!(cap & CXLDEV_MBOX_CAP_BG_CMD_IRQ) || !irq_avail) 449 return 0; 450 451 msgnum = FIELD_GET(CXLDEV_MBOX_CAP_IRQ_MSGNUM_MASK, cap); 452 irq = pci_irq_vector(to_pci_dev(cxlds->dev), msgnum); 453 if (irq < 0) 454 return 0; 455 456 if (cxl_request_irq(cxlds, irq, cxl_pci_mbox_irq)) 457 return 0; 458 459 dev_dbg(cxlds->dev, "Mailbox interrupts enabled\n"); 460 /* enable background command mbox irq support */ 461 ctrl = readl(cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 462 ctrl |= CXLDEV_MBOX_CTRL_BG_CMD_IRQ; 463 writel(ctrl, cxlds->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 464 465 return 0; 466 } 467 468 /* 469 * Assume that any RCIEP that emits the CXL memory expander class code 470 * is an RCD 471 */ 472 static bool is_cxl_restricted(struct pci_dev *pdev) 473 { 474 return pci_pcie_type(pdev) == PCI_EXP_TYPE_RC_END; 475 } 476 477 static int cxl_rcrb_get_comp_regs(struct pci_dev *pdev, 478 struct cxl_register_map *map, 479 struct cxl_dport *dport) 480 { 481 resource_size_t component_reg_phys; 482 483 *map = (struct cxl_register_map) { 484 .host = &pdev->dev, 485 .resource = CXL_RESOURCE_NONE, 486 }; 487 488 struct cxl_port *port __free(put_cxl_port) = 489 cxl_pci_find_port(pdev, &dport); 490 if (!port) 491 return -EPROBE_DEFER; 492 493 component_reg_phys = cxl_rcd_component_reg_phys(&pdev->dev, dport); 494 if (component_reg_phys == CXL_RESOURCE_NONE) 495 return -ENXIO; 496 497 map->resource = component_reg_phys; 498 map->reg_type = CXL_REGLOC_RBI_COMPONENT; 499 map->max_size = CXL_COMPONENT_REG_BLOCK_SIZE; 500 501 return 0; 502 } 503 504 static int cxl_pci_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, 505 struct cxl_register_map *map) 506 { 507 int rc; 508 509 rc = cxl_find_regblock(pdev, type, map); 510 511 /* 512 * If the Register Locator DVSEC does not exist, check if it 513 * is an RCH and try to extract the Component Registers from 514 * an RCRB. 515 */ 516 if (rc && type == CXL_REGLOC_RBI_COMPONENT && is_cxl_restricted(pdev)) { 517 struct cxl_dport *dport; 518 struct cxl_port *port __free(put_cxl_port) = 519 cxl_pci_find_port(pdev, &dport); 520 if (!port) 521 return -EPROBE_DEFER; 522 523 rc = cxl_rcrb_get_comp_regs(pdev, map, dport); 524 if (rc) 525 return rc; 526 527 rc = cxl_dport_map_rcd_linkcap(pdev, dport); 528 if (rc) 529 return rc; 530 531 } else if (rc) { 532 return rc; 533 } 534 535 return cxl_setup_regs(map); 536 } 537 538 static void free_event_buf(void *buf) 539 { 540 kvfree(buf); 541 } 542 543 /* 544 * There is a single buffer for reading event logs from the mailbox. All logs 545 * share this buffer protected by the mds->event_log_lock. 546 */ 547 static int cxl_mem_alloc_event_buf(struct cxl_memdev_state *mds) 548 { 549 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 550 struct cxl_get_event_payload *buf; 551 552 buf = kvmalloc(cxl_mbox->payload_size, GFP_KERNEL); 553 if (!buf) 554 return -ENOMEM; 555 mds->event.buf = buf; 556 557 return devm_add_action_or_reset(mds->cxlds.dev, free_event_buf, buf); 558 } 559 560 static bool cxl_alloc_irq_vectors(struct pci_dev *pdev) 561 { 562 int nvecs; 563 564 /* 565 * Per CXL 3.0 3.1.1 CXL.io Endpoint a function on a CXL device must 566 * not generate INTx messages if that function participates in 567 * CXL.cache or CXL.mem. 568 * 569 * Additionally pci_alloc_irq_vectors() handles calling 570 * pci_free_irq_vectors() automatically despite not being called 571 * pcim_*. See pci_setup_msi_context(). 572 */ 573 nvecs = pci_alloc_irq_vectors(pdev, 1, CXL_PCI_DEFAULT_MAX_VECTORS, 574 PCI_IRQ_MSIX | PCI_IRQ_MSI); 575 if (nvecs < 1) { 576 dev_dbg(&pdev->dev, "Failed to alloc irq vectors: %d\n", nvecs); 577 return false; 578 } 579 return true; 580 } 581 582 static irqreturn_t cxl_event_thread(int irq, void *id) 583 { 584 struct cxl_dev_id *dev_id = id; 585 struct cxl_dev_state *cxlds = dev_id->cxlds; 586 struct cxl_memdev_state *mds = to_cxl_memdev_state(cxlds); 587 u32 status; 588 589 do { 590 /* 591 * CXL 3.0 8.2.8.3.1: The lower 32 bits are the status; 592 * ignore the reserved upper 32 bits 593 */ 594 status = readl(cxlds->regs.status + CXLDEV_DEV_EVENT_STATUS_OFFSET); 595 /* Ignore logs unknown to the driver */ 596 status &= CXLDEV_EVENT_STATUS_ALL; 597 if (!status) 598 break; 599 cxl_mem_get_event_records(mds, status); 600 cond_resched(); 601 } while (status); 602 603 return IRQ_HANDLED; 604 } 605 606 static int cxl_event_req_irq(struct cxl_dev_state *cxlds, u8 setting) 607 { 608 struct pci_dev *pdev = to_pci_dev(cxlds->dev); 609 int irq; 610 611 if (FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting) != CXL_INT_MSI_MSIX) 612 return -ENXIO; 613 614 irq = pci_irq_vector(pdev, 615 FIELD_GET(CXLDEV_EVENT_INT_MSGNUM_MASK, setting)); 616 if (irq < 0) 617 return irq; 618 619 return cxl_request_irq(cxlds, irq, cxl_event_thread); 620 } 621 622 static int cxl_event_get_int_policy(struct cxl_memdev_state *mds, 623 struct cxl_event_interrupt_policy *policy) 624 { 625 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 626 struct cxl_mbox_cmd mbox_cmd = { 627 .opcode = CXL_MBOX_OP_GET_EVT_INT_POLICY, 628 .payload_out = policy, 629 .size_out = sizeof(*policy), 630 }; 631 int rc; 632 633 rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 634 if (rc < 0) 635 dev_err(mds->cxlds.dev, 636 "Failed to get event interrupt policy : %d", rc); 637 638 return rc; 639 } 640 641 static int cxl_event_config_msgnums(struct cxl_memdev_state *mds, 642 struct cxl_event_interrupt_policy *policy) 643 { 644 struct cxl_mailbox *cxl_mbox = &mds->cxlds.cxl_mbox; 645 struct cxl_mbox_cmd mbox_cmd; 646 int rc; 647 648 *policy = (struct cxl_event_interrupt_policy) { 649 .info_settings = CXL_INT_MSI_MSIX, 650 .warn_settings = CXL_INT_MSI_MSIX, 651 .failure_settings = CXL_INT_MSI_MSIX, 652 .fatal_settings = CXL_INT_MSI_MSIX, 653 }; 654 655 mbox_cmd = (struct cxl_mbox_cmd) { 656 .opcode = CXL_MBOX_OP_SET_EVT_INT_POLICY, 657 .payload_in = policy, 658 .size_in = sizeof(*policy), 659 }; 660 661 rc = cxl_internal_send_cmd(cxl_mbox, &mbox_cmd); 662 if (rc < 0) { 663 dev_err(mds->cxlds.dev, "Failed to set event interrupt policy : %d", 664 rc); 665 return rc; 666 } 667 668 /* Retrieve final interrupt settings */ 669 return cxl_event_get_int_policy(mds, policy); 670 } 671 672 static int cxl_event_irqsetup(struct cxl_memdev_state *mds) 673 { 674 struct cxl_dev_state *cxlds = &mds->cxlds; 675 struct cxl_event_interrupt_policy policy; 676 int rc; 677 678 rc = cxl_event_config_msgnums(mds, &policy); 679 if (rc) 680 return rc; 681 682 rc = cxl_event_req_irq(cxlds, policy.info_settings); 683 if (rc) { 684 dev_err(cxlds->dev, "Failed to get interrupt for event Info log\n"); 685 return rc; 686 } 687 688 rc = cxl_event_req_irq(cxlds, policy.warn_settings); 689 if (rc) { 690 dev_err(cxlds->dev, "Failed to get interrupt for event Warn log\n"); 691 return rc; 692 } 693 694 rc = cxl_event_req_irq(cxlds, policy.failure_settings); 695 if (rc) { 696 dev_err(cxlds->dev, "Failed to get interrupt for event Failure log\n"); 697 return rc; 698 } 699 700 rc = cxl_event_req_irq(cxlds, policy.fatal_settings); 701 if (rc) { 702 dev_err(cxlds->dev, "Failed to get interrupt for event Fatal log\n"); 703 return rc; 704 } 705 706 return 0; 707 } 708 709 static bool cxl_event_int_is_fw(u8 setting) 710 { 711 u8 mode = FIELD_GET(CXLDEV_EVENT_INT_MODE_MASK, setting); 712 713 return mode == CXL_INT_FW; 714 } 715 716 static int cxl_event_config(struct pci_host_bridge *host_bridge, 717 struct cxl_memdev_state *mds, bool irq_avail) 718 { 719 struct cxl_event_interrupt_policy policy; 720 int rc; 721 722 /* 723 * When BIOS maintains CXL error reporting control, it will process 724 * event records. Only one agent can do so. 725 */ 726 if (!host_bridge->native_cxl_error) 727 return 0; 728 729 if (!irq_avail) { 730 dev_info(mds->cxlds.dev, "No interrupt support, disable event processing.\n"); 731 return 0; 732 } 733 734 rc = cxl_event_get_int_policy(mds, &policy); 735 if (rc) 736 return rc; 737 738 if (cxl_event_int_is_fw(policy.info_settings) || 739 cxl_event_int_is_fw(policy.warn_settings) || 740 cxl_event_int_is_fw(policy.failure_settings) || 741 cxl_event_int_is_fw(policy.fatal_settings)) { 742 dev_err(mds->cxlds.dev, 743 "FW still in control of Event Logs despite _OSC settings\n"); 744 return -EBUSY; 745 } 746 747 rc = cxl_mem_alloc_event_buf(mds); 748 if (rc) 749 return rc; 750 751 rc = cxl_event_irqsetup(mds); 752 if (rc) 753 return rc; 754 755 cxl_mem_get_event_records(mds, CXLDEV_EVENT_STATUS_ALL); 756 757 return 0; 758 } 759 760 static int cxl_pci_type3_init_mailbox(struct cxl_dev_state *cxlds) 761 { 762 int rc; 763 764 /* 765 * Fail the init if there's no mailbox. For a type3 this is out of spec. 766 */ 767 if (!cxlds->reg_map.device_map.mbox.valid) 768 return -ENODEV; 769 770 rc = cxl_mailbox_init(&cxlds->cxl_mbox, cxlds->dev); 771 if (rc) 772 return rc; 773 774 return 0; 775 } 776 777 static ssize_t rcd_pcie_cap_emit(struct device *dev, u16 offset, char *buf, size_t width) 778 { 779 struct cxl_dev_state *cxlds = dev_get_drvdata(dev); 780 struct cxl_memdev *cxlmd = cxlds->cxlmd; 781 struct device *root_dev; 782 struct cxl_dport *dport; 783 struct cxl_port *root __free(put_cxl_port) = 784 cxl_mem_find_port(cxlmd, &dport); 785 786 if (!root) 787 return -ENXIO; 788 789 root_dev = root->uport_dev; 790 if (!root_dev) 791 return -ENXIO; 792 793 if (!dport->regs.rcd_pcie_cap) 794 return -ENXIO; 795 796 guard(device)(root_dev); 797 if (!root_dev->driver) 798 return -ENXIO; 799 800 switch (width) { 801 case 2: 802 return sysfs_emit(buf, "%#x\n", 803 readw(dport->regs.rcd_pcie_cap + offset)); 804 case 4: 805 return sysfs_emit(buf, "%#x\n", 806 readl(dport->regs.rcd_pcie_cap + offset)); 807 default: 808 return -EINVAL; 809 } 810 } 811 812 static ssize_t rcd_link_cap_show(struct device *dev, 813 struct device_attribute *attr, char *buf) 814 { 815 return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCAP, buf, sizeof(u32)); 816 } 817 static DEVICE_ATTR_RO(rcd_link_cap); 818 819 static ssize_t rcd_link_ctrl_show(struct device *dev, 820 struct device_attribute *attr, char *buf) 821 { 822 return rcd_pcie_cap_emit(dev, PCI_EXP_LNKCTL, buf, sizeof(u16)); 823 } 824 static DEVICE_ATTR_RO(rcd_link_ctrl); 825 826 static ssize_t rcd_link_status_show(struct device *dev, 827 struct device_attribute *attr, char *buf) 828 { 829 return rcd_pcie_cap_emit(dev, PCI_EXP_LNKSTA, buf, sizeof(u16)); 830 } 831 static DEVICE_ATTR_RO(rcd_link_status); 832 833 static struct attribute *cxl_rcd_attrs[] = { 834 &dev_attr_rcd_link_cap.attr, 835 &dev_attr_rcd_link_ctrl.attr, 836 &dev_attr_rcd_link_status.attr, 837 NULL 838 }; 839 840 static umode_t cxl_rcd_visible(struct kobject *kobj, struct attribute *a, int n) 841 { 842 struct device *dev = kobj_to_dev(kobj); 843 struct pci_dev *pdev = to_pci_dev(dev); 844 845 if (is_cxl_restricted(pdev)) 846 return a->mode; 847 848 return 0; 849 } 850 851 static struct attribute_group cxl_rcd_group = { 852 .attrs = cxl_rcd_attrs, 853 .is_visible = cxl_rcd_visible, 854 }; 855 __ATTRIBUTE_GROUPS(cxl_rcd); 856 857 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 858 { 859 struct pci_host_bridge *host_bridge = pci_find_host_bridge(pdev->bus); 860 struct cxl_dpa_info range_info = { 0 }; 861 struct cxl_memdev_state *mds; 862 struct cxl_dev_state *cxlds; 863 struct cxl_register_map map; 864 struct cxl_memdev *cxlmd; 865 int rc, pmu_count; 866 unsigned int i; 867 bool irq_avail; 868 869 rc = pcim_enable_device(pdev); 870 if (rc) 871 return rc; 872 pci_set_master(pdev); 873 874 mds = cxl_memdev_state_create(&pdev->dev); 875 if (IS_ERR(mds)) 876 return PTR_ERR(mds); 877 cxlds = &mds->cxlds; 878 pci_set_drvdata(pdev, cxlds); 879 880 cxlds->rcd = is_cxl_restricted(pdev); 881 cxlds->serial = pci_get_dsn(pdev); 882 cxlds->cxl_dvsec = pci_find_dvsec_capability( 883 pdev, PCI_VENDOR_ID_CXL, PCI_DVSEC_CXL_DEVICE); 884 if (!cxlds->cxl_dvsec) 885 dev_warn(&pdev->dev, 886 "Device DVSEC not present, skip CXL.mem init\n"); 887 888 rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); 889 if (rc) 890 return rc; 891 892 rc = cxl_map_device_regs(&map, &cxlds->regs); 893 if (rc) 894 return rc; 895 896 /* 897 * If the component registers can't be found, the cxl_pci driver may 898 * still be useful for management functions so don't return an error. 899 */ 900 rc = cxl_pci_setup_regs(pdev, CXL_REGLOC_RBI_COMPONENT, 901 &cxlds->reg_map); 902 if (rc) 903 dev_warn(&pdev->dev, "No component registers (%d)\n", rc); 904 else if (!cxlds->reg_map.component_map.ras.valid) 905 dev_dbg(&pdev->dev, "RAS registers not found\n"); 906 907 rc = cxl_pci_type3_init_mailbox(cxlds); 908 if (rc) 909 return rc; 910 911 rc = cxl_await_media_ready(cxlds); 912 if (rc == 0) 913 cxlds->media_ready = true; 914 else 915 dev_warn(&pdev->dev, "Media not active (%d)\n", rc); 916 917 irq_avail = cxl_alloc_irq_vectors(pdev); 918 919 rc = cxl_pci_setup_mailbox(mds, irq_avail); 920 if (rc) 921 return rc; 922 923 rc = cxl_enumerate_cmds(mds); 924 if (rc) 925 return rc; 926 927 rc = cxl_set_timestamp(mds); 928 if (rc) 929 return rc; 930 931 rc = cxl_poison_state_init(mds); 932 if (rc) 933 return rc; 934 935 rc = cxl_dev_state_identify(mds); 936 if (rc) 937 return rc; 938 939 rc = cxl_mem_dpa_fetch(mds, &range_info); 940 if (rc) 941 return rc; 942 943 rc = cxl_dpa_setup(cxlds, &range_info); 944 if (rc) 945 return rc; 946 947 rc = devm_cxl_setup_features(cxlds); 948 if (rc) 949 dev_dbg(&pdev->dev, "No CXL Features discovered\n"); 950 951 cxlmd = devm_cxl_add_memdev(cxlds, NULL); 952 if (IS_ERR(cxlmd)) 953 return PTR_ERR(cxlmd); 954 955 rc = devm_cxl_setup_fw_upload(&pdev->dev, mds); 956 if (rc) 957 return rc; 958 959 rc = devm_cxl_sanitize_setup_notifier(&pdev->dev, cxlmd); 960 if (rc) 961 return rc; 962 963 rc = devm_cxl_setup_fwctl(&pdev->dev, cxlmd); 964 if (rc) 965 dev_dbg(&pdev->dev, "No CXL FWCTL setup\n"); 966 967 pmu_count = cxl_count_regblock(pdev, CXL_REGLOC_RBI_PMU); 968 if (pmu_count < 0) 969 return pmu_count; 970 971 for (i = 0; i < pmu_count; i++) { 972 struct cxl_pmu_regs pmu_regs; 973 974 rc = cxl_find_regblock_instance(pdev, CXL_REGLOC_RBI_PMU, &map, i); 975 if (rc) { 976 dev_dbg(&pdev->dev, "Could not find PMU regblock\n"); 977 break; 978 } 979 980 rc = cxl_map_pmu_regs(&map, &pmu_regs); 981 if (rc) { 982 dev_dbg(&pdev->dev, "Could not map PMU regs\n"); 983 break; 984 } 985 986 rc = devm_cxl_pmu_add(cxlds->dev, &pmu_regs, cxlmd->id, i, CXL_PMU_MEMDEV); 987 if (rc) { 988 dev_dbg(&pdev->dev, "Could not add PMU instance\n"); 989 break; 990 } 991 } 992 993 rc = cxl_event_config(host_bridge, mds, irq_avail); 994 if (rc) 995 return rc; 996 997 pci_save_state(pdev); 998 999 return rc; 1000 } 1001 1002 static const struct pci_device_id cxl_mem_pci_tbl[] = { 1003 /* PCI class code for CXL.mem Type-3 Devices */ 1004 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, 1005 { /* terminate list */ }, 1006 }; 1007 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 1008 1009 static pci_ers_result_t cxl_slot_reset(struct pci_dev *pdev) 1010 { 1011 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 1012 struct cxl_memdev *cxlmd = cxlds->cxlmd; 1013 struct device *dev = &cxlmd->dev; 1014 1015 dev_info(&pdev->dev, "%s: restart CXL.mem after slot reset\n", 1016 dev_name(dev)); 1017 pci_restore_state(pdev); 1018 if (device_attach(dev) <= 0) 1019 return PCI_ERS_RESULT_DISCONNECT; 1020 return PCI_ERS_RESULT_RECOVERED; 1021 } 1022 1023 static void cxl_error_resume(struct pci_dev *pdev) 1024 { 1025 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 1026 struct cxl_memdev *cxlmd = cxlds->cxlmd; 1027 struct device *dev = &cxlmd->dev; 1028 1029 dev_info(&pdev->dev, "%s: error resume %s\n", dev_name(dev), 1030 dev->driver ? "successful" : "failed"); 1031 } 1032 1033 static void cxl_reset_done(struct pci_dev *pdev) 1034 { 1035 struct cxl_dev_state *cxlds = pci_get_drvdata(pdev); 1036 struct cxl_memdev *cxlmd = cxlds->cxlmd; 1037 struct device *dev = &pdev->dev; 1038 1039 /* 1040 * FLR does not expect to touch the HDM decoders and related 1041 * registers. SBR, however, will wipe all device configurations. 1042 * Issue a warning if there was an active decoder before the reset 1043 * that no longer exists. 1044 */ 1045 guard(device)(&cxlmd->dev); 1046 if (cxlmd->endpoint && 1047 cxl_endpoint_decoder_reset_detected(cxlmd->endpoint)) { 1048 dev_crit(dev, "SBR happened without memory regions removal.\n"); 1049 dev_crit(dev, "System may be unstable if regions hosted system memory.\n"); 1050 add_taint(TAINT_USER, LOCKDEP_STILL_OK); 1051 } 1052 } 1053 1054 static const struct pci_error_handlers cxl_error_handlers = { 1055 .error_detected = cxl_error_detected, 1056 .slot_reset = cxl_slot_reset, 1057 .resume = cxl_error_resume, 1058 .cor_error_detected = cxl_cor_error_detected, 1059 .reset_done = cxl_reset_done, 1060 }; 1061 1062 static struct pci_driver cxl_pci_driver = { 1063 .name = KBUILD_MODNAME, 1064 .id_table = cxl_mem_pci_tbl, 1065 .probe = cxl_pci_probe, 1066 .err_handler = &cxl_error_handlers, 1067 .dev_groups = cxl_rcd_groups, 1068 .driver = { 1069 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 1070 }, 1071 }; 1072 1073 #define CXL_EVENT_HDR_FLAGS_REC_SEVERITY GENMASK(1, 0) 1074 static void cxl_handle_cper_event(enum cxl_event_type ev_type, 1075 struct cxl_cper_event_rec *rec) 1076 { 1077 struct cper_cxl_event_devid *device_id = &rec->hdr.device_id; 1078 struct pci_dev *pdev __free(pci_dev_put) = NULL; 1079 enum cxl_event_log_type log_type; 1080 struct cxl_dev_state *cxlds; 1081 unsigned int devfn; 1082 u32 hdr_flags; 1083 1084 pr_debug("CPER event %d for device %u:%u:%u.%u\n", ev_type, 1085 device_id->segment_num, device_id->bus_num, 1086 device_id->device_num, device_id->func_num); 1087 1088 devfn = PCI_DEVFN(device_id->device_num, device_id->func_num); 1089 pdev = pci_get_domain_bus_and_slot(device_id->segment_num, 1090 device_id->bus_num, devfn); 1091 if (!pdev) 1092 return; 1093 1094 guard(device)(&pdev->dev); 1095 if (pdev->driver != &cxl_pci_driver) 1096 return; 1097 1098 cxlds = pci_get_drvdata(pdev); 1099 if (!cxlds) 1100 return; 1101 1102 /* Fabricate a log type */ 1103 hdr_flags = get_unaligned_le24(rec->event.generic.hdr.flags); 1104 log_type = FIELD_GET(CXL_EVENT_HDR_FLAGS_REC_SEVERITY, hdr_flags); 1105 1106 cxl_event_trace_record(cxlds->cxlmd, log_type, ev_type, 1107 &uuid_null, &rec->event); 1108 } 1109 1110 static void cxl_cper_work_fn(struct work_struct *work) 1111 { 1112 struct cxl_cper_work_data wd; 1113 1114 while (cxl_cper_kfifo_get(&wd)) 1115 cxl_handle_cper_event(wd.event_type, &wd.rec); 1116 } 1117 static DECLARE_WORK(cxl_cper_work, cxl_cper_work_fn); 1118 1119 static int __init cxl_pci_driver_init(void) 1120 { 1121 int rc; 1122 1123 rc = pci_register_driver(&cxl_pci_driver); 1124 if (rc) 1125 return rc; 1126 1127 rc = cxl_cper_register_work(&cxl_cper_work); 1128 if (rc) 1129 pci_unregister_driver(&cxl_pci_driver); 1130 1131 return rc; 1132 } 1133 1134 static void __exit cxl_pci_driver_exit(void) 1135 { 1136 cxl_cper_unregister_work(&cxl_cper_work); 1137 cancel_work_sync(&cxl_cper_work); 1138 pci_unregister_driver(&cxl_pci_driver); 1139 } 1140 1141 module_init(cxl_pci_driver_init); 1142 module_exit(cxl_pci_driver_exit); 1143 MODULE_DESCRIPTION("CXL: PCI manageability"); 1144 MODULE_LICENSE("GPL v2"); 1145 MODULE_IMPORT_NS("CXL"); 1146