1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. All rights reserved. */ 3 #include <linux/io-64-nonatomic-lo-hi.h> 4 #include <linux/module.h> 5 #include <linux/sizes.h> 6 #include <linux/mutex.h> 7 #include <linux/list.h> 8 #include <linux/pci.h> 9 #include <linux/io.h> 10 #include "cxlmem.h" 11 #include "pci.h" 12 #include "cxl.h" 13 14 /** 15 * DOC: cxl pci 16 * 17 * This implements the PCI exclusive functionality for a CXL device as it is 18 * defined by the Compute Express Link specification. CXL devices may surface 19 * certain functionality even if it isn't CXL enabled. While this driver is 20 * focused around the PCI specific aspects of a CXL device, it binds to the 21 * specific CXL memory device class code, and therefore the implementation of 22 * cxl_pci is focused around CXL memory devices. 23 * 24 * The driver has several responsibilities, mainly: 25 * - Create the memX device and register on the CXL bus. 26 * - Enumerate device's register interface and map them. 27 * - Registers nvdimm bridge device with cxl_core. 28 * - Registers a CXL mailbox with cxl_core. 29 */ 30 31 #define cxl_doorbell_busy(cxlm) \ 32 (readl((cxlm)->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET) & \ 33 CXLDEV_MBOX_CTRL_DOORBELL) 34 35 /* CXL 2.0 - 8.2.8.4 */ 36 #define CXL_MAILBOX_TIMEOUT_MS (2 * HZ) 37 38 static int cxl_pci_mbox_wait_for_doorbell(struct cxl_mem *cxlm) 39 { 40 const unsigned long start = jiffies; 41 unsigned long end = start; 42 43 while (cxl_doorbell_busy(cxlm)) { 44 end = jiffies; 45 46 if (time_after(end, start + CXL_MAILBOX_TIMEOUT_MS)) { 47 /* Check again in case preempted before timeout test */ 48 if (!cxl_doorbell_busy(cxlm)) 49 break; 50 return -ETIMEDOUT; 51 } 52 cpu_relax(); 53 } 54 55 dev_dbg(cxlm->dev, "Doorbell wait took %dms", 56 jiffies_to_msecs(end) - jiffies_to_msecs(start)); 57 return 0; 58 } 59 60 static void cxl_pci_mbox_timeout(struct cxl_mem *cxlm, 61 struct cxl_mbox_cmd *mbox_cmd) 62 { 63 struct device *dev = cxlm->dev; 64 65 dev_dbg(dev, "Mailbox command (opcode: %#x size: %zub) timed out\n", 66 mbox_cmd->opcode, mbox_cmd->size_in); 67 } 68 69 /** 70 * __cxl_pci_mbox_send_cmd() - Execute a mailbox command 71 * @cxlm: The CXL memory device to communicate with. 72 * @mbox_cmd: Command to send to the memory device. 73 * 74 * Context: Any context. Expects mbox_mutex to be held. 75 * Return: -ETIMEDOUT if timeout occurred waiting for completion. 0 on success. 76 * Caller should check the return code in @mbox_cmd to make sure it 77 * succeeded. 78 * 79 * This is a generic form of the CXL mailbox send command thus only using the 80 * registers defined by the mailbox capability ID - CXL 2.0 8.2.8.4. Memory 81 * devices, and perhaps other types of CXL devices may have further information 82 * available upon error conditions. Driver facilities wishing to send mailbox 83 * commands should use the wrapper command. 84 * 85 * The CXL spec allows for up to two mailboxes. The intention is for the primary 86 * mailbox to be OS controlled and the secondary mailbox to be used by system 87 * firmware. This allows the OS and firmware to communicate with the device and 88 * not need to coordinate with each other. The driver only uses the primary 89 * mailbox. 90 */ 91 static int __cxl_pci_mbox_send_cmd(struct cxl_mem *cxlm, 92 struct cxl_mbox_cmd *mbox_cmd) 93 { 94 void __iomem *payload = cxlm->regs.mbox + CXLDEV_MBOX_PAYLOAD_OFFSET; 95 struct device *dev = cxlm->dev; 96 u64 cmd_reg, status_reg; 97 size_t out_len; 98 int rc; 99 100 lockdep_assert_held(&cxlm->mbox_mutex); 101 102 /* 103 * Here are the steps from 8.2.8.4 of the CXL 2.0 spec. 104 * 1. Caller reads MB Control Register to verify doorbell is clear 105 * 2. Caller writes Command Register 106 * 3. Caller writes Command Payload Registers if input payload is non-empty 107 * 4. Caller writes MB Control Register to set doorbell 108 * 5. Caller either polls for doorbell to be clear or waits for interrupt if configured 109 * 6. Caller reads MB Status Register to fetch Return code 110 * 7. If command successful, Caller reads Command Register to get Payload Length 111 * 8. If output payload is non-empty, host reads Command Payload Registers 112 * 113 * Hardware is free to do whatever it wants before the doorbell is rung, 114 * and isn't allowed to change anything after it clears the doorbell. As 115 * such, steps 2 and 3 can happen in any order, and steps 6, 7, 8 can 116 * also happen in any order (though some orders might not make sense). 117 */ 118 119 /* #1 */ 120 if (cxl_doorbell_busy(cxlm)) { 121 dev_err_ratelimited(dev, "Mailbox re-busy after acquiring\n"); 122 return -EBUSY; 123 } 124 125 cmd_reg = FIELD_PREP(CXLDEV_MBOX_CMD_COMMAND_OPCODE_MASK, 126 mbox_cmd->opcode); 127 if (mbox_cmd->size_in) { 128 if (WARN_ON(!mbox_cmd->payload_in)) 129 return -EINVAL; 130 131 cmd_reg |= FIELD_PREP(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, 132 mbox_cmd->size_in); 133 memcpy_toio(payload, mbox_cmd->payload_in, mbox_cmd->size_in); 134 } 135 136 /* #2, #3 */ 137 writeq(cmd_reg, cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 138 139 /* #4 */ 140 dev_dbg(dev, "Sending command\n"); 141 writel(CXLDEV_MBOX_CTRL_DOORBELL, 142 cxlm->regs.mbox + CXLDEV_MBOX_CTRL_OFFSET); 143 144 /* #5 */ 145 rc = cxl_pci_mbox_wait_for_doorbell(cxlm); 146 if (rc == -ETIMEDOUT) { 147 cxl_pci_mbox_timeout(cxlm, mbox_cmd); 148 return rc; 149 } 150 151 /* #6 */ 152 status_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_STATUS_OFFSET); 153 mbox_cmd->return_code = 154 FIELD_GET(CXLDEV_MBOX_STATUS_RET_CODE_MASK, status_reg); 155 156 if (mbox_cmd->return_code != 0) { 157 dev_dbg(dev, "Mailbox operation had an error\n"); 158 return 0; 159 } 160 161 /* #7 */ 162 cmd_reg = readq(cxlm->regs.mbox + CXLDEV_MBOX_CMD_OFFSET); 163 out_len = FIELD_GET(CXLDEV_MBOX_CMD_PAYLOAD_LENGTH_MASK, cmd_reg); 164 165 /* #8 */ 166 if (out_len && mbox_cmd->payload_out) { 167 /* 168 * Sanitize the copy. If hardware misbehaves, out_len per the 169 * spec can actually be greater than the max allowed size (21 170 * bits available but spec defined 1M max). The caller also may 171 * have requested less data than the hardware supplied even 172 * within spec. 173 */ 174 size_t n = min3(mbox_cmd->size_out, cxlm->payload_size, out_len); 175 176 memcpy_fromio(mbox_cmd->payload_out, payload, n); 177 mbox_cmd->size_out = n; 178 } else { 179 mbox_cmd->size_out = 0; 180 } 181 182 return 0; 183 } 184 185 /** 186 * cxl_pci_mbox_get() - Acquire exclusive access to the mailbox. 187 * @cxlm: The memory device to gain access to. 188 * 189 * Context: Any context. Takes the mbox_mutex. 190 * Return: 0 if exclusive access was acquired. 191 */ 192 static int cxl_pci_mbox_get(struct cxl_mem *cxlm) 193 { 194 struct device *dev = cxlm->dev; 195 u64 md_status; 196 int rc; 197 198 mutex_lock_io(&cxlm->mbox_mutex); 199 200 /* 201 * XXX: There is some amount of ambiguity in the 2.0 version of the spec 202 * around the mailbox interface ready (8.2.8.5.1.1). The purpose of the 203 * bit is to allow firmware running on the device to notify the driver 204 * that it's ready to receive commands. It is unclear if the bit needs 205 * to be read for each transaction mailbox, ie. the firmware can switch 206 * it on and off as needed. Second, there is no defined timeout for 207 * mailbox ready, like there is for the doorbell interface. 208 * 209 * Assumptions: 210 * 1. The firmware might toggle the Mailbox Interface Ready bit, check 211 * it for every command. 212 * 213 * 2. If the doorbell is clear, the firmware should have first set the 214 * Mailbox Interface Ready bit. Therefore, waiting for the doorbell 215 * to be ready is sufficient. 216 */ 217 rc = cxl_pci_mbox_wait_for_doorbell(cxlm); 218 if (rc) { 219 dev_warn(dev, "Mailbox interface not ready\n"); 220 goto out; 221 } 222 223 md_status = readq(cxlm->regs.memdev + CXLMDEV_STATUS_OFFSET); 224 if (!(md_status & CXLMDEV_MBOX_IF_READY && CXLMDEV_READY(md_status))) { 225 dev_err(dev, "mbox: reported doorbell ready, but not mbox ready\n"); 226 rc = -EBUSY; 227 goto out; 228 } 229 230 /* 231 * Hardware shouldn't allow a ready status but also have failure bits 232 * set. Spit out an error, this should be a bug report 233 */ 234 rc = -EFAULT; 235 if (md_status & CXLMDEV_DEV_FATAL) { 236 dev_err(dev, "mbox: reported ready, but fatal\n"); 237 goto out; 238 } 239 if (md_status & CXLMDEV_FW_HALT) { 240 dev_err(dev, "mbox: reported ready, but halted\n"); 241 goto out; 242 } 243 if (CXLMDEV_RESET_NEEDED(md_status)) { 244 dev_err(dev, "mbox: reported ready, but reset needed\n"); 245 goto out; 246 } 247 248 /* with lock held */ 249 return 0; 250 251 out: 252 mutex_unlock(&cxlm->mbox_mutex); 253 return rc; 254 } 255 256 /** 257 * cxl_pci_mbox_put() - Release exclusive access to the mailbox. 258 * @cxlm: The CXL memory device to communicate with. 259 * 260 * Context: Any context. Expects mbox_mutex to be held. 261 */ 262 static void cxl_pci_mbox_put(struct cxl_mem *cxlm) 263 { 264 mutex_unlock(&cxlm->mbox_mutex); 265 } 266 267 static int cxl_pci_mbox_send(struct cxl_mem *cxlm, struct cxl_mbox_cmd *cmd) 268 { 269 int rc; 270 271 rc = cxl_pci_mbox_get(cxlm); 272 if (rc) 273 return rc; 274 275 rc = __cxl_pci_mbox_send_cmd(cxlm, cmd); 276 cxl_pci_mbox_put(cxlm); 277 278 return rc; 279 } 280 281 static int cxl_pci_setup_mailbox(struct cxl_mem *cxlm) 282 { 283 const int cap = readl(cxlm->regs.mbox + CXLDEV_MBOX_CAPS_OFFSET); 284 285 cxlm->mbox_send = cxl_pci_mbox_send; 286 cxlm->payload_size = 287 1 << FIELD_GET(CXLDEV_MBOX_CAP_PAYLOAD_SIZE_MASK, cap); 288 289 /* 290 * CXL 2.0 8.2.8.4.3 Mailbox Capabilities Register 291 * 292 * If the size is too small, mandatory commands will not work and so 293 * there's no point in going forward. If the size is too large, there's 294 * no harm is soft limiting it. 295 */ 296 cxlm->payload_size = min_t(size_t, cxlm->payload_size, SZ_1M); 297 if (cxlm->payload_size < 256) { 298 dev_err(cxlm->dev, "Mailbox is too small (%zub)", 299 cxlm->payload_size); 300 return -ENXIO; 301 } 302 303 dev_dbg(cxlm->dev, "Mailbox payload sized %zu", 304 cxlm->payload_size); 305 306 return 0; 307 } 308 309 static int cxl_map_regblock(struct pci_dev *pdev, struct cxl_register_map *map) 310 { 311 void __iomem *addr; 312 int bar = map->barno; 313 struct device *dev = &pdev->dev; 314 resource_size_t offset = map->block_offset; 315 316 /* Basic sanity check that BAR is big enough */ 317 if (pci_resource_len(pdev, bar) < offset) { 318 dev_err(dev, "BAR%d: %pr: too small (offset: %pa)\n", bar, 319 &pdev->resource[bar], &offset); 320 return -ENXIO; 321 } 322 323 addr = pci_iomap(pdev, bar, 0); 324 if (!addr) { 325 dev_err(dev, "failed to map registers\n"); 326 return -ENOMEM; 327 } 328 329 dev_dbg(dev, "Mapped CXL Memory Device resource bar %u @ %pa\n", 330 bar, &offset); 331 332 map->base = addr + map->block_offset; 333 return 0; 334 } 335 336 static void cxl_unmap_regblock(struct pci_dev *pdev, 337 struct cxl_register_map *map) 338 { 339 pci_iounmap(pdev, map->base - map->block_offset); 340 map->base = NULL; 341 } 342 343 static int cxl_probe_regs(struct pci_dev *pdev, struct cxl_register_map *map) 344 { 345 struct cxl_component_reg_map *comp_map; 346 struct cxl_device_reg_map *dev_map; 347 struct device *dev = &pdev->dev; 348 void __iomem *base = map->base; 349 350 switch (map->reg_type) { 351 case CXL_REGLOC_RBI_COMPONENT: 352 comp_map = &map->component_map; 353 cxl_probe_component_regs(dev, base, comp_map); 354 if (!comp_map->hdm_decoder.valid) { 355 dev_err(dev, "HDM decoder registers not found\n"); 356 return -ENXIO; 357 } 358 359 dev_dbg(dev, "Set up component registers\n"); 360 break; 361 case CXL_REGLOC_RBI_MEMDEV: 362 dev_map = &map->device_map; 363 cxl_probe_device_regs(dev, base, dev_map); 364 if (!dev_map->status.valid || !dev_map->mbox.valid || 365 !dev_map->memdev.valid) { 366 dev_err(dev, "registers not found: %s%s%s\n", 367 !dev_map->status.valid ? "status " : "", 368 !dev_map->mbox.valid ? "mbox " : "", 369 !dev_map->memdev.valid ? "memdev " : ""); 370 return -ENXIO; 371 } 372 373 dev_dbg(dev, "Probing device registers...\n"); 374 break; 375 default: 376 break; 377 } 378 379 return 0; 380 } 381 382 static int cxl_map_regs(struct cxl_mem *cxlm, struct cxl_register_map *map) 383 { 384 struct device *dev = cxlm->dev; 385 struct pci_dev *pdev = to_pci_dev(dev); 386 387 switch (map->reg_type) { 388 case CXL_REGLOC_RBI_COMPONENT: 389 cxl_map_component_regs(pdev, &cxlm->regs.component, map); 390 dev_dbg(dev, "Mapping component registers...\n"); 391 break; 392 case CXL_REGLOC_RBI_MEMDEV: 393 cxl_map_device_regs(pdev, &cxlm->regs.device_regs, map); 394 dev_dbg(dev, "Probing device registers...\n"); 395 break; 396 default: 397 break; 398 } 399 400 return 0; 401 } 402 403 static void cxl_decode_regblock(u32 reg_lo, u32 reg_hi, 404 struct cxl_register_map *map) 405 { 406 map->block_offset = 407 ((u64)reg_hi << 32) | (reg_lo & CXL_REGLOC_ADDR_MASK); 408 map->barno = FIELD_GET(CXL_REGLOC_BIR_MASK, reg_lo); 409 map->reg_type = FIELD_GET(CXL_REGLOC_RBI_MASK, reg_lo); 410 } 411 412 /** 413 * cxl_find_regblock() - Locate register blocks by type 414 * @pdev: The CXL PCI device to enumerate. 415 * @type: Register Block Indicator id 416 * @map: Enumeration output, clobbered on error 417 * 418 * Return: 0 if register block enumerated, negative error code otherwise 419 * 420 * A CXL DVSEC may point to one or more register blocks, search for them 421 * by @type. 422 */ 423 static int cxl_find_regblock(struct pci_dev *pdev, enum cxl_regloc_type type, 424 struct cxl_register_map *map) 425 { 426 u32 regloc_size, regblocks; 427 int regloc, i; 428 429 regloc = pci_find_dvsec_capability(pdev, PCI_DVSEC_VENDOR_ID_CXL, 430 PCI_DVSEC_ID_CXL_REGLOC_DVSEC_ID); 431 if (!regloc) 432 return -ENXIO; 433 434 pci_read_config_dword(pdev, regloc + PCI_DVSEC_HEADER1, ®loc_size); 435 regloc_size = FIELD_GET(PCI_DVSEC_HEADER1_LENGTH_MASK, regloc_size); 436 437 regloc += PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET; 438 regblocks = (regloc_size - PCI_DVSEC_ID_CXL_REGLOC_BLOCK1_OFFSET) / 8; 439 440 for (i = 0; i < regblocks; i++, regloc += 8) { 441 u32 reg_lo, reg_hi; 442 443 pci_read_config_dword(pdev, regloc, ®_lo); 444 pci_read_config_dword(pdev, regloc + 4, ®_hi); 445 446 cxl_decode_regblock(reg_lo, reg_hi, map); 447 448 if (map->reg_type == type) 449 return 0; 450 } 451 452 return -ENODEV; 453 } 454 455 static int cxl_setup_regs(struct pci_dev *pdev, enum cxl_regloc_type type, 456 struct cxl_register_map *map) 457 { 458 int rc; 459 460 rc = cxl_find_regblock(pdev, type, map); 461 if (rc) 462 return rc; 463 464 rc = cxl_map_regblock(pdev, map); 465 if (rc) 466 return rc; 467 468 rc = cxl_probe_regs(pdev, map); 469 cxl_unmap_regblock(pdev, map); 470 471 return rc; 472 } 473 474 static int cxl_pci_probe(struct pci_dev *pdev, const struct pci_device_id *id) 475 { 476 struct cxl_register_map map; 477 struct cxl_memdev *cxlmd; 478 struct cxl_mem *cxlm; 479 int rc; 480 481 /* 482 * Double check the anonymous union trickery in struct cxl_regs 483 * FIXME switch to struct_group() 484 */ 485 BUILD_BUG_ON(offsetof(struct cxl_regs, memdev) != 486 offsetof(struct cxl_regs, device_regs.memdev)); 487 488 rc = pcim_enable_device(pdev); 489 if (rc) 490 return rc; 491 492 cxlm = cxl_mem_create(&pdev->dev); 493 if (IS_ERR(cxlm)) 494 return PTR_ERR(cxlm); 495 496 rc = cxl_setup_regs(pdev, CXL_REGLOC_RBI_MEMDEV, &map); 497 if (rc) 498 return rc; 499 500 rc = cxl_map_regs(cxlm, &map); 501 if (rc) 502 return rc; 503 504 rc = cxl_pci_setup_mailbox(cxlm); 505 if (rc) 506 return rc; 507 508 rc = cxl_mem_enumerate_cmds(cxlm); 509 if (rc) 510 return rc; 511 512 rc = cxl_mem_identify(cxlm); 513 if (rc) 514 return rc; 515 516 rc = cxl_mem_create_range_info(cxlm); 517 if (rc) 518 return rc; 519 520 cxlmd = devm_cxl_add_memdev(cxlm); 521 if (IS_ERR(cxlmd)) 522 return PTR_ERR(cxlmd); 523 524 if (range_len(&cxlm->pmem_range) && IS_ENABLED(CONFIG_CXL_PMEM)) 525 rc = devm_cxl_add_nvdimm(&pdev->dev, cxlmd); 526 527 return rc; 528 } 529 530 static const struct pci_device_id cxl_mem_pci_tbl[] = { 531 /* PCI class code for CXL.mem Type-3 Devices */ 532 { PCI_DEVICE_CLASS((PCI_CLASS_MEMORY_CXL << 8 | CXL_MEMORY_PROGIF), ~0)}, 533 { /* terminate list */ }, 534 }; 535 MODULE_DEVICE_TABLE(pci, cxl_mem_pci_tbl); 536 537 static struct pci_driver cxl_pci_driver = { 538 .name = KBUILD_MODNAME, 539 .id_table = cxl_mem_pci_tbl, 540 .probe = cxl_pci_probe, 541 .driver = { 542 .probe_type = PROBE_PREFER_ASYNCHRONOUS, 543 }, 544 }; 545 546 MODULE_LICENSE("GPL v2"); 547 module_pci_driver(cxl_pci_driver); 548 MODULE_IMPORT_NS(CXL); 549