1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2020 Intel Corporation. */ 3 4 #include <linux/device.h> 5 #include <linux/slab.h> 6 #include <linux/idr.h> 7 #include <linux/pci.h> 8 #include <cxlmem.h> 9 #include "trace.h" 10 #include "core.h" 11 12 static DECLARE_RWSEM(cxl_memdev_rwsem); 13 14 /* 15 * An entire PCI topology full of devices should be enough for any 16 * config 17 */ 18 #define CXL_MEM_MAX_DEVS 65536 19 20 static int cxl_mem_major; 21 static DEFINE_IDA(cxl_memdev_ida); 22 23 static void cxl_memdev_release(struct device *dev) 24 { 25 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 26 27 ida_free(&cxl_memdev_ida, cxlmd->id); 28 kfree(cxlmd); 29 } 30 31 static char *cxl_memdev_devnode(const struct device *dev, umode_t *mode, kuid_t *uid, 32 kgid_t *gid) 33 { 34 return kasprintf(GFP_KERNEL, "cxl/%s", dev_name(dev)); 35 } 36 37 static ssize_t firmware_version_show(struct device *dev, 38 struct device_attribute *attr, char *buf) 39 { 40 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 41 struct cxl_dev_state *cxlds = cxlmd->cxlds; 42 43 return sysfs_emit(buf, "%.16s\n", cxlds->firmware_version); 44 } 45 static DEVICE_ATTR_RO(firmware_version); 46 47 static ssize_t payload_max_show(struct device *dev, 48 struct device_attribute *attr, char *buf) 49 { 50 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 51 struct cxl_dev_state *cxlds = cxlmd->cxlds; 52 53 return sysfs_emit(buf, "%zu\n", cxlds->payload_size); 54 } 55 static DEVICE_ATTR_RO(payload_max); 56 57 static ssize_t label_storage_size_show(struct device *dev, 58 struct device_attribute *attr, char *buf) 59 { 60 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 61 struct cxl_dev_state *cxlds = cxlmd->cxlds; 62 63 return sysfs_emit(buf, "%zu\n", cxlds->lsa_size); 64 } 65 static DEVICE_ATTR_RO(label_storage_size); 66 67 static ssize_t ram_size_show(struct device *dev, struct device_attribute *attr, 68 char *buf) 69 { 70 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 71 struct cxl_dev_state *cxlds = cxlmd->cxlds; 72 unsigned long long len = resource_size(&cxlds->ram_res); 73 74 return sysfs_emit(buf, "%#llx\n", len); 75 } 76 77 static struct device_attribute dev_attr_ram_size = 78 __ATTR(size, 0444, ram_size_show, NULL); 79 80 static ssize_t pmem_size_show(struct device *dev, struct device_attribute *attr, 81 char *buf) 82 { 83 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 84 struct cxl_dev_state *cxlds = cxlmd->cxlds; 85 unsigned long long len = resource_size(&cxlds->pmem_res); 86 87 return sysfs_emit(buf, "%#llx\n", len); 88 } 89 90 static struct device_attribute dev_attr_pmem_size = 91 __ATTR(size, 0444, pmem_size_show, NULL); 92 93 static ssize_t serial_show(struct device *dev, struct device_attribute *attr, 94 char *buf) 95 { 96 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 97 struct cxl_dev_state *cxlds = cxlmd->cxlds; 98 99 return sysfs_emit(buf, "%#llx\n", cxlds->serial); 100 } 101 static DEVICE_ATTR_RO(serial); 102 103 static ssize_t numa_node_show(struct device *dev, struct device_attribute *attr, 104 char *buf) 105 { 106 return sprintf(buf, "%d\n", dev_to_node(dev)); 107 } 108 static DEVICE_ATTR_RO(numa_node); 109 110 static int cxl_get_poison_by_memdev(struct cxl_memdev *cxlmd) 111 { 112 struct cxl_dev_state *cxlds = cxlmd->cxlds; 113 u64 offset, length; 114 int rc = 0; 115 116 /* CXL 3.0 Spec 8.2.9.8.4.1 Separate pmem and ram poison requests */ 117 if (resource_size(&cxlds->pmem_res)) { 118 offset = cxlds->pmem_res.start; 119 length = resource_size(&cxlds->pmem_res); 120 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 121 if (rc) 122 return rc; 123 } 124 if (resource_size(&cxlds->ram_res)) { 125 offset = cxlds->ram_res.start; 126 length = resource_size(&cxlds->ram_res); 127 rc = cxl_mem_get_poison(cxlmd, offset, length, NULL); 128 /* 129 * Invalid Physical Address is not an error for 130 * volatile addresses. Device support is optional. 131 */ 132 if (rc == -EFAULT) 133 rc = 0; 134 } 135 return rc; 136 } 137 138 int cxl_trigger_poison_list(struct cxl_memdev *cxlmd) 139 { 140 struct cxl_port *port; 141 int rc; 142 143 port = dev_get_drvdata(&cxlmd->dev); 144 if (!port || !is_cxl_endpoint(port)) 145 return -EINVAL; 146 147 rc = down_read_interruptible(&cxl_dpa_rwsem); 148 if (rc) 149 return rc; 150 151 if (port->commit_end == -1) { 152 /* No regions mapped to this memdev */ 153 rc = cxl_get_poison_by_memdev(cxlmd); 154 } else { 155 /* Regions mapped, collect poison by endpoint */ 156 rc = cxl_get_poison_by_endpoint(port); 157 } 158 up_read(&cxl_dpa_rwsem); 159 160 return rc; 161 } 162 EXPORT_SYMBOL_NS_GPL(cxl_trigger_poison_list, CXL); 163 164 struct cxl_dpa_to_region_context { 165 struct cxl_region *cxlr; 166 u64 dpa; 167 }; 168 169 static int __cxl_dpa_to_region(struct device *dev, void *arg) 170 { 171 struct cxl_dpa_to_region_context *ctx = arg; 172 struct cxl_endpoint_decoder *cxled; 173 u64 dpa = ctx->dpa; 174 175 if (!is_endpoint_decoder(dev)) 176 return 0; 177 178 cxled = to_cxl_endpoint_decoder(dev); 179 if (!cxled->dpa_res || !resource_size(cxled->dpa_res)) 180 return 0; 181 182 if (dpa > cxled->dpa_res->end || dpa < cxled->dpa_res->start) 183 return 0; 184 185 dev_dbg(dev, "dpa:0x%llx mapped in region:%s\n", dpa, 186 dev_name(&cxled->cxld.region->dev)); 187 188 ctx->cxlr = cxled->cxld.region; 189 190 return 1; 191 } 192 193 static struct cxl_region *cxl_dpa_to_region(struct cxl_memdev *cxlmd, u64 dpa) 194 { 195 struct cxl_dpa_to_region_context ctx; 196 struct cxl_port *port; 197 198 ctx = (struct cxl_dpa_to_region_context) { 199 .dpa = dpa, 200 }; 201 port = dev_get_drvdata(&cxlmd->dev); 202 if (port && is_cxl_endpoint(port) && port->commit_end != -1) 203 device_for_each_child(&port->dev, &ctx, __cxl_dpa_to_region); 204 205 return ctx.cxlr; 206 } 207 208 static int cxl_validate_poison_dpa(struct cxl_memdev *cxlmd, u64 dpa) 209 { 210 struct cxl_dev_state *cxlds = cxlmd->cxlds; 211 212 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 213 return 0; 214 215 if (!resource_size(&cxlds->dpa_res)) { 216 dev_dbg(cxlds->dev, "device has no dpa resource\n"); 217 return -EINVAL; 218 } 219 if (dpa < cxlds->dpa_res.start || dpa > cxlds->dpa_res.end) { 220 dev_dbg(cxlds->dev, "dpa:0x%llx not in resource:%pR\n", 221 dpa, &cxlds->dpa_res); 222 return -EINVAL; 223 } 224 if (!IS_ALIGNED(dpa, 64)) { 225 dev_dbg(cxlds->dev, "dpa:0x%llx is not 64-byte aligned\n", dpa); 226 return -EINVAL; 227 } 228 229 return 0; 230 } 231 232 int cxl_inject_poison(struct cxl_memdev *cxlmd, u64 dpa) 233 { 234 struct cxl_dev_state *cxlds = cxlmd->cxlds; 235 struct cxl_mbox_inject_poison inject; 236 struct cxl_poison_record record; 237 struct cxl_mbox_cmd mbox_cmd; 238 struct cxl_region *cxlr; 239 int rc; 240 241 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 242 return 0; 243 244 rc = down_read_interruptible(&cxl_dpa_rwsem); 245 if (rc) 246 return rc; 247 248 rc = cxl_validate_poison_dpa(cxlmd, dpa); 249 if (rc) 250 goto out; 251 252 inject.address = cpu_to_le64(dpa); 253 mbox_cmd = (struct cxl_mbox_cmd) { 254 .opcode = CXL_MBOX_OP_INJECT_POISON, 255 .size_in = sizeof(inject), 256 .payload_in = &inject, 257 }; 258 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); 259 if (rc) 260 goto out; 261 262 cxlr = cxl_dpa_to_region(cxlmd, dpa); 263 if (cxlr) 264 dev_warn_once(cxlds->dev, 265 "poison inject dpa:%#llx region: %s\n", dpa, 266 dev_name(&cxlr->dev)); 267 268 record = (struct cxl_poison_record) { 269 .address = cpu_to_le64(dpa), 270 .length = cpu_to_le32(1), 271 }; 272 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_INJECT); 273 out: 274 up_read(&cxl_dpa_rwsem); 275 276 return rc; 277 } 278 EXPORT_SYMBOL_NS_GPL(cxl_inject_poison, CXL); 279 280 int cxl_clear_poison(struct cxl_memdev *cxlmd, u64 dpa) 281 { 282 struct cxl_dev_state *cxlds = cxlmd->cxlds; 283 struct cxl_mbox_clear_poison clear; 284 struct cxl_poison_record record; 285 struct cxl_mbox_cmd mbox_cmd; 286 struct cxl_region *cxlr; 287 int rc; 288 289 if (!IS_ENABLED(CONFIG_DEBUG_FS)) 290 return 0; 291 292 rc = down_read_interruptible(&cxl_dpa_rwsem); 293 if (rc) 294 return rc; 295 296 rc = cxl_validate_poison_dpa(cxlmd, dpa); 297 if (rc) 298 goto out; 299 300 /* 301 * In CXL 3.0 Spec 8.2.9.8.4.3, the Clear Poison mailbox command 302 * is defined to accept 64 bytes of write-data, along with the 303 * address to clear. This driver uses zeroes as write-data. 304 */ 305 clear = (struct cxl_mbox_clear_poison) { 306 .address = cpu_to_le64(dpa) 307 }; 308 309 mbox_cmd = (struct cxl_mbox_cmd) { 310 .opcode = CXL_MBOX_OP_CLEAR_POISON, 311 .size_in = sizeof(clear), 312 .payload_in = &clear, 313 }; 314 315 rc = cxl_internal_send_cmd(cxlds, &mbox_cmd); 316 if (rc) 317 goto out; 318 319 cxlr = cxl_dpa_to_region(cxlmd, dpa); 320 if (cxlr) 321 dev_warn_once(cxlds->dev, "poison clear dpa:%#llx region: %s\n", 322 dpa, dev_name(&cxlr->dev)); 323 324 record = (struct cxl_poison_record) { 325 .address = cpu_to_le64(dpa), 326 .length = cpu_to_le32(1), 327 }; 328 trace_cxl_poison(cxlmd, cxlr, &record, 0, 0, CXL_POISON_TRACE_CLEAR); 329 out: 330 up_read(&cxl_dpa_rwsem); 331 332 return rc; 333 } 334 EXPORT_SYMBOL_NS_GPL(cxl_clear_poison, CXL); 335 336 static struct attribute *cxl_memdev_attributes[] = { 337 &dev_attr_serial.attr, 338 &dev_attr_firmware_version.attr, 339 &dev_attr_payload_max.attr, 340 &dev_attr_label_storage_size.attr, 341 &dev_attr_numa_node.attr, 342 NULL, 343 }; 344 345 static struct attribute *cxl_memdev_pmem_attributes[] = { 346 &dev_attr_pmem_size.attr, 347 NULL, 348 }; 349 350 static struct attribute *cxl_memdev_ram_attributes[] = { 351 &dev_attr_ram_size.attr, 352 NULL, 353 }; 354 355 static umode_t cxl_memdev_visible(struct kobject *kobj, struct attribute *a, 356 int n) 357 { 358 if (!IS_ENABLED(CONFIG_NUMA) && a == &dev_attr_numa_node.attr) 359 return 0; 360 return a->mode; 361 } 362 363 static struct attribute_group cxl_memdev_attribute_group = { 364 .attrs = cxl_memdev_attributes, 365 .is_visible = cxl_memdev_visible, 366 }; 367 368 static struct attribute_group cxl_memdev_ram_attribute_group = { 369 .name = "ram", 370 .attrs = cxl_memdev_ram_attributes, 371 }; 372 373 static struct attribute_group cxl_memdev_pmem_attribute_group = { 374 .name = "pmem", 375 .attrs = cxl_memdev_pmem_attributes, 376 }; 377 378 static const struct attribute_group *cxl_memdev_attribute_groups[] = { 379 &cxl_memdev_attribute_group, 380 &cxl_memdev_ram_attribute_group, 381 &cxl_memdev_pmem_attribute_group, 382 NULL, 383 }; 384 385 static const struct device_type cxl_memdev_type = { 386 .name = "cxl_memdev", 387 .release = cxl_memdev_release, 388 .devnode = cxl_memdev_devnode, 389 .groups = cxl_memdev_attribute_groups, 390 }; 391 392 bool is_cxl_memdev(const struct device *dev) 393 { 394 return dev->type == &cxl_memdev_type; 395 } 396 EXPORT_SYMBOL_NS_GPL(is_cxl_memdev, CXL); 397 398 /** 399 * set_exclusive_cxl_commands() - atomically disable user cxl commands 400 * @cxlds: The device state to operate on 401 * @cmds: bitmap of commands to mark exclusive 402 * 403 * Grab the cxl_memdev_rwsem in write mode to flush in-flight 404 * invocations of the ioctl path and then disable future execution of 405 * commands with the command ids set in @cmds. 406 */ 407 void set_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds) 408 { 409 down_write(&cxl_memdev_rwsem); 410 bitmap_or(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds, 411 CXL_MEM_COMMAND_ID_MAX); 412 up_write(&cxl_memdev_rwsem); 413 } 414 EXPORT_SYMBOL_NS_GPL(set_exclusive_cxl_commands, CXL); 415 416 /** 417 * clear_exclusive_cxl_commands() - atomically enable user cxl commands 418 * @cxlds: The device state to modify 419 * @cmds: bitmap of commands to mark available for userspace 420 */ 421 void clear_exclusive_cxl_commands(struct cxl_dev_state *cxlds, unsigned long *cmds) 422 { 423 down_write(&cxl_memdev_rwsem); 424 bitmap_andnot(cxlds->exclusive_cmds, cxlds->exclusive_cmds, cmds, 425 CXL_MEM_COMMAND_ID_MAX); 426 up_write(&cxl_memdev_rwsem); 427 } 428 EXPORT_SYMBOL_NS_GPL(clear_exclusive_cxl_commands, CXL); 429 430 static void cxl_memdev_shutdown(struct device *dev) 431 { 432 struct cxl_memdev *cxlmd = to_cxl_memdev(dev); 433 434 down_write(&cxl_memdev_rwsem); 435 cxlmd->cxlds = NULL; 436 up_write(&cxl_memdev_rwsem); 437 } 438 439 static void cxl_memdev_unregister(void *_cxlmd) 440 { 441 struct cxl_memdev *cxlmd = _cxlmd; 442 struct device *dev = &cxlmd->dev; 443 444 cxl_memdev_shutdown(dev); 445 cdev_device_del(&cxlmd->cdev, dev); 446 put_device(dev); 447 } 448 449 static void detach_memdev(struct work_struct *work) 450 { 451 struct cxl_memdev *cxlmd; 452 453 cxlmd = container_of(work, typeof(*cxlmd), detach_work); 454 device_release_driver(&cxlmd->dev); 455 put_device(&cxlmd->dev); 456 } 457 458 static struct lock_class_key cxl_memdev_key; 459 460 static struct cxl_memdev *cxl_memdev_alloc(struct cxl_dev_state *cxlds, 461 const struct file_operations *fops) 462 { 463 struct cxl_memdev *cxlmd; 464 struct device *dev; 465 struct cdev *cdev; 466 int rc; 467 468 cxlmd = kzalloc(sizeof(*cxlmd), GFP_KERNEL); 469 if (!cxlmd) 470 return ERR_PTR(-ENOMEM); 471 472 rc = ida_alloc_max(&cxl_memdev_ida, CXL_MEM_MAX_DEVS - 1, GFP_KERNEL); 473 if (rc < 0) 474 goto err; 475 cxlmd->id = rc; 476 cxlmd->depth = -1; 477 478 dev = &cxlmd->dev; 479 device_initialize(dev); 480 lockdep_set_class(&dev->mutex, &cxl_memdev_key); 481 dev->parent = cxlds->dev; 482 dev->bus = &cxl_bus_type; 483 dev->devt = MKDEV(cxl_mem_major, cxlmd->id); 484 dev->type = &cxl_memdev_type; 485 device_set_pm_not_required(dev); 486 INIT_WORK(&cxlmd->detach_work, detach_memdev); 487 488 cdev = &cxlmd->cdev; 489 cdev_init(cdev, fops); 490 return cxlmd; 491 492 err: 493 kfree(cxlmd); 494 return ERR_PTR(rc); 495 } 496 497 static long __cxl_memdev_ioctl(struct cxl_memdev *cxlmd, unsigned int cmd, 498 unsigned long arg) 499 { 500 switch (cmd) { 501 case CXL_MEM_QUERY_COMMANDS: 502 return cxl_query_cmd(cxlmd, (void __user *)arg); 503 case CXL_MEM_SEND_COMMAND: 504 return cxl_send_cmd(cxlmd, (void __user *)arg); 505 default: 506 return -ENOTTY; 507 } 508 } 509 510 static long cxl_memdev_ioctl(struct file *file, unsigned int cmd, 511 unsigned long arg) 512 { 513 struct cxl_memdev *cxlmd = file->private_data; 514 int rc = -ENXIO; 515 516 down_read(&cxl_memdev_rwsem); 517 if (cxlmd->cxlds) 518 rc = __cxl_memdev_ioctl(cxlmd, cmd, arg); 519 up_read(&cxl_memdev_rwsem); 520 521 return rc; 522 } 523 524 static int cxl_memdev_open(struct inode *inode, struct file *file) 525 { 526 struct cxl_memdev *cxlmd = 527 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 528 529 get_device(&cxlmd->dev); 530 file->private_data = cxlmd; 531 532 return 0; 533 } 534 535 static int cxl_memdev_release_file(struct inode *inode, struct file *file) 536 { 537 struct cxl_memdev *cxlmd = 538 container_of(inode->i_cdev, typeof(*cxlmd), cdev); 539 540 put_device(&cxlmd->dev); 541 542 return 0; 543 } 544 545 static const struct file_operations cxl_memdev_fops = { 546 .owner = THIS_MODULE, 547 .unlocked_ioctl = cxl_memdev_ioctl, 548 .open = cxl_memdev_open, 549 .release = cxl_memdev_release_file, 550 .compat_ioctl = compat_ptr_ioctl, 551 .llseek = noop_llseek, 552 }; 553 554 struct cxl_memdev *devm_cxl_add_memdev(struct cxl_dev_state *cxlds) 555 { 556 struct cxl_memdev *cxlmd; 557 struct device *dev; 558 struct cdev *cdev; 559 int rc; 560 561 cxlmd = cxl_memdev_alloc(cxlds, &cxl_memdev_fops); 562 if (IS_ERR(cxlmd)) 563 return cxlmd; 564 565 dev = &cxlmd->dev; 566 rc = dev_set_name(dev, "mem%d", cxlmd->id); 567 if (rc) 568 goto err; 569 570 /* 571 * Activate ioctl operations, no cxl_memdev_rwsem manipulation 572 * needed as this is ordered with cdev_add() publishing the device. 573 */ 574 cxlmd->cxlds = cxlds; 575 cxlds->cxlmd = cxlmd; 576 577 cdev = &cxlmd->cdev; 578 rc = cdev_device_add(cdev, dev); 579 if (rc) 580 goto err; 581 582 rc = devm_add_action_or_reset(cxlds->dev, cxl_memdev_unregister, cxlmd); 583 if (rc) 584 return ERR_PTR(rc); 585 return cxlmd; 586 587 err: 588 /* 589 * The cdev was briefly live, shutdown any ioctl operations that 590 * saw that state. 591 */ 592 cxl_memdev_shutdown(dev); 593 put_device(dev); 594 return ERR_PTR(rc); 595 } 596 EXPORT_SYMBOL_NS_GPL(devm_cxl_add_memdev, CXL); 597 598 __init int cxl_memdev_init(void) 599 { 600 dev_t devt; 601 int rc; 602 603 rc = alloc_chrdev_region(&devt, 0, CXL_MEM_MAX_DEVS, "cxl"); 604 if (rc) 605 return rc; 606 607 cxl_mem_major = MAJOR(devt); 608 609 return 0; 610 } 611 612 void cxl_memdev_exit(void) 613 { 614 unregister_chrdev_region(MKDEV(cxl_mem_major, 0), CXL_MEM_MAX_DEVS); 615 } 616