1 // SPDX-License-Identifier: GPL-2.0-only 2 /* Copyright(c) 2021 Intel Corporation. All rights reserved. */ 3 #include <linux/libnvdimm.h> 4 #include <asm/unaligned.h> 5 #include <linux/device.h> 6 #include <linux/module.h> 7 #include <linux/ndctl.h> 8 #include <linux/async.h> 9 #include <linux/slab.h> 10 #include <linux/nd.h> 11 #include "cxlmem.h" 12 #include "cxl.h" 13 14 /* 15 * Ordered workqueue for cxl nvdimm device arrival and departure 16 * to coordinate bus rescans when a bridge arrives and trigger remove 17 * operations when the bridge is removed. 18 */ 19 static struct workqueue_struct *cxl_pmem_wq; 20 21 static __read_mostly DECLARE_BITMAP(exclusive_cmds, CXL_MEM_COMMAND_ID_MAX); 22 23 static void clear_exclusive(void *cxlds) 24 { 25 clear_exclusive_cxl_commands(cxlds, exclusive_cmds); 26 } 27 28 static void unregister_nvdimm(void *nvdimm) 29 { 30 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); 31 struct cxl_nvdimm_bridge *cxl_nvb = cxl_nvd->bridge; 32 struct cxl_pmem_region *cxlr_pmem; 33 34 device_lock(&cxl_nvb->dev); 35 cxlr_pmem = cxl_nvd->region; 36 dev_set_drvdata(&cxl_nvd->dev, NULL); 37 cxl_nvd->region = NULL; 38 device_unlock(&cxl_nvb->dev); 39 40 if (cxlr_pmem) { 41 device_release_driver(&cxlr_pmem->dev); 42 put_device(&cxlr_pmem->dev); 43 } 44 45 nvdimm_delete(nvdimm); 46 cxl_nvd->bridge = NULL; 47 } 48 49 static int cxl_nvdimm_probe(struct device *dev) 50 { 51 struct cxl_nvdimm *cxl_nvd = to_cxl_nvdimm(dev); 52 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; 53 unsigned long flags = 0, cmd_mask = 0; 54 struct cxl_dev_state *cxlds = cxlmd->cxlds; 55 struct cxl_nvdimm_bridge *cxl_nvb; 56 struct nvdimm *nvdimm; 57 int rc; 58 59 cxl_nvb = cxl_find_nvdimm_bridge(dev); 60 if (!cxl_nvb) 61 return -ENXIO; 62 63 device_lock(&cxl_nvb->dev); 64 if (!cxl_nvb->nvdimm_bus) { 65 rc = -ENXIO; 66 goto out; 67 } 68 69 set_exclusive_cxl_commands(cxlds, exclusive_cmds); 70 rc = devm_add_action_or_reset(dev, clear_exclusive, cxlds); 71 if (rc) 72 goto out; 73 74 set_bit(NDD_LABELING, &flags); 75 set_bit(ND_CMD_GET_CONFIG_SIZE, &cmd_mask); 76 set_bit(ND_CMD_GET_CONFIG_DATA, &cmd_mask); 77 set_bit(ND_CMD_SET_CONFIG_DATA, &cmd_mask); 78 nvdimm = nvdimm_create(cxl_nvb->nvdimm_bus, cxl_nvd, NULL, flags, 79 cmd_mask, 0, NULL); 80 if (!nvdimm) { 81 rc = -ENOMEM; 82 goto out; 83 } 84 85 dev_set_drvdata(dev, nvdimm); 86 cxl_nvd->bridge = cxl_nvb; 87 rc = devm_add_action_or_reset(dev, unregister_nvdimm, nvdimm); 88 out: 89 device_unlock(&cxl_nvb->dev); 90 put_device(&cxl_nvb->dev); 91 92 return rc; 93 } 94 95 static struct cxl_driver cxl_nvdimm_driver = { 96 .name = "cxl_nvdimm", 97 .probe = cxl_nvdimm_probe, 98 .id = CXL_DEVICE_NVDIMM, 99 }; 100 101 static int cxl_pmem_get_config_size(struct cxl_dev_state *cxlds, 102 struct nd_cmd_get_config_size *cmd, 103 unsigned int buf_len) 104 { 105 if (sizeof(*cmd) > buf_len) 106 return -EINVAL; 107 108 *cmd = (struct nd_cmd_get_config_size) { 109 .config_size = cxlds->lsa_size, 110 .max_xfer = cxlds->payload_size, 111 }; 112 113 return 0; 114 } 115 116 static int cxl_pmem_get_config_data(struct cxl_dev_state *cxlds, 117 struct nd_cmd_get_config_data_hdr *cmd, 118 unsigned int buf_len) 119 { 120 struct cxl_mbox_get_lsa get_lsa; 121 int rc; 122 123 if (sizeof(*cmd) > buf_len) 124 return -EINVAL; 125 if (struct_size(cmd, out_buf, cmd->in_length) > buf_len) 126 return -EINVAL; 127 128 get_lsa = (struct cxl_mbox_get_lsa) { 129 .offset = cpu_to_le32(cmd->in_offset), 130 .length = cpu_to_le32(cmd->in_length), 131 }; 132 133 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_GET_LSA, &get_lsa, 134 sizeof(get_lsa), cmd->out_buf, cmd->in_length); 135 cmd->status = 0; 136 137 return rc; 138 } 139 140 static int cxl_pmem_set_config_data(struct cxl_dev_state *cxlds, 141 struct nd_cmd_set_config_hdr *cmd, 142 unsigned int buf_len) 143 { 144 struct cxl_mbox_set_lsa *set_lsa; 145 int rc; 146 147 if (sizeof(*cmd) > buf_len) 148 return -EINVAL; 149 150 /* 4-byte status follows the input data in the payload */ 151 if (struct_size(cmd, in_buf, cmd->in_length) + 4 > buf_len) 152 return -EINVAL; 153 154 set_lsa = 155 kvzalloc(struct_size(set_lsa, data, cmd->in_length), GFP_KERNEL); 156 if (!set_lsa) 157 return -ENOMEM; 158 159 *set_lsa = (struct cxl_mbox_set_lsa) { 160 .offset = cpu_to_le32(cmd->in_offset), 161 }; 162 memcpy(set_lsa->data, cmd->in_buf, cmd->in_length); 163 164 rc = cxl_mbox_send_cmd(cxlds, CXL_MBOX_OP_SET_LSA, set_lsa, 165 struct_size(set_lsa, data, cmd->in_length), 166 NULL, 0); 167 168 /* 169 * Set "firmware" status (4-packed bytes at the end of the input 170 * payload. 171 */ 172 put_unaligned(0, (u32 *) &cmd->in_buf[cmd->in_length]); 173 kvfree(set_lsa); 174 175 return rc; 176 } 177 178 static int cxl_pmem_nvdimm_ctl(struct nvdimm *nvdimm, unsigned int cmd, 179 void *buf, unsigned int buf_len) 180 { 181 struct cxl_nvdimm *cxl_nvd = nvdimm_provider_data(nvdimm); 182 unsigned long cmd_mask = nvdimm_cmd_mask(nvdimm); 183 struct cxl_memdev *cxlmd = cxl_nvd->cxlmd; 184 struct cxl_dev_state *cxlds = cxlmd->cxlds; 185 186 if (!test_bit(cmd, &cmd_mask)) 187 return -ENOTTY; 188 189 switch (cmd) { 190 case ND_CMD_GET_CONFIG_SIZE: 191 return cxl_pmem_get_config_size(cxlds, buf, buf_len); 192 case ND_CMD_GET_CONFIG_DATA: 193 return cxl_pmem_get_config_data(cxlds, buf, buf_len); 194 case ND_CMD_SET_CONFIG_DATA: 195 return cxl_pmem_set_config_data(cxlds, buf, buf_len); 196 default: 197 return -ENOTTY; 198 } 199 } 200 201 static int cxl_pmem_ctl(struct nvdimm_bus_descriptor *nd_desc, 202 struct nvdimm *nvdimm, unsigned int cmd, void *buf, 203 unsigned int buf_len, int *cmd_rc) 204 { 205 /* 206 * No firmware response to translate, let the transport error 207 * code take precedence. 208 */ 209 *cmd_rc = 0; 210 211 if (!nvdimm) 212 return -ENOTTY; 213 return cxl_pmem_nvdimm_ctl(nvdimm, cmd, buf, buf_len); 214 } 215 216 static bool online_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb) 217 { 218 if (cxl_nvb->nvdimm_bus) 219 return true; 220 cxl_nvb->nvdimm_bus = 221 nvdimm_bus_register(&cxl_nvb->dev, &cxl_nvb->nd_desc); 222 return cxl_nvb->nvdimm_bus != NULL; 223 } 224 225 static int cxl_nvdimm_release_driver(struct device *dev, void *cxl_nvb) 226 { 227 struct cxl_nvdimm *cxl_nvd; 228 229 if (!is_cxl_nvdimm(dev)) 230 return 0; 231 232 cxl_nvd = to_cxl_nvdimm(dev); 233 if (cxl_nvd->bridge != cxl_nvb) 234 return 0; 235 236 device_release_driver(dev); 237 return 0; 238 } 239 240 static int cxl_pmem_region_release_driver(struct device *dev, void *cxl_nvb) 241 { 242 struct cxl_pmem_region *cxlr_pmem; 243 244 if (!is_cxl_pmem_region(dev)) 245 return 0; 246 247 cxlr_pmem = to_cxl_pmem_region(dev); 248 if (cxlr_pmem->bridge != cxl_nvb) 249 return 0; 250 251 device_release_driver(dev); 252 return 0; 253 } 254 255 static void offline_nvdimm_bus(struct cxl_nvdimm_bridge *cxl_nvb, 256 struct nvdimm_bus *nvdimm_bus) 257 { 258 if (!nvdimm_bus) 259 return; 260 261 /* 262 * Set the state of cxl_nvdimm devices to unbound / idle before 263 * nvdimm_bus_unregister() rips the nvdimm objects out from 264 * underneath them. 265 */ 266 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, 267 cxl_pmem_region_release_driver); 268 bus_for_each_dev(&cxl_bus_type, NULL, cxl_nvb, 269 cxl_nvdimm_release_driver); 270 nvdimm_bus_unregister(nvdimm_bus); 271 } 272 273 static void cxl_nvb_update_state(struct work_struct *work) 274 { 275 struct cxl_nvdimm_bridge *cxl_nvb = 276 container_of(work, typeof(*cxl_nvb), state_work); 277 struct nvdimm_bus *victim_bus = NULL; 278 bool release = false, rescan = false; 279 280 device_lock(&cxl_nvb->dev); 281 switch (cxl_nvb->state) { 282 case CXL_NVB_ONLINE: 283 if (!online_nvdimm_bus(cxl_nvb)) { 284 dev_err(&cxl_nvb->dev, 285 "failed to establish nvdimm bus\n"); 286 release = true; 287 } else 288 rescan = true; 289 break; 290 case CXL_NVB_OFFLINE: 291 case CXL_NVB_DEAD: 292 victim_bus = cxl_nvb->nvdimm_bus; 293 cxl_nvb->nvdimm_bus = NULL; 294 break; 295 default: 296 break; 297 } 298 device_unlock(&cxl_nvb->dev); 299 300 if (release) 301 device_release_driver(&cxl_nvb->dev); 302 if (rescan) { 303 int rc = bus_rescan_devices(&cxl_bus_type); 304 305 dev_dbg(&cxl_nvb->dev, "rescan: %d\n", rc); 306 } 307 offline_nvdimm_bus(cxl_nvb, victim_bus); 308 309 put_device(&cxl_nvb->dev); 310 } 311 312 static void cxl_nvdimm_bridge_state_work(struct cxl_nvdimm_bridge *cxl_nvb) 313 { 314 /* 315 * Take a reference that the workqueue will drop if new work 316 * gets queued. 317 */ 318 get_device(&cxl_nvb->dev); 319 if (!queue_work(cxl_pmem_wq, &cxl_nvb->state_work)) 320 put_device(&cxl_nvb->dev); 321 } 322 323 static void cxl_nvdimm_bridge_remove(struct device *dev) 324 { 325 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); 326 327 if (cxl_nvb->state == CXL_NVB_ONLINE) 328 cxl_nvb->state = CXL_NVB_OFFLINE; 329 cxl_nvdimm_bridge_state_work(cxl_nvb); 330 } 331 332 static int cxl_nvdimm_bridge_probe(struct device *dev) 333 { 334 struct cxl_nvdimm_bridge *cxl_nvb = to_cxl_nvdimm_bridge(dev); 335 336 if (cxl_nvb->state == CXL_NVB_DEAD) 337 return -ENXIO; 338 339 if (cxl_nvb->state == CXL_NVB_NEW) { 340 cxl_nvb->nd_desc = (struct nvdimm_bus_descriptor) { 341 .provider_name = "CXL", 342 .module = THIS_MODULE, 343 .ndctl = cxl_pmem_ctl, 344 }; 345 346 INIT_WORK(&cxl_nvb->state_work, cxl_nvb_update_state); 347 } 348 349 cxl_nvb->state = CXL_NVB_ONLINE; 350 cxl_nvdimm_bridge_state_work(cxl_nvb); 351 352 return 0; 353 } 354 355 static struct cxl_driver cxl_nvdimm_bridge_driver = { 356 .name = "cxl_nvdimm_bridge", 357 .probe = cxl_nvdimm_bridge_probe, 358 .remove = cxl_nvdimm_bridge_remove, 359 .id = CXL_DEVICE_NVDIMM_BRIDGE, 360 }; 361 362 static int match_cxl_nvdimm(struct device *dev, void *data) 363 { 364 return is_cxl_nvdimm(dev); 365 } 366 367 static void unregister_nvdimm_region(void *nd_region) 368 { 369 struct cxl_nvdimm_bridge *cxl_nvb; 370 struct cxl_pmem_region *cxlr_pmem; 371 int i; 372 373 cxlr_pmem = nd_region_provider_data(nd_region); 374 cxl_nvb = cxlr_pmem->bridge; 375 device_lock(&cxl_nvb->dev); 376 for (i = 0; i < cxlr_pmem->nr_mappings; i++) { 377 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; 378 struct cxl_nvdimm *cxl_nvd = m->cxl_nvd; 379 380 if (cxl_nvd->region) { 381 put_device(&cxlr_pmem->dev); 382 cxl_nvd->region = NULL; 383 } 384 } 385 device_unlock(&cxl_nvb->dev); 386 387 nvdimm_region_delete(nd_region); 388 } 389 390 static void cxlr_pmem_remove_resource(void *res) 391 { 392 remove_resource(res); 393 } 394 395 struct cxl_pmem_region_info { 396 u64 offset; 397 u64 serial; 398 }; 399 400 static int cxl_pmem_region_probe(struct device *dev) 401 { 402 struct nd_mapping_desc mappings[CXL_DECODER_MAX_INTERLEAVE]; 403 struct cxl_pmem_region *cxlr_pmem = to_cxl_pmem_region(dev); 404 struct cxl_region *cxlr = cxlr_pmem->cxlr; 405 struct cxl_pmem_region_info *info = NULL; 406 struct cxl_nvdimm_bridge *cxl_nvb; 407 struct nd_interleave_set *nd_set; 408 struct nd_region_desc ndr_desc; 409 struct cxl_nvdimm *cxl_nvd; 410 struct nvdimm *nvdimm; 411 struct resource *res; 412 int rc, i = 0; 413 414 cxl_nvb = cxl_find_nvdimm_bridge(&cxlr_pmem->mapping[0].cxlmd->dev); 415 if (!cxl_nvb) { 416 dev_dbg(dev, "bridge not found\n"); 417 return -ENXIO; 418 } 419 cxlr_pmem->bridge = cxl_nvb; 420 421 device_lock(&cxl_nvb->dev); 422 if (!cxl_nvb->nvdimm_bus) { 423 dev_dbg(dev, "nvdimm bus not found\n"); 424 rc = -ENXIO; 425 goto err; 426 } 427 428 memset(&mappings, 0, sizeof(mappings)); 429 memset(&ndr_desc, 0, sizeof(ndr_desc)); 430 431 res = devm_kzalloc(dev, sizeof(*res), GFP_KERNEL); 432 if (!res) { 433 rc = -ENOMEM; 434 goto err; 435 } 436 437 res->name = "Persistent Memory"; 438 res->start = cxlr_pmem->hpa_range.start; 439 res->end = cxlr_pmem->hpa_range.end; 440 res->flags = IORESOURCE_MEM; 441 res->desc = IORES_DESC_PERSISTENT_MEMORY; 442 443 rc = insert_resource(&iomem_resource, res); 444 if (rc) 445 goto err; 446 447 rc = devm_add_action_or_reset(dev, cxlr_pmem_remove_resource, res); 448 if (rc) 449 goto err; 450 451 ndr_desc.res = res; 452 ndr_desc.provider_data = cxlr_pmem; 453 454 ndr_desc.numa_node = memory_add_physaddr_to_nid(res->start); 455 ndr_desc.target_node = phys_to_target_node(res->start); 456 if (ndr_desc.target_node == NUMA_NO_NODE) { 457 ndr_desc.target_node = ndr_desc.numa_node; 458 dev_dbg(&cxlr->dev, "changing target node from %d to %d", 459 NUMA_NO_NODE, ndr_desc.target_node); 460 } 461 462 nd_set = devm_kzalloc(dev, sizeof(*nd_set), GFP_KERNEL); 463 if (!nd_set) { 464 rc = -ENOMEM; 465 goto err; 466 } 467 468 ndr_desc.memregion = cxlr->id; 469 set_bit(ND_REGION_CXL, &ndr_desc.flags); 470 set_bit(ND_REGION_PERSIST_MEMCTRL, &ndr_desc.flags); 471 472 info = kmalloc_array(cxlr_pmem->nr_mappings, sizeof(*info), GFP_KERNEL); 473 if (!info) { 474 rc = -ENOMEM; 475 goto err; 476 } 477 478 for (i = 0; i < cxlr_pmem->nr_mappings; i++) { 479 struct cxl_pmem_region_mapping *m = &cxlr_pmem->mapping[i]; 480 struct cxl_memdev *cxlmd = m->cxlmd; 481 struct cxl_dev_state *cxlds = cxlmd->cxlds; 482 struct device *d; 483 484 d = device_find_child(&cxlmd->dev, NULL, match_cxl_nvdimm); 485 if (!d) { 486 dev_dbg(dev, "[%d]: %s: no cxl_nvdimm found\n", i, 487 dev_name(&cxlmd->dev)); 488 rc = -ENODEV; 489 goto err; 490 } 491 492 /* safe to drop ref now with bridge lock held */ 493 put_device(d); 494 495 cxl_nvd = to_cxl_nvdimm(d); 496 nvdimm = dev_get_drvdata(&cxl_nvd->dev); 497 if (!nvdimm) { 498 dev_dbg(dev, "[%d]: %s: no nvdimm found\n", i, 499 dev_name(&cxlmd->dev)); 500 rc = -ENODEV; 501 goto err; 502 } 503 cxl_nvd->region = cxlr_pmem; 504 get_device(&cxlr_pmem->dev); 505 m->cxl_nvd = cxl_nvd; 506 mappings[i] = (struct nd_mapping_desc) { 507 .nvdimm = nvdimm, 508 .start = m->start, 509 .size = m->size, 510 .position = i, 511 }; 512 info[i].offset = m->start; 513 info[i].serial = cxlds->serial; 514 } 515 ndr_desc.num_mappings = cxlr_pmem->nr_mappings; 516 ndr_desc.mapping = mappings; 517 518 /* 519 * TODO enable CXL labels which skip the need for 'interleave-set cookie' 520 */ 521 nd_set->cookie1 = 522 nd_fletcher64(info, sizeof(*info) * cxlr_pmem->nr_mappings, 0); 523 nd_set->cookie2 = nd_set->cookie1; 524 ndr_desc.nd_set = nd_set; 525 526 cxlr_pmem->nd_region = 527 nvdimm_pmem_region_create(cxl_nvb->nvdimm_bus, &ndr_desc); 528 if (!cxlr_pmem->nd_region) { 529 rc = -ENOMEM; 530 goto err; 531 } 532 533 rc = devm_add_action_or_reset(dev, unregister_nvdimm_region, 534 cxlr_pmem->nd_region); 535 out: 536 kfree(info); 537 device_unlock(&cxl_nvb->dev); 538 put_device(&cxl_nvb->dev); 539 540 return rc; 541 542 err: 543 dev_dbg(dev, "failed to create nvdimm region\n"); 544 for (i--; i >= 0; i--) { 545 nvdimm = mappings[i].nvdimm; 546 cxl_nvd = nvdimm_provider_data(nvdimm); 547 put_device(&cxl_nvd->region->dev); 548 cxl_nvd->region = NULL; 549 } 550 goto out; 551 } 552 553 static struct cxl_driver cxl_pmem_region_driver = { 554 .name = "cxl_pmem_region", 555 .probe = cxl_pmem_region_probe, 556 .id = CXL_DEVICE_PMEM_REGION, 557 }; 558 559 /* 560 * Return all bridges to the CXL_NVB_NEW state to invalidate any 561 * ->state_work referring to the now destroyed cxl_pmem_wq. 562 */ 563 static int cxl_nvdimm_bridge_reset(struct device *dev, void *data) 564 { 565 struct cxl_nvdimm_bridge *cxl_nvb; 566 567 if (!is_cxl_nvdimm_bridge(dev)) 568 return 0; 569 570 cxl_nvb = to_cxl_nvdimm_bridge(dev); 571 device_lock(dev); 572 cxl_nvb->state = CXL_NVB_NEW; 573 device_unlock(dev); 574 575 return 0; 576 } 577 578 static void destroy_cxl_pmem_wq(void) 579 { 580 destroy_workqueue(cxl_pmem_wq); 581 bus_for_each_dev(&cxl_bus_type, NULL, NULL, cxl_nvdimm_bridge_reset); 582 } 583 584 static __init int cxl_pmem_init(void) 585 { 586 int rc; 587 588 set_bit(CXL_MEM_COMMAND_ID_SET_SHUTDOWN_STATE, exclusive_cmds); 589 set_bit(CXL_MEM_COMMAND_ID_SET_LSA, exclusive_cmds); 590 591 cxl_pmem_wq = alloc_ordered_workqueue("cxl_pmem", 0); 592 if (!cxl_pmem_wq) 593 return -ENXIO; 594 595 rc = cxl_driver_register(&cxl_nvdimm_bridge_driver); 596 if (rc) 597 goto err_bridge; 598 599 rc = cxl_driver_register(&cxl_nvdimm_driver); 600 if (rc) 601 goto err_nvdimm; 602 603 rc = cxl_driver_register(&cxl_pmem_region_driver); 604 if (rc) 605 goto err_region; 606 607 return 0; 608 609 err_region: 610 cxl_driver_unregister(&cxl_nvdimm_driver); 611 err_nvdimm: 612 cxl_driver_unregister(&cxl_nvdimm_bridge_driver); 613 err_bridge: 614 destroy_cxl_pmem_wq(); 615 return rc; 616 } 617 618 static __exit void cxl_pmem_exit(void) 619 { 620 cxl_driver_unregister(&cxl_pmem_region_driver); 621 cxl_driver_unregister(&cxl_nvdimm_driver); 622 cxl_driver_unregister(&cxl_nvdimm_bridge_driver); 623 destroy_cxl_pmem_wq(); 624 } 625 626 MODULE_LICENSE("GPL v2"); 627 module_init(cxl_pmem_init); 628 module_exit(cxl_pmem_exit); 629 MODULE_IMPORT_NS(CXL); 630 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM_BRIDGE); 631 MODULE_ALIAS_CXL(CXL_DEVICE_NVDIMM); 632 MODULE_ALIAS_CXL(CXL_DEVICE_PMEM_REGION); 633