1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ 3 #include <linux/memremap.h> 4 #include <linux/device.h> 5 #include <linux/mutex.h> 6 #include <linux/list.h> 7 #include <linux/slab.h> 8 #include <linux/dax.h> 9 #include <linux/io.h> 10 #include "dax-private.h" 11 #include "bus.h" 12 13 static struct resource dax_regions = DEFINE_RES_MEM_NAMED(0, -1, "DAX Regions"); 14 static DEFINE_MUTEX(dax_bus_lock); 15 16 /* 17 * All changes to the dax region configuration occur with this lock held 18 * for write. 19 */ 20 DECLARE_RWSEM(dax_region_rwsem); 21 22 /* 23 * All changes to the dax device configuration occur with this lock held 24 * for write. 25 */ 26 DECLARE_RWSEM(dax_dev_rwsem); 27 28 #define DAX_NAME_LEN 30 29 struct dax_id { 30 struct list_head list; 31 char dev_name[DAX_NAME_LEN]; 32 }; 33 34 static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env) 35 { 36 /* 37 * We only ever expect to handle device-dax instances, i.e. the 38 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 39 */ 40 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); 41 } 42 43 static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv, 44 const char *dev_name) 45 { 46 struct dax_id *dax_id; 47 48 lockdep_assert_held(&dax_bus_lock); 49 50 list_for_each_entry(dax_id, &dax_drv->ids, list) 51 if (sysfs_streq(dax_id->dev_name, dev_name)) 52 return dax_id; 53 return NULL; 54 } 55 56 static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev) 57 { 58 int match; 59 60 mutex_lock(&dax_bus_lock); 61 match = !!__dax_match_id(dax_drv, dev_name(dev)); 62 mutex_unlock(&dax_bus_lock); 63 64 return match; 65 } 66 67 static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev) 68 { 69 enum dax_driver_type type = DAXDRV_DEVICE_TYPE; 70 struct dev_dax *dev_dax = to_dev_dax(dev); 71 72 if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM) 73 type = DAXDRV_KMEM_TYPE; 74 75 if (dax_drv->type == type) 76 return 1; 77 78 /* default to device mode if dax_kmem is disabled */ 79 if (dax_drv->type == DAXDRV_DEVICE_TYPE && 80 !IS_ENABLED(CONFIG_DEV_DAX_KMEM)) 81 return 1; 82 83 return 0; 84 } 85 86 enum id_action { 87 ID_REMOVE, 88 ID_ADD, 89 }; 90 91 static ssize_t do_id_store(struct device_driver *drv, const char *buf, 92 size_t count, enum id_action action) 93 { 94 struct dax_device_driver *dax_drv = to_dax_drv(drv); 95 unsigned int region_id, id; 96 char devname[DAX_NAME_LEN]; 97 struct dax_id *dax_id; 98 ssize_t rc = count; 99 int fields; 100 101 fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); 102 if (fields != 2) 103 return -EINVAL; 104 sprintf(devname, "dax%d.%d", region_id, id); 105 if (!sysfs_streq(buf, devname)) 106 return -EINVAL; 107 108 mutex_lock(&dax_bus_lock); 109 dax_id = __dax_match_id(dax_drv, buf); 110 if (!dax_id) { 111 if (action == ID_ADD) { 112 dax_id = kzalloc_obj(*dax_id); 113 if (dax_id) { 114 strscpy(dax_id->dev_name, buf, DAX_NAME_LEN); 115 list_add(&dax_id->list, &dax_drv->ids); 116 } else 117 rc = -ENOMEM; 118 } 119 } else if (action == ID_REMOVE) { 120 list_del(&dax_id->list); 121 kfree(dax_id); 122 } 123 mutex_unlock(&dax_bus_lock); 124 125 if (rc < 0) 126 return rc; 127 if (action == ID_ADD) 128 rc = driver_attach(drv); 129 if (rc) 130 return rc; 131 return count; 132 } 133 134 static ssize_t new_id_store(struct device_driver *drv, const char *buf, 135 size_t count) 136 { 137 return do_id_store(drv, buf, count, ID_ADD); 138 } 139 static DRIVER_ATTR_WO(new_id); 140 141 static ssize_t remove_id_store(struct device_driver *drv, const char *buf, 142 size_t count) 143 { 144 return do_id_store(drv, buf, count, ID_REMOVE); 145 } 146 static DRIVER_ATTR_WO(remove_id); 147 148 static struct attribute *dax_drv_attrs[] = { 149 &driver_attr_new_id.attr, 150 &driver_attr_remove_id.attr, 151 NULL, 152 }; 153 ATTRIBUTE_GROUPS(dax_drv); 154 155 static int dax_bus_match(struct device *dev, const struct device_driver *drv); 156 157 /* 158 * Static dax regions are regions created by an external subsystem 159 * nvdimm where a single range is assigned. Its boundaries are by the external 160 * subsystem and are usually limited to one physical memory range. For example, 161 * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a 162 * single contiguous range) 163 * 164 * On dynamic dax regions, the assigned region can be partitioned by dax core 165 * into multiple subdivisions. A subdivision is represented into one 166 * /dev/daxN.M device composed by one or more potentially discontiguous ranges. 167 * 168 * When allocating a dax region, drivers must set whether it's static 169 * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned 170 * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax 171 * devices it is NULL but afterwards allocated by dax core on device ->probe(). 172 * Care is needed to make sure that dynamic dax devices are torn down with a 173 * cleared @pgmap field (see kill_dev_dax()). 174 */ 175 static bool is_static(struct dax_region *dax_region) 176 { 177 return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 178 } 179 180 bool static_dev_dax(struct dev_dax *dev_dax) 181 { 182 return is_static(dev_dax->region); 183 } 184 EXPORT_SYMBOL_GPL(static_dev_dax); 185 186 static u64 dev_dax_size(struct dev_dax *dev_dax) 187 { 188 u64 size = 0; 189 int i; 190 191 lockdep_assert_held(&dax_dev_rwsem); 192 193 for (i = 0; i < dev_dax->nr_range; i++) 194 size += range_len(&dev_dax->ranges[i].range); 195 196 return size; 197 } 198 199 static int dax_bus_probe(struct device *dev) 200 { 201 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 202 struct dev_dax *dev_dax = to_dev_dax(dev); 203 struct dax_region *dax_region = dev_dax->region; 204 int rc; 205 u64 size; 206 207 rc = down_read_interruptible(&dax_dev_rwsem); 208 if (rc) 209 return rc; 210 size = dev_dax_size(dev_dax); 211 up_read(&dax_dev_rwsem); 212 213 if (size == 0 || dev_dax->id < 0) 214 return -ENXIO; 215 216 rc = dax_drv->probe(dev_dax); 217 218 if (rc || is_static(dax_region)) 219 return rc; 220 221 /* 222 * Track new seed creation only after successful probe of the 223 * previous seed. 224 */ 225 if (dax_region->seed == dev) 226 dax_region->seed = NULL; 227 228 return 0; 229 } 230 231 static void dax_bus_remove(struct device *dev) 232 { 233 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 234 struct dev_dax *dev_dax = to_dev_dax(dev); 235 236 if (dax_drv->remove) 237 dax_drv->remove(dev_dax); 238 } 239 240 static const struct bus_type dax_bus_type = { 241 .name = "dax", 242 .uevent = dax_bus_uevent, 243 .match = dax_bus_match, 244 .probe = dax_bus_probe, 245 .remove = dax_bus_remove, 246 .drv_groups = dax_drv_groups, 247 }; 248 249 static int dax_bus_match(struct device *dev, const struct device_driver *drv) 250 { 251 const struct dax_device_driver *dax_drv = to_dax_drv(drv); 252 253 if (dax_match_id(dax_drv, dev)) 254 return 1; 255 return dax_match_type(dax_drv, dev); 256 } 257 258 /* 259 * Rely on the fact that drvdata is set before the attributes are 260 * registered, and that the attributes are unregistered before drvdata 261 * is cleared to assume that drvdata is always valid. 262 */ 263 static ssize_t id_show(struct device *dev, 264 struct device_attribute *attr, char *buf) 265 { 266 struct dax_region *dax_region = dev_get_drvdata(dev); 267 268 return sysfs_emit(buf, "%d\n", dax_region->id); 269 } 270 static DEVICE_ATTR_RO(id); 271 272 static ssize_t region_size_show(struct device *dev, 273 struct device_attribute *attr, char *buf) 274 { 275 struct dax_region *dax_region = dev_get_drvdata(dev); 276 277 return sysfs_emit(buf, "%llu\n", 278 (unsigned long long)resource_size(&dax_region->res)); 279 } 280 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, 281 region_size_show, NULL); 282 283 static ssize_t region_align_show(struct device *dev, 284 struct device_attribute *attr, char *buf) 285 { 286 struct dax_region *dax_region = dev_get_drvdata(dev); 287 288 return sysfs_emit(buf, "%u\n", dax_region->align); 289 } 290 static struct device_attribute dev_attr_region_align = 291 __ATTR(align, 0400, region_align_show, NULL); 292 293 #define for_each_dax_region_resource(dax_region, res) \ 294 for (res = (dax_region)->res.child; res; res = res->sibling) 295 296 static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 297 { 298 resource_size_t size = resource_size(&dax_region->res); 299 struct resource *res; 300 301 lockdep_assert_held(&dax_region_rwsem); 302 303 for_each_dax_region_resource(dax_region, res) 304 size -= resource_size(res); 305 return size; 306 } 307 308 static ssize_t available_size_show(struct device *dev, 309 struct device_attribute *attr, char *buf) 310 { 311 struct dax_region *dax_region = dev_get_drvdata(dev); 312 unsigned long long size; 313 int rc; 314 315 rc = down_read_interruptible(&dax_region_rwsem); 316 if (rc) 317 return rc; 318 size = dax_region_avail_size(dax_region); 319 up_read(&dax_region_rwsem); 320 321 return sysfs_emit(buf, "%llu\n", size); 322 } 323 static DEVICE_ATTR_RO(available_size); 324 325 static ssize_t seed_show(struct device *dev, 326 struct device_attribute *attr, char *buf) 327 { 328 struct dax_region *dax_region = dev_get_drvdata(dev); 329 struct device *seed; 330 ssize_t rc; 331 332 if (is_static(dax_region)) 333 return -EINVAL; 334 335 rc = down_read_interruptible(&dax_region_rwsem); 336 if (rc) 337 return rc; 338 seed = dax_region->seed; 339 rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : ""); 340 up_read(&dax_region_rwsem); 341 342 return rc; 343 } 344 static DEVICE_ATTR_RO(seed); 345 346 static ssize_t create_show(struct device *dev, 347 struct device_attribute *attr, char *buf) 348 { 349 struct dax_region *dax_region = dev_get_drvdata(dev); 350 struct device *youngest; 351 ssize_t rc; 352 353 if (is_static(dax_region)) 354 return -EINVAL; 355 356 rc = down_read_interruptible(&dax_region_rwsem); 357 if (rc) 358 return rc; 359 youngest = dax_region->youngest; 360 rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : ""); 361 up_read(&dax_region_rwsem); 362 363 return rc; 364 } 365 366 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data); 367 368 static ssize_t create_store(struct device *dev, struct device_attribute *attr, 369 const char *buf, size_t len) 370 { 371 struct dax_region *dax_region = dev_get_drvdata(dev); 372 unsigned long long avail; 373 ssize_t rc; 374 int val; 375 376 if (is_static(dax_region)) 377 return -EINVAL; 378 379 rc = kstrtoint(buf, 0, &val); 380 if (rc) 381 return rc; 382 if (val != 1) 383 return -EINVAL; 384 385 rc = down_write_killable(&dax_region_rwsem); 386 if (rc) 387 return rc; 388 avail = dax_region_avail_size(dax_region); 389 if (avail == 0) 390 rc = -ENOSPC; 391 else { 392 struct dev_dax_data data = { 393 .dax_region = dax_region, 394 .size = 0, 395 .id = -1, 396 .memmap_on_memory = false, 397 }; 398 struct dev_dax *dev_dax = __devm_create_dev_dax(&data); 399 400 if (IS_ERR(dev_dax)) 401 rc = PTR_ERR(dev_dax); 402 else { 403 /* 404 * In support of crafting multiple new devices 405 * simultaneously multiple seeds can be created, 406 * but only the first one that has not been 407 * successfully bound is tracked as the region 408 * seed. 409 */ 410 if (!dax_region->seed) 411 dax_region->seed = &dev_dax->dev; 412 dax_region->youngest = &dev_dax->dev; 413 rc = len; 414 } 415 } 416 up_write(&dax_region_rwsem); 417 418 return rc; 419 } 420 static DEVICE_ATTR_RW(create); 421 422 void kill_dev_dax(struct dev_dax *dev_dax) 423 { 424 struct dax_device *dax_dev = dev_dax->dax_dev; 425 struct inode *inode = dax_inode(dax_dev); 426 427 kill_dax(dax_dev); 428 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 429 430 /* 431 * Dynamic dax region have the pgmap allocated via dev_kzalloc() 432 * and thus freed by devm. Clear the pgmap to not have stale pgmap 433 * ranges on probe() from previous reconfigurations of region devices. 434 */ 435 if (!static_dev_dax(dev_dax)) 436 dev_dax->pgmap = NULL; 437 } 438 EXPORT_SYMBOL_GPL(kill_dev_dax); 439 440 static void trim_dev_dax_range(struct dev_dax *dev_dax) 441 { 442 int i = dev_dax->nr_range - 1; 443 struct range *range = &dev_dax->ranges[i].range; 444 struct dax_region *dax_region = dev_dax->region; 445 446 lockdep_assert_held_write(&dax_region_rwsem); 447 dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, 448 (unsigned long long)range->start, 449 (unsigned long long)range->end); 450 451 __release_region(&dax_region->res, range->start, range_len(range)); 452 if (--dev_dax->nr_range == 0) { 453 kfree(dev_dax->ranges); 454 dev_dax->ranges = NULL; 455 } 456 } 457 458 static void free_dev_dax_ranges(struct dev_dax *dev_dax) 459 { 460 while (dev_dax->nr_range) 461 trim_dev_dax_range(dev_dax); 462 } 463 464 static void unregister_dev_dax(void *dev) 465 { 466 struct dev_dax *dev_dax = to_dev_dax(dev); 467 468 dev_dbg(dev, "%s\n", __func__); 469 470 down_write(&dax_region_rwsem); 471 kill_dev_dax(dev_dax); 472 device_del(dev); 473 free_dev_dax_ranges(dev_dax); 474 put_device(dev); 475 up_write(&dax_region_rwsem); 476 } 477 478 static void dax_region_free(struct kref *kref) 479 { 480 struct dax_region *dax_region; 481 482 dax_region = container_of(kref, struct dax_region, kref); 483 kfree(dax_region); 484 } 485 486 static void dax_region_put(struct dax_region *dax_region) 487 { 488 kref_put(&dax_region->kref, dax_region_free); 489 } 490 491 /* a return value >= 0 indicates this invocation invalidated the id */ 492 static int __free_dev_dax_id(struct dev_dax *dev_dax) 493 { 494 struct dax_region *dax_region; 495 int rc = dev_dax->id; 496 497 lockdep_assert_held_write(&dax_dev_rwsem); 498 499 if (!dev_dax->dyn_id || dev_dax->id < 0) 500 return -1; 501 dax_region = dev_dax->region; 502 ida_free(&dax_region->ida, dev_dax->id); 503 dax_region_put(dax_region); 504 dev_dax->id = -1; 505 return rc; 506 } 507 508 static int free_dev_dax_id(struct dev_dax *dev_dax) 509 { 510 int rc; 511 512 rc = down_write_killable(&dax_dev_rwsem); 513 if (rc) 514 return rc; 515 rc = __free_dev_dax_id(dev_dax); 516 up_write(&dax_dev_rwsem); 517 return rc; 518 } 519 520 static int alloc_dev_dax_id(struct dev_dax *dev_dax) 521 { 522 struct dax_region *dax_region = dev_dax->region; 523 int id; 524 525 id = ida_alloc(&dax_region->ida, GFP_KERNEL); 526 if (id < 0) 527 return id; 528 kref_get(&dax_region->kref); 529 dev_dax->dyn_id = true; 530 dev_dax->id = id; 531 return id; 532 } 533 534 static ssize_t delete_store(struct device *dev, struct device_attribute *attr, 535 const char *buf, size_t len) 536 { 537 struct dax_region *dax_region = dev_get_drvdata(dev); 538 struct dev_dax *dev_dax; 539 struct device *victim; 540 bool do_del = false; 541 int rc; 542 543 if (is_static(dax_region)) 544 return -EINVAL; 545 546 victim = device_find_child_by_name(dax_region->dev, buf); 547 if (!victim) 548 return -ENXIO; 549 550 device_lock(dev); 551 device_lock(victim); 552 dev_dax = to_dev_dax(victim); 553 down_write(&dax_dev_rwsem); 554 if (victim->driver || dev_dax_size(dev_dax)) 555 rc = -EBUSY; 556 else { 557 /* 558 * Invalidate the device so it does not become active 559 * again, but always preserve device-id-0 so that 560 * /sys/bus/dax/ is guaranteed to be populated while any 561 * dax_region is registered. 562 */ 563 if (dev_dax->id > 0) { 564 do_del = __free_dev_dax_id(dev_dax) >= 0; 565 rc = len; 566 if (dax_region->seed == victim) 567 dax_region->seed = NULL; 568 if (dax_region->youngest == victim) 569 dax_region->youngest = NULL; 570 } else 571 rc = -EBUSY; 572 } 573 up_write(&dax_dev_rwsem); 574 device_unlock(victim); 575 576 /* won the race to invalidate the device, clean it up */ 577 if (do_del) 578 devm_release_action(dev, unregister_dev_dax, victim); 579 device_unlock(dev); 580 put_device(victim); 581 582 return rc; 583 } 584 static DEVICE_ATTR_WO(delete); 585 586 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 587 int n) 588 { 589 struct device *dev = container_of(kobj, struct device, kobj); 590 struct dax_region *dax_region = dev_get_drvdata(dev); 591 592 if (is_static(dax_region)) 593 if (a == &dev_attr_available_size.attr 594 || a == &dev_attr_create.attr 595 || a == &dev_attr_seed.attr 596 || a == &dev_attr_delete.attr) 597 return 0; 598 return a->mode; 599 } 600 601 static struct attribute *dax_region_attributes[] = { 602 &dev_attr_available_size.attr, 603 &dev_attr_region_size.attr, 604 &dev_attr_region_align.attr, 605 &dev_attr_create.attr, 606 &dev_attr_seed.attr, 607 &dev_attr_delete.attr, 608 &dev_attr_id.attr, 609 NULL, 610 }; 611 612 static const struct attribute_group dax_region_attribute_group = { 613 .name = "dax_region", 614 .attrs = dax_region_attributes, 615 .is_visible = dax_region_visible, 616 }; 617 618 static const struct attribute_group *dax_region_attribute_groups[] = { 619 &dax_region_attribute_group, 620 NULL, 621 }; 622 623 static void dax_region_unregister(void *region) 624 { 625 struct dax_region *dax_region = region; 626 627 sysfs_remove_groups(&dax_region->dev->kobj, 628 dax_region_attribute_groups); 629 release_resource(&dax_region->res); 630 dax_region_put(dax_region); 631 } 632 633 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 634 struct range *range, int target_node, unsigned int align, 635 unsigned long flags) 636 { 637 struct dax_region *dax_region; 638 int rc; 639 640 /* 641 * The DAX core assumes that it can store its private data in 642 * parent->driver_data. This WARN is a reminder / safeguard for 643 * developers of device-dax drivers. 644 */ 645 if (dev_get_drvdata(parent)) { 646 dev_WARN(parent, "dax core failed to setup private data\n"); 647 return NULL; 648 } 649 650 if (!IS_ALIGNED(range->start, align) 651 || !IS_ALIGNED(range_len(range), align)) 652 return NULL; 653 654 dax_region = kzalloc_obj(*dax_region); 655 if (!dax_region) 656 return NULL; 657 658 dev_set_drvdata(parent, dax_region); 659 kref_init(&dax_region->kref); 660 dax_region->id = region_id; 661 dax_region->align = align; 662 dax_region->dev = parent; 663 dax_region->target_node = target_node; 664 ida_init(&dax_region->ida); 665 dax_region->res = (struct resource) { 666 .start = range->start, 667 .end = range->end, 668 .flags = IORESOURCE_MEM | flags, 669 }; 670 671 rc = request_resource(&dax_regions, &dax_region->res); 672 if (rc) { 673 dev_dbg(parent, "dax_region resource conflict for %pR\n", 674 &dax_region->res); 675 goto err_res; 676 } 677 678 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) 679 goto err_sysfs; 680 681 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 682 return NULL; 683 return dax_region; 684 685 err_sysfs: 686 release_resource(&dax_region->res); 687 err_res: 688 dax_region_put(dax_region); 689 return NULL; 690 } 691 EXPORT_SYMBOL_GPL(alloc_dax_region); 692 693 static void dax_mapping_release(struct device *dev) 694 { 695 struct dax_mapping *mapping = to_dax_mapping(dev); 696 struct device *parent = dev->parent; 697 struct dev_dax *dev_dax = to_dev_dax(parent); 698 699 ida_free(&dev_dax->ida, mapping->id); 700 kfree(mapping); 701 put_device(parent); 702 } 703 704 static void unregister_dax_mapping(void *data) 705 { 706 struct device *dev = data; 707 struct dax_mapping *mapping = to_dax_mapping(dev); 708 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 709 710 dev_dbg(dev, "%s\n", __func__); 711 712 dev_dax->ranges[mapping->range_id].mapping = NULL; 713 mapping->range_id = -1; 714 715 device_unregister(dev); 716 } 717 718 static struct dev_dax_range *get_dax_range(struct device *dev) 719 { 720 struct dax_mapping *mapping = to_dax_mapping(dev); 721 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 722 int rc; 723 724 rc = down_write_killable(&dax_region_rwsem); 725 if (rc) 726 return NULL; 727 if (mapping->range_id < 0) { 728 up_write(&dax_region_rwsem); 729 return NULL; 730 } 731 732 return &dev_dax->ranges[mapping->range_id]; 733 } 734 735 static void put_dax_range(void) 736 { 737 up_write(&dax_region_rwsem); 738 } 739 740 static ssize_t start_show(struct device *dev, 741 struct device_attribute *attr, char *buf) 742 { 743 struct dev_dax_range *dax_range; 744 ssize_t rc; 745 746 dax_range = get_dax_range(dev); 747 if (!dax_range) 748 return -ENXIO; 749 rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start); 750 put_dax_range(); 751 752 return rc; 753 } 754 static DEVICE_ATTR(start, 0400, start_show, NULL); 755 756 static ssize_t end_show(struct device *dev, 757 struct device_attribute *attr, char *buf) 758 { 759 struct dev_dax_range *dax_range; 760 ssize_t rc; 761 762 dax_range = get_dax_range(dev); 763 if (!dax_range) 764 return -ENXIO; 765 rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end); 766 put_dax_range(); 767 768 return rc; 769 } 770 static DEVICE_ATTR(end, 0400, end_show, NULL); 771 772 static ssize_t pgoff_show(struct device *dev, 773 struct device_attribute *attr, char *buf) 774 { 775 struct dev_dax_range *dax_range; 776 ssize_t rc; 777 778 dax_range = get_dax_range(dev); 779 if (!dax_range) 780 return -ENXIO; 781 rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff); 782 put_dax_range(); 783 784 return rc; 785 } 786 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL); 787 788 static struct attribute *dax_mapping_attributes[] = { 789 &dev_attr_start.attr, 790 &dev_attr_end.attr, 791 &dev_attr_page_offset.attr, 792 NULL, 793 }; 794 795 static const struct attribute_group dax_mapping_attribute_group = { 796 .attrs = dax_mapping_attributes, 797 }; 798 799 static const struct attribute_group *dax_mapping_attribute_groups[] = { 800 &dax_mapping_attribute_group, 801 NULL, 802 }; 803 804 static const struct device_type dax_mapping_type = { 805 .release = dax_mapping_release, 806 .groups = dax_mapping_attribute_groups, 807 }; 808 809 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) 810 { 811 struct dax_region *dax_region = dev_dax->region; 812 struct dax_mapping *mapping; 813 struct device *dev; 814 int rc; 815 816 lockdep_assert_held_write(&dax_region_rwsem); 817 818 if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver, 819 "region disabled\n")) 820 return -ENXIO; 821 822 mapping = kzalloc_obj(*mapping); 823 if (!mapping) 824 return -ENOMEM; 825 mapping->range_id = range_id; 826 mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL); 827 if (mapping->id < 0) { 828 kfree(mapping); 829 return -ENOMEM; 830 } 831 dev_dax->ranges[range_id].mapping = mapping; 832 dev = &mapping->dev; 833 device_initialize(dev); 834 dev->parent = &dev_dax->dev; 835 get_device(dev->parent); 836 dev->type = &dax_mapping_type; 837 dev_set_name(dev, "mapping%d", mapping->id); 838 rc = device_add(dev); 839 if (rc) { 840 put_device(dev); 841 return rc; 842 } 843 844 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping, 845 dev); 846 if (rc) 847 return rc; 848 return 0; 849 } 850 851 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, 852 resource_size_t size) 853 { 854 struct dax_region *dax_region = dev_dax->region; 855 struct resource *res = &dax_region->res; 856 struct device *dev = &dev_dax->dev; 857 struct dev_dax_range *ranges; 858 unsigned long pgoff = 0; 859 struct resource *alloc; 860 int i, rc; 861 862 lockdep_assert_held_write(&dax_region_rwsem); 863 864 /* handle the seed alloc special case */ 865 if (!size) { 866 if (dev_WARN_ONCE(dev, dev_dax->nr_range, 867 "0-size allocation must be first\n")) 868 return -EBUSY; 869 /* nr_range == 0 is elsewhere special cased as 0-size device */ 870 return 0; 871 } 872 873 alloc = __request_region(res, start, size, dev_name(dev), 0); 874 if (!alloc) 875 return -ENOMEM; 876 877 ranges = krealloc(dev_dax->ranges, sizeof(*ranges) 878 * (dev_dax->nr_range + 1), GFP_KERNEL); 879 if (!ranges) { 880 __release_region(res, alloc->start, resource_size(alloc)); 881 return -ENOMEM; 882 } 883 884 for (i = 0; i < dev_dax->nr_range; i++) 885 pgoff += PHYS_PFN(range_len(&ranges[i].range)); 886 dev_dax->ranges = ranges; 887 ranges[dev_dax->nr_range++] = (struct dev_dax_range) { 888 .pgoff = pgoff, 889 .range = { 890 .start = alloc->start, 891 .end = alloc->end, 892 }, 893 }; 894 895 dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, 896 &alloc->start, &alloc->end); 897 /* 898 * A dev_dax instance must be registered before mapping device 899 * children can be added. Defer to devm_create_dev_dax() to add 900 * the initial mapping device. 901 */ 902 if (!device_is_registered(&dev_dax->dev)) 903 return 0; 904 905 rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); 906 if (rc) 907 trim_dev_dax_range(dev_dax); 908 909 return rc; 910 } 911 912 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) 913 { 914 int last_range = dev_dax->nr_range - 1; 915 struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; 916 bool is_shrink = resource_size(res) > size; 917 struct range *range = &dax_range->range; 918 struct device *dev = &dev_dax->dev; 919 int rc; 920 921 lockdep_assert_held_write(&dax_region_rwsem); 922 923 if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) 924 return -EINVAL; 925 926 rc = adjust_resource(res, range->start, size); 927 if (rc) 928 return rc; 929 930 *range = (struct range) { 931 .start = range->start, 932 .end = range->start + size - 1, 933 }; 934 935 dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", 936 last_range, (unsigned long long) range->start, 937 (unsigned long long) range->end); 938 939 return 0; 940 } 941 942 static ssize_t size_show(struct device *dev, 943 struct device_attribute *attr, char *buf) 944 { 945 struct dev_dax *dev_dax = to_dev_dax(dev); 946 unsigned long long size; 947 int rc; 948 949 rc = down_read_interruptible(&dax_dev_rwsem); 950 if (rc) 951 return rc; 952 size = dev_dax_size(dev_dax); 953 up_read(&dax_dev_rwsem); 954 955 return sysfs_emit(buf, "%llu\n", size); 956 } 957 958 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size) 959 { 960 /* 961 * The minimum mapping granularity for a device instance is a 962 * single subsection, unless the arch says otherwise. 963 */ 964 return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align())); 965 } 966 967 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) 968 { 969 resource_size_t to_shrink = dev_dax_size(dev_dax) - size; 970 struct dax_region *dax_region = dev_dax->region; 971 struct device *dev = &dev_dax->dev; 972 int i; 973 974 for (i = dev_dax->nr_range - 1; i >= 0; i--) { 975 struct range *range = &dev_dax->ranges[i].range; 976 struct dax_mapping *mapping = dev_dax->ranges[i].mapping; 977 struct resource *adjust = NULL, *res; 978 resource_size_t shrink; 979 980 shrink = min_t(u64, to_shrink, range_len(range)); 981 if (shrink >= range_len(range)) { 982 devm_release_action(dax_region->dev, 983 unregister_dax_mapping, &mapping->dev); 984 trim_dev_dax_range(dev_dax); 985 to_shrink -= shrink; 986 if (!to_shrink) 987 break; 988 continue; 989 } 990 991 for_each_dax_region_resource(dax_region, res) 992 if (strcmp(res->name, dev_name(dev)) == 0 993 && res->start == range->start) { 994 adjust = res; 995 break; 996 } 997 998 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, 999 "failed to find matching resource\n")) 1000 return -ENXIO; 1001 return adjust_dev_dax_range(dev_dax, adjust, range_len(range) 1002 - shrink); 1003 } 1004 return 0; 1005 } 1006 1007 /* 1008 * Only allow adjustments that preserve the relative pgoff of existing 1009 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. 1010 */ 1011 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) 1012 { 1013 struct dev_dax_range *last; 1014 int i; 1015 1016 if (dev_dax->nr_range == 0) 1017 return false; 1018 if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) 1019 return false; 1020 last = &dev_dax->ranges[dev_dax->nr_range - 1]; 1021 if (last->range.start != res->start || last->range.end != res->end) 1022 return false; 1023 for (i = 0; i < dev_dax->nr_range - 1; i++) { 1024 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 1025 1026 if (dax_range->pgoff > last->pgoff) 1027 return false; 1028 } 1029 1030 return true; 1031 } 1032 1033 static ssize_t dev_dax_resize(struct dax_region *dax_region, 1034 struct dev_dax *dev_dax, resource_size_t size) 1035 { 1036 resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; 1037 resource_size_t dev_size = dev_dax_size(dev_dax); 1038 struct resource *region_res = &dax_region->res; 1039 struct device *dev = &dev_dax->dev; 1040 struct resource *res, *first; 1041 resource_size_t alloc = 0; 1042 int rc; 1043 1044 if (dev->driver) 1045 return -EBUSY; 1046 if (size == dev_size) 1047 return 0; 1048 if (size > dev_size && size - dev_size > avail) 1049 return -ENOSPC; 1050 if (size < dev_size) 1051 return dev_dax_shrink(dev_dax, size); 1052 1053 to_alloc = size - dev_size; 1054 if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc), 1055 "resize of %pa misaligned\n", &to_alloc)) 1056 return -ENXIO; 1057 1058 /* 1059 * Expand the device into the unused portion of the region. This 1060 * may involve adjusting the end of an existing resource, or 1061 * allocating a new resource. 1062 */ 1063 retry: 1064 first = region_res->child; 1065 if (!first) 1066 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); 1067 1068 rc = -ENOSPC; 1069 for (res = first; res; res = res->sibling) { 1070 struct resource *next = res->sibling; 1071 1072 /* space at the beginning of the region */ 1073 if (res == first && res->start > dax_region->res.start) { 1074 alloc = min(res->start - dax_region->res.start, to_alloc); 1075 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); 1076 break; 1077 } 1078 1079 alloc = 0; 1080 /* space between allocations */ 1081 if (next && next->start > res->end + 1) 1082 alloc = min(next->start - (res->end + 1), to_alloc); 1083 1084 /* space at the end of the region */ 1085 if (!alloc && !next && res->end < region_res->end) 1086 alloc = min(region_res->end - res->end, to_alloc); 1087 1088 if (!alloc) 1089 continue; 1090 1091 if (adjust_ok(dev_dax, res)) { 1092 rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); 1093 break; 1094 } 1095 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); 1096 break; 1097 } 1098 if (rc) 1099 return rc; 1100 to_alloc -= alloc; 1101 if (to_alloc) 1102 goto retry; 1103 return 0; 1104 } 1105 1106 static ssize_t size_store(struct device *dev, struct device_attribute *attr, 1107 const char *buf, size_t len) 1108 { 1109 ssize_t rc; 1110 unsigned long long val; 1111 struct dev_dax *dev_dax = to_dev_dax(dev); 1112 struct dax_region *dax_region = dev_dax->region; 1113 1114 rc = kstrtoull(buf, 0, &val); 1115 if (rc) 1116 return rc; 1117 1118 if (!alloc_is_aligned(dev_dax, val)) { 1119 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); 1120 return -EINVAL; 1121 } 1122 1123 rc = down_write_killable(&dax_region_rwsem); 1124 if (rc) 1125 return rc; 1126 if (!dax_region->dev->driver) { 1127 rc = -ENXIO; 1128 goto err_region; 1129 } 1130 rc = down_write_killable(&dax_dev_rwsem); 1131 if (rc) 1132 goto err_dev; 1133 1134 rc = dev_dax_resize(dax_region, dev_dax, val); 1135 1136 err_dev: 1137 up_write(&dax_dev_rwsem); 1138 err_region: 1139 up_write(&dax_region_rwsem); 1140 1141 if (rc == 0) 1142 return len; 1143 return rc; 1144 } 1145 static DEVICE_ATTR_RW(size); 1146 1147 static ssize_t range_parse(const char *opt, size_t len, struct range *range) 1148 { 1149 unsigned long long addr = 0; 1150 char *start, *end, *str; 1151 ssize_t rc = -EINVAL; 1152 1153 str = kstrdup(opt, GFP_KERNEL); 1154 if (!str) 1155 return rc; 1156 1157 end = str; 1158 start = strsep(&end, "-"); 1159 if (!start || !end) 1160 goto err; 1161 1162 rc = kstrtoull(start, 16, &addr); 1163 if (rc) 1164 goto err; 1165 range->start = addr; 1166 1167 rc = kstrtoull(end, 16, &addr); 1168 if (rc) 1169 goto err; 1170 range->end = addr; 1171 1172 err: 1173 kfree(str); 1174 return rc; 1175 } 1176 1177 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, 1178 const char *buf, size_t len) 1179 { 1180 struct dev_dax *dev_dax = to_dev_dax(dev); 1181 struct dax_region *dax_region = dev_dax->region; 1182 size_t to_alloc; 1183 struct range r; 1184 ssize_t rc; 1185 1186 rc = range_parse(buf, len, &r); 1187 if (rc) 1188 return rc; 1189 1190 rc = down_write_killable(&dax_region_rwsem); 1191 if (rc) 1192 return rc; 1193 if (!dax_region->dev->driver) { 1194 up_write(&dax_region_rwsem); 1195 return rc; 1196 } 1197 rc = down_write_killable(&dax_dev_rwsem); 1198 if (rc) { 1199 up_write(&dax_region_rwsem); 1200 return rc; 1201 } 1202 1203 to_alloc = range_len(&r); 1204 if (alloc_is_aligned(dev_dax, to_alloc)) 1205 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc); 1206 up_write(&dax_dev_rwsem); 1207 up_write(&dax_region_rwsem); 1208 1209 return rc == 0 ? len : rc; 1210 } 1211 static DEVICE_ATTR_WO(mapping); 1212 1213 static ssize_t align_show(struct device *dev, 1214 struct device_attribute *attr, char *buf) 1215 { 1216 struct dev_dax *dev_dax = to_dev_dax(dev); 1217 1218 return sysfs_emit(buf, "%d\n", dev_dax->align); 1219 } 1220 1221 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) 1222 { 1223 struct device *dev = &dev_dax->dev; 1224 int i; 1225 1226 for (i = 0; i < dev_dax->nr_range; i++) { 1227 size_t len = range_len(&dev_dax->ranges[i].range); 1228 1229 if (!alloc_is_aligned(dev_dax, len)) { 1230 dev_dbg(dev, "%s: align %u invalid for range %d\n", 1231 __func__, dev_dax->align, i); 1232 return -EINVAL; 1233 } 1234 } 1235 1236 return 0; 1237 } 1238 1239 static ssize_t align_store(struct device *dev, struct device_attribute *attr, 1240 const char *buf, size_t len) 1241 { 1242 struct dev_dax *dev_dax = to_dev_dax(dev); 1243 struct dax_region *dax_region = dev_dax->region; 1244 unsigned long val, align_save; 1245 ssize_t rc; 1246 1247 rc = kstrtoul(buf, 0, &val); 1248 if (rc) 1249 return -ENXIO; 1250 1251 if (!dax_align_valid(val)) 1252 return -EINVAL; 1253 1254 rc = down_write_killable(&dax_region_rwsem); 1255 if (rc) 1256 return rc; 1257 if (!dax_region->dev->driver) { 1258 up_write(&dax_region_rwsem); 1259 return -ENXIO; 1260 } 1261 1262 rc = down_write_killable(&dax_dev_rwsem); 1263 if (rc) { 1264 up_write(&dax_region_rwsem); 1265 return rc; 1266 } 1267 if (dev->driver) { 1268 rc = -EBUSY; 1269 goto out_unlock; 1270 } 1271 1272 align_save = dev_dax->align; 1273 dev_dax->align = val; 1274 rc = dev_dax_validate_align(dev_dax); 1275 if (rc) 1276 dev_dax->align = align_save; 1277 out_unlock: 1278 up_write(&dax_dev_rwsem); 1279 up_write(&dax_region_rwsem); 1280 return rc == 0 ? len : rc; 1281 } 1282 static DEVICE_ATTR_RW(align); 1283 1284 static int dev_dax_target_node(struct dev_dax *dev_dax) 1285 { 1286 struct dax_region *dax_region = dev_dax->region; 1287 1288 return dax_region->target_node; 1289 } 1290 1291 static ssize_t target_node_show(struct device *dev, 1292 struct device_attribute *attr, char *buf) 1293 { 1294 struct dev_dax *dev_dax = to_dev_dax(dev); 1295 1296 return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax)); 1297 } 1298 static DEVICE_ATTR_RO(target_node); 1299 1300 static ssize_t resource_show(struct device *dev, 1301 struct device_attribute *attr, char *buf) 1302 { 1303 struct dev_dax *dev_dax = to_dev_dax(dev); 1304 struct dax_region *dax_region = dev_dax->region; 1305 unsigned long long start; 1306 1307 if (dev_dax->nr_range < 1) 1308 start = dax_region->res.start; 1309 else 1310 start = dev_dax->ranges[0].range.start; 1311 1312 return sysfs_emit(buf, "%#llx\n", start); 1313 } 1314 static DEVICE_ATTR(resource, 0400, resource_show, NULL); 1315 1316 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 1317 char *buf) 1318 { 1319 /* 1320 * We only ever expect to handle device-dax instances, i.e. the 1321 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 1322 */ 1323 return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); 1324 } 1325 static DEVICE_ATTR_RO(modalias); 1326 1327 static ssize_t numa_node_show(struct device *dev, 1328 struct device_attribute *attr, char *buf) 1329 { 1330 return sysfs_emit(buf, "%d\n", dev_to_node(dev)); 1331 } 1332 static DEVICE_ATTR_RO(numa_node); 1333 1334 static ssize_t memmap_on_memory_show(struct device *dev, 1335 struct device_attribute *attr, char *buf) 1336 { 1337 struct dev_dax *dev_dax = to_dev_dax(dev); 1338 1339 return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory); 1340 } 1341 1342 static ssize_t memmap_on_memory_store(struct device *dev, 1343 struct device_attribute *attr, 1344 const char *buf, size_t len) 1345 { 1346 struct dev_dax *dev_dax = to_dev_dax(dev); 1347 bool val; 1348 int rc; 1349 1350 rc = kstrtobool(buf, &val); 1351 if (rc) 1352 return rc; 1353 1354 if (val == true && !mhp_supports_memmap_on_memory()) { 1355 dev_dbg(dev, "memmap_on_memory is not available\n"); 1356 return -EOPNOTSUPP; 1357 } 1358 1359 rc = down_write_killable(&dax_dev_rwsem); 1360 if (rc) 1361 return rc; 1362 1363 if (dev_dax->memmap_on_memory != val && dev->driver && 1364 to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) { 1365 up_write(&dax_dev_rwsem); 1366 return -EBUSY; 1367 } 1368 1369 dev_dax->memmap_on_memory = val; 1370 up_write(&dax_dev_rwsem); 1371 1372 return len; 1373 } 1374 static DEVICE_ATTR_RW(memmap_on_memory); 1375 1376 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) 1377 { 1378 struct device *dev = container_of(kobj, struct device, kobj); 1379 struct dev_dax *dev_dax = to_dev_dax(dev); 1380 struct dax_region *dax_region = dev_dax->region; 1381 1382 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) 1383 return 0; 1384 if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) 1385 return 0; 1386 if (a == &dev_attr_mapping.attr && is_static(dax_region)) 1387 return 0; 1388 if ((a == &dev_attr_align.attr || 1389 a == &dev_attr_size.attr) && is_static(dax_region)) 1390 return 0444; 1391 return a->mode; 1392 } 1393 1394 static struct attribute *dev_dax_attributes[] = { 1395 &dev_attr_modalias.attr, 1396 &dev_attr_size.attr, 1397 &dev_attr_mapping.attr, 1398 &dev_attr_target_node.attr, 1399 &dev_attr_align.attr, 1400 &dev_attr_resource.attr, 1401 &dev_attr_numa_node.attr, 1402 &dev_attr_memmap_on_memory.attr, 1403 NULL, 1404 }; 1405 1406 static const struct attribute_group dev_dax_attribute_group = { 1407 .attrs = dev_dax_attributes, 1408 .is_visible = dev_dax_visible, 1409 }; 1410 1411 static const struct attribute_group *dax_attribute_groups[] = { 1412 &dev_dax_attribute_group, 1413 NULL, 1414 }; 1415 1416 static void dev_dax_release(struct device *dev) 1417 { 1418 struct dev_dax *dev_dax = to_dev_dax(dev); 1419 struct dax_device *dax_dev = dev_dax->dax_dev; 1420 1421 put_dax(dax_dev); 1422 free_dev_dax_id(dev_dax); 1423 kfree(dev_dax->pgmap); 1424 kfree(dev_dax); 1425 } 1426 1427 static const struct device_type dev_dax_type = { 1428 .release = dev_dax_release, 1429 .groups = dax_attribute_groups, 1430 }; 1431 1432 /* see "strong" declaration in tools/testing/nvdimm/dax-dev.c */ 1433 __weak phys_addr_t dax_pgoff_to_phys(struct dev_dax *dev_dax, pgoff_t pgoff, 1434 unsigned long size) 1435 { 1436 for (int i = 0; i < dev_dax->nr_range; i++) { 1437 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 1438 struct range *range = &dax_range->range; 1439 phys_addr_t phys; 1440 1441 if (!in_range(pgoff, dax_range->pgoff, PHYS_PFN(range_len(range)))) 1442 continue; 1443 phys = PFN_PHYS(pgoff - dax_range->pgoff) + range->start; 1444 if (phys + size - 1 <= range->end) 1445 return phys; 1446 break; 1447 } 1448 return -1; 1449 } 1450 EXPORT_SYMBOL_GPL(dax_pgoff_to_phys); 1451 1452 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data) 1453 { 1454 struct dax_region *dax_region = data->dax_region; 1455 struct device *parent = dax_region->dev; 1456 struct dax_device *dax_dev; 1457 struct dev_dax *dev_dax; 1458 struct inode *inode; 1459 struct device *dev; 1460 int rc; 1461 1462 dev_dax = kzalloc_obj(*dev_dax); 1463 if (!dev_dax) 1464 return ERR_PTR(-ENOMEM); 1465 1466 dev_dax->region = dax_region; 1467 if (is_static(dax_region)) { 1468 if (dev_WARN_ONCE(parent, data->id < 0, 1469 "dynamic id specified to static region\n")) { 1470 rc = -EINVAL; 1471 goto err_id; 1472 } 1473 1474 dev_dax->id = data->id; 1475 } else { 1476 if (dev_WARN_ONCE(parent, data->id >= 0, 1477 "static id specified to dynamic region\n")) { 1478 rc = -EINVAL; 1479 goto err_id; 1480 } 1481 1482 rc = alloc_dev_dax_id(dev_dax); 1483 if (rc < 0) 1484 goto err_id; 1485 } 1486 1487 dev = &dev_dax->dev; 1488 device_initialize(dev); 1489 dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); 1490 1491 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); 1492 if (rc) 1493 goto err_range; 1494 1495 if (data->pgmap) { 1496 dev_WARN_ONCE(parent, !is_static(dax_region), 1497 "custom dev_pagemap requires a static dax_region\n"); 1498 1499 dev_dax->pgmap = kmemdup(data->pgmap, 1500 sizeof(struct dev_pagemap), GFP_KERNEL); 1501 if (!dev_dax->pgmap) { 1502 rc = -ENOMEM; 1503 goto err_pgmap; 1504 } 1505 } 1506 1507 /* 1508 * No dax_operations since there is no access to this device outside of 1509 * mmap of the resulting character device. 1510 */ 1511 dax_dev = alloc_dax(dev_dax, NULL); 1512 if (IS_ERR(dax_dev)) { 1513 rc = PTR_ERR(dax_dev); 1514 goto err_alloc_dax; 1515 } 1516 set_dax_synchronous(dax_dev); 1517 set_dax_nocache(dax_dev); 1518 set_dax_nomc(dax_dev); 1519 1520 /* a device_dax instance is dead while the driver is not attached */ 1521 kill_dax(dax_dev); 1522 1523 dev_dax->dax_dev = dax_dev; 1524 dev_dax->target_node = dax_region->target_node; 1525 dev_dax->align = dax_region->align; 1526 ida_init(&dev_dax->ida); 1527 1528 dev_dax->memmap_on_memory = data->memmap_on_memory; 1529 1530 inode = dax_inode(dax_dev); 1531 dev->devt = inode->i_rdev; 1532 dev->bus = &dax_bus_type; 1533 dev->parent = parent; 1534 dev->type = &dev_dax_type; 1535 1536 rc = device_add(dev); 1537 if (rc) { 1538 kill_dev_dax(dev_dax); 1539 put_device(dev); 1540 return ERR_PTR(rc); 1541 } 1542 1543 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); 1544 if (rc) 1545 return ERR_PTR(rc); 1546 1547 /* register mapping device for the initial allocation range */ 1548 if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) { 1549 rc = devm_register_dax_mapping(dev_dax, 0); 1550 if (rc) 1551 return ERR_PTR(rc); 1552 } 1553 1554 return dev_dax; 1555 1556 err_alloc_dax: 1557 kfree(dev_dax->pgmap); 1558 err_pgmap: 1559 free_dev_dax_ranges(dev_dax); 1560 err_range: 1561 free_dev_dax_id(dev_dax); 1562 err_id: 1563 kfree(dev_dax); 1564 1565 return ERR_PTR(rc); 1566 } 1567 1568 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) 1569 { 1570 struct dev_dax *dev_dax; 1571 1572 down_write(&dax_region_rwsem); 1573 dev_dax = __devm_create_dev_dax(data); 1574 up_write(&dax_region_rwsem); 1575 1576 return dev_dax; 1577 } 1578 EXPORT_SYMBOL_GPL(devm_create_dev_dax); 1579 1580 int __dax_driver_register(struct dax_device_driver *dax_drv, 1581 struct module *module, const char *mod_name) 1582 { 1583 struct device_driver *drv = &dax_drv->drv; 1584 1585 /* 1586 * dax_bus_probe() calls dax_drv->probe() unconditionally. 1587 * So better be safe than sorry and ensure it is provided. 1588 */ 1589 if (!dax_drv->probe) 1590 return -EINVAL; 1591 1592 INIT_LIST_HEAD(&dax_drv->ids); 1593 drv->owner = module; 1594 drv->name = mod_name; 1595 drv->mod_name = mod_name; 1596 drv->bus = &dax_bus_type; 1597 1598 return driver_register(drv); 1599 } 1600 EXPORT_SYMBOL_GPL(__dax_driver_register); 1601 1602 void dax_driver_unregister(struct dax_device_driver *dax_drv) 1603 { 1604 struct device_driver *drv = &dax_drv->drv; 1605 struct dax_id *dax_id, *_id; 1606 1607 mutex_lock(&dax_bus_lock); 1608 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { 1609 list_del(&dax_id->list); 1610 kfree(dax_id); 1611 } 1612 mutex_unlock(&dax_bus_lock); 1613 driver_unregister(drv); 1614 } 1615 EXPORT_SYMBOL_GPL(dax_driver_unregister); 1616 1617 int __init dax_bus_init(void) 1618 { 1619 return bus_register(&dax_bus_type); 1620 } 1621 1622 void __exit dax_bus_exit(void) 1623 { 1624 bus_unregister(&dax_bus_type); 1625 } 1626