1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ 3 #include <linux/memremap.h> 4 #include <linux/device.h> 5 #include <linux/mutex.h> 6 #include <linux/list.h> 7 #include <linux/slab.h> 8 #include <linux/dax.h> 9 #include <linux/io.h> 10 #include "dax-private.h" 11 #include "bus.h" 12 13 static struct resource dax_regions = DEFINE_RES_MEM_NAMED(0, -1, "DAX Regions"); 14 static DEFINE_MUTEX(dax_bus_lock); 15 16 /* 17 * All changes to the dax region configuration occur with this lock held 18 * for write. 19 */ 20 DECLARE_RWSEM(dax_region_rwsem); 21 22 /* 23 * All changes to the dax device configuration occur with this lock held 24 * for write. 25 */ 26 DECLARE_RWSEM(dax_dev_rwsem); 27 28 #define DAX_NAME_LEN 30 29 struct dax_id { 30 struct list_head list; 31 char dev_name[DAX_NAME_LEN]; 32 }; 33 34 static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env) 35 { 36 /* 37 * We only ever expect to handle device-dax instances, i.e. the 38 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 39 */ 40 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); 41 } 42 43 #define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv) 44 45 static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv, 46 const char *dev_name) 47 { 48 struct dax_id *dax_id; 49 50 lockdep_assert_held(&dax_bus_lock); 51 52 list_for_each_entry(dax_id, &dax_drv->ids, list) 53 if (sysfs_streq(dax_id->dev_name, dev_name)) 54 return dax_id; 55 return NULL; 56 } 57 58 static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev) 59 { 60 int match; 61 62 mutex_lock(&dax_bus_lock); 63 match = !!__dax_match_id(dax_drv, dev_name(dev)); 64 mutex_unlock(&dax_bus_lock); 65 66 return match; 67 } 68 69 static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev) 70 { 71 enum dax_driver_type type = DAXDRV_DEVICE_TYPE; 72 struct dev_dax *dev_dax = to_dev_dax(dev); 73 74 if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM) 75 type = DAXDRV_KMEM_TYPE; 76 77 if (dax_drv->type == type) 78 return 1; 79 80 /* default to device mode if dax_kmem is disabled */ 81 if (dax_drv->type == DAXDRV_DEVICE_TYPE && 82 !IS_ENABLED(CONFIG_DEV_DAX_KMEM)) 83 return 1; 84 85 return 0; 86 } 87 88 enum id_action { 89 ID_REMOVE, 90 ID_ADD, 91 }; 92 93 static ssize_t do_id_store(struct device_driver *drv, const char *buf, 94 size_t count, enum id_action action) 95 { 96 struct dax_device_driver *dax_drv = to_dax_drv(drv); 97 unsigned int region_id, id; 98 char devname[DAX_NAME_LEN]; 99 struct dax_id *dax_id; 100 ssize_t rc = count; 101 int fields; 102 103 fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); 104 if (fields != 2) 105 return -EINVAL; 106 sprintf(devname, "dax%d.%d", region_id, id); 107 if (!sysfs_streq(buf, devname)) 108 return -EINVAL; 109 110 mutex_lock(&dax_bus_lock); 111 dax_id = __dax_match_id(dax_drv, buf); 112 if (!dax_id) { 113 if (action == ID_ADD) { 114 dax_id = kzalloc_obj(*dax_id); 115 if (dax_id) { 116 strscpy(dax_id->dev_name, buf, DAX_NAME_LEN); 117 list_add(&dax_id->list, &dax_drv->ids); 118 } else 119 rc = -ENOMEM; 120 } 121 } else if (action == ID_REMOVE) { 122 list_del(&dax_id->list); 123 kfree(dax_id); 124 } 125 mutex_unlock(&dax_bus_lock); 126 127 if (rc < 0) 128 return rc; 129 if (action == ID_ADD) 130 rc = driver_attach(drv); 131 if (rc) 132 return rc; 133 return count; 134 } 135 136 static ssize_t new_id_store(struct device_driver *drv, const char *buf, 137 size_t count) 138 { 139 return do_id_store(drv, buf, count, ID_ADD); 140 } 141 static DRIVER_ATTR_WO(new_id); 142 143 static ssize_t remove_id_store(struct device_driver *drv, const char *buf, 144 size_t count) 145 { 146 return do_id_store(drv, buf, count, ID_REMOVE); 147 } 148 static DRIVER_ATTR_WO(remove_id); 149 150 static struct attribute *dax_drv_attrs[] = { 151 &driver_attr_new_id.attr, 152 &driver_attr_remove_id.attr, 153 NULL, 154 }; 155 ATTRIBUTE_GROUPS(dax_drv); 156 157 static int dax_bus_match(struct device *dev, const struct device_driver *drv); 158 159 /* 160 * Static dax regions are regions created by an external subsystem 161 * nvdimm where a single range is assigned. Its boundaries are by the external 162 * subsystem and are usually limited to one physical memory range. For example, 163 * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a 164 * single contiguous range) 165 * 166 * On dynamic dax regions, the assigned region can be partitioned by dax core 167 * into multiple subdivisions. A subdivision is represented into one 168 * /dev/daxN.M device composed by one or more potentially discontiguous ranges. 169 * 170 * When allocating a dax region, drivers must set whether it's static 171 * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned 172 * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax 173 * devices it is NULL but afterwards allocated by dax core on device ->probe(). 174 * Care is needed to make sure that dynamic dax devices are torn down with a 175 * cleared @pgmap field (see kill_dev_dax()). 176 */ 177 static bool is_static(struct dax_region *dax_region) 178 { 179 return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 180 } 181 182 bool static_dev_dax(struct dev_dax *dev_dax) 183 { 184 return is_static(dev_dax->region); 185 } 186 EXPORT_SYMBOL_GPL(static_dev_dax); 187 188 static u64 dev_dax_size(struct dev_dax *dev_dax) 189 { 190 u64 size = 0; 191 int i; 192 193 lockdep_assert_held(&dax_dev_rwsem); 194 195 for (i = 0; i < dev_dax->nr_range; i++) 196 size += range_len(&dev_dax->ranges[i].range); 197 198 return size; 199 } 200 201 static int dax_bus_probe(struct device *dev) 202 { 203 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 204 struct dev_dax *dev_dax = to_dev_dax(dev); 205 struct dax_region *dax_region = dev_dax->region; 206 int rc; 207 u64 size; 208 209 rc = down_read_interruptible(&dax_dev_rwsem); 210 if (rc) 211 return rc; 212 size = dev_dax_size(dev_dax); 213 up_read(&dax_dev_rwsem); 214 215 if (size == 0 || dev_dax->id < 0) 216 return -ENXIO; 217 218 rc = dax_drv->probe(dev_dax); 219 220 if (rc || is_static(dax_region)) 221 return rc; 222 223 /* 224 * Track new seed creation only after successful probe of the 225 * previous seed. 226 */ 227 if (dax_region->seed == dev) 228 dax_region->seed = NULL; 229 230 return 0; 231 } 232 233 static void dax_bus_remove(struct device *dev) 234 { 235 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 236 struct dev_dax *dev_dax = to_dev_dax(dev); 237 238 if (dax_drv->remove) 239 dax_drv->remove(dev_dax); 240 } 241 242 static const struct bus_type dax_bus_type = { 243 .name = "dax", 244 .uevent = dax_bus_uevent, 245 .match = dax_bus_match, 246 .probe = dax_bus_probe, 247 .remove = dax_bus_remove, 248 .drv_groups = dax_drv_groups, 249 }; 250 251 static int dax_bus_match(struct device *dev, const struct device_driver *drv) 252 { 253 const struct dax_device_driver *dax_drv = to_dax_drv(drv); 254 255 if (dax_match_id(dax_drv, dev)) 256 return 1; 257 return dax_match_type(dax_drv, dev); 258 } 259 260 /* 261 * Rely on the fact that drvdata is set before the attributes are 262 * registered, and that the attributes are unregistered before drvdata 263 * is cleared to assume that drvdata is always valid. 264 */ 265 static ssize_t id_show(struct device *dev, 266 struct device_attribute *attr, char *buf) 267 { 268 struct dax_region *dax_region = dev_get_drvdata(dev); 269 270 return sysfs_emit(buf, "%d\n", dax_region->id); 271 } 272 static DEVICE_ATTR_RO(id); 273 274 static ssize_t region_size_show(struct device *dev, 275 struct device_attribute *attr, char *buf) 276 { 277 struct dax_region *dax_region = dev_get_drvdata(dev); 278 279 return sysfs_emit(buf, "%llu\n", 280 (unsigned long long)resource_size(&dax_region->res)); 281 } 282 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, 283 region_size_show, NULL); 284 285 static ssize_t region_align_show(struct device *dev, 286 struct device_attribute *attr, char *buf) 287 { 288 struct dax_region *dax_region = dev_get_drvdata(dev); 289 290 return sysfs_emit(buf, "%u\n", dax_region->align); 291 } 292 static struct device_attribute dev_attr_region_align = 293 __ATTR(align, 0400, region_align_show, NULL); 294 295 #define for_each_dax_region_resource(dax_region, res) \ 296 for (res = (dax_region)->res.child; res; res = res->sibling) 297 298 static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 299 { 300 resource_size_t size = resource_size(&dax_region->res); 301 struct resource *res; 302 303 lockdep_assert_held(&dax_region_rwsem); 304 305 for_each_dax_region_resource(dax_region, res) 306 size -= resource_size(res); 307 return size; 308 } 309 310 static ssize_t available_size_show(struct device *dev, 311 struct device_attribute *attr, char *buf) 312 { 313 struct dax_region *dax_region = dev_get_drvdata(dev); 314 unsigned long long size; 315 int rc; 316 317 rc = down_read_interruptible(&dax_region_rwsem); 318 if (rc) 319 return rc; 320 size = dax_region_avail_size(dax_region); 321 up_read(&dax_region_rwsem); 322 323 return sysfs_emit(buf, "%llu\n", size); 324 } 325 static DEVICE_ATTR_RO(available_size); 326 327 static ssize_t seed_show(struct device *dev, 328 struct device_attribute *attr, char *buf) 329 { 330 struct dax_region *dax_region = dev_get_drvdata(dev); 331 struct device *seed; 332 ssize_t rc; 333 334 if (is_static(dax_region)) 335 return -EINVAL; 336 337 rc = down_read_interruptible(&dax_region_rwsem); 338 if (rc) 339 return rc; 340 seed = dax_region->seed; 341 rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : ""); 342 up_read(&dax_region_rwsem); 343 344 return rc; 345 } 346 static DEVICE_ATTR_RO(seed); 347 348 static ssize_t create_show(struct device *dev, 349 struct device_attribute *attr, char *buf) 350 { 351 struct dax_region *dax_region = dev_get_drvdata(dev); 352 struct device *youngest; 353 ssize_t rc; 354 355 if (is_static(dax_region)) 356 return -EINVAL; 357 358 rc = down_read_interruptible(&dax_region_rwsem); 359 if (rc) 360 return rc; 361 youngest = dax_region->youngest; 362 rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : ""); 363 up_read(&dax_region_rwsem); 364 365 return rc; 366 } 367 368 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data); 369 370 static ssize_t create_store(struct device *dev, struct device_attribute *attr, 371 const char *buf, size_t len) 372 { 373 struct dax_region *dax_region = dev_get_drvdata(dev); 374 unsigned long long avail; 375 ssize_t rc; 376 int val; 377 378 if (is_static(dax_region)) 379 return -EINVAL; 380 381 rc = kstrtoint(buf, 0, &val); 382 if (rc) 383 return rc; 384 if (val != 1) 385 return -EINVAL; 386 387 rc = down_write_killable(&dax_region_rwsem); 388 if (rc) 389 return rc; 390 avail = dax_region_avail_size(dax_region); 391 if (avail == 0) 392 rc = -ENOSPC; 393 else { 394 struct dev_dax_data data = { 395 .dax_region = dax_region, 396 .size = 0, 397 .id = -1, 398 .memmap_on_memory = false, 399 }; 400 struct dev_dax *dev_dax = __devm_create_dev_dax(&data); 401 402 if (IS_ERR(dev_dax)) 403 rc = PTR_ERR(dev_dax); 404 else { 405 /* 406 * In support of crafting multiple new devices 407 * simultaneously multiple seeds can be created, 408 * but only the first one that has not been 409 * successfully bound is tracked as the region 410 * seed. 411 */ 412 if (!dax_region->seed) 413 dax_region->seed = &dev_dax->dev; 414 dax_region->youngest = &dev_dax->dev; 415 rc = len; 416 } 417 } 418 up_write(&dax_region_rwsem); 419 420 return rc; 421 } 422 static DEVICE_ATTR_RW(create); 423 424 void kill_dev_dax(struct dev_dax *dev_dax) 425 { 426 struct dax_device *dax_dev = dev_dax->dax_dev; 427 struct inode *inode = dax_inode(dax_dev); 428 429 kill_dax(dax_dev); 430 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 431 432 /* 433 * Dynamic dax region have the pgmap allocated via dev_kzalloc() 434 * and thus freed by devm. Clear the pgmap to not have stale pgmap 435 * ranges on probe() from previous reconfigurations of region devices. 436 */ 437 if (!static_dev_dax(dev_dax)) 438 dev_dax->pgmap = NULL; 439 } 440 EXPORT_SYMBOL_GPL(kill_dev_dax); 441 442 static void trim_dev_dax_range(struct dev_dax *dev_dax) 443 { 444 int i = dev_dax->nr_range - 1; 445 struct range *range = &dev_dax->ranges[i].range; 446 struct dax_region *dax_region = dev_dax->region; 447 448 lockdep_assert_held_write(&dax_region_rwsem); 449 dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, 450 (unsigned long long)range->start, 451 (unsigned long long)range->end); 452 453 __release_region(&dax_region->res, range->start, range_len(range)); 454 if (--dev_dax->nr_range == 0) { 455 kfree(dev_dax->ranges); 456 dev_dax->ranges = NULL; 457 } 458 } 459 460 static void free_dev_dax_ranges(struct dev_dax *dev_dax) 461 { 462 while (dev_dax->nr_range) 463 trim_dev_dax_range(dev_dax); 464 } 465 466 static void unregister_dev_dax(void *dev) 467 { 468 struct dev_dax *dev_dax = to_dev_dax(dev); 469 470 dev_dbg(dev, "%s\n", __func__); 471 472 down_write(&dax_region_rwsem); 473 kill_dev_dax(dev_dax); 474 device_del(dev); 475 free_dev_dax_ranges(dev_dax); 476 put_device(dev); 477 up_write(&dax_region_rwsem); 478 } 479 480 static void dax_region_free(struct kref *kref) 481 { 482 struct dax_region *dax_region; 483 484 dax_region = container_of(kref, struct dax_region, kref); 485 kfree(dax_region); 486 } 487 488 static void dax_region_put(struct dax_region *dax_region) 489 { 490 kref_put(&dax_region->kref, dax_region_free); 491 } 492 493 /* a return value >= 0 indicates this invocation invalidated the id */ 494 static int __free_dev_dax_id(struct dev_dax *dev_dax) 495 { 496 struct dax_region *dax_region; 497 int rc = dev_dax->id; 498 499 lockdep_assert_held_write(&dax_dev_rwsem); 500 501 if (!dev_dax->dyn_id || dev_dax->id < 0) 502 return -1; 503 dax_region = dev_dax->region; 504 ida_free(&dax_region->ida, dev_dax->id); 505 dax_region_put(dax_region); 506 dev_dax->id = -1; 507 return rc; 508 } 509 510 static int free_dev_dax_id(struct dev_dax *dev_dax) 511 { 512 int rc; 513 514 rc = down_write_killable(&dax_dev_rwsem); 515 if (rc) 516 return rc; 517 rc = __free_dev_dax_id(dev_dax); 518 up_write(&dax_dev_rwsem); 519 return rc; 520 } 521 522 static int alloc_dev_dax_id(struct dev_dax *dev_dax) 523 { 524 struct dax_region *dax_region = dev_dax->region; 525 int id; 526 527 id = ida_alloc(&dax_region->ida, GFP_KERNEL); 528 if (id < 0) 529 return id; 530 kref_get(&dax_region->kref); 531 dev_dax->dyn_id = true; 532 dev_dax->id = id; 533 return id; 534 } 535 536 static ssize_t delete_store(struct device *dev, struct device_attribute *attr, 537 const char *buf, size_t len) 538 { 539 struct dax_region *dax_region = dev_get_drvdata(dev); 540 struct dev_dax *dev_dax; 541 struct device *victim; 542 bool do_del = false; 543 int rc; 544 545 if (is_static(dax_region)) 546 return -EINVAL; 547 548 victim = device_find_child_by_name(dax_region->dev, buf); 549 if (!victim) 550 return -ENXIO; 551 552 device_lock(dev); 553 device_lock(victim); 554 dev_dax = to_dev_dax(victim); 555 down_write(&dax_dev_rwsem); 556 if (victim->driver || dev_dax_size(dev_dax)) 557 rc = -EBUSY; 558 else { 559 /* 560 * Invalidate the device so it does not become active 561 * again, but always preserve device-id-0 so that 562 * /sys/bus/dax/ is guaranteed to be populated while any 563 * dax_region is registered. 564 */ 565 if (dev_dax->id > 0) { 566 do_del = __free_dev_dax_id(dev_dax) >= 0; 567 rc = len; 568 if (dax_region->seed == victim) 569 dax_region->seed = NULL; 570 if (dax_region->youngest == victim) 571 dax_region->youngest = NULL; 572 } else 573 rc = -EBUSY; 574 } 575 up_write(&dax_dev_rwsem); 576 device_unlock(victim); 577 578 /* won the race to invalidate the device, clean it up */ 579 if (do_del) 580 devm_release_action(dev, unregister_dev_dax, victim); 581 device_unlock(dev); 582 put_device(victim); 583 584 return rc; 585 } 586 static DEVICE_ATTR_WO(delete); 587 588 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 589 int n) 590 { 591 struct device *dev = container_of(kobj, struct device, kobj); 592 struct dax_region *dax_region = dev_get_drvdata(dev); 593 594 if (is_static(dax_region)) 595 if (a == &dev_attr_available_size.attr 596 || a == &dev_attr_create.attr 597 || a == &dev_attr_seed.attr 598 || a == &dev_attr_delete.attr) 599 return 0; 600 return a->mode; 601 } 602 603 static struct attribute *dax_region_attributes[] = { 604 &dev_attr_available_size.attr, 605 &dev_attr_region_size.attr, 606 &dev_attr_region_align.attr, 607 &dev_attr_create.attr, 608 &dev_attr_seed.attr, 609 &dev_attr_delete.attr, 610 &dev_attr_id.attr, 611 NULL, 612 }; 613 614 static const struct attribute_group dax_region_attribute_group = { 615 .name = "dax_region", 616 .attrs = dax_region_attributes, 617 .is_visible = dax_region_visible, 618 }; 619 620 static const struct attribute_group *dax_region_attribute_groups[] = { 621 &dax_region_attribute_group, 622 NULL, 623 }; 624 625 static void dax_region_unregister(void *region) 626 { 627 struct dax_region *dax_region = region; 628 629 sysfs_remove_groups(&dax_region->dev->kobj, 630 dax_region_attribute_groups); 631 release_resource(&dax_region->res); 632 dax_region_put(dax_region); 633 } 634 635 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 636 struct range *range, int target_node, unsigned int align, 637 unsigned long flags) 638 { 639 struct dax_region *dax_region; 640 int rc; 641 642 /* 643 * The DAX core assumes that it can store its private data in 644 * parent->driver_data. This WARN is a reminder / safeguard for 645 * developers of device-dax drivers. 646 */ 647 if (dev_get_drvdata(parent)) { 648 dev_WARN(parent, "dax core failed to setup private data\n"); 649 return NULL; 650 } 651 652 if (!IS_ALIGNED(range->start, align) 653 || !IS_ALIGNED(range_len(range), align)) 654 return NULL; 655 656 dax_region = kzalloc_obj(*dax_region); 657 if (!dax_region) 658 return NULL; 659 660 dev_set_drvdata(parent, dax_region); 661 kref_init(&dax_region->kref); 662 dax_region->id = region_id; 663 dax_region->align = align; 664 dax_region->dev = parent; 665 dax_region->target_node = target_node; 666 ida_init(&dax_region->ida); 667 dax_region->res = (struct resource) { 668 .start = range->start, 669 .end = range->end, 670 .flags = IORESOURCE_MEM | flags, 671 }; 672 673 rc = request_resource(&dax_regions, &dax_region->res); 674 if (rc) { 675 dev_dbg(parent, "dax_region resource conflict for %pR\n", 676 &dax_region->res); 677 goto err_res; 678 } 679 680 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) 681 goto err_sysfs; 682 683 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 684 return NULL; 685 return dax_region; 686 687 err_sysfs: 688 release_resource(&dax_region->res); 689 err_res: 690 dax_region_put(dax_region); 691 return NULL; 692 } 693 EXPORT_SYMBOL_GPL(alloc_dax_region); 694 695 static void dax_mapping_release(struct device *dev) 696 { 697 struct dax_mapping *mapping = to_dax_mapping(dev); 698 struct device *parent = dev->parent; 699 struct dev_dax *dev_dax = to_dev_dax(parent); 700 701 ida_free(&dev_dax->ida, mapping->id); 702 kfree(mapping); 703 put_device(parent); 704 } 705 706 static void unregister_dax_mapping(void *data) 707 { 708 struct device *dev = data; 709 struct dax_mapping *mapping = to_dax_mapping(dev); 710 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 711 712 dev_dbg(dev, "%s\n", __func__); 713 714 dev_dax->ranges[mapping->range_id].mapping = NULL; 715 mapping->range_id = -1; 716 717 device_unregister(dev); 718 } 719 720 static struct dev_dax_range *get_dax_range(struct device *dev) 721 { 722 struct dax_mapping *mapping = to_dax_mapping(dev); 723 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 724 int rc; 725 726 rc = down_write_killable(&dax_region_rwsem); 727 if (rc) 728 return NULL; 729 if (mapping->range_id < 0) { 730 up_write(&dax_region_rwsem); 731 return NULL; 732 } 733 734 return &dev_dax->ranges[mapping->range_id]; 735 } 736 737 static void put_dax_range(void) 738 { 739 up_write(&dax_region_rwsem); 740 } 741 742 static ssize_t start_show(struct device *dev, 743 struct device_attribute *attr, char *buf) 744 { 745 struct dev_dax_range *dax_range; 746 ssize_t rc; 747 748 dax_range = get_dax_range(dev); 749 if (!dax_range) 750 return -ENXIO; 751 rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start); 752 put_dax_range(); 753 754 return rc; 755 } 756 static DEVICE_ATTR(start, 0400, start_show, NULL); 757 758 static ssize_t end_show(struct device *dev, 759 struct device_attribute *attr, char *buf) 760 { 761 struct dev_dax_range *dax_range; 762 ssize_t rc; 763 764 dax_range = get_dax_range(dev); 765 if (!dax_range) 766 return -ENXIO; 767 rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end); 768 put_dax_range(); 769 770 return rc; 771 } 772 static DEVICE_ATTR(end, 0400, end_show, NULL); 773 774 static ssize_t pgoff_show(struct device *dev, 775 struct device_attribute *attr, char *buf) 776 { 777 struct dev_dax_range *dax_range; 778 ssize_t rc; 779 780 dax_range = get_dax_range(dev); 781 if (!dax_range) 782 return -ENXIO; 783 rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff); 784 put_dax_range(); 785 786 return rc; 787 } 788 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL); 789 790 static struct attribute *dax_mapping_attributes[] = { 791 &dev_attr_start.attr, 792 &dev_attr_end.attr, 793 &dev_attr_page_offset.attr, 794 NULL, 795 }; 796 797 static const struct attribute_group dax_mapping_attribute_group = { 798 .attrs = dax_mapping_attributes, 799 }; 800 801 static const struct attribute_group *dax_mapping_attribute_groups[] = { 802 &dax_mapping_attribute_group, 803 NULL, 804 }; 805 806 static const struct device_type dax_mapping_type = { 807 .release = dax_mapping_release, 808 .groups = dax_mapping_attribute_groups, 809 }; 810 811 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) 812 { 813 struct dax_region *dax_region = dev_dax->region; 814 struct dax_mapping *mapping; 815 struct device *dev; 816 int rc; 817 818 lockdep_assert_held_write(&dax_region_rwsem); 819 820 if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver, 821 "region disabled\n")) 822 return -ENXIO; 823 824 mapping = kzalloc_obj(*mapping); 825 if (!mapping) 826 return -ENOMEM; 827 mapping->range_id = range_id; 828 mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL); 829 if (mapping->id < 0) { 830 kfree(mapping); 831 return -ENOMEM; 832 } 833 dev_dax->ranges[range_id].mapping = mapping; 834 dev = &mapping->dev; 835 device_initialize(dev); 836 dev->parent = &dev_dax->dev; 837 get_device(dev->parent); 838 dev->type = &dax_mapping_type; 839 dev_set_name(dev, "mapping%d", mapping->id); 840 rc = device_add(dev); 841 if (rc) { 842 put_device(dev); 843 return rc; 844 } 845 846 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping, 847 dev); 848 if (rc) 849 return rc; 850 return 0; 851 } 852 853 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, 854 resource_size_t size) 855 { 856 struct dax_region *dax_region = dev_dax->region; 857 struct resource *res = &dax_region->res; 858 struct device *dev = &dev_dax->dev; 859 struct dev_dax_range *ranges; 860 unsigned long pgoff = 0; 861 struct resource *alloc; 862 int i, rc; 863 864 lockdep_assert_held_write(&dax_region_rwsem); 865 866 /* handle the seed alloc special case */ 867 if (!size) { 868 if (dev_WARN_ONCE(dev, dev_dax->nr_range, 869 "0-size allocation must be first\n")) 870 return -EBUSY; 871 /* nr_range == 0 is elsewhere special cased as 0-size device */ 872 return 0; 873 } 874 875 alloc = __request_region(res, start, size, dev_name(dev), 0); 876 if (!alloc) 877 return -ENOMEM; 878 879 ranges = krealloc(dev_dax->ranges, sizeof(*ranges) 880 * (dev_dax->nr_range + 1), GFP_KERNEL); 881 if (!ranges) { 882 __release_region(res, alloc->start, resource_size(alloc)); 883 return -ENOMEM; 884 } 885 886 for (i = 0; i < dev_dax->nr_range; i++) 887 pgoff += PHYS_PFN(range_len(&ranges[i].range)); 888 dev_dax->ranges = ranges; 889 ranges[dev_dax->nr_range++] = (struct dev_dax_range) { 890 .pgoff = pgoff, 891 .range = { 892 .start = alloc->start, 893 .end = alloc->end, 894 }, 895 }; 896 897 dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, 898 &alloc->start, &alloc->end); 899 /* 900 * A dev_dax instance must be registered before mapping device 901 * children can be added. Defer to devm_create_dev_dax() to add 902 * the initial mapping device. 903 */ 904 if (!device_is_registered(&dev_dax->dev)) 905 return 0; 906 907 rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); 908 if (rc) 909 trim_dev_dax_range(dev_dax); 910 911 return rc; 912 } 913 914 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) 915 { 916 int last_range = dev_dax->nr_range - 1; 917 struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; 918 bool is_shrink = resource_size(res) > size; 919 struct range *range = &dax_range->range; 920 struct device *dev = &dev_dax->dev; 921 int rc; 922 923 lockdep_assert_held_write(&dax_region_rwsem); 924 925 if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) 926 return -EINVAL; 927 928 rc = adjust_resource(res, range->start, size); 929 if (rc) 930 return rc; 931 932 *range = (struct range) { 933 .start = range->start, 934 .end = range->start + size - 1, 935 }; 936 937 dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", 938 last_range, (unsigned long long) range->start, 939 (unsigned long long) range->end); 940 941 return 0; 942 } 943 944 static ssize_t size_show(struct device *dev, 945 struct device_attribute *attr, char *buf) 946 { 947 struct dev_dax *dev_dax = to_dev_dax(dev); 948 unsigned long long size; 949 int rc; 950 951 rc = down_read_interruptible(&dax_dev_rwsem); 952 if (rc) 953 return rc; 954 size = dev_dax_size(dev_dax); 955 up_read(&dax_dev_rwsem); 956 957 return sysfs_emit(buf, "%llu\n", size); 958 } 959 960 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size) 961 { 962 /* 963 * The minimum mapping granularity for a device instance is a 964 * single subsection, unless the arch says otherwise. 965 */ 966 return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align())); 967 } 968 969 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) 970 { 971 resource_size_t to_shrink = dev_dax_size(dev_dax) - size; 972 struct dax_region *dax_region = dev_dax->region; 973 struct device *dev = &dev_dax->dev; 974 int i; 975 976 for (i = dev_dax->nr_range - 1; i >= 0; i--) { 977 struct range *range = &dev_dax->ranges[i].range; 978 struct dax_mapping *mapping = dev_dax->ranges[i].mapping; 979 struct resource *adjust = NULL, *res; 980 resource_size_t shrink; 981 982 shrink = min_t(u64, to_shrink, range_len(range)); 983 if (shrink >= range_len(range)) { 984 devm_release_action(dax_region->dev, 985 unregister_dax_mapping, &mapping->dev); 986 trim_dev_dax_range(dev_dax); 987 to_shrink -= shrink; 988 if (!to_shrink) 989 break; 990 continue; 991 } 992 993 for_each_dax_region_resource(dax_region, res) 994 if (strcmp(res->name, dev_name(dev)) == 0 995 && res->start == range->start) { 996 adjust = res; 997 break; 998 } 999 1000 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, 1001 "failed to find matching resource\n")) 1002 return -ENXIO; 1003 return adjust_dev_dax_range(dev_dax, adjust, range_len(range) 1004 - shrink); 1005 } 1006 return 0; 1007 } 1008 1009 /* 1010 * Only allow adjustments that preserve the relative pgoff of existing 1011 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. 1012 */ 1013 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) 1014 { 1015 struct dev_dax_range *last; 1016 int i; 1017 1018 if (dev_dax->nr_range == 0) 1019 return false; 1020 if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) 1021 return false; 1022 last = &dev_dax->ranges[dev_dax->nr_range - 1]; 1023 if (last->range.start != res->start || last->range.end != res->end) 1024 return false; 1025 for (i = 0; i < dev_dax->nr_range - 1; i++) { 1026 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 1027 1028 if (dax_range->pgoff > last->pgoff) 1029 return false; 1030 } 1031 1032 return true; 1033 } 1034 1035 static ssize_t dev_dax_resize(struct dax_region *dax_region, 1036 struct dev_dax *dev_dax, resource_size_t size) 1037 { 1038 resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; 1039 resource_size_t dev_size = dev_dax_size(dev_dax); 1040 struct resource *region_res = &dax_region->res; 1041 struct device *dev = &dev_dax->dev; 1042 struct resource *res, *first; 1043 resource_size_t alloc = 0; 1044 int rc; 1045 1046 if (dev->driver) 1047 return -EBUSY; 1048 if (size == dev_size) 1049 return 0; 1050 if (size > dev_size && size - dev_size > avail) 1051 return -ENOSPC; 1052 if (size < dev_size) 1053 return dev_dax_shrink(dev_dax, size); 1054 1055 to_alloc = size - dev_size; 1056 if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc), 1057 "resize of %pa misaligned\n", &to_alloc)) 1058 return -ENXIO; 1059 1060 /* 1061 * Expand the device into the unused portion of the region. This 1062 * may involve adjusting the end of an existing resource, or 1063 * allocating a new resource. 1064 */ 1065 retry: 1066 first = region_res->child; 1067 if (!first) 1068 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); 1069 1070 rc = -ENOSPC; 1071 for (res = first; res; res = res->sibling) { 1072 struct resource *next = res->sibling; 1073 1074 /* space at the beginning of the region */ 1075 if (res == first && res->start > dax_region->res.start) { 1076 alloc = min(res->start - dax_region->res.start, to_alloc); 1077 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); 1078 break; 1079 } 1080 1081 alloc = 0; 1082 /* space between allocations */ 1083 if (next && next->start > res->end + 1) 1084 alloc = min(next->start - (res->end + 1), to_alloc); 1085 1086 /* space at the end of the region */ 1087 if (!alloc && !next && res->end < region_res->end) 1088 alloc = min(region_res->end - res->end, to_alloc); 1089 1090 if (!alloc) 1091 continue; 1092 1093 if (adjust_ok(dev_dax, res)) { 1094 rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); 1095 break; 1096 } 1097 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); 1098 break; 1099 } 1100 if (rc) 1101 return rc; 1102 to_alloc -= alloc; 1103 if (to_alloc) 1104 goto retry; 1105 return 0; 1106 } 1107 1108 static ssize_t size_store(struct device *dev, struct device_attribute *attr, 1109 const char *buf, size_t len) 1110 { 1111 ssize_t rc; 1112 unsigned long long val; 1113 struct dev_dax *dev_dax = to_dev_dax(dev); 1114 struct dax_region *dax_region = dev_dax->region; 1115 1116 rc = kstrtoull(buf, 0, &val); 1117 if (rc) 1118 return rc; 1119 1120 if (!alloc_is_aligned(dev_dax, val)) { 1121 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); 1122 return -EINVAL; 1123 } 1124 1125 rc = down_write_killable(&dax_region_rwsem); 1126 if (rc) 1127 return rc; 1128 if (!dax_region->dev->driver) { 1129 rc = -ENXIO; 1130 goto err_region; 1131 } 1132 rc = down_write_killable(&dax_dev_rwsem); 1133 if (rc) 1134 goto err_dev; 1135 1136 rc = dev_dax_resize(dax_region, dev_dax, val); 1137 1138 err_dev: 1139 up_write(&dax_dev_rwsem); 1140 err_region: 1141 up_write(&dax_region_rwsem); 1142 1143 if (rc == 0) 1144 return len; 1145 return rc; 1146 } 1147 static DEVICE_ATTR_RW(size); 1148 1149 static ssize_t range_parse(const char *opt, size_t len, struct range *range) 1150 { 1151 unsigned long long addr = 0; 1152 char *start, *end, *str; 1153 ssize_t rc = -EINVAL; 1154 1155 str = kstrdup(opt, GFP_KERNEL); 1156 if (!str) 1157 return rc; 1158 1159 end = str; 1160 start = strsep(&end, "-"); 1161 if (!start || !end) 1162 goto err; 1163 1164 rc = kstrtoull(start, 16, &addr); 1165 if (rc) 1166 goto err; 1167 range->start = addr; 1168 1169 rc = kstrtoull(end, 16, &addr); 1170 if (rc) 1171 goto err; 1172 range->end = addr; 1173 1174 err: 1175 kfree(str); 1176 return rc; 1177 } 1178 1179 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, 1180 const char *buf, size_t len) 1181 { 1182 struct dev_dax *dev_dax = to_dev_dax(dev); 1183 struct dax_region *dax_region = dev_dax->region; 1184 size_t to_alloc; 1185 struct range r; 1186 ssize_t rc; 1187 1188 rc = range_parse(buf, len, &r); 1189 if (rc) 1190 return rc; 1191 1192 rc = down_write_killable(&dax_region_rwsem); 1193 if (rc) 1194 return rc; 1195 if (!dax_region->dev->driver) { 1196 up_write(&dax_region_rwsem); 1197 return rc; 1198 } 1199 rc = down_write_killable(&dax_dev_rwsem); 1200 if (rc) { 1201 up_write(&dax_region_rwsem); 1202 return rc; 1203 } 1204 1205 to_alloc = range_len(&r); 1206 if (alloc_is_aligned(dev_dax, to_alloc)) 1207 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc); 1208 up_write(&dax_dev_rwsem); 1209 up_write(&dax_region_rwsem); 1210 1211 return rc == 0 ? len : rc; 1212 } 1213 static DEVICE_ATTR_WO(mapping); 1214 1215 static ssize_t align_show(struct device *dev, 1216 struct device_attribute *attr, char *buf) 1217 { 1218 struct dev_dax *dev_dax = to_dev_dax(dev); 1219 1220 return sysfs_emit(buf, "%d\n", dev_dax->align); 1221 } 1222 1223 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) 1224 { 1225 struct device *dev = &dev_dax->dev; 1226 int i; 1227 1228 for (i = 0; i < dev_dax->nr_range; i++) { 1229 size_t len = range_len(&dev_dax->ranges[i].range); 1230 1231 if (!alloc_is_aligned(dev_dax, len)) { 1232 dev_dbg(dev, "%s: align %u invalid for range %d\n", 1233 __func__, dev_dax->align, i); 1234 return -EINVAL; 1235 } 1236 } 1237 1238 return 0; 1239 } 1240 1241 static ssize_t align_store(struct device *dev, struct device_attribute *attr, 1242 const char *buf, size_t len) 1243 { 1244 struct dev_dax *dev_dax = to_dev_dax(dev); 1245 struct dax_region *dax_region = dev_dax->region; 1246 unsigned long val, align_save; 1247 ssize_t rc; 1248 1249 rc = kstrtoul(buf, 0, &val); 1250 if (rc) 1251 return -ENXIO; 1252 1253 if (!dax_align_valid(val)) 1254 return -EINVAL; 1255 1256 rc = down_write_killable(&dax_region_rwsem); 1257 if (rc) 1258 return rc; 1259 if (!dax_region->dev->driver) { 1260 up_write(&dax_region_rwsem); 1261 return -ENXIO; 1262 } 1263 1264 rc = down_write_killable(&dax_dev_rwsem); 1265 if (rc) { 1266 up_write(&dax_region_rwsem); 1267 return rc; 1268 } 1269 if (dev->driver) { 1270 rc = -EBUSY; 1271 goto out_unlock; 1272 } 1273 1274 align_save = dev_dax->align; 1275 dev_dax->align = val; 1276 rc = dev_dax_validate_align(dev_dax); 1277 if (rc) 1278 dev_dax->align = align_save; 1279 out_unlock: 1280 up_write(&dax_dev_rwsem); 1281 up_write(&dax_region_rwsem); 1282 return rc == 0 ? len : rc; 1283 } 1284 static DEVICE_ATTR_RW(align); 1285 1286 static int dev_dax_target_node(struct dev_dax *dev_dax) 1287 { 1288 struct dax_region *dax_region = dev_dax->region; 1289 1290 return dax_region->target_node; 1291 } 1292 1293 static ssize_t target_node_show(struct device *dev, 1294 struct device_attribute *attr, char *buf) 1295 { 1296 struct dev_dax *dev_dax = to_dev_dax(dev); 1297 1298 return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax)); 1299 } 1300 static DEVICE_ATTR_RO(target_node); 1301 1302 static ssize_t resource_show(struct device *dev, 1303 struct device_attribute *attr, char *buf) 1304 { 1305 struct dev_dax *dev_dax = to_dev_dax(dev); 1306 struct dax_region *dax_region = dev_dax->region; 1307 unsigned long long start; 1308 1309 if (dev_dax->nr_range < 1) 1310 start = dax_region->res.start; 1311 else 1312 start = dev_dax->ranges[0].range.start; 1313 1314 return sysfs_emit(buf, "%#llx\n", start); 1315 } 1316 static DEVICE_ATTR(resource, 0400, resource_show, NULL); 1317 1318 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 1319 char *buf) 1320 { 1321 /* 1322 * We only ever expect to handle device-dax instances, i.e. the 1323 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 1324 */ 1325 return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); 1326 } 1327 static DEVICE_ATTR_RO(modalias); 1328 1329 static ssize_t numa_node_show(struct device *dev, 1330 struct device_attribute *attr, char *buf) 1331 { 1332 return sysfs_emit(buf, "%d\n", dev_to_node(dev)); 1333 } 1334 static DEVICE_ATTR_RO(numa_node); 1335 1336 static ssize_t memmap_on_memory_show(struct device *dev, 1337 struct device_attribute *attr, char *buf) 1338 { 1339 struct dev_dax *dev_dax = to_dev_dax(dev); 1340 1341 return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory); 1342 } 1343 1344 static ssize_t memmap_on_memory_store(struct device *dev, 1345 struct device_attribute *attr, 1346 const char *buf, size_t len) 1347 { 1348 struct dev_dax *dev_dax = to_dev_dax(dev); 1349 bool val; 1350 int rc; 1351 1352 rc = kstrtobool(buf, &val); 1353 if (rc) 1354 return rc; 1355 1356 if (val == true && !mhp_supports_memmap_on_memory()) { 1357 dev_dbg(dev, "memmap_on_memory is not available\n"); 1358 return -EOPNOTSUPP; 1359 } 1360 1361 rc = down_write_killable(&dax_dev_rwsem); 1362 if (rc) 1363 return rc; 1364 1365 if (dev_dax->memmap_on_memory != val && dev->driver && 1366 to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) { 1367 up_write(&dax_dev_rwsem); 1368 return -EBUSY; 1369 } 1370 1371 dev_dax->memmap_on_memory = val; 1372 up_write(&dax_dev_rwsem); 1373 1374 return len; 1375 } 1376 static DEVICE_ATTR_RW(memmap_on_memory); 1377 1378 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) 1379 { 1380 struct device *dev = container_of(kobj, struct device, kobj); 1381 struct dev_dax *dev_dax = to_dev_dax(dev); 1382 struct dax_region *dax_region = dev_dax->region; 1383 1384 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) 1385 return 0; 1386 if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) 1387 return 0; 1388 if (a == &dev_attr_mapping.attr && is_static(dax_region)) 1389 return 0; 1390 if ((a == &dev_attr_align.attr || 1391 a == &dev_attr_size.attr) && is_static(dax_region)) 1392 return 0444; 1393 return a->mode; 1394 } 1395 1396 static struct attribute *dev_dax_attributes[] = { 1397 &dev_attr_modalias.attr, 1398 &dev_attr_size.attr, 1399 &dev_attr_mapping.attr, 1400 &dev_attr_target_node.attr, 1401 &dev_attr_align.attr, 1402 &dev_attr_resource.attr, 1403 &dev_attr_numa_node.attr, 1404 &dev_attr_memmap_on_memory.attr, 1405 NULL, 1406 }; 1407 1408 static const struct attribute_group dev_dax_attribute_group = { 1409 .attrs = dev_dax_attributes, 1410 .is_visible = dev_dax_visible, 1411 }; 1412 1413 static const struct attribute_group *dax_attribute_groups[] = { 1414 &dev_dax_attribute_group, 1415 NULL, 1416 }; 1417 1418 static void dev_dax_release(struct device *dev) 1419 { 1420 struct dev_dax *dev_dax = to_dev_dax(dev); 1421 struct dax_device *dax_dev = dev_dax->dax_dev; 1422 1423 put_dax(dax_dev); 1424 free_dev_dax_id(dev_dax); 1425 kfree(dev_dax->pgmap); 1426 kfree(dev_dax); 1427 } 1428 1429 static const struct device_type dev_dax_type = { 1430 .release = dev_dax_release, 1431 .groups = dax_attribute_groups, 1432 }; 1433 1434 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data) 1435 { 1436 struct dax_region *dax_region = data->dax_region; 1437 struct device *parent = dax_region->dev; 1438 struct dax_device *dax_dev; 1439 struct dev_dax *dev_dax; 1440 struct inode *inode; 1441 struct device *dev; 1442 int rc; 1443 1444 dev_dax = kzalloc_obj(*dev_dax); 1445 if (!dev_dax) 1446 return ERR_PTR(-ENOMEM); 1447 1448 dev_dax->region = dax_region; 1449 if (is_static(dax_region)) { 1450 if (dev_WARN_ONCE(parent, data->id < 0, 1451 "dynamic id specified to static region\n")) { 1452 rc = -EINVAL; 1453 goto err_id; 1454 } 1455 1456 dev_dax->id = data->id; 1457 } else { 1458 if (dev_WARN_ONCE(parent, data->id >= 0, 1459 "static id specified to dynamic region\n")) { 1460 rc = -EINVAL; 1461 goto err_id; 1462 } 1463 1464 rc = alloc_dev_dax_id(dev_dax); 1465 if (rc < 0) 1466 goto err_id; 1467 } 1468 1469 dev = &dev_dax->dev; 1470 device_initialize(dev); 1471 dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); 1472 1473 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); 1474 if (rc) 1475 goto err_range; 1476 1477 if (data->pgmap) { 1478 dev_WARN_ONCE(parent, !is_static(dax_region), 1479 "custom dev_pagemap requires a static dax_region\n"); 1480 1481 dev_dax->pgmap = kmemdup(data->pgmap, 1482 sizeof(struct dev_pagemap), GFP_KERNEL); 1483 if (!dev_dax->pgmap) { 1484 rc = -ENOMEM; 1485 goto err_pgmap; 1486 } 1487 } 1488 1489 /* 1490 * No dax_operations since there is no access to this device outside of 1491 * mmap of the resulting character device. 1492 */ 1493 dax_dev = alloc_dax(dev_dax, NULL); 1494 if (IS_ERR(dax_dev)) { 1495 rc = PTR_ERR(dax_dev); 1496 goto err_alloc_dax; 1497 } 1498 set_dax_synchronous(dax_dev); 1499 set_dax_nocache(dax_dev); 1500 set_dax_nomc(dax_dev); 1501 1502 /* a device_dax instance is dead while the driver is not attached */ 1503 kill_dax(dax_dev); 1504 1505 dev_dax->dax_dev = dax_dev; 1506 dev_dax->target_node = dax_region->target_node; 1507 dev_dax->align = dax_region->align; 1508 ida_init(&dev_dax->ida); 1509 1510 dev_dax->memmap_on_memory = data->memmap_on_memory; 1511 1512 inode = dax_inode(dax_dev); 1513 dev->devt = inode->i_rdev; 1514 dev->bus = &dax_bus_type; 1515 dev->parent = parent; 1516 dev->type = &dev_dax_type; 1517 1518 rc = device_add(dev); 1519 if (rc) { 1520 kill_dev_dax(dev_dax); 1521 put_device(dev); 1522 return ERR_PTR(rc); 1523 } 1524 1525 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); 1526 if (rc) 1527 return ERR_PTR(rc); 1528 1529 /* register mapping device for the initial allocation range */ 1530 if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) { 1531 rc = devm_register_dax_mapping(dev_dax, 0); 1532 if (rc) 1533 return ERR_PTR(rc); 1534 } 1535 1536 return dev_dax; 1537 1538 err_alloc_dax: 1539 kfree(dev_dax->pgmap); 1540 err_pgmap: 1541 free_dev_dax_ranges(dev_dax); 1542 err_range: 1543 free_dev_dax_id(dev_dax); 1544 err_id: 1545 kfree(dev_dax); 1546 1547 return ERR_PTR(rc); 1548 } 1549 1550 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) 1551 { 1552 struct dev_dax *dev_dax; 1553 1554 down_write(&dax_region_rwsem); 1555 dev_dax = __devm_create_dev_dax(data); 1556 up_write(&dax_region_rwsem); 1557 1558 return dev_dax; 1559 } 1560 EXPORT_SYMBOL_GPL(devm_create_dev_dax); 1561 1562 int __dax_driver_register(struct dax_device_driver *dax_drv, 1563 struct module *module, const char *mod_name) 1564 { 1565 struct device_driver *drv = &dax_drv->drv; 1566 1567 /* 1568 * dax_bus_probe() calls dax_drv->probe() unconditionally. 1569 * So better be safe than sorry and ensure it is provided. 1570 */ 1571 if (!dax_drv->probe) 1572 return -EINVAL; 1573 1574 INIT_LIST_HEAD(&dax_drv->ids); 1575 drv->owner = module; 1576 drv->name = mod_name; 1577 drv->mod_name = mod_name; 1578 drv->bus = &dax_bus_type; 1579 1580 return driver_register(drv); 1581 } 1582 EXPORT_SYMBOL_GPL(__dax_driver_register); 1583 1584 void dax_driver_unregister(struct dax_device_driver *dax_drv) 1585 { 1586 struct device_driver *drv = &dax_drv->drv; 1587 struct dax_id *dax_id, *_id; 1588 1589 mutex_lock(&dax_bus_lock); 1590 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { 1591 list_del(&dax_id->list); 1592 kfree(dax_id); 1593 } 1594 mutex_unlock(&dax_bus_lock); 1595 driver_unregister(drv); 1596 } 1597 EXPORT_SYMBOL_GPL(dax_driver_unregister); 1598 1599 int __init dax_bus_init(void) 1600 { 1601 return bus_register(&dax_bus_type); 1602 } 1603 1604 void __exit dax_bus_exit(void) 1605 { 1606 bus_unregister(&dax_bus_type); 1607 } 1608