1 // SPDX-License-Identifier: GPL-2.0 2 /* Copyright(c) 2017-2018 Intel Corporation. All rights reserved. */ 3 #include <linux/memremap.h> 4 #include <linux/device.h> 5 #include <linux/mutex.h> 6 #include <linux/list.h> 7 #include <linux/slab.h> 8 #include <linux/dax.h> 9 #include <linux/io.h> 10 #include "dax-private.h" 11 #include "bus.h" 12 13 static DEFINE_MUTEX(dax_bus_lock); 14 15 /* 16 * All changes to the dax region configuration occur with this lock held 17 * for write. 18 */ 19 DECLARE_RWSEM(dax_region_rwsem); 20 21 /* 22 * All changes to the dax device configuration occur with this lock held 23 * for write. 24 */ 25 DECLARE_RWSEM(dax_dev_rwsem); 26 27 #define DAX_NAME_LEN 30 28 struct dax_id { 29 struct list_head list; 30 char dev_name[DAX_NAME_LEN]; 31 }; 32 33 static int dax_bus_uevent(const struct device *dev, struct kobj_uevent_env *env) 34 { 35 /* 36 * We only ever expect to handle device-dax instances, i.e. the 37 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 38 */ 39 return add_uevent_var(env, "MODALIAS=" DAX_DEVICE_MODALIAS_FMT, 0); 40 } 41 42 #define to_dax_drv(__drv) container_of_const(__drv, struct dax_device_driver, drv) 43 44 static struct dax_id *__dax_match_id(const struct dax_device_driver *dax_drv, 45 const char *dev_name) 46 { 47 struct dax_id *dax_id; 48 49 lockdep_assert_held(&dax_bus_lock); 50 51 list_for_each_entry(dax_id, &dax_drv->ids, list) 52 if (sysfs_streq(dax_id->dev_name, dev_name)) 53 return dax_id; 54 return NULL; 55 } 56 57 static int dax_match_id(const struct dax_device_driver *dax_drv, struct device *dev) 58 { 59 int match; 60 61 mutex_lock(&dax_bus_lock); 62 match = !!__dax_match_id(dax_drv, dev_name(dev)); 63 mutex_unlock(&dax_bus_lock); 64 65 return match; 66 } 67 68 static int dax_match_type(const struct dax_device_driver *dax_drv, struct device *dev) 69 { 70 enum dax_driver_type type = DAXDRV_DEVICE_TYPE; 71 struct dev_dax *dev_dax = to_dev_dax(dev); 72 73 if (dev_dax->region->res.flags & IORESOURCE_DAX_KMEM) 74 type = DAXDRV_KMEM_TYPE; 75 76 if (dax_drv->type == type) 77 return 1; 78 79 /* default to device mode if dax_kmem is disabled */ 80 if (dax_drv->type == DAXDRV_DEVICE_TYPE && 81 !IS_ENABLED(CONFIG_DEV_DAX_KMEM)) 82 return 1; 83 84 return 0; 85 } 86 87 enum id_action { 88 ID_REMOVE, 89 ID_ADD, 90 }; 91 92 static ssize_t do_id_store(struct device_driver *drv, const char *buf, 93 size_t count, enum id_action action) 94 { 95 struct dax_device_driver *dax_drv = to_dax_drv(drv); 96 unsigned int region_id, id; 97 char devname[DAX_NAME_LEN]; 98 struct dax_id *dax_id; 99 ssize_t rc = count; 100 int fields; 101 102 fields = sscanf(buf, "dax%d.%d", ®ion_id, &id); 103 if (fields != 2) 104 return -EINVAL; 105 sprintf(devname, "dax%d.%d", region_id, id); 106 if (!sysfs_streq(buf, devname)) 107 return -EINVAL; 108 109 mutex_lock(&dax_bus_lock); 110 dax_id = __dax_match_id(dax_drv, buf); 111 if (!dax_id) { 112 if (action == ID_ADD) { 113 dax_id = kzalloc(sizeof(*dax_id), GFP_KERNEL); 114 if (dax_id) { 115 strscpy(dax_id->dev_name, buf, DAX_NAME_LEN); 116 list_add(&dax_id->list, &dax_drv->ids); 117 } else 118 rc = -ENOMEM; 119 } 120 } else if (action == ID_REMOVE) { 121 list_del(&dax_id->list); 122 kfree(dax_id); 123 } 124 mutex_unlock(&dax_bus_lock); 125 126 if (rc < 0) 127 return rc; 128 if (action == ID_ADD) 129 rc = driver_attach(drv); 130 if (rc) 131 return rc; 132 return count; 133 } 134 135 static ssize_t new_id_store(struct device_driver *drv, const char *buf, 136 size_t count) 137 { 138 return do_id_store(drv, buf, count, ID_ADD); 139 } 140 static DRIVER_ATTR_WO(new_id); 141 142 static ssize_t remove_id_store(struct device_driver *drv, const char *buf, 143 size_t count) 144 { 145 return do_id_store(drv, buf, count, ID_REMOVE); 146 } 147 static DRIVER_ATTR_WO(remove_id); 148 149 static struct attribute *dax_drv_attrs[] = { 150 &driver_attr_new_id.attr, 151 &driver_attr_remove_id.attr, 152 NULL, 153 }; 154 ATTRIBUTE_GROUPS(dax_drv); 155 156 static int dax_bus_match(struct device *dev, const struct device_driver *drv); 157 158 /* 159 * Static dax regions are regions created by an external subsystem 160 * nvdimm where a single range is assigned. Its boundaries are by the external 161 * subsystem and are usually limited to one physical memory range. For example, 162 * for PMEM it is usually defined by NVDIMM Namespace boundaries (i.e. a 163 * single contiguous range) 164 * 165 * On dynamic dax regions, the assigned region can be partitioned by dax core 166 * into multiple subdivisions. A subdivision is represented into one 167 * /dev/daxN.M device composed by one or more potentially discontiguous ranges. 168 * 169 * When allocating a dax region, drivers must set whether it's static 170 * (IORESOURCE_DAX_STATIC). On static dax devices, the @pgmap is pre-assigned 171 * to dax core when calling devm_create_dev_dax(), whereas in dynamic dax 172 * devices it is NULL but afterwards allocated by dax core on device ->probe(). 173 * Care is needed to make sure that dynamic dax devices are torn down with a 174 * cleared @pgmap field (see kill_dev_dax()). 175 */ 176 static bool is_static(struct dax_region *dax_region) 177 { 178 return (dax_region->res.flags & IORESOURCE_DAX_STATIC) != 0; 179 } 180 181 bool static_dev_dax(struct dev_dax *dev_dax) 182 { 183 return is_static(dev_dax->region); 184 } 185 EXPORT_SYMBOL_GPL(static_dev_dax); 186 187 static u64 dev_dax_size(struct dev_dax *dev_dax) 188 { 189 u64 size = 0; 190 int i; 191 192 lockdep_assert_held(&dax_dev_rwsem); 193 194 for (i = 0; i < dev_dax->nr_range; i++) 195 size += range_len(&dev_dax->ranges[i].range); 196 197 return size; 198 } 199 200 static int dax_bus_probe(struct device *dev) 201 { 202 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 203 struct dev_dax *dev_dax = to_dev_dax(dev); 204 struct dax_region *dax_region = dev_dax->region; 205 int rc; 206 u64 size; 207 208 rc = down_read_interruptible(&dax_dev_rwsem); 209 if (rc) 210 return rc; 211 size = dev_dax_size(dev_dax); 212 up_read(&dax_dev_rwsem); 213 214 if (size == 0 || dev_dax->id < 0) 215 return -ENXIO; 216 217 rc = dax_drv->probe(dev_dax); 218 219 if (rc || is_static(dax_region)) 220 return rc; 221 222 /* 223 * Track new seed creation only after successful probe of the 224 * previous seed. 225 */ 226 if (dax_region->seed == dev) 227 dax_region->seed = NULL; 228 229 return 0; 230 } 231 232 static void dax_bus_remove(struct device *dev) 233 { 234 struct dax_device_driver *dax_drv = to_dax_drv(dev->driver); 235 struct dev_dax *dev_dax = to_dev_dax(dev); 236 237 if (dax_drv->remove) 238 dax_drv->remove(dev_dax); 239 } 240 241 static const struct bus_type dax_bus_type = { 242 .name = "dax", 243 .uevent = dax_bus_uevent, 244 .match = dax_bus_match, 245 .probe = dax_bus_probe, 246 .remove = dax_bus_remove, 247 .drv_groups = dax_drv_groups, 248 }; 249 250 static int dax_bus_match(struct device *dev, const struct device_driver *drv) 251 { 252 const struct dax_device_driver *dax_drv = to_dax_drv(drv); 253 254 if (dax_match_id(dax_drv, dev)) 255 return 1; 256 return dax_match_type(dax_drv, dev); 257 } 258 259 /* 260 * Rely on the fact that drvdata is set before the attributes are 261 * registered, and that the attributes are unregistered before drvdata 262 * is cleared to assume that drvdata is always valid. 263 */ 264 static ssize_t id_show(struct device *dev, 265 struct device_attribute *attr, char *buf) 266 { 267 struct dax_region *dax_region = dev_get_drvdata(dev); 268 269 return sysfs_emit(buf, "%d\n", dax_region->id); 270 } 271 static DEVICE_ATTR_RO(id); 272 273 static ssize_t region_size_show(struct device *dev, 274 struct device_attribute *attr, char *buf) 275 { 276 struct dax_region *dax_region = dev_get_drvdata(dev); 277 278 return sysfs_emit(buf, "%llu\n", 279 (unsigned long long)resource_size(&dax_region->res)); 280 } 281 static struct device_attribute dev_attr_region_size = __ATTR(size, 0444, 282 region_size_show, NULL); 283 284 static ssize_t region_align_show(struct device *dev, 285 struct device_attribute *attr, char *buf) 286 { 287 struct dax_region *dax_region = dev_get_drvdata(dev); 288 289 return sysfs_emit(buf, "%u\n", dax_region->align); 290 } 291 static struct device_attribute dev_attr_region_align = 292 __ATTR(align, 0400, region_align_show, NULL); 293 294 #define for_each_dax_region_resource(dax_region, res) \ 295 for (res = (dax_region)->res.child; res; res = res->sibling) 296 297 static unsigned long long dax_region_avail_size(struct dax_region *dax_region) 298 { 299 resource_size_t size = resource_size(&dax_region->res); 300 struct resource *res; 301 302 lockdep_assert_held(&dax_region_rwsem); 303 304 for_each_dax_region_resource(dax_region, res) 305 size -= resource_size(res); 306 return size; 307 } 308 309 static ssize_t available_size_show(struct device *dev, 310 struct device_attribute *attr, char *buf) 311 { 312 struct dax_region *dax_region = dev_get_drvdata(dev); 313 unsigned long long size; 314 int rc; 315 316 rc = down_read_interruptible(&dax_region_rwsem); 317 if (rc) 318 return rc; 319 size = dax_region_avail_size(dax_region); 320 up_read(&dax_region_rwsem); 321 322 return sysfs_emit(buf, "%llu\n", size); 323 } 324 static DEVICE_ATTR_RO(available_size); 325 326 static ssize_t seed_show(struct device *dev, 327 struct device_attribute *attr, char *buf) 328 { 329 struct dax_region *dax_region = dev_get_drvdata(dev); 330 struct device *seed; 331 ssize_t rc; 332 333 if (is_static(dax_region)) 334 return -EINVAL; 335 336 rc = down_read_interruptible(&dax_region_rwsem); 337 if (rc) 338 return rc; 339 seed = dax_region->seed; 340 rc = sysfs_emit(buf, "%s\n", seed ? dev_name(seed) : ""); 341 up_read(&dax_region_rwsem); 342 343 return rc; 344 } 345 static DEVICE_ATTR_RO(seed); 346 347 static ssize_t create_show(struct device *dev, 348 struct device_attribute *attr, char *buf) 349 { 350 struct dax_region *dax_region = dev_get_drvdata(dev); 351 struct device *youngest; 352 ssize_t rc; 353 354 if (is_static(dax_region)) 355 return -EINVAL; 356 357 rc = down_read_interruptible(&dax_region_rwsem); 358 if (rc) 359 return rc; 360 youngest = dax_region->youngest; 361 rc = sysfs_emit(buf, "%s\n", youngest ? dev_name(youngest) : ""); 362 up_read(&dax_region_rwsem); 363 364 return rc; 365 } 366 367 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data); 368 369 static ssize_t create_store(struct device *dev, struct device_attribute *attr, 370 const char *buf, size_t len) 371 { 372 struct dax_region *dax_region = dev_get_drvdata(dev); 373 unsigned long long avail; 374 ssize_t rc; 375 int val; 376 377 if (is_static(dax_region)) 378 return -EINVAL; 379 380 rc = kstrtoint(buf, 0, &val); 381 if (rc) 382 return rc; 383 if (val != 1) 384 return -EINVAL; 385 386 rc = down_write_killable(&dax_region_rwsem); 387 if (rc) 388 return rc; 389 avail = dax_region_avail_size(dax_region); 390 if (avail == 0) 391 rc = -ENOSPC; 392 else { 393 struct dev_dax_data data = { 394 .dax_region = dax_region, 395 .size = 0, 396 .id = -1, 397 .memmap_on_memory = false, 398 }; 399 struct dev_dax *dev_dax = __devm_create_dev_dax(&data); 400 401 if (IS_ERR(dev_dax)) 402 rc = PTR_ERR(dev_dax); 403 else { 404 /* 405 * In support of crafting multiple new devices 406 * simultaneously multiple seeds can be created, 407 * but only the first one that has not been 408 * successfully bound is tracked as the region 409 * seed. 410 */ 411 if (!dax_region->seed) 412 dax_region->seed = &dev_dax->dev; 413 dax_region->youngest = &dev_dax->dev; 414 rc = len; 415 } 416 } 417 up_write(&dax_region_rwsem); 418 419 return rc; 420 } 421 static DEVICE_ATTR_RW(create); 422 423 void kill_dev_dax(struct dev_dax *dev_dax) 424 { 425 struct dax_device *dax_dev = dev_dax->dax_dev; 426 struct inode *inode = dax_inode(dax_dev); 427 428 kill_dax(dax_dev); 429 unmap_mapping_range(inode->i_mapping, 0, 0, 1); 430 431 /* 432 * Dynamic dax region have the pgmap allocated via dev_kzalloc() 433 * and thus freed by devm. Clear the pgmap to not have stale pgmap 434 * ranges on probe() from previous reconfigurations of region devices. 435 */ 436 if (!static_dev_dax(dev_dax)) 437 dev_dax->pgmap = NULL; 438 } 439 EXPORT_SYMBOL_GPL(kill_dev_dax); 440 441 static void trim_dev_dax_range(struct dev_dax *dev_dax) 442 { 443 int i = dev_dax->nr_range - 1; 444 struct range *range = &dev_dax->ranges[i].range; 445 struct dax_region *dax_region = dev_dax->region; 446 447 lockdep_assert_held_write(&dax_region_rwsem); 448 dev_dbg(&dev_dax->dev, "delete range[%d]: %#llx:%#llx\n", i, 449 (unsigned long long)range->start, 450 (unsigned long long)range->end); 451 452 __release_region(&dax_region->res, range->start, range_len(range)); 453 if (--dev_dax->nr_range == 0) { 454 kfree(dev_dax->ranges); 455 dev_dax->ranges = NULL; 456 } 457 } 458 459 static void free_dev_dax_ranges(struct dev_dax *dev_dax) 460 { 461 while (dev_dax->nr_range) 462 trim_dev_dax_range(dev_dax); 463 } 464 465 static void unregister_dev_dax(void *dev) 466 { 467 struct dev_dax *dev_dax = to_dev_dax(dev); 468 469 dev_dbg(dev, "%s\n", __func__); 470 471 down_write(&dax_region_rwsem); 472 kill_dev_dax(dev_dax); 473 device_del(dev); 474 free_dev_dax_ranges(dev_dax); 475 put_device(dev); 476 up_write(&dax_region_rwsem); 477 } 478 479 static void dax_region_free(struct kref *kref) 480 { 481 struct dax_region *dax_region; 482 483 dax_region = container_of(kref, struct dax_region, kref); 484 kfree(dax_region); 485 } 486 487 static void dax_region_put(struct dax_region *dax_region) 488 { 489 kref_put(&dax_region->kref, dax_region_free); 490 } 491 492 /* a return value >= 0 indicates this invocation invalidated the id */ 493 static int __free_dev_dax_id(struct dev_dax *dev_dax) 494 { 495 struct dax_region *dax_region; 496 int rc = dev_dax->id; 497 498 lockdep_assert_held_write(&dax_dev_rwsem); 499 500 if (!dev_dax->dyn_id || dev_dax->id < 0) 501 return -1; 502 dax_region = dev_dax->region; 503 ida_free(&dax_region->ida, dev_dax->id); 504 dax_region_put(dax_region); 505 dev_dax->id = -1; 506 return rc; 507 } 508 509 static int free_dev_dax_id(struct dev_dax *dev_dax) 510 { 511 int rc; 512 513 rc = down_write_killable(&dax_dev_rwsem); 514 if (rc) 515 return rc; 516 rc = __free_dev_dax_id(dev_dax); 517 up_write(&dax_dev_rwsem); 518 return rc; 519 } 520 521 static int alloc_dev_dax_id(struct dev_dax *dev_dax) 522 { 523 struct dax_region *dax_region = dev_dax->region; 524 int id; 525 526 id = ida_alloc(&dax_region->ida, GFP_KERNEL); 527 if (id < 0) 528 return id; 529 kref_get(&dax_region->kref); 530 dev_dax->dyn_id = true; 531 dev_dax->id = id; 532 return id; 533 } 534 535 static ssize_t delete_store(struct device *dev, struct device_attribute *attr, 536 const char *buf, size_t len) 537 { 538 struct dax_region *dax_region = dev_get_drvdata(dev); 539 struct dev_dax *dev_dax; 540 struct device *victim; 541 bool do_del = false; 542 int rc; 543 544 if (is_static(dax_region)) 545 return -EINVAL; 546 547 victim = device_find_child_by_name(dax_region->dev, buf); 548 if (!victim) 549 return -ENXIO; 550 551 device_lock(dev); 552 device_lock(victim); 553 dev_dax = to_dev_dax(victim); 554 down_write(&dax_dev_rwsem); 555 if (victim->driver || dev_dax_size(dev_dax)) 556 rc = -EBUSY; 557 else { 558 /* 559 * Invalidate the device so it does not become active 560 * again, but always preserve device-id-0 so that 561 * /sys/bus/dax/ is guaranteed to be populated while any 562 * dax_region is registered. 563 */ 564 if (dev_dax->id > 0) { 565 do_del = __free_dev_dax_id(dev_dax) >= 0; 566 rc = len; 567 if (dax_region->seed == victim) 568 dax_region->seed = NULL; 569 if (dax_region->youngest == victim) 570 dax_region->youngest = NULL; 571 } else 572 rc = -EBUSY; 573 } 574 up_write(&dax_dev_rwsem); 575 device_unlock(victim); 576 577 /* won the race to invalidate the device, clean it up */ 578 if (do_del) 579 devm_release_action(dev, unregister_dev_dax, victim); 580 device_unlock(dev); 581 put_device(victim); 582 583 return rc; 584 } 585 static DEVICE_ATTR_WO(delete); 586 587 static umode_t dax_region_visible(struct kobject *kobj, struct attribute *a, 588 int n) 589 { 590 struct device *dev = container_of(kobj, struct device, kobj); 591 struct dax_region *dax_region = dev_get_drvdata(dev); 592 593 if (is_static(dax_region)) 594 if (a == &dev_attr_available_size.attr 595 || a == &dev_attr_create.attr 596 || a == &dev_attr_seed.attr 597 || a == &dev_attr_delete.attr) 598 return 0; 599 return a->mode; 600 } 601 602 static struct attribute *dax_region_attributes[] = { 603 &dev_attr_available_size.attr, 604 &dev_attr_region_size.attr, 605 &dev_attr_region_align.attr, 606 &dev_attr_create.attr, 607 &dev_attr_seed.attr, 608 &dev_attr_delete.attr, 609 &dev_attr_id.attr, 610 NULL, 611 }; 612 613 static const struct attribute_group dax_region_attribute_group = { 614 .name = "dax_region", 615 .attrs = dax_region_attributes, 616 .is_visible = dax_region_visible, 617 }; 618 619 static const struct attribute_group *dax_region_attribute_groups[] = { 620 &dax_region_attribute_group, 621 NULL, 622 }; 623 624 static void dax_region_unregister(void *region) 625 { 626 struct dax_region *dax_region = region; 627 628 sysfs_remove_groups(&dax_region->dev->kobj, 629 dax_region_attribute_groups); 630 dax_region_put(dax_region); 631 } 632 633 struct dax_region *alloc_dax_region(struct device *parent, int region_id, 634 struct range *range, int target_node, unsigned int align, 635 unsigned long flags) 636 { 637 struct dax_region *dax_region; 638 639 /* 640 * The DAX core assumes that it can store its private data in 641 * parent->driver_data. This WARN is a reminder / safeguard for 642 * developers of device-dax drivers. 643 */ 644 if (dev_get_drvdata(parent)) { 645 dev_WARN(parent, "dax core failed to setup private data\n"); 646 return NULL; 647 } 648 649 if (!IS_ALIGNED(range->start, align) 650 || !IS_ALIGNED(range_len(range), align)) 651 return NULL; 652 653 dax_region = kzalloc(sizeof(*dax_region), GFP_KERNEL); 654 if (!dax_region) 655 return NULL; 656 657 dev_set_drvdata(parent, dax_region); 658 kref_init(&dax_region->kref); 659 dax_region->id = region_id; 660 dax_region->align = align; 661 dax_region->dev = parent; 662 dax_region->target_node = target_node; 663 ida_init(&dax_region->ida); 664 dax_region->res = (struct resource) { 665 .start = range->start, 666 .end = range->end, 667 .flags = IORESOURCE_MEM | flags, 668 }; 669 670 if (sysfs_create_groups(&parent->kobj, dax_region_attribute_groups)) { 671 kfree(dax_region); 672 return NULL; 673 } 674 675 if (devm_add_action_or_reset(parent, dax_region_unregister, dax_region)) 676 return NULL; 677 return dax_region; 678 } 679 EXPORT_SYMBOL_GPL(alloc_dax_region); 680 681 static void dax_mapping_release(struct device *dev) 682 { 683 struct dax_mapping *mapping = to_dax_mapping(dev); 684 struct device *parent = dev->parent; 685 struct dev_dax *dev_dax = to_dev_dax(parent); 686 687 ida_free(&dev_dax->ida, mapping->id); 688 kfree(mapping); 689 put_device(parent); 690 } 691 692 static void unregister_dax_mapping(void *data) 693 { 694 struct device *dev = data; 695 struct dax_mapping *mapping = to_dax_mapping(dev); 696 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 697 698 dev_dbg(dev, "%s\n", __func__); 699 700 dev_dax->ranges[mapping->range_id].mapping = NULL; 701 mapping->range_id = -1; 702 703 device_unregister(dev); 704 } 705 706 static struct dev_dax_range *get_dax_range(struct device *dev) 707 { 708 struct dax_mapping *mapping = to_dax_mapping(dev); 709 struct dev_dax *dev_dax = to_dev_dax(dev->parent); 710 int rc; 711 712 rc = down_write_killable(&dax_region_rwsem); 713 if (rc) 714 return NULL; 715 if (mapping->range_id < 0) { 716 up_write(&dax_region_rwsem); 717 return NULL; 718 } 719 720 return &dev_dax->ranges[mapping->range_id]; 721 } 722 723 static void put_dax_range(void) 724 { 725 up_write(&dax_region_rwsem); 726 } 727 728 static ssize_t start_show(struct device *dev, 729 struct device_attribute *attr, char *buf) 730 { 731 struct dev_dax_range *dax_range; 732 ssize_t rc; 733 734 dax_range = get_dax_range(dev); 735 if (!dax_range) 736 return -ENXIO; 737 rc = sysfs_emit(buf, "%#llx\n", dax_range->range.start); 738 put_dax_range(); 739 740 return rc; 741 } 742 static DEVICE_ATTR(start, 0400, start_show, NULL); 743 744 static ssize_t end_show(struct device *dev, 745 struct device_attribute *attr, char *buf) 746 { 747 struct dev_dax_range *dax_range; 748 ssize_t rc; 749 750 dax_range = get_dax_range(dev); 751 if (!dax_range) 752 return -ENXIO; 753 rc = sysfs_emit(buf, "%#llx\n", dax_range->range.end); 754 put_dax_range(); 755 756 return rc; 757 } 758 static DEVICE_ATTR(end, 0400, end_show, NULL); 759 760 static ssize_t pgoff_show(struct device *dev, 761 struct device_attribute *attr, char *buf) 762 { 763 struct dev_dax_range *dax_range; 764 ssize_t rc; 765 766 dax_range = get_dax_range(dev); 767 if (!dax_range) 768 return -ENXIO; 769 rc = sysfs_emit(buf, "%#lx\n", dax_range->pgoff); 770 put_dax_range(); 771 772 return rc; 773 } 774 static DEVICE_ATTR(page_offset, 0400, pgoff_show, NULL); 775 776 static struct attribute *dax_mapping_attributes[] = { 777 &dev_attr_start.attr, 778 &dev_attr_end.attr, 779 &dev_attr_page_offset.attr, 780 NULL, 781 }; 782 783 static const struct attribute_group dax_mapping_attribute_group = { 784 .attrs = dax_mapping_attributes, 785 }; 786 787 static const struct attribute_group *dax_mapping_attribute_groups[] = { 788 &dax_mapping_attribute_group, 789 NULL, 790 }; 791 792 static const struct device_type dax_mapping_type = { 793 .release = dax_mapping_release, 794 .groups = dax_mapping_attribute_groups, 795 }; 796 797 static int devm_register_dax_mapping(struct dev_dax *dev_dax, int range_id) 798 { 799 struct dax_region *dax_region = dev_dax->region; 800 struct dax_mapping *mapping; 801 struct device *dev; 802 int rc; 803 804 lockdep_assert_held_write(&dax_region_rwsem); 805 806 if (dev_WARN_ONCE(&dev_dax->dev, !dax_region->dev->driver, 807 "region disabled\n")) 808 return -ENXIO; 809 810 mapping = kzalloc(sizeof(*mapping), GFP_KERNEL); 811 if (!mapping) 812 return -ENOMEM; 813 mapping->range_id = range_id; 814 mapping->id = ida_alloc(&dev_dax->ida, GFP_KERNEL); 815 if (mapping->id < 0) { 816 kfree(mapping); 817 return -ENOMEM; 818 } 819 dev_dax->ranges[range_id].mapping = mapping; 820 dev = &mapping->dev; 821 device_initialize(dev); 822 dev->parent = &dev_dax->dev; 823 get_device(dev->parent); 824 dev->type = &dax_mapping_type; 825 dev_set_name(dev, "mapping%d", mapping->id); 826 rc = device_add(dev); 827 if (rc) { 828 put_device(dev); 829 return rc; 830 } 831 832 rc = devm_add_action_or_reset(dax_region->dev, unregister_dax_mapping, 833 dev); 834 if (rc) 835 return rc; 836 return 0; 837 } 838 839 static int alloc_dev_dax_range(struct dev_dax *dev_dax, u64 start, 840 resource_size_t size) 841 { 842 struct dax_region *dax_region = dev_dax->region; 843 struct resource *res = &dax_region->res; 844 struct device *dev = &dev_dax->dev; 845 struct dev_dax_range *ranges; 846 unsigned long pgoff = 0; 847 struct resource *alloc; 848 int i, rc; 849 850 lockdep_assert_held_write(&dax_region_rwsem); 851 852 /* handle the seed alloc special case */ 853 if (!size) { 854 if (dev_WARN_ONCE(dev, dev_dax->nr_range, 855 "0-size allocation must be first\n")) 856 return -EBUSY; 857 /* nr_range == 0 is elsewhere special cased as 0-size device */ 858 return 0; 859 } 860 861 alloc = __request_region(res, start, size, dev_name(dev), 0); 862 if (!alloc) 863 return -ENOMEM; 864 865 ranges = krealloc(dev_dax->ranges, sizeof(*ranges) 866 * (dev_dax->nr_range + 1), GFP_KERNEL); 867 if (!ranges) { 868 __release_region(res, alloc->start, resource_size(alloc)); 869 return -ENOMEM; 870 } 871 872 for (i = 0; i < dev_dax->nr_range; i++) 873 pgoff += PHYS_PFN(range_len(&ranges[i].range)); 874 dev_dax->ranges = ranges; 875 ranges[dev_dax->nr_range++] = (struct dev_dax_range) { 876 .pgoff = pgoff, 877 .range = { 878 .start = alloc->start, 879 .end = alloc->end, 880 }, 881 }; 882 883 dev_dbg(dev, "alloc range[%d]: %pa:%pa\n", dev_dax->nr_range - 1, 884 &alloc->start, &alloc->end); 885 /* 886 * A dev_dax instance must be registered before mapping device 887 * children can be added. Defer to devm_create_dev_dax() to add 888 * the initial mapping device. 889 */ 890 if (!device_is_registered(&dev_dax->dev)) 891 return 0; 892 893 rc = devm_register_dax_mapping(dev_dax, dev_dax->nr_range - 1); 894 if (rc) 895 trim_dev_dax_range(dev_dax); 896 897 return rc; 898 } 899 900 static int adjust_dev_dax_range(struct dev_dax *dev_dax, struct resource *res, resource_size_t size) 901 { 902 int last_range = dev_dax->nr_range - 1; 903 struct dev_dax_range *dax_range = &dev_dax->ranges[last_range]; 904 bool is_shrink = resource_size(res) > size; 905 struct range *range = &dax_range->range; 906 struct device *dev = &dev_dax->dev; 907 int rc; 908 909 lockdep_assert_held_write(&dax_region_rwsem); 910 911 if (dev_WARN_ONCE(dev, !size, "deletion is handled by dev_dax_shrink\n")) 912 return -EINVAL; 913 914 rc = adjust_resource(res, range->start, size); 915 if (rc) 916 return rc; 917 918 *range = (struct range) { 919 .start = range->start, 920 .end = range->start + size - 1, 921 }; 922 923 dev_dbg(dev, "%s range[%d]: %#llx:%#llx\n", is_shrink ? "shrink" : "extend", 924 last_range, (unsigned long long) range->start, 925 (unsigned long long) range->end); 926 927 return 0; 928 } 929 930 static ssize_t size_show(struct device *dev, 931 struct device_attribute *attr, char *buf) 932 { 933 struct dev_dax *dev_dax = to_dev_dax(dev); 934 unsigned long long size; 935 int rc; 936 937 rc = down_read_interruptible(&dax_dev_rwsem); 938 if (rc) 939 return rc; 940 size = dev_dax_size(dev_dax); 941 up_read(&dax_dev_rwsem); 942 943 return sysfs_emit(buf, "%llu\n", size); 944 } 945 946 static bool alloc_is_aligned(struct dev_dax *dev_dax, resource_size_t size) 947 { 948 /* 949 * The minimum mapping granularity for a device instance is a 950 * single subsection, unless the arch says otherwise. 951 */ 952 return IS_ALIGNED(size, max_t(unsigned long, dev_dax->align, memremap_compat_align())); 953 } 954 955 static int dev_dax_shrink(struct dev_dax *dev_dax, resource_size_t size) 956 { 957 resource_size_t to_shrink = dev_dax_size(dev_dax) - size; 958 struct dax_region *dax_region = dev_dax->region; 959 struct device *dev = &dev_dax->dev; 960 int i; 961 962 for (i = dev_dax->nr_range - 1; i >= 0; i--) { 963 struct range *range = &dev_dax->ranges[i].range; 964 struct dax_mapping *mapping = dev_dax->ranges[i].mapping; 965 struct resource *adjust = NULL, *res; 966 resource_size_t shrink; 967 968 shrink = min_t(u64, to_shrink, range_len(range)); 969 if (shrink >= range_len(range)) { 970 devm_release_action(dax_region->dev, 971 unregister_dax_mapping, &mapping->dev); 972 trim_dev_dax_range(dev_dax); 973 to_shrink -= shrink; 974 if (!to_shrink) 975 break; 976 continue; 977 } 978 979 for_each_dax_region_resource(dax_region, res) 980 if (strcmp(res->name, dev_name(dev)) == 0 981 && res->start == range->start) { 982 adjust = res; 983 break; 984 } 985 986 if (dev_WARN_ONCE(dev, !adjust || i != dev_dax->nr_range - 1, 987 "failed to find matching resource\n")) 988 return -ENXIO; 989 return adjust_dev_dax_range(dev_dax, adjust, range_len(range) 990 - shrink); 991 } 992 return 0; 993 } 994 995 /* 996 * Only allow adjustments that preserve the relative pgoff of existing 997 * allocations. I.e. the dev_dax->ranges array is ordered by increasing pgoff. 998 */ 999 static bool adjust_ok(struct dev_dax *dev_dax, struct resource *res) 1000 { 1001 struct dev_dax_range *last; 1002 int i; 1003 1004 if (dev_dax->nr_range == 0) 1005 return false; 1006 if (strcmp(res->name, dev_name(&dev_dax->dev)) != 0) 1007 return false; 1008 last = &dev_dax->ranges[dev_dax->nr_range - 1]; 1009 if (last->range.start != res->start || last->range.end != res->end) 1010 return false; 1011 for (i = 0; i < dev_dax->nr_range - 1; i++) { 1012 struct dev_dax_range *dax_range = &dev_dax->ranges[i]; 1013 1014 if (dax_range->pgoff > last->pgoff) 1015 return false; 1016 } 1017 1018 return true; 1019 } 1020 1021 static ssize_t dev_dax_resize(struct dax_region *dax_region, 1022 struct dev_dax *dev_dax, resource_size_t size) 1023 { 1024 resource_size_t avail = dax_region_avail_size(dax_region), to_alloc; 1025 resource_size_t dev_size = dev_dax_size(dev_dax); 1026 struct resource *region_res = &dax_region->res; 1027 struct device *dev = &dev_dax->dev; 1028 struct resource *res, *first; 1029 resource_size_t alloc = 0; 1030 int rc; 1031 1032 if (dev->driver) 1033 return -EBUSY; 1034 if (size == dev_size) 1035 return 0; 1036 if (size > dev_size && size - dev_size > avail) 1037 return -ENOSPC; 1038 if (size < dev_size) 1039 return dev_dax_shrink(dev_dax, size); 1040 1041 to_alloc = size - dev_size; 1042 if (dev_WARN_ONCE(dev, !alloc_is_aligned(dev_dax, to_alloc), 1043 "resize of %pa misaligned\n", &to_alloc)) 1044 return -ENXIO; 1045 1046 /* 1047 * Expand the device into the unused portion of the region. This 1048 * may involve adjusting the end of an existing resource, or 1049 * allocating a new resource. 1050 */ 1051 retry: 1052 first = region_res->child; 1053 if (!first) 1054 return alloc_dev_dax_range(dev_dax, dax_region->res.start, to_alloc); 1055 1056 rc = -ENOSPC; 1057 for (res = first; res; res = res->sibling) { 1058 struct resource *next = res->sibling; 1059 1060 /* space at the beginning of the region */ 1061 if (res == first && res->start > dax_region->res.start) { 1062 alloc = min(res->start - dax_region->res.start, to_alloc); 1063 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, alloc); 1064 break; 1065 } 1066 1067 alloc = 0; 1068 /* space between allocations */ 1069 if (next && next->start > res->end + 1) 1070 alloc = min(next->start - (res->end + 1), to_alloc); 1071 1072 /* space at the end of the region */ 1073 if (!alloc && !next && res->end < region_res->end) 1074 alloc = min(region_res->end - res->end, to_alloc); 1075 1076 if (!alloc) 1077 continue; 1078 1079 if (adjust_ok(dev_dax, res)) { 1080 rc = adjust_dev_dax_range(dev_dax, res, resource_size(res) + alloc); 1081 break; 1082 } 1083 rc = alloc_dev_dax_range(dev_dax, res->end + 1, alloc); 1084 break; 1085 } 1086 if (rc) 1087 return rc; 1088 to_alloc -= alloc; 1089 if (to_alloc) 1090 goto retry; 1091 return 0; 1092 } 1093 1094 static ssize_t size_store(struct device *dev, struct device_attribute *attr, 1095 const char *buf, size_t len) 1096 { 1097 ssize_t rc; 1098 unsigned long long val; 1099 struct dev_dax *dev_dax = to_dev_dax(dev); 1100 struct dax_region *dax_region = dev_dax->region; 1101 1102 rc = kstrtoull(buf, 0, &val); 1103 if (rc) 1104 return rc; 1105 1106 if (!alloc_is_aligned(dev_dax, val)) { 1107 dev_dbg(dev, "%s: size: %lld misaligned\n", __func__, val); 1108 return -EINVAL; 1109 } 1110 1111 rc = down_write_killable(&dax_region_rwsem); 1112 if (rc) 1113 return rc; 1114 if (!dax_region->dev->driver) { 1115 rc = -ENXIO; 1116 goto err_region; 1117 } 1118 rc = down_write_killable(&dax_dev_rwsem); 1119 if (rc) 1120 goto err_dev; 1121 1122 rc = dev_dax_resize(dax_region, dev_dax, val); 1123 1124 err_dev: 1125 up_write(&dax_dev_rwsem); 1126 err_region: 1127 up_write(&dax_region_rwsem); 1128 1129 if (rc == 0) 1130 return len; 1131 return rc; 1132 } 1133 static DEVICE_ATTR_RW(size); 1134 1135 static ssize_t range_parse(const char *opt, size_t len, struct range *range) 1136 { 1137 unsigned long long addr = 0; 1138 char *start, *end, *str; 1139 ssize_t rc = -EINVAL; 1140 1141 str = kstrdup(opt, GFP_KERNEL); 1142 if (!str) 1143 return rc; 1144 1145 end = str; 1146 start = strsep(&end, "-"); 1147 if (!start || !end) 1148 goto err; 1149 1150 rc = kstrtoull(start, 16, &addr); 1151 if (rc) 1152 goto err; 1153 range->start = addr; 1154 1155 rc = kstrtoull(end, 16, &addr); 1156 if (rc) 1157 goto err; 1158 range->end = addr; 1159 1160 err: 1161 kfree(str); 1162 return rc; 1163 } 1164 1165 static ssize_t mapping_store(struct device *dev, struct device_attribute *attr, 1166 const char *buf, size_t len) 1167 { 1168 struct dev_dax *dev_dax = to_dev_dax(dev); 1169 struct dax_region *dax_region = dev_dax->region; 1170 size_t to_alloc; 1171 struct range r; 1172 ssize_t rc; 1173 1174 rc = range_parse(buf, len, &r); 1175 if (rc) 1176 return rc; 1177 1178 rc = down_write_killable(&dax_region_rwsem); 1179 if (rc) 1180 return rc; 1181 if (!dax_region->dev->driver) { 1182 up_write(&dax_region_rwsem); 1183 return rc; 1184 } 1185 rc = down_write_killable(&dax_dev_rwsem); 1186 if (rc) { 1187 up_write(&dax_region_rwsem); 1188 return rc; 1189 } 1190 1191 to_alloc = range_len(&r); 1192 if (alloc_is_aligned(dev_dax, to_alloc)) 1193 rc = alloc_dev_dax_range(dev_dax, r.start, to_alloc); 1194 up_write(&dax_dev_rwsem); 1195 up_write(&dax_region_rwsem); 1196 1197 return rc == 0 ? len : rc; 1198 } 1199 static DEVICE_ATTR_WO(mapping); 1200 1201 static ssize_t align_show(struct device *dev, 1202 struct device_attribute *attr, char *buf) 1203 { 1204 struct dev_dax *dev_dax = to_dev_dax(dev); 1205 1206 return sysfs_emit(buf, "%d\n", dev_dax->align); 1207 } 1208 1209 static ssize_t dev_dax_validate_align(struct dev_dax *dev_dax) 1210 { 1211 struct device *dev = &dev_dax->dev; 1212 int i; 1213 1214 for (i = 0; i < dev_dax->nr_range; i++) { 1215 size_t len = range_len(&dev_dax->ranges[i].range); 1216 1217 if (!alloc_is_aligned(dev_dax, len)) { 1218 dev_dbg(dev, "%s: align %u invalid for range %d\n", 1219 __func__, dev_dax->align, i); 1220 return -EINVAL; 1221 } 1222 } 1223 1224 return 0; 1225 } 1226 1227 static ssize_t align_store(struct device *dev, struct device_attribute *attr, 1228 const char *buf, size_t len) 1229 { 1230 struct dev_dax *dev_dax = to_dev_dax(dev); 1231 struct dax_region *dax_region = dev_dax->region; 1232 unsigned long val, align_save; 1233 ssize_t rc; 1234 1235 rc = kstrtoul(buf, 0, &val); 1236 if (rc) 1237 return -ENXIO; 1238 1239 if (!dax_align_valid(val)) 1240 return -EINVAL; 1241 1242 rc = down_write_killable(&dax_region_rwsem); 1243 if (rc) 1244 return rc; 1245 if (!dax_region->dev->driver) { 1246 up_write(&dax_region_rwsem); 1247 return -ENXIO; 1248 } 1249 1250 rc = down_write_killable(&dax_dev_rwsem); 1251 if (rc) { 1252 up_write(&dax_region_rwsem); 1253 return rc; 1254 } 1255 if (dev->driver) { 1256 rc = -EBUSY; 1257 goto out_unlock; 1258 } 1259 1260 align_save = dev_dax->align; 1261 dev_dax->align = val; 1262 rc = dev_dax_validate_align(dev_dax); 1263 if (rc) 1264 dev_dax->align = align_save; 1265 out_unlock: 1266 up_write(&dax_dev_rwsem); 1267 up_write(&dax_region_rwsem); 1268 return rc == 0 ? len : rc; 1269 } 1270 static DEVICE_ATTR_RW(align); 1271 1272 static int dev_dax_target_node(struct dev_dax *dev_dax) 1273 { 1274 struct dax_region *dax_region = dev_dax->region; 1275 1276 return dax_region->target_node; 1277 } 1278 1279 static ssize_t target_node_show(struct device *dev, 1280 struct device_attribute *attr, char *buf) 1281 { 1282 struct dev_dax *dev_dax = to_dev_dax(dev); 1283 1284 return sysfs_emit(buf, "%d\n", dev_dax_target_node(dev_dax)); 1285 } 1286 static DEVICE_ATTR_RO(target_node); 1287 1288 static ssize_t resource_show(struct device *dev, 1289 struct device_attribute *attr, char *buf) 1290 { 1291 struct dev_dax *dev_dax = to_dev_dax(dev); 1292 struct dax_region *dax_region = dev_dax->region; 1293 unsigned long long start; 1294 1295 if (dev_dax->nr_range < 1) 1296 start = dax_region->res.start; 1297 else 1298 start = dev_dax->ranges[0].range.start; 1299 1300 return sysfs_emit(buf, "%#llx\n", start); 1301 } 1302 static DEVICE_ATTR(resource, 0400, resource_show, NULL); 1303 1304 static ssize_t modalias_show(struct device *dev, struct device_attribute *attr, 1305 char *buf) 1306 { 1307 /* 1308 * We only ever expect to handle device-dax instances, i.e. the 1309 * @type argument to MODULE_ALIAS_DAX_DEVICE() is always zero 1310 */ 1311 return sysfs_emit(buf, DAX_DEVICE_MODALIAS_FMT "\n", 0); 1312 } 1313 static DEVICE_ATTR_RO(modalias); 1314 1315 static ssize_t numa_node_show(struct device *dev, 1316 struct device_attribute *attr, char *buf) 1317 { 1318 return sysfs_emit(buf, "%d\n", dev_to_node(dev)); 1319 } 1320 static DEVICE_ATTR_RO(numa_node); 1321 1322 static ssize_t memmap_on_memory_show(struct device *dev, 1323 struct device_attribute *attr, char *buf) 1324 { 1325 struct dev_dax *dev_dax = to_dev_dax(dev); 1326 1327 return sysfs_emit(buf, "%d\n", dev_dax->memmap_on_memory); 1328 } 1329 1330 static ssize_t memmap_on_memory_store(struct device *dev, 1331 struct device_attribute *attr, 1332 const char *buf, size_t len) 1333 { 1334 struct dev_dax *dev_dax = to_dev_dax(dev); 1335 bool val; 1336 int rc; 1337 1338 rc = kstrtobool(buf, &val); 1339 if (rc) 1340 return rc; 1341 1342 if (val == true && !mhp_supports_memmap_on_memory()) { 1343 dev_dbg(dev, "memmap_on_memory is not available\n"); 1344 return -EOPNOTSUPP; 1345 } 1346 1347 rc = down_write_killable(&dax_dev_rwsem); 1348 if (rc) 1349 return rc; 1350 1351 if (dev_dax->memmap_on_memory != val && dev->driver && 1352 to_dax_drv(dev->driver)->type == DAXDRV_KMEM_TYPE) { 1353 up_write(&dax_dev_rwsem); 1354 return -EBUSY; 1355 } 1356 1357 dev_dax->memmap_on_memory = val; 1358 up_write(&dax_dev_rwsem); 1359 1360 return len; 1361 } 1362 static DEVICE_ATTR_RW(memmap_on_memory); 1363 1364 static umode_t dev_dax_visible(struct kobject *kobj, struct attribute *a, int n) 1365 { 1366 struct device *dev = container_of(kobj, struct device, kobj); 1367 struct dev_dax *dev_dax = to_dev_dax(dev); 1368 struct dax_region *dax_region = dev_dax->region; 1369 1370 if (a == &dev_attr_target_node.attr && dev_dax_target_node(dev_dax) < 0) 1371 return 0; 1372 if (a == &dev_attr_numa_node.attr && !IS_ENABLED(CONFIG_NUMA)) 1373 return 0; 1374 if (a == &dev_attr_mapping.attr && is_static(dax_region)) 1375 return 0; 1376 if ((a == &dev_attr_align.attr || 1377 a == &dev_attr_size.attr) && is_static(dax_region)) 1378 return 0444; 1379 return a->mode; 1380 } 1381 1382 static struct attribute *dev_dax_attributes[] = { 1383 &dev_attr_modalias.attr, 1384 &dev_attr_size.attr, 1385 &dev_attr_mapping.attr, 1386 &dev_attr_target_node.attr, 1387 &dev_attr_align.attr, 1388 &dev_attr_resource.attr, 1389 &dev_attr_numa_node.attr, 1390 &dev_attr_memmap_on_memory.attr, 1391 NULL, 1392 }; 1393 1394 static const struct attribute_group dev_dax_attribute_group = { 1395 .attrs = dev_dax_attributes, 1396 .is_visible = dev_dax_visible, 1397 }; 1398 1399 static const struct attribute_group *dax_attribute_groups[] = { 1400 &dev_dax_attribute_group, 1401 NULL, 1402 }; 1403 1404 static void dev_dax_release(struct device *dev) 1405 { 1406 struct dev_dax *dev_dax = to_dev_dax(dev); 1407 struct dax_device *dax_dev = dev_dax->dax_dev; 1408 1409 put_dax(dax_dev); 1410 free_dev_dax_id(dev_dax); 1411 kfree(dev_dax->pgmap); 1412 kfree(dev_dax); 1413 } 1414 1415 static const struct device_type dev_dax_type = { 1416 .release = dev_dax_release, 1417 .groups = dax_attribute_groups, 1418 }; 1419 1420 static struct dev_dax *__devm_create_dev_dax(struct dev_dax_data *data) 1421 { 1422 struct dax_region *dax_region = data->dax_region; 1423 struct device *parent = dax_region->dev; 1424 struct dax_device *dax_dev; 1425 struct dev_dax *dev_dax; 1426 struct inode *inode; 1427 struct device *dev; 1428 int rc; 1429 1430 dev_dax = kzalloc(sizeof(*dev_dax), GFP_KERNEL); 1431 if (!dev_dax) 1432 return ERR_PTR(-ENOMEM); 1433 1434 dev_dax->region = dax_region; 1435 if (is_static(dax_region)) { 1436 if (dev_WARN_ONCE(parent, data->id < 0, 1437 "dynamic id specified to static region\n")) { 1438 rc = -EINVAL; 1439 goto err_id; 1440 } 1441 1442 dev_dax->id = data->id; 1443 } else { 1444 if (dev_WARN_ONCE(parent, data->id >= 0, 1445 "static id specified to dynamic region\n")) { 1446 rc = -EINVAL; 1447 goto err_id; 1448 } 1449 1450 rc = alloc_dev_dax_id(dev_dax); 1451 if (rc < 0) 1452 goto err_id; 1453 } 1454 1455 dev = &dev_dax->dev; 1456 device_initialize(dev); 1457 dev_set_name(dev, "dax%d.%d", dax_region->id, dev_dax->id); 1458 1459 rc = alloc_dev_dax_range(dev_dax, dax_region->res.start, data->size); 1460 if (rc) 1461 goto err_range; 1462 1463 if (data->pgmap) { 1464 dev_WARN_ONCE(parent, !is_static(dax_region), 1465 "custom dev_pagemap requires a static dax_region\n"); 1466 1467 dev_dax->pgmap = kmemdup(data->pgmap, 1468 sizeof(struct dev_pagemap), GFP_KERNEL); 1469 if (!dev_dax->pgmap) { 1470 rc = -ENOMEM; 1471 goto err_pgmap; 1472 } 1473 } 1474 1475 /* 1476 * No dax_operations since there is no access to this device outside of 1477 * mmap of the resulting character device. 1478 */ 1479 dax_dev = alloc_dax(dev_dax, NULL); 1480 if (IS_ERR(dax_dev)) { 1481 rc = PTR_ERR(dax_dev); 1482 goto err_alloc_dax; 1483 } 1484 set_dax_synchronous(dax_dev); 1485 set_dax_nocache(dax_dev); 1486 set_dax_nomc(dax_dev); 1487 1488 /* a device_dax instance is dead while the driver is not attached */ 1489 kill_dax(dax_dev); 1490 1491 dev_dax->dax_dev = dax_dev; 1492 dev_dax->target_node = dax_region->target_node; 1493 dev_dax->align = dax_region->align; 1494 ida_init(&dev_dax->ida); 1495 1496 dev_dax->memmap_on_memory = data->memmap_on_memory; 1497 1498 inode = dax_inode(dax_dev); 1499 dev->devt = inode->i_rdev; 1500 dev->bus = &dax_bus_type; 1501 dev->parent = parent; 1502 dev->type = &dev_dax_type; 1503 1504 rc = device_add(dev); 1505 if (rc) { 1506 kill_dev_dax(dev_dax); 1507 put_device(dev); 1508 return ERR_PTR(rc); 1509 } 1510 1511 rc = devm_add_action_or_reset(dax_region->dev, unregister_dev_dax, dev); 1512 if (rc) 1513 return ERR_PTR(rc); 1514 1515 /* register mapping device for the initial allocation range */ 1516 if (dev_dax->nr_range && range_len(&dev_dax->ranges[0].range)) { 1517 rc = devm_register_dax_mapping(dev_dax, 0); 1518 if (rc) 1519 return ERR_PTR(rc); 1520 } 1521 1522 return dev_dax; 1523 1524 err_alloc_dax: 1525 kfree(dev_dax->pgmap); 1526 err_pgmap: 1527 free_dev_dax_ranges(dev_dax); 1528 err_range: 1529 free_dev_dax_id(dev_dax); 1530 err_id: 1531 kfree(dev_dax); 1532 1533 return ERR_PTR(rc); 1534 } 1535 1536 struct dev_dax *devm_create_dev_dax(struct dev_dax_data *data) 1537 { 1538 struct dev_dax *dev_dax; 1539 1540 down_write(&dax_region_rwsem); 1541 dev_dax = __devm_create_dev_dax(data); 1542 up_write(&dax_region_rwsem); 1543 1544 return dev_dax; 1545 } 1546 EXPORT_SYMBOL_GPL(devm_create_dev_dax); 1547 1548 int __dax_driver_register(struct dax_device_driver *dax_drv, 1549 struct module *module, const char *mod_name) 1550 { 1551 struct device_driver *drv = &dax_drv->drv; 1552 1553 /* 1554 * dax_bus_probe() calls dax_drv->probe() unconditionally. 1555 * So better be safe than sorry and ensure it is provided. 1556 */ 1557 if (!dax_drv->probe) 1558 return -EINVAL; 1559 1560 INIT_LIST_HEAD(&dax_drv->ids); 1561 drv->owner = module; 1562 drv->name = mod_name; 1563 drv->mod_name = mod_name; 1564 drv->bus = &dax_bus_type; 1565 1566 return driver_register(drv); 1567 } 1568 EXPORT_SYMBOL_GPL(__dax_driver_register); 1569 1570 void dax_driver_unregister(struct dax_device_driver *dax_drv) 1571 { 1572 struct device_driver *drv = &dax_drv->drv; 1573 struct dax_id *dax_id, *_id; 1574 1575 mutex_lock(&dax_bus_lock); 1576 list_for_each_entry_safe(dax_id, _id, &dax_drv->ids, list) { 1577 list_del(&dax_id->list); 1578 kfree(dax_id); 1579 } 1580 mutex_unlock(&dax_bus_lock); 1581 driver_unregister(drv); 1582 } 1583 EXPORT_SYMBOL_GPL(dax_driver_unregister); 1584 1585 int __init dax_bus_init(void) 1586 { 1587 return bus_register(&dax_bus_type); 1588 } 1589 1590 void __exit dax_bus_exit(void) 1591 { 1592 bus_unregister(&dax_bus_type); 1593 } 1594