1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 4 */ 5 #include <linux/memremap.h> 6 #include <linux/blkdev.h> 7 #include <linux/device.h> 8 #include <linux/sizes.h> 9 #include <linux/slab.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include "nd-core.h" 13 #include "pfn.h" 14 #include "nd.h" 15 16 static const bool page_struct_override = IS_ENABLED(CONFIG_NVDIMM_KMSAN); 17 18 static void nd_pfn_release(struct device *dev) 19 { 20 struct nd_region *nd_region = to_nd_region(dev->parent); 21 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 22 23 dev_dbg(dev, "trace\n"); 24 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 25 ida_free(&nd_region->pfn_ida, nd_pfn->id); 26 kfree(nd_pfn->uuid); 27 kfree(nd_pfn); 28 } 29 30 struct nd_pfn *to_nd_pfn(struct device *dev) 31 { 32 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 33 34 WARN_ON(!is_nd_pfn(dev)); 35 return nd_pfn; 36 } 37 EXPORT_SYMBOL(to_nd_pfn); 38 39 static ssize_t mode_show(struct device *dev, 40 struct device_attribute *attr, char *buf) 41 { 42 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 43 44 switch (nd_pfn->mode) { 45 case PFN_MODE_RAM: 46 return sprintf(buf, "ram\n"); 47 case PFN_MODE_PMEM: 48 return sprintf(buf, "pmem\n"); 49 default: 50 return sprintf(buf, "none\n"); 51 } 52 } 53 54 static ssize_t mode_store(struct device *dev, 55 struct device_attribute *attr, const char *buf, size_t len) 56 { 57 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 58 ssize_t rc = 0; 59 60 device_lock(dev); 61 nvdimm_bus_lock(dev); 62 if (dev->driver) 63 rc = -EBUSY; 64 else { 65 size_t n = len - 1; 66 67 if (strncmp(buf, "pmem\n", n) == 0 68 || strncmp(buf, "pmem", n) == 0) { 69 nd_pfn->mode = PFN_MODE_PMEM; 70 } else if (strncmp(buf, "ram\n", n) == 0 71 || strncmp(buf, "ram", n) == 0) 72 nd_pfn->mode = PFN_MODE_RAM; 73 else if (strncmp(buf, "none\n", n) == 0 74 || strncmp(buf, "none", n) == 0) 75 nd_pfn->mode = PFN_MODE_NONE; 76 else 77 rc = -EINVAL; 78 } 79 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 80 buf[len - 1] == '\n' ? "" : "\n"); 81 nvdimm_bus_unlock(dev); 82 device_unlock(dev); 83 84 return rc ? rc : len; 85 } 86 static DEVICE_ATTR_RW(mode); 87 88 static ssize_t align_show(struct device *dev, 89 struct device_attribute *attr, char *buf) 90 { 91 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 92 93 return sprintf(buf, "%ld\n", nd_pfn->align); 94 } 95 96 static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) 97 { 98 99 alignments[0] = PAGE_SIZE; 100 101 if (has_transparent_hugepage()) { 102 alignments[1] = HPAGE_PMD_SIZE; 103 if (has_transparent_pud_hugepage()) 104 alignments[2] = HPAGE_PUD_SIZE; 105 } 106 107 return alignments; 108 } 109 110 /* 111 * Use pmd mapping if supported as default alignment 112 */ 113 static unsigned long nd_pfn_default_alignment(void) 114 { 115 116 if (has_transparent_hugepage()) 117 return HPAGE_PMD_SIZE; 118 return PAGE_SIZE; 119 } 120 121 static ssize_t align_store(struct device *dev, 122 struct device_attribute *attr, const char *buf, size_t len) 123 { 124 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 125 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 126 ssize_t rc; 127 128 device_lock(dev); 129 nvdimm_bus_lock(dev); 130 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 131 nd_pfn_supported_alignments(aligns)); 132 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 133 buf[len - 1] == '\n' ? "" : "\n"); 134 nvdimm_bus_unlock(dev); 135 device_unlock(dev); 136 137 return rc ? rc : len; 138 } 139 static DEVICE_ATTR_RW(align); 140 141 static ssize_t uuid_show(struct device *dev, 142 struct device_attribute *attr, char *buf) 143 { 144 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 145 146 if (nd_pfn->uuid) 147 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 148 return sprintf(buf, "\n"); 149 } 150 151 static ssize_t uuid_store(struct device *dev, 152 struct device_attribute *attr, const char *buf, size_t len) 153 { 154 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 155 ssize_t rc; 156 157 device_lock(dev); 158 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 159 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 160 buf[len - 1] == '\n' ? "" : "\n"); 161 device_unlock(dev); 162 163 return rc ? rc : len; 164 } 165 static DEVICE_ATTR_RW(uuid); 166 167 static ssize_t namespace_show(struct device *dev, 168 struct device_attribute *attr, char *buf) 169 { 170 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 171 ssize_t rc; 172 173 nvdimm_bus_lock(dev); 174 rc = sprintf(buf, "%s\n", nd_pfn->ndns 175 ? dev_name(&nd_pfn->ndns->dev) : ""); 176 nvdimm_bus_unlock(dev); 177 return rc; 178 } 179 180 static ssize_t namespace_store(struct device *dev, 181 struct device_attribute *attr, const char *buf, size_t len) 182 { 183 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 184 ssize_t rc; 185 186 device_lock(dev); 187 nvdimm_bus_lock(dev); 188 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 189 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 190 buf[len - 1] == '\n' ? "" : "\n"); 191 nvdimm_bus_unlock(dev); 192 device_unlock(dev); 193 194 return rc; 195 } 196 static DEVICE_ATTR_RW(namespace); 197 198 static ssize_t resource_show(struct device *dev, 199 struct device_attribute *attr, char *buf) 200 { 201 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 202 ssize_t rc; 203 204 device_lock(dev); 205 if (dev->driver) { 206 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 207 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 208 struct nd_namespace_common *ndns = nd_pfn->ndns; 209 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 210 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 211 212 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 213 + start_pad + offset); 214 } else { 215 /* no address to convey if the pfn instance is disabled */ 216 rc = -ENXIO; 217 } 218 device_unlock(dev); 219 220 return rc; 221 } 222 static DEVICE_ATTR_ADMIN_RO(resource); 223 224 static ssize_t size_show(struct device *dev, 225 struct device_attribute *attr, char *buf) 226 { 227 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 228 ssize_t rc; 229 230 device_lock(dev); 231 if (dev->driver) { 232 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 233 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 234 struct nd_namespace_common *ndns = nd_pfn->ndns; 235 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 236 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 237 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 238 239 rc = sprintf(buf, "%llu\n", (unsigned long long) 240 resource_size(&nsio->res) - start_pad 241 - end_trunc - offset); 242 } else { 243 /* no size to convey if the pfn instance is disabled */ 244 rc = -ENXIO; 245 } 246 device_unlock(dev); 247 248 return rc; 249 } 250 static DEVICE_ATTR_RO(size); 251 252 static ssize_t supported_alignments_show(struct device *dev, 253 struct device_attribute *attr, char *buf) 254 { 255 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 256 257 return nd_size_select_show(0, 258 nd_pfn_supported_alignments(aligns), buf); 259 } 260 static DEVICE_ATTR_RO(supported_alignments); 261 262 static struct attribute *nd_pfn_attributes[] = { 263 &dev_attr_mode.attr, 264 &dev_attr_namespace.attr, 265 &dev_attr_uuid.attr, 266 &dev_attr_align.attr, 267 &dev_attr_resource.attr, 268 &dev_attr_size.attr, 269 &dev_attr_supported_alignments.attr, 270 NULL, 271 }; 272 273 static struct attribute_group nd_pfn_attribute_group = { 274 .attrs = nd_pfn_attributes, 275 }; 276 277 const struct attribute_group *nd_pfn_attribute_groups[] = { 278 &nd_pfn_attribute_group, 279 &nd_device_attribute_group, 280 &nd_numa_attribute_group, 281 NULL, 282 }; 283 284 static const struct device_type nd_pfn_device_type = { 285 .name = "nd_pfn", 286 .release = nd_pfn_release, 287 .groups = nd_pfn_attribute_groups, 288 }; 289 290 bool is_nd_pfn(struct device *dev) 291 { 292 return dev ? dev->type == &nd_pfn_device_type : false; 293 } 294 EXPORT_SYMBOL(is_nd_pfn); 295 296 static struct lock_class_key nvdimm_pfn_key; 297 298 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 299 struct nd_namespace_common *ndns) 300 { 301 struct device *dev; 302 303 if (!nd_pfn) 304 return NULL; 305 306 nd_pfn->mode = PFN_MODE_NONE; 307 nd_pfn->align = nd_pfn_default_alignment(); 308 dev = &nd_pfn->dev; 309 device_initialize(&nd_pfn->dev); 310 lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); 311 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 312 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 313 dev_name(ndns->claim)); 314 put_device(dev); 315 return NULL; 316 } 317 return dev; 318 } 319 320 static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 321 { 322 struct nd_pfn *nd_pfn; 323 struct device *dev; 324 325 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 326 if (!nd_pfn) 327 return NULL; 328 329 nd_pfn->id = ida_alloc(&nd_region->pfn_ida, GFP_KERNEL); 330 if (nd_pfn->id < 0) { 331 kfree(nd_pfn); 332 return NULL; 333 } 334 335 dev = &nd_pfn->dev; 336 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 337 dev->type = &nd_pfn_device_type; 338 dev->parent = &nd_region->dev; 339 340 return nd_pfn; 341 } 342 343 struct device *nd_pfn_create(struct nd_region *nd_region) 344 { 345 struct nd_pfn *nd_pfn; 346 struct device *dev; 347 348 if (!is_memory(&nd_region->dev)) 349 return NULL; 350 351 nd_pfn = nd_pfn_alloc(nd_region); 352 dev = nd_pfn_devinit(nd_pfn, NULL); 353 354 nd_device_register(dev); 355 return dev; 356 } 357 358 /* 359 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 360 * space associated with the namespace. If the memmap is set to DRAM, then 361 * this is a no-op. Since the memmap area is freshly initialized during 362 * probe, we have an opportunity to clear any badblocks in this area. 363 */ 364 static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 365 { 366 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 367 struct nd_namespace_common *ndns = nd_pfn->ndns; 368 void *zero_page = page_address(ZERO_PAGE(0)); 369 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 370 int meta_num, rc, bb_present; 371 sector_t first_bad, meta_start; 372 struct nd_namespace_io *nsio; 373 sector_t num_bad; 374 375 if (nd_pfn->mode != PFN_MODE_PMEM) 376 return 0; 377 378 nsio = to_nd_namespace_io(&ndns->dev); 379 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 380 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 381 382 /* 383 * re-enable the namespace with correct size so that we can access 384 * the device memmap area. 385 */ 386 devm_namespace_disable(&nd_pfn->dev, ndns); 387 rc = devm_namespace_enable(&nd_pfn->dev, ndns, le64_to_cpu(pfn_sb->dataoff)); 388 if (rc) 389 return rc; 390 391 do { 392 unsigned long zero_len; 393 u64 nsoff; 394 395 bb_present = badblocks_check(&nd_region->bb, meta_start, 396 meta_num, &first_bad, &num_bad); 397 if (bb_present) { 398 dev_dbg(&nd_pfn->dev, "meta: %llx badblocks at %llx\n", 399 num_bad, first_bad); 400 nsoff = ALIGN_DOWN((nd_region->ndr_start 401 + (first_bad << 9)) - nsio->res.start, 402 PAGE_SIZE); 403 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 404 while (zero_len) { 405 unsigned long chunk = min(zero_len, PAGE_SIZE); 406 407 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 408 chunk, 0); 409 if (rc) 410 break; 411 412 zero_len -= chunk; 413 nsoff += chunk; 414 } 415 if (rc) { 416 dev_err(&nd_pfn->dev, 417 "error clearing %llx badblocks at %llx\n", 418 num_bad, first_bad); 419 return rc; 420 } 421 } 422 } while (bb_present); 423 424 return 0; 425 } 426 427 static bool nd_supported_alignment(unsigned long align) 428 { 429 int i; 430 unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 431 432 if (align == 0) 433 return false; 434 435 nd_pfn_supported_alignments(supported); 436 for (i = 0; supported[i]; i++) 437 if (align == supported[i]) 438 return true; 439 return false; 440 } 441 442 /** 443 * nd_pfn_validate - read and validate info-block 444 * @nd_pfn: fsdax namespace runtime state / properties 445 * @sig: 'devdax' or 'fsdax' signature 446 * 447 * Upon return the info-block buffer contents (->pfn_sb) are 448 * indeterminate when validation fails, and a coherent info-block 449 * otherwise. 450 */ 451 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 452 { 453 u64 checksum, offset; 454 struct resource *res; 455 enum nd_pfn_mode mode; 456 resource_size_t res_size; 457 struct nd_namespace_io *nsio; 458 unsigned long align, start_pad, end_trunc; 459 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 460 struct nd_namespace_common *ndns = nd_pfn->ndns; 461 const uuid_t *parent_uuid = nd_dev_to_uuid(&ndns->dev); 462 463 if (!pfn_sb || !ndns) 464 return -ENODEV; 465 466 if (!is_memory(nd_pfn->dev.parent)) 467 return -ENODEV; 468 469 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 470 return -ENXIO; 471 472 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 473 return -ENODEV; 474 475 checksum = le64_to_cpu(pfn_sb->checksum); 476 pfn_sb->checksum = 0; 477 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 478 return -ENODEV; 479 pfn_sb->checksum = cpu_to_le64(checksum); 480 481 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 482 return -ENODEV; 483 484 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 485 pfn_sb->start_pad = 0; 486 pfn_sb->end_trunc = 0; 487 } 488 489 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 490 pfn_sb->align = 0; 491 492 if (__le16_to_cpu(pfn_sb->version_minor) < 4) { 493 pfn_sb->page_struct_size = cpu_to_le16(64); 494 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 495 } 496 497 switch (le32_to_cpu(pfn_sb->mode)) { 498 case PFN_MODE_RAM: 499 case PFN_MODE_PMEM: 500 break; 501 default: 502 return -ENXIO; 503 } 504 505 align = le32_to_cpu(pfn_sb->align); 506 offset = le64_to_cpu(pfn_sb->dataoff); 507 start_pad = le32_to_cpu(pfn_sb->start_pad); 508 end_trunc = le32_to_cpu(pfn_sb->end_trunc); 509 if (align == 0) 510 align = 1UL << ilog2(offset); 511 mode = le32_to_cpu(pfn_sb->mode); 512 513 if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) && 514 (mode == PFN_MODE_PMEM)) { 515 dev_err(&nd_pfn->dev, 516 "init failed, page size mismatch %d\n", 517 le32_to_cpu(pfn_sb->page_size)); 518 return -EOPNOTSUPP; 519 } 520 521 if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) && 522 (mode == PFN_MODE_PMEM)) { 523 dev_err(&nd_pfn->dev, 524 "init failed, struct page size mismatch %d\n", 525 le16_to_cpu(pfn_sb->page_struct_size)); 526 return -EOPNOTSUPP; 527 } 528 529 /* 530 * Check whether the we support the alignment. For Dax if the 531 * superblock alignment is not matching, we won't initialize 532 * the device. 533 */ 534 if (!nd_supported_alignment(align) && 535 !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { 536 dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " 537 "%ld:%ld\n", nd_pfn->align, align); 538 return -EOPNOTSUPP; 539 } 540 541 if (!nd_pfn->uuid) { 542 /* 543 * When probing a namespace via nd_pfn_probe() the uuid 544 * is NULL (see: nd_pfn_devinit()) we init settings from 545 * pfn_sb 546 */ 547 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 548 if (!nd_pfn->uuid) 549 return -ENOMEM; 550 nd_pfn->align = align; 551 nd_pfn->mode = mode; 552 } else { 553 /* 554 * When probing a pfn / dax instance we validate the 555 * live settings against the pfn_sb 556 */ 557 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 558 return -ENODEV; 559 560 /* 561 * If the uuid validates, but other settings mismatch 562 * return EINVAL because userspace has managed to change 563 * the configuration without specifying new 564 * identification. 565 */ 566 if (nd_pfn->align != align || nd_pfn->mode != mode) { 567 dev_err(&nd_pfn->dev, 568 "init failed, settings mismatch\n"); 569 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 570 nd_pfn->align, align, nd_pfn->mode, 571 mode); 572 return -EOPNOTSUPP; 573 } 574 } 575 576 if (align > nvdimm_namespace_capacity(ndns)) { 577 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 578 align, nvdimm_namespace_capacity(ndns)); 579 return -EOPNOTSUPP; 580 } 581 582 /* 583 * These warnings are verbose because they can only trigger in 584 * the case where the physical address alignment of the 585 * namespace has changed since the pfn superblock was 586 * established. 587 */ 588 nsio = to_nd_namespace_io(&ndns->dev); 589 res = &nsio->res; 590 res_size = resource_size(res); 591 if (offset >= res_size) { 592 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 593 dev_name(&ndns->dev)); 594 return -EOPNOTSUPP; 595 } 596 597 if ((align && !IS_ALIGNED(res->start + offset + start_pad, align)) 598 || !IS_ALIGNED(offset, PAGE_SIZE)) { 599 dev_err(&nd_pfn->dev, 600 "bad offset: %#llx dax disabled align: %#lx\n", 601 offset, align); 602 return -EOPNOTSUPP; 603 } 604 605 if (!IS_ALIGNED(res->start + start_pad, memremap_compat_align())) { 606 dev_err(&nd_pfn->dev, "resource start misaligned\n"); 607 return -EOPNOTSUPP; 608 } 609 610 if (!IS_ALIGNED(res->end + 1 - end_trunc, memremap_compat_align())) { 611 dev_err(&nd_pfn->dev, "resource end misaligned\n"); 612 return -EOPNOTSUPP; 613 } 614 615 if (offset >= (res_size - start_pad - end_trunc)) { 616 dev_err(&nd_pfn->dev, "bad offset with small namespace\n"); 617 return -EOPNOTSUPP; 618 } 619 return 0; 620 } 621 EXPORT_SYMBOL(nd_pfn_validate); 622 623 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 624 { 625 int rc; 626 struct nd_pfn *nd_pfn; 627 struct device *pfn_dev; 628 struct nd_pfn_sb *pfn_sb; 629 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 630 631 if (ndns->force_raw) 632 return -ENODEV; 633 634 switch (ndns->claim_class) { 635 case NVDIMM_CCLASS_NONE: 636 case NVDIMM_CCLASS_PFN: 637 break; 638 default: 639 return -ENODEV; 640 } 641 642 nvdimm_bus_lock(&ndns->dev); 643 nd_pfn = nd_pfn_alloc(nd_region); 644 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 645 nvdimm_bus_unlock(&ndns->dev); 646 if (!pfn_dev) 647 return -ENOMEM; 648 pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 649 nd_pfn = to_nd_pfn(pfn_dev); 650 nd_pfn->pfn_sb = pfn_sb; 651 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 652 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 653 if (rc < 0) { 654 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 655 put_device(pfn_dev); 656 } else 657 nd_device_register(pfn_dev); 658 659 return rc; 660 } 661 EXPORT_SYMBOL(nd_pfn_probe); 662 663 /* 664 * We hotplug memory at sub-section granularity, pad the reserved area 665 * from the previous section base to the namespace base address. 666 */ 667 static unsigned long init_altmap_base(resource_size_t base) 668 { 669 unsigned long base_pfn = PHYS_PFN(base); 670 671 return SUBSECTION_ALIGN_DOWN(base_pfn); 672 } 673 674 static unsigned long init_altmap_reserve(resource_size_t base) 675 { 676 unsigned long reserve = nd_info_block_reserve() >> PAGE_SHIFT; 677 unsigned long base_pfn = PHYS_PFN(base); 678 679 reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn); 680 return reserve; 681 } 682 683 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 684 { 685 struct range *range = &pgmap->range; 686 struct vmem_altmap *altmap = &pgmap->altmap; 687 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 688 u64 offset = le64_to_cpu(pfn_sb->dataoff); 689 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 690 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 691 u32 reserve = nd_info_block_reserve(); 692 struct nd_namespace_common *ndns = nd_pfn->ndns; 693 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 694 resource_size_t base = nsio->res.start + start_pad; 695 resource_size_t end = nsio->res.end - end_trunc; 696 struct vmem_altmap __altmap = { 697 .base_pfn = init_altmap_base(base), 698 .reserve = init_altmap_reserve(base), 699 .end_pfn = PHYS_PFN(end), 700 }; 701 702 *range = (struct range) { 703 .start = nsio->res.start + start_pad, 704 .end = nsio->res.end - end_trunc, 705 }; 706 pgmap->nr_range = 1; 707 if (nd_pfn->mode == PFN_MODE_RAM) { 708 if (offset < reserve) 709 return -EINVAL; 710 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 711 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 712 nd_pfn->npfns = PHYS_PFN((range_len(range) - offset)); 713 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 714 dev_info(&nd_pfn->dev, 715 "number of pfns truncated from %lld to %ld\n", 716 le64_to_cpu(nd_pfn->pfn_sb->npfns), 717 nd_pfn->npfns); 718 memcpy(altmap, &__altmap, sizeof(*altmap)); 719 altmap->free = PHYS_PFN(offset - reserve); 720 altmap->alloc = 0; 721 pgmap->flags |= PGMAP_ALTMAP_VALID; 722 } else 723 return -ENXIO; 724 725 return 0; 726 } 727 728 static int nd_pfn_init(struct nd_pfn *nd_pfn) 729 { 730 struct nd_namespace_common *ndns = nd_pfn->ndns; 731 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 732 resource_size_t start, size; 733 struct nd_region *nd_region; 734 unsigned long npfns, align; 735 u32 end_trunc; 736 struct nd_pfn_sb *pfn_sb; 737 phys_addr_t offset; 738 const char *sig; 739 u64 checksum; 740 int rc; 741 742 pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 743 if (!pfn_sb) 744 return -ENOMEM; 745 746 nd_pfn->pfn_sb = pfn_sb; 747 if (is_nd_dax(&nd_pfn->dev)) 748 sig = DAX_SIG; 749 else 750 sig = PFN_SIG; 751 752 rc = nd_pfn_validate(nd_pfn, sig); 753 if (rc == 0) 754 return nd_pfn_clear_memmap_errors(nd_pfn); 755 if (rc != -ENODEV) 756 return rc; 757 758 /* no info block, do init */; 759 memset(pfn_sb, 0, sizeof(*pfn_sb)); 760 761 nd_region = to_nd_region(nd_pfn->dev.parent); 762 if (nd_region->ro) { 763 dev_info(&nd_pfn->dev, 764 "%s is read-only, unable to init metadata\n", 765 dev_name(&nd_region->dev)); 766 return -ENXIO; 767 } 768 769 start = nsio->res.start; 770 size = resource_size(&nsio->res); 771 npfns = PHYS_PFN(size - SZ_8K); 772 align = max(nd_pfn->align, memremap_compat_align()); 773 774 /* 775 * When @start is misaligned fail namespace creation. See 776 * the 'struct nd_pfn_sb' commentary on why ->start_pad is not 777 * an option. 778 */ 779 if (!IS_ALIGNED(start, memremap_compat_align())) { 780 dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n", 781 dev_name(&ndns->dev), &start, 782 memremap_compat_align()); 783 return -EINVAL; 784 } 785 end_trunc = start + size - ALIGN_DOWN(start + size, align); 786 if (nd_pfn->mode == PFN_MODE_PMEM) { 787 unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns; 788 789 /* 790 * The altmap should be padded out to the block size used 791 * when populating the vmemmap. This *should* be equal to 792 * PMD_SIZE for most architectures. 793 * 794 * Also make sure size of struct page is less than 795 * MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the 796 * face of production kernel configurations that reduce the 797 * 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug 798 * kernel configurations that increase the 'struct page' size 799 * above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows 800 * for continuing with the capacity that will be wasted when 801 * reverting to a production kernel configuration. Otherwise, 802 * those configurations are blocked by default. 803 */ 804 if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) { 805 if (page_struct_override) 806 page_map_size = sizeof(struct page) * npfns; 807 else { 808 dev_err(&nd_pfn->dev, 809 "Memory debug options prevent using pmem for the page map\n"); 810 return -EINVAL; 811 } 812 } 813 offset = ALIGN(start + SZ_8K + page_map_size, align) - start; 814 } else if (nd_pfn->mode == PFN_MODE_RAM) 815 offset = ALIGN(start + SZ_8K, align) - start; 816 else 817 return -ENXIO; 818 819 if (offset >= (size - end_trunc)) { 820 /* This results in zero size devices */ 821 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 822 dev_name(&ndns->dev)); 823 return -ENXIO; 824 } 825 826 npfns = PHYS_PFN(size - offset - end_trunc); 827 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 828 pfn_sb->dataoff = cpu_to_le64(offset); 829 pfn_sb->npfns = cpu_to_le64(npfns); 830 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 831 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 832 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 833 pfn_sb->version_major = cpu_to_le16(1); 834 pfn_sb->version_minor = cpu_to_le16(4); 835 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 836 pfn_sb->align = cpu_to_le32(nd_pfn->align); 837 if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override) 838 pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page)); 839 else 840 pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); 841 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 842 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 843 pfn_sb->checksum = cpu_to_le64(checksum); 844 845 rc = nd_pfn_clear_memmap_errors(nd_pfn); 846 if (rc) 847 return rc; 848 849 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 850 } 851 852 /* 853 * Determine the effective resource range and vmem_altmap from an nd_pfn 854 * instance. 855 */ 856 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 857 { 858 int rc; 859 860 if (!nd_pfn->uuid || !nd_pfn->ndns) 861 return -ENODEV; 862 863 rc = nd_pfn_init(nd_pfn); 864 if (rc) 865 return rc; 866 867 /* we need a valid pfn_sb before we can init a dev_pagemap */ 868 return __nvdimm_setup_pfn(nd_pfn, pgmap); 869 } 870 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 871