1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 4 */ 5 #include <linux/memremap.h> 6 #include <linux/blkdev.h> 7 #include <linux/device.h> 8 #include <linux/sizes.h> 9 #include <linux/slab.h> 10 #include <linux/fs.h> 11 #include <linux/mm.h> 12 #include "nd-core.h" 13 #include "pfn.h" 14 #include "nd.h" 15 16 static const bool page_struct_override = IS_ENABLED(CONFIG_NVDIMM_KMSAN); 17 18 static void nd_pfn_release(struct device *dev) 19 { 20 struct nd_region *nd_region = to_nd_region(dev->parent); 21 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 22 23 dev_dbg(dev, "trace\n"); 24 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 25 ida_free(&nd_region->pfn_ida, nd_pfn->id); 26 kfree(nd_pfn->uuid); 27 kfree(nd_pfn); 28 } 29 30 struct nd_pfn *to_nd_pfn(struct device *dev) 31 { 32 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 33 34 WARN_ON(!is_nd_pfn(dev)); 35 return nd_pfn; 36 } 37 EXPORT_SYMBOL(to_nd_pfn); 38 39 static ssize_t mode_show(struct device *dev, 40 struct device_attribute *attr, char *buf) 41 { 42 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 43 44 switch (nd_pfn->mode) { 45 case PFN_MODE_RAM: 46 return sprintf(buf, "ram\n"); 47 case PFN_MODE_PMEM: 48 return sprintf(buf, "pmem\n"); 49 default: 50 return sprintf(buf, "none\n"); 51 } 52 } 53 54 static ssize_t mode_store(struct device *dev, 55 struct device_attribute *attr, const char *buf, size_t len) 56 { 57 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 58 ssize_t rc = 0; 59 size_t n = len - 1; 60 61 guard(device)(dev); 62 guard(nvdimm_bus)(dev); 63 if (dev->driver) 64 return -EBUSY; 65 66 if (strncmp(buf, "pmem\n", n) == 0 67 || strncmp(buf, "pmem", n) == 0) { 68 nd_pfn->mode = PFN_MODE_PMEM; 69 } else if (strncmp(buf, "ram\n", n) == 0 70 || strncmp(buf, "ram", n) == 0) 71 nd_pfn->mode = PFN_MODE_RAM; 72 else if (strncmp(buf, "none\n", n) == 0 73 || strncmp(buf, "none", n) == 0) 74 nd_pfn->mode = PFN_MODE_NONE; 75 else 76 rc = -EINVAL; 77 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 78 buf[len - 1] == '\n' ? "" : "\n"); 79 80 return rc ? rc : len; 81 } 82 static DEVICE_ATTR_RW(mode); 83 84 static ssize_t align_show(struct device *dev, 85 struct device_attribute *attr, char *buf) 86 { 87 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 88 89 return sprintf(buf, "%ld\n", nd_pfn->align); 90 } 91 92 static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) 93 { 94 95 alignments[0] = PAGE_SIZE; 96 97 if (has_transparent_hugepage()) { 98 alignments[1] = HPAGE_PMD_SIZE; 99 if (has_transparent_pud_hugepage()) 100 alignments[2] = HPAGE_PUD_SIZE; 101 } 102 103 return alignments; 104 } 105 106 /* 107 * Use pmd mapping if supported as default alignment 108 */ 109 static unsigned long nd_pfn_default_alignment(void) 110 { 111 112 if (has_transparent_hugepage()) 113 return HPAGE_PMD_SIZE; 114 return PAGE_SIZE; 115 } 116 117 static ssize_t align_store(struct device *dev, 118 struct device_attribute *attr, const char *buf, size_t len) 119 { 120 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 121 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 122 ssize_t rc; 123 124 guard(device)(dev); 125 guard(nvdimm_bus)(dev); 126 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 127 nd_pfn_supported_alignments(aligns)); 128 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 129 buf[len - 1] == '\n' ? "" : "\n"); 130 131 return rc ? rc : len; 132 } 133 static DEVICE_ATTR_RW(align); 134 135 static ssize_t uuid_show(struct device *dev, 136 struct device_attribute *attr, char *buf) 137 { 138 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 139 140 if (nd_pfn->uuid) 141 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 142 return sprintf(buf, "\n"); 143 } 144 145 static ssize_t uuid_store(struct device *dev, 146 struct device_attribute *attr, const char *buf, size_t len) 147 { 148 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 149 ssize_t rc; 150 151 device_lock(dev); 152 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 153 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 154 buf[len - 1] == '\n' ? "" : "\n"); 155 device_unlock(dev); 156 157 return rc ? rc : len; 158 } 159 static DEVICE_ATTR_RW(uuid); 160 161 static ssize_t namespace_show(struct device *dev, 162 struct device_attribute *attr, char *buf) 163 { 164 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 165 166 guard(nvdimm_bus)(dev); 167 return sprintf(buf, "%s\n", nd_pfn->ndns 168 ? dev_name(&nd_pfn->ndns->dev) : ""); 169 } 170 171 static ssize_t namespace_store(struct device *dev, 172 struct device_attribute *attr, const char *buf, size_t len) 173 { 174 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 175 ssize_t rc; 176 177 guard(device)(dev); 178 guard(nvdimm_bus)(dev); 179 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 180 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 181 buf[len - 1] == '\n' ? "" : "\n"); 182 183 return rc; 184 } 185 static DEVICE_ATTR_RW(namespace); 186 187 static ssize_t resource_show(struct device *dev, 188 struct device_attribute *attr, char *buf) 189 { 190 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 191 ssize_t rc; 192 193 device_lock(dev); 194 if (dev->driver) { 195 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 196 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 197 struct nd_namespace_common *ndns = nd_pfn->ndns; 198 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 199 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 200 201 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 202 + start_pad + offset); 203 } else { 204 /* no address to convey if the pfn instance is disabled */ 205 rc = -ENXIO; 206 } 207 device_unlock(dev); 208 209 return rc; 210 } 211 static DEVICE_ATTR_ADMIN_RO(resource); 212 213 static ssize_t size_show(struct device *dev, 214 struct device_attribute *attr, char *buf) 215 { 216 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 217 ssize_t rc; 218 219 device_lock(dev); 220 if (dev->driver) { 221 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 222 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 223 struct nd_namespace_common *ndns = nd_pfn->ndns; 224 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 225 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 226 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 227 228 rc = sprintf(buf, "%llu\n", (unsigned long long) 229 resource_size(&nsio->res) - start_pad 230 - end_trunc - offset); 231 } else { 232 /* no size to convey if the pfn instance is disabled */ 233 rc = -ENXIO; 234 } 235 device_unlock(dev); 236 237 return rc; 238 } 239 static DEVICE_ATTR_RO(size); 240 241 static ssize_t supported_alignments_show(struct device *dev, 242 struct device_attribute *attr, char *buf) 243 { 244 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 245 246 return nd_size_select_show(0, 247 nd_pfn_supported_alignments(aligns), buf); 248 } 249 static DEVICE_ATTR_RO(supported_alignments); 250 251 static struct attribute *nd_pfn_attributes[] = { 252 &dev_attr_mode.attr, 253 &dev_attr_namespace.attr, 254 &dev_attr_uuid.attr, 255 &dev_attr_align.attr, 256 &dev_attr_resource.attr, 257 &dev_attr_size.attr, 258 &dev_attr_supported_alignments.attr, 259 NULL, 260 }; 261 262 static struct attribute_group nd_pfn_attribute_group = { 263 .attrs = nd_pfn_attributes, 264 }; 265 266 const struct attribute_group *nd_pfn_attribute_groups[] = { 267 &nd_pfn_attribute_group, 268 &nd_device_attribute_group, 269 &nd_numa_attribute_group, 270 NULL, 271 }; 272 273 static const struct device_type nd_pfn_device_type = { 274 .name = "nd_pfn", 275 .release = nd_pfn_release, 276 .groups = nd_pfn_attribute_groups, 277 }; 278 279 bool is_nd_pfn(struct device *dev) 280 { 281 return dev ? dev->type == &nd_pfn_device_type : false; 282 } 283 EXPORT_SYMBOL(is_nd_pfn); 284 285 static struct lock_class_key nvdimm_pfn_key; 286 287 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 288 struct nd_namespace_common *ndns) 289 { 290 struct device *dev; 291 292 if (!nd_pfn) 293 return NULL; 294 295 nd_pfn->mode = PFN_MODE_NONE; 296 nd_pfn->align = nd_pfn_default_alignment(); 297 dev = &nd_pfn->dev; 298 device_initialize(&nd_pfn->dev); 299 lockdep_set_class(&nd_pfn->dev.mutex, &nvdimm_pfn_key); 300 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 301 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 302 dev_name(ndns->claim)); 303 put_device(dev); 304 return NULL; 305 } 306 return dev; 307 } 308 309 static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 310 { 311 struct nd_pfn *nd_pfn; 312 struct device *dev; 313 314 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 315 if (!nd_pfn) 316 return NULL; 317 318 nd_pfn->id = ida_alloc(&nd_region->pfn_ida, GFP_KERNEL); 319 if (nd_pfn->id < 0) { 320 kfree(nd_pfn); 321 return NULL; 322 } 323 324 dev = &nd_pfn->dev; 325 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 326 dev->type = &nd_pfn_device_type; 327 dev->parent = &nd_region->dev; 328 329 return nd_pfn; 330 } 331 332 struct device *nd_pfn_create(struct nd_region *nd_region) 333 { 334 struct nd_pfn *nd_pfn; 335 struct device *dev; 336 337 if (!is_memory(&nd_region->dev)) 338 return NULL; 339 340 nd_pfn = nd_pfn_alloc(nd_region); 341 dev = nd_pfn_devinit(nd_pfn, NULL); 342 343 nd_device_register(dev); 344 return dev; 345 } 346 347 /* 348 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 349 * space associated with the namespace. If the memmap is set to DRAM, then 350 * this is a no-op. Since the memmap area is freshly initialized during 351 * probe, we have an opportunity to clear any badblocks in this area. 352 */ 353 static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 354 { 355 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 356 struct nd_namespace_common *ndns = nd_pfn->ndns; 357 void *zero_page = page_address(ZERO_PAGE(0)); 358 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 359 int meta_num, rc, bb_present; 360 sector_t first_bad, meta_start; 361 struct nd_namespace_io *nsio; 362 sector_t num_bad; 363 364 if (nd_pfn->mode != PFN_MODE_PMEM) 365 return 0; 366 367 nsio = to_nd_namespace_io(&ndns->dev); 368 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 369 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 370 371 /* 372 * re-enable the namespace with correct size so that we can access 373 * the device memmap area. 374 */ 375 devm_namespace_disable(&nd_pfn->dev, ndns); 376 rc = devm_namespace_enable(&nd_pfn->dev, ndns, le64_to_cpu(pfn_sb->dataoff)); 377 if (rc) 378 return rc; 379 380 do { 381 unsigned long zero_len; 382 u64 nsoff; 383 384 bb_present = badblocks_check(&nd_region->bb, meta_start, 385 meta_num, &first_bad, &num_bad); 386 if (bb_present) { 387 dev_dbg(&nd_pfn->dev, "meta: %llx badblocks at %llx\n", 388 num_bad, first_bad); 389 nsoff = ALIGN_DOWN((nd_region->ndr_start 390 + (first_bad << 9)) - nsio->res.start, 391 PAGE_SIZE); 392 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 393 while (zero_len) { 394 unsigned long chunk = min(zero_len, PAGE_SIZE); 395 396 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 397 chunk, 0); 398 if (rc) 399 break; 400 401 zero_len -= chunk; 402 nsoff += chunk; 403 } 404 if (rc) { 405 dev_err(&nd_pfn->dev, 406 "error clearing %llx badblocks at %llx\n", 407 num_bad, first_bad); 408 return rc; 409 } 410 } 411 } while (bb_present); 412 413 return 0; 414 } 415 416 static bool nd_supported_alignment(unsigned long align) 417 { 418 int i; 419 unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 420 421 if (align == 0) 422 return false; 423 424 nd_pfn_supported_alignments(supported); 425 for (i = 0; supported[i]; i++) 426 if (align == supported[i]) 427 return true; 428 return false; 429 } 430 431 /** 432 * nd_pfn_validate - read and validate info-block 433 * @nd_pfn: fsdax namespace runtime state / properties 434 * @sig: 'devdax' or 'fsdax' signature 435 * 436 * Upon return the info-block buffer contents (->pfn_sb) are 437 * indeterminate when validation fails, and a coherent info-block 438 * otherwise. 439 */ 440 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 441 { 442 u64 checksum, offset; 443 struct resource *res; 444 enum nd_pfn_mode mode; 445 resource_size_t res_size; 446 struct nd_namespace_io *nsio; 447 unsigned long align, start_pad, end_trunc; 448 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 449 struct nd_namespace_common *ndns = nd_pfn->ndns; 450 const uuid_t *parent_uuid = nd_dev_to_uuid(&ndns->dev); 451 452 if (!pfn_sb || !ndns) 453 return -ENODEV; 454 455 if (!is_memory(nd_pfn->dev.parent)) 456 return -ENODEV; 457 458 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 459 return -ENXIO; 460 461 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 462 return -ENODEV; 463 464 checksum = le64_to_cpu(pfn_sb->checksum); 465 pfn_sb->checksum = 0; 466 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 467 return -ENODEV; 468 pfn_sb->checksum = cpu_to_le64(checksum); 469 470 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 471 return -ENODEV; 472 473 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 474 pfn_sb->start_pad = 0; 475 pfn_sb->end_trunc = 0; 476 } 477 478 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 479 pfn_sb->align = 0; 480 481 if (__le16_to_cpu(pfn_sb->version_minor) < 4) { 482 pfn_sb->page_struct_size = cpu_to_le16(64); 483 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 484 } 485 486 switch (le32_to_cpu(pfn_sb->mode)) { 487 case PFN_MODE_RAM: 488 case PFN_MODE_PMEM: 489 break; 490 default: 491 return -ENXIO; 492 } 493 494 align = le32_to_cpu(pfn_sb->align); 495 offset = le64_to_cpu(pfn_sb->dataoff); 496 start_pad = le32_to_cpu(pfn_sb->start_pad); 497 end_trunc = le32_to_cpu(pfn_sb->end_trunc); 498 if (align == 0) 499 align = 1UL << ilog2(offset); 500 mode = le32_to_cpu(pfn_sb->mode); 501 502 if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) && 503 (mode == PFN_MODE_PMEM)) { 504 dev_err(&nd_pfn->dev, 505 "init failed, page size mismatch %d\n", 506 le32_to_cpu(pfn_sb->page_size)); 507 return -EOPNOTSUPP; 508 } 509 510 if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) && 511 (mode == PFN_MODE_PMEM)) { 512 dev_err(&nd_pfn->dev, 513 "init failed, struct page size mismatch %d\n", 514 le16_to_cpu(pfn_sb->page_struct_size)); 515 return -EOPNOTSUPP; 516 } 517 518 /* 519 * Check whether the we support the alignment. For Dax if the 520 * superblock alignment is not matching, we won't initialize 521 * the device. 522 */ 523 if (!nd_supported_alignment(align) && 524 !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { 525 dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " 526 "%ld:%ld\n", nd_pfn->align, align); 527 return -EOPNOTSUPP; 528 } 529 530 if (!nd_pfn->uuid) { 531 /* 532 * When probing a namespace via nd_pfn_probe() the uuid 533 * is NULL (see: nd_pfn_devinit()) we init settings from 534 * pfn_sb 535 */ 536 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 537 if (!nd_pfn->uuid) 538 return -ENOMEM; 539 nd_pfn->align = align; 540 nd_pfn->mode = mode; 541 } else { 542 /* 543 * When probing a pfn / dax instance we validate the 544 * live settings against the pfn_sb 545 */ 546 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 547 return -ENODEV; 548 549 /* 550 * If the uuid validates, but other settings mismatch 551 * return EINVAL because userspace has managed to change 552 * the configuration without specifying new 553 * identification. 554 */ 555 if (nd_pfn->align != align || nd_pfn->mode != mode) { 556 dev_err(&nd_pfn->dev, 557 "init failed, settings mismatch\n"); 558 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 559 nd_pfn->align, align, nd_pfn->mode, 560 mode); 561 return -EOPNOTSUPP; 562 } 563 } 564 565 if (align > nvdimm_namespace_capacity(ndns)) { 566 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 567 align, nvdimm_namespace_capacity(ndns)); 568 return -EOPNOTSUPP; 569 } 570 571 /* 572 * These warnings are verbose because they can only trigger in 573 * the case where the physical address alignment of the 574 * namespace has changed since the pfn superblock was 575 * established. 576 */ 577 nsio = to_nd_namespace_io(&ndns->dev); 578 res = &nsio->res; 579 res_size = resource_size(res); 580 if (offset >= res_size) { 581 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 582 dev_name(&ndns->dev)); 583 return -EOPNOTSUPP; 584 } 585 586 if ((align && !IS_ALIGNED(res->start + offset + start_pad, align)) 587 || !IS_ALIGNED(offset, PAGE_SIZE)) { 588 dev_err(&nd_pfn->dev, 589 "bad offset: %#llx dax disabled align: %#lx\n", 590 offset, align); 591 return -EOPNOTSUPP; 592 } 593 594 if (!IS_ALIGNED(res->start + start_pad, memremap_compat_align())) { 595 dev_err(&nd_pfn->dev, "resource start misaligned\n"); 596 return -EOPNOTSUPP; 597 } 598 599 if (!IS_ALIGNED(res->end + 1 - end_trunc, memremap_compat_align())) { 600 dev_err(&nd_pfn->dev, "resource end misaligned\n"); 601 return -EOPNOTSUPP; 602 } 603 604 if (offset >= (res_size - start_pad - end_trunc)) { 605 dev_err(&nd_pfn->dev, "bad offset with small namespace\n"); 606 return -EOPNOTSUPP; 607 } 608 return 0; 609 } 610 EXPORT_SYMBOL(nd_pfn_validate); 611 612 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 613 { 614 int rc; 615 struct nd_pfn *nd_pfn; 616 struct device *pfn_dev; 617 struct nd_pfn_sb *pfn_sb; 618 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 619 620 if (ndns->force_raw) 621 return -ENODEV; 622 623 switch (ndns->claim_class) { 624 case NVDIMM_CCLASS_NONE: 625 case NVDIMM_CCLASS_PFN: 626 break; 627 default: 628 return -ENODEV; 629 } 630 631 scoped_guard(nvdimm_bus, &ndns->dev) { 632 nd_pfn = nd_pfn_alloc(nd_region); 633 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 634 } 635 if (!pfn_dev) 636 return -ENOMEM; 637 pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 638 nd_pfn = to_nd_pfn(pfn_dev); 639 nd_pfn->pfn_sb = pfn_sb; 640 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 641 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 642 if (rc < 0) { 643 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 644 put_device(pfn_dev); 645 } else 646 nd_device_register(pfn_dev); 647 648 return rc; 649 } 650 EXPORT_SYMBOL(nd_pfn_probe); 651 652 /* 653 * We hotplug memory at sub-section granularity, pad the reserved area 654 * from the previous section base to the namespace base address. 655 */ 656 static unsigned long init_altmap_base(resource_size_t base) 657 { 658 unsigned long base_pfn = PHYS_PFN(base); 659 660 return SUBSECTION_ALIGN_DOWN(base_pfn); 661 } 662 663 static unsigned long init_altmap_reserve(resource_size_t base) 664 { 665 unsigned long reserve = nd_info_block_reserve() >> PAGE_SHIFT; 666 unsigned long base_pfn = PHYS_PFN(base); 667 668 reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn); 669 return reserve; 670 } 671 672 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 673 { 674 struct range *range = &pgmap->range; 675 struct vmem_altmap *altmap = &pgmap->altmap; 676 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 677 u64 offset = le64_to_cpu(pfn_sb->dataoff); 678 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 679 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 680 u32 reserve = nd_info_block_reserve(); 681 struct nd_namespace_common *ndns = nd_pfn->ndns; 682 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 683 resource_size_t base = nsio->res.start + start_pad; 684 resource_size_t end = nsio->res.end - end_trunc; 685 struct vmem_altmap __altmap = { 686 .base_pfn = init_altmap_base(base), 687 .reserve = init_altmap_reserve(base), 688 .end_pfn = PHYS_PFN(end), 689 }; 690 691 *range = (struct range) { 692 .start = nsio->res.start + start_pad, 693 .end = nsio->res.end - end_trunc, 694 }; 695 pgmap->nr_range = 1; 696 if (nd_pfn->mode == PFN_MODE_RAM) { 697 if (offset < reserve) 698 return -EINVAL; 699 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 700 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 701 nd_pfn->npfns = PHYS_PFN((range_len(range) - offset)); 702 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 703 dev_info(&nd_pfn->dev, 704 "number of pfns truncated from %lld to %ld\n", 705 le64_to_cpu(nd_pfn->pfn_sb->npfns), 706 nd_pfn->npfns); 707 memcpy(altmap, &__altmap, sizeof(*altmap)); 708 altmap->free = PHYS_PFN(offset - reserve); 709 altmap->alloc = 0; 710 pgmap->flags |= PGMAP_ALTMAP_VALID; 711 } else 712 return -ENXIO; 713 714 return 0; 715 } 716 717 static int nd_pfn_init(struct nd_pfn *nd_pfn) 718 { 719 struct nd_namespace_common *ndns = nd_pfn->ndns; 720 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 721 resource_size_t start, size; 722 struct nd_region *nd_region; 723 unsigned long npfns, align; 724 u32 end_trunc; 725 struct nd_pfn_sb *pfn_sb; 726 phys_addr_t offset; 727 const char *sig; 728 u64 checksum; 729 int rc; 730 731 pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 732 if (!pfn_sb) 733 return -ENOMEM; 734 735 nd_pfn->pfn_sb = pfn_sb; 736 if (is_nd_dax(&nd_pfn->dev)) 737 sig = DAX_SIG; 738 else 739 sig = PFN_SIG; 740 741 rc = nd_pfn_validate(nd_pfn, sig); 742 if (rc == 0) 743 return nd_pfn_clear_memmap_errors(nd_pfn); 744 if (rc != -ENODEV) 745 return rc; 746 747 /* no info block, do init */; 748 memset(pfn_sb, 0, sizeof(*pfn_sb)); 749 750 nd_region = to_nd_region(nd_pfn->dev.parent); 751 if (nd_region->ro) { 752 dev_info(&nd_pfn->dev, 753 "%s is read-only, unable to init metadata\n", 754 dev_name(&nd_region->dev)); 755 return -ENXIO; 756 } 757 758 start = nsio->res.start; 759 size = resource_size(&nsio->res); 760 npfns = PHYS_PFN(size - SZ_8K); 761 align = max(nd_pfn->align, memremap_compat_align()); 762 763 /* 764 * When @start is misaligned fail namespace creation. See 765 * the 'struct nd_pfn_sb' commentary on why ->start_pad is not 766 * an option. 767 */ 768 if (!IS_ALIGNED(start, memremap_compat_align())) { 769 dev_err(&nd_pfn->dev, "%s: start %pa misaligned to %#lx\n", 770 dev_name(&ndns->dev), &start, 771 memremap_compat_align()); 772 return -EINVAL; 773 } 774 end_trunc = start + size - ALIGN_DOWN(start + size, align); 775 if (nd_pfn->mode == PFN_MODE_PMEM) { 776 unsigned long page_map_size = MAX_STRUCT_PAGE_SIZE * npfns; 777 778 /* 779 * The altmap should be padded out to the block size used 780 * when populating the vmemmap. This *should* be equal to 781 * PMD_SIZE for most architectures. 782 * 783 * Also make sure size of struct page is less than 784 * MAX_STRUCT_PAGE_SIZE. The goal here is compatibility in the 785 * face of production kernel configurations that reduce the 786 * 'struct page' size below MAX_STRUCT_PAGE_SIZE. For debug 787 * kernel configurations that increase the 'struct page' size 788 * above MAX_STRUCT_PAGE_SIZE, the page_struct_override allows 789 * for continuing with the capacity that will be wasted when 790 * reverting to a production kernel configuration. Otherwise, 791 * those configurations are blocked by default. 792 */ 793 if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE) { 794 if (page_struct_override) 795 page_map_size = sizeof(struct page) * npfns; 796 else { 797 dev_err(&nd_pfn->dev, 798 "Memory debug options prevent using pmem for the page map\n"); 799 return -EINVAL; 800 } 801 } 802 offset = ALIGN(start + SZ_8K + page_map_size, align) - start; 803 } else if (nd_pfn->mode == PFN_MODE_RAM) 804 offset = ALIGN(start + SZ_8K, align) - start; 805 else 806 return -ENXIO; 807 808 if (offset >= (size - end_trunc)) { 809 /* This results in zero size devices */ 810 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 811 dev_name(&ndns->dev)); 812 return -ENXIO; 813 } 814 815 npfns = PHYS_PFN(size - offset - end_trunc); 816 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 817 pfn_sb->dataoff = cpu_to_le64(offset); 818 pfn_sb->npfns = cpu_to_le64(npfns); 819 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 820 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 821 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 822 pfn_sb->version_major = cpu_to_le16(1); 823 pfn_sb->version_minor = cpu_to_le16(4); 824 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 825 pfn_sb->align = cpu_to_le32(nd_pfn->align); 826 if (sizeof(struct page) > MAX_STRUCT_PAGE_SIZE && page_struct_override) 827 pfn_sb->page_struct_size = cpu_to_le16(sizeof(struct page)); 828 else 829 pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); 830 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 831 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 832 pfn_sb->checksum = cpu_to_le64(checksum); 833 834 rc = nd_pfn_clear_memmap_errors(nd_pfn); 835 if (rc) 836 return rc; 837 838 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 839 } 840 841 /* 842 * Determine the effective resource range and vmem_altmap from an nd_pfn 843 * instance. 844 */ 845 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 846 { 847 int rc; 848 849 if (!nd_pfn->uuid || !nd_pfn->ndns) 850 return -ENODEV; 851 852 rc = nd_pfn_init(nd_pfn); 853 if (rc) 854 return rc; 855 856 /* we need a valid pfn_sb before we can init a dev_pagemap */ 857 return __nvdimm_setup_pfn(nd_pfn, pgmap); 858 } 859 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 860