1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Copyright(c) 2013-2016 Intel Corporation. All rights reserved. 4 */ 5 #include <linux/memremap.h> 6 #include <linux/blkdev.h> 7 #include <linux/device.h> 8 #include <linux/genhd.h> 9 #include <linux/sizes.h> 10 #include <linux/slab.h> 11 #include <linux/fs.h> 12 #include <linux/mm.h> 13 #include "nd-core.h" 14 #include "pfn.h" 15 #include "nd.h" 16 17 static void nd_pfn_release(struct device *dev) 18 { 19 struct nd_region *nd_region = to_nd_region(dev->parent); 20 struct nd_pfn *nd_pfn = to_nd_pfn(dev); 21 22 dev_dbg(dev, "trace\n"); 23 nd_detach_ndns(&nd_pfn->dev, &nd_pfn->ndns); 24 ida_simple_remove(&nd_region->pfn_ida, nd_pfn->id); 25 kfree(nd_pfn->uuid); 26 kfree(nd_pfn); 27 } 28 29 static struct device_type nd_pfn_device_type = { 30 .name = "nd_pfn", 31 .release = nd_pfn_release, 32 }; 33 34 bool is_nd_pfn(struct device *dev) 35 { 36 return dev ? dev->type == &nd_pfn_device_type : false; 37 } 38 EXPORT_SYMBOL(is_nd_pfn); 39 40 struct nd_pfn *to_nd_pfn(struct device *dev) 41 { 42 struct nd_pfn *nd_pfn = container_of(dev, struct nd_pfn, dev); 43 44 WARN_ON(!is_nd_pfn(dev)); 45 return nd_pfn; 46 } 47 EXPORT_SYMBOL(to_nd_pfn); 48 49 static ssize_t mode_show(struct device *dev, 50 struct device_attribute *attr, char *buf) 51 { 52 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 53 54 switch (nd_pfn->mode) { 55 case PFN_MODE_RAM: 56 return sprintf(buf, "ram\n"); 57 case PFN_MODE_PMEM: 58 return sprintf(buf, "pmem\n"); 59 default: 60 return sprintf(buf, "none\n"); 61 } 62 } 63 64 static ssize_t mode_store(struct device *dev, 65 struct device_attribute *attr, const char *buf, size_t len) 66 { 67 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 68 ssize_t rc = 0; 69 70 nd_device_lock(dev); 71 nvdimm_bus_lock(dev); 72 if (dev->driver) 73 rc = -EBUSY; 74 else { 75 size_t n = len - 1; 76 77 if (strncmp(buf, "pmem\n", n) == 0 78 || strncmp(buf, "pmem", n) == 0) { 79 nd_pfn->mode = PFN_MODE_PMEM; 80 } else if (strncmp(buf, "ram\n", n) == 0 81 || strncmp(buf, "ram", n) == 0) 82 nd_pfn->mode = PFN_MODE_RAM; 83 else if (strncmp(buf, "none\n", n) == 0 84 || strncmp(buf, "none", n) == 0) 85 nd_pfn->mode = PFN_MODE_NONE; 86 else 87 rc = -EINVAL; 88 } 89 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 90 buf[len - 1] == '\n' ? "" : "\n"); 91 nvdimm_bus_unlock(dev); 92 nd_device_unlock(dev); 93 94 return rc ? rc : len; 95 } 96 static DEVICE_ATTR_RW(mode); 97 98 static ssize_t align_show(struct device *dev, 99 struct device_attribute *attr, char *buf) 100 { 101 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 102 103 return sprintf(buf, "%ld\n", nd_pfn->align); 104 } 105 106 static unsigned long *nd_pfn_supported_alignments(unsigned long *alignments) 107 { 108 109 alignments[0] = PAGE_SIZE; 110 111 if (has_transparent_hugepage()) { 112 alignments[1] = HPAGE_PMD_SIZE; 113 if (IS_ENABLED(CONFIG_HAVE_ARCH_TRANSPARENT_HUGEPAGE_PUD)) 114 alignments[2] = HPAGE_PUD_SIZE; 115 } 116 117 return alignments; 118 } 119 120 /* 121 * Use pmd mapping if supported as default alignment 122 */ 123 static unsigned long nd_pfn_default_alignment(void) 124 { 125 126 if (has_transparent_hugepage()) 127 return HPAGE_PMD_SIZE; 128 return PAGE_SIZE; 129 } 130 131 static ssize_t align_store(struct device *dev, 132 struct device_attribute *attr, const char *buf, size_t len) 133 { 134 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 135 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 136 ssize_t rc; 137 138 nd_device_lock(dev); 139 nvdimm_bus_lock(dev); 140 rc = nd_size_select_store(dev, buf, &nd_pfn->align, 141 nd_pfn_supported_alignments(aligns)); 142 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 143 buf[len - 1] == '\n' ? "" : "\n"); 144 nvdimm_bus_unlock(dev); 145 nd_device_unlock(dev); 146 147 return rc ? rc : len; 148 } 149 static DEVICE_ATTR_RW(align); 150 151 static ssize_t uuid_show(struct device *dev, 152 struct device_attribute *attr, char *buf) 153 { 154 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 155 156 if (nd_pfn->uuid) 157 return sprintf(buf, "%pUb\n", nd_pfn->uuid); 158 return sprintf(buf, "\n"); 159 } 160 161 static ssize_t uuid_store(struct device *dev, 162 struct device_attribute *attr, const char *buf, size_t len) 163 { 164 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 165 ssize_t rc; 166 167 nd_device_lock(dev); 168 rc = nd_uuid_store(dev, &nd_pfn->uuid, buf, len); 169 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 170 buf[len - 1] == '\n' ? "" : "\n"); 171 nd_device_unlock(dev); 172 173 return rc ? rc : len; 174 } 175 static DEVICE_ATTR_RW(uuid); 176 177 static ssize_t namespace_show(struct device *dev, 178 struct device_attribute *attr, char *buf) 179 { 180 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 181 ssize_t rc; 182 183 nvdimm_bus_lock(dev); 184 rc = sprintf(buf, "%s\n", nd_pfn->ndns 185 ? dev_name(&nd_pfn->ndns->dev) : ""); 186 nvdimm_bus_unlock(dev); 187 return rc; 188 } 189 190 static ssize_t namespace_store(struct device *dev, 191 struct device_attribute *attr, const char *buf, size_t len) 192 { 193 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 194 ssize_t rc; 195 196 nd_device_lock(dev); 197 nvdimm_bus_lock(dev); 198 rc = nd_namespace_store(dev, &nd_pfn->ndns, buf, len); 199 dev_dbg(dev, "result: %zd wrote: %s%s", rc, buf, 200 buf[len - 1] == '\n' ? "" : "\n"); 201 nvdimm_bus_unlock(dev); 202 nd_device_unlock(dev); 203 204 return rc; 205 } 206 static DEVICE_ATTR_RW(namespace); 207 208 static ssize_t resource_show(struct device *dev, 209 struct device_attribute *attr, char *buf) 210 { 211 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 212 ssize_t rc; 213 214 nd_device_lock(dev); 215 if (dev->driver) { 216 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 217 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 218 struct nd_namespace_common *ndns = nd_pfn->ndns; 219 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 220 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 221 222 rc = sprintf(buf, "%#llx\n", (unsigned long long) nsio->res.start 223 + start_pad + offset); 224 } else { 225 /* no address to convey if the pfn instance is disabled */ 226 rc = -ENXIO; 227 } 228 nd_device_unlock(dev); 229 230 return rc; 231 } 232 static DEVICE_ATTR_RO(resource); 233 234 static ssize_t size_show(struct device *dev, 235 struct device_attribute *attr, char *buf) 236 { 237 struct nd_pfn *nd_pfn = to_nd_pfn_safe(dev); 238 ssize_t rc; 239 240 nd_device_lock(dev); 241 if (dev->driver) { 242 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 243 u64 offset = __le64_to_cpu(pfn_sb->dataoff); 244 struct nd_namespace_common *ndns = nd_pfn->ndns; 245 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 246 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 247 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 248 249 rc = sprintf(buf, "%llu\n", (unsigned long long) 250 resource_size(&nsio->res) - start_pad 251 - end_trunc - offset); 252 } else { 253 /* no size to convey if the pfn instance is disabled */ 254 rc = -ENXIO; 255 } 256 nd_device_unlock(dev); 257 258 return rc; 259 } 260 static DEVICE_ATTR_RO(size); 261 262 static ssize_t supported_alignments_show(struct device *dev, 263 struct device_attribute *attr, char *buf) 264 { 265 unsigned long aligns[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 266 267 return nd_size_select_show(0, 268 nd_pfn_supported_alignments(aligns), buf); 269 } 270 static DEVICE_ATTR_RO(supported_alignments); 271 272 static struct attribute *nd_pfn_attributes[] = { 273 &dev_attr_mode.attr, 274 &dev_attr_namespace.attr, 275 &dev_attr_uuid.attr, 276 &dev_attr_align.attr, 277 &dev_attr_resource.attr, 278 &dev_attr_size.attr, 279 &dev_attr_supported_alignments.attr, 280 NULL, 281 }; 282 283 static umode_t pfn_visible(struct kobject *kobj, struct attribute *a, int n) 284 { 285 if (a == &dev_attr_resource.attr) 286 return 0400; 287 return a->mode; 288 } 289 290 struct attribute_group nd_pfn_attribute_group = { 291 .attrs = nd_pfn_attributes, 292 .is_visible = pfn_visible, 293 }; 294 295 static const struct attribute_group *nd_pfn_attribute_groups[] = { 296 &nd_pfn_attribute_group, 297 &nd_device_attribute_group, 298 &nd_numa_attribute_group, 299 NULL, 300 }; 301 302 struct device *nd_pfn_devinit(struct nd_pfn *nd_pfn, 303 struct nd_namespace_common *ndns) 304 { 305 struct device *dev; 306 307 if (!nd_pfn) 308 return NULL; 309 310 nd_pfn->mode = PFN_MODE_NONE; 311 nd_pfn->align = nd_pfn_default_alignment(); 312 dev = &nd_pfn->dev; 313 device_initialize(&nd_pfn->dev); 314 if (ndns && !__nd_attach_ndns(&nd_pfn->dev, ndns, &nd_pfn->ndns)) { 315 dev_dbg(&ndns->dev, "failed, already claimed by %s\n", 316 dev_name(ndns->claim)); 317 put_device(dev); 318 return NULL; 319 } 320 return dev; 321 } 322 323 static struct nd_pfn *nd_pfn_alloc(struct nd_region *nd_region) 324 { 325 struct nd_pfn *nd_pfn; 326 struct device *dev; 327 328 nd_pfn = kzalloc(sizeof(*nd_pfn), GFP_KERNEL); 329 if (!nd_pfn) 330 return NULL; 331 332 nd_pfn->id = ida_simple_get(&nd_region->pfn_ida, 0, 0, GFP_KERNEL); 333 if (nd_pfn->id < 0) { 334 kfree(nd_pfn); 335 return NULL; 336 } 337 338 dev = &nd_pfn->dev; 339 dev_set_name(dev, "pfn%d.%d", nd_region->id, nd_pfn->id); 340 dev->groups = nd_pfn_attribute_groups; 341 dev->type = &nd_pfn_device_type; 342 dev->parent = &nd_region->dev; 343 344 return nd_pfn; 345 } 346 347 struct device *nd_pfn_create(struct nd_region *nd_region) 348 { 349 struct nd_pfn *nd_pfn; 350 struct device *dev; 351 352 if (!is_memory(&nd_region->dev)) 353 return NULL; 354 355 nd_pfn = nd_pfn_alloc(nd_region); 356 dev = nd_pfn_devinit(nd_pfn, NULL); 357 358 __nd_device_register(dev); 359 return dev; 360 } 361 362 /* 363 * nd_pfn_clear_memmap_errors() clears any errors in the volatile memmap 364 * space associated with the namespace. If the memmap is set to DRAM, then 365 * this is a no-op. Since the memmap area is freshly initialized during 366 * probe, we have an opportunity to clear any badblocks in this area. 367 */ 368 static int nd_pfn_clear_memmap_errors(struct nd_pfn *nd_pfn) 369 { 370 struct nd_region *nd_region = to_nd_region(nd_pfn->dev.parent); 371 struct nd_namespace_common *ndns = nd_pfn->ndns; 372 void *zero_page = page_address(ZERO_PAGE(0)); 373 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 374 int num_bad, meta_num, rc, bb_present; 375 sector_t first_bad, meta_start; 376 struct nd_namespace_io *nsio; 377 378 if (nd_pfn->mode != PFN_MODE_PMEM) 379 return 0; 380 381 nsio = to_nd_namespace_io(&ndns->dev); 382 meta_start = (SZ_4K + sizeof(*pfn_sb)) >> 9; 383 meta_num = (le64_to_cpu(pfn_sb->dataoff) >> 9) - meta_start; 384 385 do { 386 unsigned long zero_len; 387 u64 nsoff; 388 389 bb_present = badblocks_check(&nd_region->bb, meta_start, 390 meta_num, &first_bad, &num_bad); 391 if (bb_present) { 392 dev_dbg(&nd_pfn->dev, "meta: %x badblocks at %llx\n", 393 num_bad, first_bad); 394 nsoff = ALIGN_DOWN((nd_region->ndr_start 395 + (first_bad << 9)) - nsio->res.start, 396 PAGE_SIZE); 397 zero_len = ALIGN(num_bad << 9, PAGE_SIZE); 398 while (zero_len) { 399 unsigned long chunk = min(zero_len, PAGE_SIZE); 400 401 rc = nvdimm_write_bytes(ndns, nsoff, zero_page, 402 chunk, 0); 403 if (rc) 404 break; 405 406 zero_len -= chunk; 407 nsoff += chunk; 408 } 409 if (rc) { 410 dev_err(&nd_pfn->dev, 411 "error clearing %x badblocks at %llx\n", 412 num_bad, first_bad); 413 return rc; 414 } 415 } 416 } while (bb_present); 417 418 return 0; 419 } 420 421 static bool nd_supported_alignment(unsigned long align) 422 { 423 int i; 424 unsigned long supported[MAX_NVDIMM_ALIGN] = { [0] = 0, }; 425 426 if (align == 0) 427 return false; 428 429 nd_pfn_supported_alignments(supported); 430 for (i = 0; supported[i]; i++) 431 if (align == supported[i]) 432 return true; 433 return false; 434 } 435 436 /** 437 * nd_pfn_validate - read and validate info-block 438 * @nd_pfn: fsdax namespace runtime state / properties 439 * @sig: 'devdax' or 'fsdax' signature 440 * 441 * Upon return the info-block buffer contents (->pfn_sb) are 442 * indeterminate when validation fails, and a coherent info-block 443 * otherwise. 444 */ 445 int nd_pfn_validate(struct nd_pfn *nd_pfn, const char *sig) 446 { 447 u64 checksum, offset; 448 enum nd_pfn_mode mode; 449 struct nd_namespace_io *nsio; 450 unsigned long align, start_pad; 451 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 452 struct nd_namespace_common *ndns = nd_pfn->ndns; 453 const u8 *parent_uuid = nd_dev_to_uuid(&ndns->dev); 454 455 if (!pfn_sb || !ndns) 456 return -ENODEV; 457 458 if (!is_memory(nd_pfn->dev.parent)) 459 return -ENODEV; 460 461 if (nvdimm_read_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0)) 462 return -ENXIO; 463 464 if (memcmp(pfn_sb->signature, sig, PFN_SIG_LEN) != 0) 465 return -ENODEV; 466 467 checksum = le64_to_cpu(pfn_sb->checksum); 468 pfn_sb->checksum = 0; 469 if (checksum != nd_sb_checksum((struct nd_gen_sb *) pfn_sb)) 470 return -ENODEV; 471 pfn_sb->checksum = cpu_to_le64(checksum); 472 473 if (memcmp(pfn_sb->parent_uuid, parent_uuid, 16) != 0) 474 return -ENODEV; 475 476 if (__le16_to_cpu(pfn_sb->version_minor) < 1) { 477 pfn_sb->start_pad = 0; 478 pfn_sb->end_trunc = 0; 479 } 480 481 if (__le16_to_cpu(pfn_sb->version_minor) < 2) 482 pfn_sb->align = 0; 483 484 if (__le16_to_cpu(pfn_sb->version_minor) < 4) { 485 pfn_sb->page_struct_size = cpu_to_le16(64); 486 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 487 } 488 489 switch (le32_to_cpu(pfn_sb->mode)) { 490 case PFN_MODE_RAM: 491 case PFN_MODE_PMEM: 492 break; 493 default: 494 return -ENXIO; 495 } 496 497 align = le32_to_cpu(pfn_sb->align); 498 offset = le64_to_cpu(pfn_sb->dataoff); 499 start_pad = le32_to_cpu(pfn_sb->start_pad); 500 if (align == 0) 501 align = 1UL << ilog2(offset); 502 mode = le32_to_cpu(pfn_sb->mode); 503 504 if ((le32_to_cpu(pfn_sb->page_size) > PAGE_SIZE) && 505 (mode == PFN_MODE_PMEM)) { 506 dev_err(&nd_pfn->dev, 507 "init failed, page size mismatch %d\n", 508 le32_to_cpu(pfn_sb->page_size)); 509 return -EOPNOTSUPP; 510 } 511 512 if ((le16_to_cpu(pfn_sb->page_struct_size) < sizeof(struct page)) && 513 (mode == PFN_MODE_PMEM)) { 514 dev_err(&nd_pfn->dev, 515 "init failed, struct page size mismatch %d\n", 516 le16_to_cpu(pfn_sb->page_struct_size)); 517 return -EOPNOTSUPP; 518 } 519 520 /* 521 * Check whether the we support the alignment. For Dax if the 522 * superblock alignment is not matching, we won't initialize 523 * the device. 524 */ 525 if (!nd_supported_alignment(align) && 526 !memcmp(pfn_sb->signature, DAX_SIG, PFN_SIG_LEN)) { 527 dev_err(&nd_pfn->dev, "init failed, alignment mismatch: " 528 "%ld:%ld\n", nd_pfn->align, align); 529 return -EOPNOTSUPP; 530 } 531 532 if (!nd_pfn->uuid) { 533 /* 534 * When probing a namepace via nd_pfn_probe() the uuid 535 * is NULL (see: nd_pfn_devinit()) we init settings from 536 * pfn_sb 537 */ 538 nd_pfn->uuid = kmemdup(pfn_sb->uuid, 16, GFP_KERNEL); 539 if (!nd_pfn->uuid) 540 return -ENOMEM; 541 nd_pfn->align = align; 542 nd_pfn->mode = mode; 543 } else { 544 /* 545 * When probing a pfn / dax instance we validate the 546 * live settings against the pfn_sb 547 */ 548 if (memcmp(nd_pfn->uuid, pfn_sb->uuid, 16) != 0) 549 return -ENODEV; 550 551 /* 552 * If the uuid validates, but other settings mismatch 553 * return EINVAL because userspace has managed to change 554 * the configuration without specifying new 555 * identification. 556 */ 557 if (nd_pfn->align != align || nd_pfn->mode != mode) { 558 dev_err(&nd_pfn->dev, 559 "init failed, settings mismatch\n"); 560 dev_dbg(&nd_pfn->dev, "align: %lx:%lx mode: %d:%d\n", 561 nd_pfn->align, align, nd_pfn->mode, 562 mode); 563 return -EINVAL; 564 } 565 } 566 567 if (align > nvdimm_namespace_capacity(ndns)) { 568 dev_err(&nd_pfn->dev, "alignment: %lx exceeds capacity %llx\n", 569 align, nvdimm_namespace_capacity(ndns)); 570 return -EINVAL; 571 } 572 573 /* 574 * These warnings are verbose because they can only trigger in 575 * the case where the physical address alignment of the 576 * namespace has changed since the pfn superblock was 577 * established. 578 */ 579 nsio = to_nd_namespace_io(&ndns->dev); 580 if (offset >= resource_size(&nsio->res)) { 581 dev_err(&nd_pfn->dev, "pfn array size exceeds capacity of %s\n", 582 dev_name(&ndns->dev)); 583 return -EBUSY; 584 } 585 586 if ((align && !IS_ALIGNED(nsio->res.start + offset + start_pad, align)) 587 || !IS_ALIGNED(offset, PAGE_SIZE)) { 588 dev_err(&nd_pfn->dev, 589 "bad offset: %#llx dax disabled align: %#lx\n", 590 offset, align); 591 return -ENXIO; 592 } 593 594 return nd_pfn_clear_memmap_errors(nd_pfn); 595 } 596 EXPORT_SYMBOL(nd_pfn_validate); 597 598 int nd_pfn_probe(struct device *dev, struct nd_namespace_common *ndns) 599 { 600 int rc; 601 struct nd_pfn *nd_pfn; 602 struct device *pfn_dev; 603 struct nd_pfn_sb *pfn_sb; 604 struct nd_region *nd_region = to_nd_region(ndns->dev.parent); 605 606 if (ndns->force_raw) 607 return -ENODEV; 608 609 switch (ndns->claim_class) { 610 case NVDIMM_CCLASS_NONE: 611 case NVDIMM_CCLASS_PFN: 612 break; 613 default: 614 return -ENODEV; 615 } 616 617 nvdimm_bus_lock(&ndns->dev); 618 nd_pfn = nd_pfn_alloc(nd_region); 619 pfn_dev = nd_pfn_devinit(nd_pfn, ndns); 620 nvdimm_bus_unlock(&ndns->dev); 621 if (!pfn_dev) 622 return -ENOMEM; 623 pfn_sb = devm_kmalloc(dev, sizeof(*pfn_sb), GFP_KERNEL); 624 nd_pfn = to_nd_pfn(pfn_dev); 625 nd_pfn->pfn_sb = pfn_sb; 626 rc = nd_pfn_validate(nd_pfn, PFN_SIG); 627 dev_dbg(dev, "pfn: %s\n", rc == 0 ? dev_name(pfn_dev) : "<none>"); 628 if (rc < 0) { 629 nd_detach_ndns(pfn_dev, &nd_pfn->ndns); 630 put_device(pfn_dev); 631 } else 632 __nd_device_register(pfn_dev); 633 634 return rc; 635 } 636 EXPORT_SYMBOL(nd_pfn_probe); 637 638 static u32 info_block_reserve(void) 639 { 640 return ALIGN(SZ_8K, PAGE_SIZE); 641 } 642 643 /* 644 * We hotplug memory at sub-section granularity, pad the reserved area 645 * from the previous section base to the namespace base address. 646 */ 647 static unsigned long init_altmap_base(resource_size_t base) 648 { 649 unsigned long base_pfn = PHYS_PFN(base); 650 651 return SUBSECTION_ALIGN_DOWN(base_pfn); 652 } 653 654 static unsigned long init_altmap_reserve(resource_size_t base) 655 { 656 unsigned long reserve = info_block_reserve() >> PAGE_SHIFT; 657 unsigned long base_pfn = PHYS_PFN(base); 658 659 reserve += base_pfn - SUBSECTION_ALIGN_DOWN(base_pfn); 660 return reserve; 661 } 662 663 static int __nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 664 { 665 struct resource *res = &pgmap->res; 666 struct vmem_altmap *altmap = &pgmap->altmap; 667 struct nd_pfn_sb *pfn_sb = nd_pfn->pfn_sb; 668 u64 offset = le64_to_cpu(pfn_sb->dataoff); 669 u32 start_pad = __le32_to_cpu(pfn_sb->start_pad); 670 u32 end_trunc = __le32_to_cpu(pfn_sb->end_trunc); 671 u32 reserve = info_block_reserve(); 672 struct nd_namespace_common *ndns = nd_pfn->ndns; 673 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 674 resource_size_t base = nsio->res.start + start_pad; 675 resource_size_t end = nsio->res.end - end_trunc; 676 struct vmem_altmap __altmap = { 677 .base_pfn = init_altmap_base(base), 678 .reserve = init_altmap_reserve(base), 679 .end_pfn = PHYS_PFN(end), 680 }; 681 682 memcpy(res, &nsio->res, sizeof(*res)); 683 res->start += start_pad; 684 res->end -= end_trunc; 685 686 if (nd_pfn->mode == PFN_MODE_RAM) { 687 if (offset < reserve) 688 return -EINVAL; 689 nd_pfn->npfns = le64_to_cpu(pfn_sb->npfns); 690 } else if (nd_pfn->mode == PFN_MODE_PMEM) { 691 nd_pfn->npfns = PHYS_PFN((resource_size(res) - offset)); 692 if (le64_to_cpu(nd_pfn->pfn_sb->npfns) > nd_pfn->npfns) 693 dev_info(&nd_pfn->dev, 694 "number of pfns truncated from %lld to %ld\n", 695 le64_to_cpu(nd_pfn->pfn_sb->npfns), 696 nd_pfn->npfns); 697 memcpy(altmap, &__altmap, sizeof(*altmap)); 698 altmap->free = PHYS_PFN(offset - reserve); 699 altmap->alloc = 0; 700 pgmap->flags |= PGMAP_ALTMAP_VALID; 701 } else 702 return -ENXIO; 703 704 return 0; 705 } 706 707 static int nd_pfn_init(struct nd_pfn *nd_pfn) 708 { 709 struct nd_namespace_common *ndns = nd_pfn->ndns; 710 struct nd_namespace_io *nsio = to_nd_namespace_io(&ndns->dev); 711 resource_size_t start, size; 712 struct nd_region *nd_region; 713 unsigned long npfns, align; 714 u32 end_trunc; 715 struct nd_pfn_sb *pfn_sb; 716 phys_addr_t offset; 717 const char *sig; 718 u64 checksum; 719 int rc; 720 721 pfn_sb = devm_kmalloc(&nd_pfn->dev, sizeof(*pfn_sb), GFP_KERNEL); 722 if (!pfn_sb) 723 return -ENOMEM; 724 725 nd_pfn->pfn_sb = pfn_sb; 726 if (is_nd_dax(&nd_pfn->dev)) 727 sig = DAX_SIG; 728 else 729 sig = PFN_SIG; 730 731 rc = nd_pfn_validate(nd_pfn, sig); 732 if (rc != -ENODEV) 733 return rc; 734 735 /* no info block, do init */; 736 memset(pfn_sb, 0, sizeof(*pfn_sb)); 737 738 nd_region = to_nd_region(nd_pfn->dev.parent); 739 if (nd_region->ro) { 740 dev_info(&nd_pfn->dev, 741 "%s is read-only, unable to init metadata\n", 742 dev_name(&nd_region->dev)); 743 return -ENXIO; 744 } 745 746 /* 747 * Note, we use 64 here for the standard size of struct page, 748 * debugging options may cause it to be larger in which case the 749 * implementation will limit the pfns advertised through 750 * ->direct_access() to those that are included in the memmap. 751 */ 752 start = nsio->res.start; 753 size = resource_size(&nsio->res); 754 npfns = PHYS_PFN(size - SZ_8K); 755 align = max(nd_pfn->align, (1UL << SUBSECTION_SHIFT)); 756 end_trunc = start + size - ALIGN_DOWN(start + size, align); 757 if (nd_pfn->mode == PFN_MODE_PMEM) { 758 /* 759 * The altmap should be padded out to the block size used 760 * when populating the vmemmap. This *should* be equal to 761 * PMD_SIZE for most architectures. 762 * 763 * Also make sure size of struct page is less than 64. We 764 * want to make sure we use large enough size here so that 765 * we don't have a dynamic reserve space depending on 766 * struct page size. But we also want to make sure we notice 767 * when we end up adding new elements to struct page. 768 */ 769 BUILD_BUG_ON(sizeof(struct page) > MAX_STRUCT_PAGE_SIZE); 770 offset = ALIGN(start + SZ_8K + MAX_STRUCT_PAGE_SIZE * npfns, align) 771 - start; 772 } else if (nd_pfn->mode == PFN_MODE_RAM) 773 offset = ALIGN(start + SZ_8K, align) - start; 774 else 775 return -ENXIO; 776 777 if (offset >= size) { 778 dev_err(&nd_pfn->dev, "%s unable to satisfy requested alignment\n", 779 dev_name(&ndns->dev)); 780 return -ENXIO; 781 } 782 783 npfns = PHYS_PFN(size - offset - end_trunc); 784 pfn_sb->mode = cpu_to_le32(nd_pfn->mode); 785 pfn_sb->dataoff = cpu_to_le64(offset); 786 pfn_sb->npfns = cpu_to_le64(npfns); 787 memcpy(pfn_sb->signature, sig, PFN_SIG_LEN); 788 memcpy(pfn_sb->uuid, nd_pfn->uuid, 16); 789 memcpy(pfn_sb->parent_uuid, nd_dev_to_uuid(&ndns->dev), 16); 790 pfn_sb->version_major = cpu_to_le16(1); 791 pfn_sb->version_minor = cpu_to_le16(4); 792 pfn_sb->end_trunc = cpu_to_le32(end_trunc); 793 pfn_sb->align = cpu_to_le32(nd_pfn->align); 794 pfn_sb->page_struct_size = cpu_to_le16(MAX_STRUCT_PAGE_SIZE); 795 pfn_sb->page_size = cpu_to_le32(PAGE_SIZE); 796 checksum = nd_sb_checksum((struct nd_gen_sb *) pfn_sb); 797 pfn_sb->checksum = cpu_to_le64(checksum); 798 799 return nvdimm_write_bytes(ndns, SZ_4K, pfn_sb, sizeof(*pfn_sb), 0); 800 } 801 802 /* 803 * Determine the effective resource range and vmem_altmap from an nd_pfn 804 * instance. 805 */ 806 int nvdimm_setup_pfn(struct nd_pfn *nd_pfn, struct dev_pagemap *pgmap) 807 { 808 int rc; 809 810 if (!nd_pfn->uuid || !nd_pfn->ndns) 811 return -ENODEV; 812 813 rc = nd_pfn_init(nd_pfn); 814 if (rc) 815 return rc; 816 817 /* we need a valid pfn_sb before we can init a dev_pagemap */ 818 return __nvdimm_setup_pfn(nd_pfn, pgmap); 819 } 820 EXPORT_SYMBOL_GPL(nvdimm_setup_pfn); 821