1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory subsystem support 4 * 5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 6 * Dave Hansen <haveblue@us.ibm.com> 7 * 8 * This file provides the necessary infrastructure to represent 9 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 10 * All arch-independent code that assumes MEMORY_HOTPLUG requires 11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/memory_hotplug.h> 21 #include <linux/mm.h> 22 #include <linux/stat.h> 23 #include <linux/slab.h> 24 #include <linux/xarray.h> 25 26 #include <linux/atomic.h> 27 #include <linux/uaccess.h> 28 29 #define MEMORY_CLASS_NAME "memory" 30 31 static const char *const online_type_to_str[] = { 32 [MMOP_OFFLINE] = "offline", 33 [MMOP_ONLINE] = "online", 34 [MMOP_ONLINE_KERNEL] = "online_kernel", 35 [MMOP_ONLINE_MOVABLE] = "online_movable", 36 }; 37 38 int mhp_online_type_from_str(const char *str) 39 { 40 int i; 41 42 for (i = 0; i < ARRAY_SIZE(online_type_to_str); i++) { 43 if (sysfs_streq(str, online_type_to_str[i])) 44 return i; 45 } 46 return -EINVAL; 47 } 48 49 #define to_memory_block(dev) container_of(dev, struct memory_block, dev) 50 51 static int sections_per_block; 52 53 static inline unsigned long memory_block_id(unsigned long section_nr) 54 { 55 return section_nr / sections_per_block; 56 } 57 58 static inline unsigned long pfn_to_block_id(unsigned long pfn) 59 { 60 return memory_block_id(pfn_to_section_nr(pfn)); 61 } 62 63 static inline unsigned long phys_to_block_id(unsigned long phys) 64 { 65 return pfn_to_block_id(PFN_DOWN(phys)); 66 } 67 68 static int memory_subsys_online(struct device *dev); 69 static int memory_subsys_offline(struct device *dev); 70 71 static struct bus_type memory_subsys = { 72 .name = MEMORY_CLASS_NAME, 73 .dev_name = MEMORY_CLASS_NAME, 74 .online = memory_subsys_online, 75 .offline = memory_subsys_offline, 76 }; 77 78 /* 79 * Memory blocks are cached in a local radix tree to avoid 80 * a costly linear search for the corresponding device on 81 * the subsystem bus. 82 */ 83 static DEFINE_XARRAY(memory_blocks); 84 85 static BLOCKING_NOTIFIER_HEAD(memory_chain); 86 87 int register_memory_notifier(struct notifier_block *nb) 88 { 89 return blocking_notifier_chain_register(&memory_chain, nb); 90 } 91 EXPORT_SYMBOL(register_memory_notifier); 92 93 void unregister_memory_notifier(struct notifier_block *nb) 94 { 95 blocking_notifier_chain_unregister(&memory_chain, nb); 96 } 97 EXPORT_SYMBOL(unregister_memory_notifier); 98 99 static void memory_block_release(struct device *dev) 100 { 101 struct memory_block *mem = to_memory_block(dev); 102 103 kfree(mem); 104 } 105 106 unsigned long __weak memory_block_size_bytes(void) 107 { 108 return MIN_MEMORY_BLOCK_SIZE; 109 } 110 EXPORT_SYMBOL_GPL(memory_block_size_bytes); 111 112 /* 113 * Show the first physical section index (number) of this memory block. 114 */ 115 static ssize_t phys_index_show(struct device *dev, 116 struct device_attribute *attr, char *buf) 117 { 118 struct memory_block *mem = to_memory_block(dev); 119 unsigned long phys_index; 120 121 phys_index = mem->start_section_nr / sections_per_block; 122 123 return sysfs_emit(buf, "%08lx\n", phys_index); 124 } 125 126 /* 127 * Legacy interface that we cannot remove. Always indicate "removable" 128 * with CONFIG_MEMORY_HOTREMOVE - bad heuristic. 129 */ 130 static ssize_t removable_show(struct device *dev, struct device_attribute *attr, 131 char *buf) 132 { 133 return sysfs_emit(buf, "%d\n", (int)IS_ENABLED(CONFIG_MEMORY_HOTREMOVE)); 134 } 135 136 /* 137 * online, offline, going offline, etc. 138 */ 139 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 140 char *buf) 141 { 142 struct memory_block *mem = to_memory_block(dev); 143 const char *output; 144 145 /* 146 * We can probably put these states in a nice little array 147 * so that they're not open-coded 148 */ 149 switch (mem->state) { 150 case MEM_ONLINE: 151 output = "online"; 152 break; 153 case MEM_OFFLINE: 154 output = "offline"; 155 break; 156 case MEM_GOING_OFFLINE: 157 output = "going-offline"; 158 break; 159 default: 160 WARN_ON(1); 161 return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state); 162 } 163 164 return sysfs_emit(buf, "%s\n", output); 165 } 166 167 int memory_notify(unsigned long val, void *v) 168 { 169 return blocking_notifier_call_chain(&memory_chain, val, v); 170 } 171 172 static int memory_block_online(struct memory_block *mem) 173 { 174 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 175 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 176 unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; 177 struct zone *zone; 178 int ret; 179 180 zone = zone_for_pfn_range(mem->online_type, mem->nid, start_pfn, nr_pages); 181 182 /* 183 * Although vmemmap pages have a different lifecycle than the pages 184 * they describe (they remain until the memory is unplugged), doing 185 * their initialization and accounting at memory onlining/offlining 186 * stage helps to keep accounting easier to follow - e.g vmemmaps 187 * belong to the same zone as the memory they backed. 188 */ 189 if (nr_vmemmap_pages) { 190 ret = mhp_init_memmap_on_memory(start_pfn, nr_vmemmap_pages, zone); 191 if (ret) 192 return ret; 193 } 194 195 ret = online_pages(start_pfn + nr_vmemmap_pages, 196 nr_pages - nr_vmemmap_pages, zone); 197 if (ret) { 198 if (nr_vmemmap_pages) 199 mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); 200 return ret; 201 } 202 203 /* 204 * Account once onlining succeeded. If the zone was unpopulated, it is 205 * now already properly populated. 206 */ 207 if (nr_vmemmap_pages) 208 adjust_present_page_count(pfn_to_page(start_pfn), 209 nr_vmemmap_pages); 210 211 return ret; 212 } 213 214 static int memory_block_offline(struct memory_block *mem) 215 { 216 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 217 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 218 unsigned long nr_vmemmap_pages = mem->nr_vmemmap_pages; 219 int ret; 220 221 /* 222 * Unaccount before offlining, such that unpopulated zone and kthreads 223 * can properly be torn down in offline_pages(). 224 */ 225 if (nr_vmemmap_pages) 226 adjust_present_page_count(pfn_to_page(start_pfn), 227 -nr_vmemmap_pages); 228 229 ret = offline_pages(start_pfn + nr_vmemmap_pages, 230 nr_pages - nr_vmemmap_pages); 231 if (ret) { 232 /* offline_pages() failed. Account back. */ 233 if (nr_vmemmap_pages) 234 adjust_present_page_count(pfn_to_page(start_pfn), 235 nr_vmemmap_pages); 236 return ret; 237 } 238 239 if (nr_vmemmap_pages) 240 mhp_deinit_memmap_on_memory(start_pfn, nr_vmemmap_pages); 241 242 return ret; 243 } 244 245 /* 246 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 247 * OK to have direct references to sparsemem variables in here. 248 */ 249 static int 250 memory_block_action(struct memory_block *mem, unsigned long action) 251 { 252 int ret; 253 254 switch (action) { 255 case MEM_ONLINE: 256 ret = memory_block_online(mem); 257 break; 258 case MEM_OFFLINE: 259 ret = memory_block_offline(mem); 260 break; 261 default: 262 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 263 "%ld\n", __func__, mem->start_section_nr, action, action); 264 ret = -EINVAL; 265 } 266 267 return ret; 268 } 269 270 static int memory_block_change_state(struct memory_block *mem, 271 unsigned long to_state, unsigned long from_state_req) 272 { 273 int ret = 0; 274 275 if (mem->state != from_state_req) 276 return -EINVAL; 277 278 if (to_state == MEM_OFFLINE) 279 mem->state = MEM_GOING_OFFLINE; 280 281 ret = memory_block_action(mem, to_state); 282 mem->state = ret ? from_state_req : to_state; 283 284 return ret; 285 } 286 287 /* The device lock serializes operations on memory_subsys_[online|offline] */ 288 static int memory_subsys_online(struct device *dev) 289 { 290 struct memory_block *mem = to_memory_block(dev); 291 int ret; 292 293 if (mem->state == MEM_ONLINE) 294 return 0; 295 296 /* 297 * When called via device_online() without configuring the online_type, 298 * we want to default to MMOP_ONLINE. 299 */ 300 if (mem->online_type == MMOP_OFFLINE) 301 mem->online_type = MMOP_ONLINE; 302 303 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 304 mem->online_type = MMOP_OFFLINE; 305 306 return ret; 307 } 308 309 static int memory_subsys_offline(struct device *dev) 310 { 311 struct memory_block *mem = to_memory_block(dev); 312 313 if (mem->state == MEM_OFFLINE) 314 return 0; 315 316 return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 317 } 318 319 static ssize_t state_store(struct device *dev, struct device_attribute *attr, 320 const char *buf, size_t count) 321 { 322 const int online_type = mhp_online_type_from_str(buf); 323 struct memory_block *mem = to_memory_block(dev); 324 int ret; 325 326 if (online_type < 0) 327 return -EINVAL; 328 329 ret = lock_device_hotplug_sysfs(); 330 if (ret) 331 return ret; 332 333 switch (online_type) { 334 case MMOP_ONLINE_KERNEL: 335 case MMOP_ONLINE_MOVABLE: 336 case MMOP_ONLINE: 337 /* mem->online_type is protected by device_hotplug_lock */ 338 mem->online_type = online_type; 339 ret = device_online(&mem->dev); 340 break; 341 case MMOP_OFFLINE: 342 ret = device_offline(&mem->dev); 343 break; 344 default: 345 ret = -EINVAL; /* should never happen */ 346 } 347 348 unlock_device_hotplug(); 349 350 if (ret < 0) 351 return ret; 352 if (ret) 353 return -EINVAL; 354 355 return count; 356 } 357 358 /* 359 * Legacy interface that we cannot remove: s390x exposes the storage increment 360 * covered by a memory block, allowing for identifying which memory blocks 361 * comprise a storage increment. Since a memory block spans complete 362 * storage increments nowadays, this interface is basically unused. Other 363 * archs never exposed != 0. 364 */ 365 static ssize_t phys_device_show(struct device *dev, 366 struct device_attribute *attr, char *buf) 367 { 368 struct memory_block *mem = to_memory_block(dev); 369 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 370 371 return sysfs_emit(buf, "%d\n", 372 arch_get_memory_phys_device(start_pfn)); 373 } 374 375 #ifdef CONFIG_MEMORY_HOTREMOVE 376 static int print_allowed_zone(char *buf, int len, int nid, 377 unsigned long start_pfn, unsigned long nr_pages, 378 int online_type, struct zone *default_zone) 379 { 380 struct zone *zone; 381 382 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 383 if (zone == default_zone) 384 return 0; 385 386 return sysfs_emit_at(buf, len, " %s", zone->name); 387 } 388 389 static ssize_t valid_zones_show(struct device *dev, 390 struct device_attribute *attr, char *buf) 391 { 392 struct memory_block *mem = to_memory_block(dev); 393 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 394 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 395 struct zone *default_zone; 396 int len = 0; 397 int nid; 398 399 /* 400 * Check the existing zone. Make sure that we do that only on the 401 * online nodes otherwise the page_zone is not reliable 402 */ 403 if (mem->state == MEM_ONLINE) { 404 /* 405 * The block contains more than one zone can not be offlined. 406 * This can happen e.g. for ZONE_DMA and ZONE_DMA32 407 */ 408 default_zone = test_pages_in_a_zone(start_pfn, 409 start_pfn + nr_pages); 410 if (!default_zone) 411 return sysfs_emit(buf, "%s\n", "none"); 412 len += sysfs_emit_at(buf, len, "%s", default_zone->name); 413 goto out; 414 } 415 416 nid = mem->nid; 417 default_zone = zone_for_pfn_range(MMOP_ONLINE, nid, start_pfn, 418 nr_pages); 419 420 len += sysfs_emit_at(buf, len, "%s", default_zone->name); 421 len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages, 422 MMOP_ONLINE_KERNEL, default_zone); 423 len += print_allowed_zone(buf, len, nid, start_pfn, nr_pages, 424 MMOP_ONLINE_MOVABLE, default_zone); 425 out: 426 len += sysfs_emit_at(buf, len, "\n"); 427 return len; 428 } 429 static DEVICE_ATTR_RO(valid_zones); 430 #endif 431 432 static DEVICE_ATTR_RO(phys_index); 433 static DEVICE_ATTR_RW(state); 434 static DEVICE_ATTR_RO(phys_device); 435 static DEVICE_ATTR_RO(removable); 436 437 /* 438 * Show the memory block size (shared by all memory blocks). 439 */ 440 static ssize_t block_size_bytes_show(struct device *dev, 441 struct device_attribute *attr, char *buf) 442 { 443 return sysfs_emit(buf, "%lx\n", memory_block_size_bytes()); 444 } 445 446 static DEVICE_ATTR_RO(block_size_bytes); 447 448 /* 449 * Memory auto online policy. 450 */ 451 452 static ssize_t auto_online_blocks_show(struct device *dev, 453 struct device_attribute *attr, char *buf) 454 { 455 return sysfs_emit(buf, "%s\n", 456 online_type_to_str[mhp_default_online_type]); 457 } 458 459 static ssize_t auto_online_blocks_store(struct device *dev, 460 struct device_attribute *attr, 461 const char *buf, size_t count) 462 { 463 const int online_type = mhp_online_type_from_str(buf); 464 465 if (online_type < 0) 466 return -EINVAL; 467 468 mhp_default_online_type = online_type; 469 return count; 470 } 471 472 static DEVICE_ATTR_RW(auto_online_blocks); 473 474 /* 475 * Some architectures will have custom drivers to do this, and 476 * will not need to do it from userspace. The fake hot-add code 477 * as well as ppc64 will do all of their discovery in userspace 478 * and will require this interface. 479 */ 480 #ifdef CONFIG_ARCH_MEMORY_PROBE 481 static ssize_t probe_store(struct device *dev, struct device_attribute *attr, 482 const char *buf, size_t count) 483 { 484 u64 phys_addr; 485 int nid, ret; 486 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 487 488 ret = kstrtoull(buf, 0, &phys_addr); 489 if (ret) 490 return ret; 491 492 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 493 return -EINVAL; 494 495 ret = lock_device_hotplug_sysfs(); 496 if (ret) 497 return ret; 498 499 nid = memory_add_physaddr_to_nid(phys_addr); 500 ret = __add_memory(nid, phys_addr, 501 MIN_MEMORY_BLOCK_SIZE * sections_per_block, 502 MHP_NONE); 503 504 if (ret) 505 goto out; 506 507 ret = count; 508 out: 509 unlock_device_hotplug(); 510 return ret; 511 } 512 513 static DEVICE_ATTR_WO(probe); 514 #endif 515 516 #ifdef CONFIG_MEMORY_FAILURE 517 /* 518 * Support for offlining pages of memory 519 */ 520 521 /* Soft offline a page */ 522 static ssize_t soft_offline_page_store(struct device *dev, 523 struct device_attribute *attr, 524 const char *buf, size_t count) 525 { 526 int ret; 527 u64 pfn; 528 if (!capable(CAP_SYS_ADMIN)) 529 return -EPERM; 530 if (kstrtoull(buf, 0, &pfn) < 0) 531 return -EINVAL; 532 pfn >>= PAGE_SHIFT; 533 ret = soft_offline_page(pfn, 0); 534 return ret == 0 ? count : ret; 535 } 536 537 /* Forcibly offline a page, including killing processes. */ 538 static ssize_t hard_offline_page_store(struct device *dev, 539 struct device_attribute *attr, 540 const char *buf, size_t count) 541 { 542 int ret; 543 u64 pfn; 544 if (!capable(CAP_SYS_ADMIN)) 545 return -EPERM; 546 if (kstrtoull(buf, 0, &pfn) < 0) 547 return -EINVAL; 548 pfn >>= PAGE_SHIFT; 549 ret = memory_failure(pfn, 0); 550 return ret ? ret : count; 551 } 552 553 static DEVICE_ATTR_WO(soft_offline_page); 554 static DEVICE_ATTR_WO(hard_offline_page); 555 #endif 556 557 /* See phys_device_show(). */ 558 int __weak arch_get_memory_phys_device(unsigned long start_pfn) 559 { 560 return 0; 561 } 562 563 /* 564 * A reference for the returned memory block device is acquired. 565 * 566 * Called under device_hotplug_lock. 567 */ 568 static struct memory_block *find_memory_block_by_id(unsigned long block_id) 569 { 570 struct memory_block *mem; 571 572 mem = xa_load(&memory_blocks, block_id); 573 if (mem) 574 get_device(&mem->dev); 575 return mem; 576 } 577 578 /* 579 * Called under device_hotplug_lock. 580 */ 581 struct memory_block *find_memory_block(struct mem_section *section) 582 { 583 unsigned long block_id = memory_block_id(__section_nr(section)); 584 585 return find_memory_block_by_id(block_id); 586 } 587 588 static struct attribute *memory_memblk_attrs[] = { 589 &dev_attr_phys_index.attr, 590 &dev_attr_state.attr, 591 &dev_attr_phys_device.attr, 592 &dev_attr_removable.attr, 593 #ifdef CONFIG_MEMORY_HOTREMOVE 594 &dev_attr_valid_zones.attr, 595 #endif 596 NULL 597 }; 598 599 static const struct attribute_group memory_memblk_attr_group = { 600 .attrs = memory_memblk_attrs, 601 }; 602 603 static const struct attribute_group *memory_memblk_attr_groups[] = { 604 &memory_memblk_attr_group, 605 NULL, 606 }; 607 608 /* 609 * register_memory - Setup a sysfs device for a memory block 610 */ 611 static 612 int register_memory(struct memory_block *memory) 613 { 614 int ret; 615 616 memory->dev.bus = &memory_subsys; 617 memory->dev.id = memory->start_section_nr / sections_per_block; 618 memory->dev.release = memory_block_release; 619 memory->dev.groups = memory_memblk_attr_groups; 620 memory->dev.offline = memory->state == MEM_OFFLINE; 621 622 ret = device_register(&memory->dev); 623 if (ret) { 624 put_device(&memory->dev); 625 return ret; 626 } 627 ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, 628 GFP_KERNEL)); 629 if (ret) { 630 put_device(&memory->dev); 631 device_unregister(&memory->dev); 632 } 633 return ret; 634 } 635 636 static int init_memory_block(unsigned long block_id, unsigned long state, 637 unsigned long nr_vmemmap_pages) 638 { 639 struct memory_block *mem; 640 int ret = 0; 641 642 mem = find_memory_block_by_id(block_id); 643 if (mem) { 644 put_device(&mem->dev); 645 return -EEXIST; 646 } 647 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 648 if (!mem) 649 return -ENOMEM; 650 651 mem->start_section_nr = block_id * sections_per_block; 652 mem->state = state; 653 mem->nid = NUMA_NO_NODE; 654 mem->nr_vmemmap_pages = nr_vmemmap_pages; 655 656 ret = register_memory(mem); 657 658 return ret; 659 } 660 661 static int add_memory_block(unsigned long base_section_nr) 662 { 663 int section_count = 0; 664 unsigned long nr; 665 666 for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 667 nr++) 668 if (present_section_nr(nr)) 669 section_count++; 670 671 if (section_count == 0) 672 return 0; 673 return init_memory_block(memory_block_id(base_section_nr), 674 MEM_ONLINE, 0); 675 } 676 677 static void unregister_memory(struct memory_block *memory) 678 { 679 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 680 return; 681 682 WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL); 683 684 /* drop the ref. we got via find_memory_block() */ 685 put_device(&memory->dev); 686 device_unregister(&memory->dev); 687 } 688 689 /* 690 * Create memory block devices for the given memory area. Start and size 691 * have to be aligned to memory block granularity. Memory block devices 692 * will be initialized as offline. 693 * 694 * Called under device_hotplug_lock. 695 */ 696 int create_memory_block_devices(unsigned long start, unsigned long size, 697 unsigned long vmemmap_pages) 698 { 699 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 700 unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 701 struct memory_block *mem; 702 unsigned long block_id; 703 int ret = 0; 704 705 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 706 !IS_ALIGNED(size, memory_block_size_bytes()))) 707 return -EINVAL; 708 709 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 710 ret = init_memory_block(block_id, MEM_OFFLINE, vmemmap_pages); 711 if (ret) 712 break; 713 } 714 if (ret) { 715 end_block_id = block_id; 716 for (block_id = start_block_id; block_id != end_block_id; 717 block_id++) { 718 mem = find_memory_block_by_id(block_id); 719 if (WARN_ON_ONCE(!mem)) 720 continue; 721 unregister_memory(mem); 722 } 723 } 724 return ret; 725 } 726 727 /* 728 * Remove memory block devices for the given memory area. Start and size 729 * have to be aligned to memory block granularity. Memory block devices 730 * have to be offline. 731 * 732 * Called under device_hotplug_lock. 733 */ 734 void remove_memory_block_devices(unsigned long start, unsigned long size) 735 { 736 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 737 const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 738 struct memory_block *mem; 739 unsigned long block_id; 740 741 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 742 !IS_ALIGNED(size, memory_block_size_bytes()))) 743 return; 744 745 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 746 mem = find_memory_block_by_id(block_id); 747 if (WARN_ON_ONCE(!mem)) 748 continue; 749 unregister_memory_block_under_nodes(mem); 750 unregister_memory(mem); 751 } 752 } 753 754 /* return true if the memory block is offlined, otherwise, return false */ 755 bool is_memblock_offlined(struct memory_block *mem) 756 { 757 return mem->state == MEM_OFFLINE; 758 } 759 760 static struct attribute *memory_root_attrs[] = { 761 #ifdef CONFIG_ARCH_MEMORY_PROBE 762 &dev_attr_probe.attr, 763 #endif 764 765 #ifdef CONFIG_MEMORY_FAILURE 766 &dev_attr_soft_offline_page.attr, 767 &dev_attr_hard_offline_page.attr, 768 #endif 769 770 &dev_attr_block_size_bytes.attr, 771 &dev_attr_auto_online_blocks.attr, 772 NULL 773 }; 774 775 static const struct attribute_group memory_root_attr_group = { 776 .attrs = memory_root_attrs, 777 }; 778 779 static const struct attribute_group *memory_root_attr_groups[] = { 780 &memory_root_attr_group, 781 NULL, 782 }; 783 784 /* 785 * Initialize the sysfs support for memory devices. At the time this function 786 * is called, we cannot have concurrent creation/deletion of memory block 787 * devices, the device_hotplug_lock is not needed. 788 */ 789 void __init memory_dev_init(void) 790 { 791 int ret; 792 unsigned long block_sz, nr; 793 794 /* Validate the configured memory block size */ 795 block_sz = memory_block_size_bytes(); 796 if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) 797 panic("Memory block size not suitable: 0x%lx\n", block_sz); 798 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 799 800 ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 801 if (ret) 802 panic("%s() failed to register subsystem: %d\n", __func__, ret); 803 804 /* 805 * Create entries for memory sections that were found 806 * during boot and have been initialized 807 */ 808 for (nr = 0; nr <= __highest_present_section_nr; 809 nr += sections_per_block) { 810 ret = add_memory_block(nr); 811 if (ret) 812 panic("%s() failed to add memory block: %d\n", __func__, 813 ret); 814 } 815 } 816 817 /** 818 * walk_memory_blocks - walk through all present memory blocks overlapped 819 * by the range [start, start + size) 820 * 821 * @start: start address of the memory range 822 * @size: size of the memory range 823 * @arg: argument passed to func 824 * @func: callback for each memory section walked 825 * 826 * This function walks through all present memory blocks overlapped by the 827 * range [start, start + size), calling func on each memory block. 828 * 829 * In case func() returns an error, walking is aborted and the error is 830 * returned. 831 * 832 * Called under device_hotplug_lock. 833 */ 834 int walk_memory_blocks(unsigned long start, unsigned long size, 835 void *arg, walk_memory_blocks_func_t func) 836 { 837 const unsigned long start_block_id = phys_to_block_id(start); 838 const unsigned long end_block_id = phys_to_block_id(start + size - 1); 839 struct memory_block *mem; 840 unsigned long block_id; 841 int ret = 0; 842 843 if (!size) 844 return 0; 845 846 for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 847 mem = find_memory_block_by_id(block_id); 848 if (!mem) 849 continue; 850 851 ret = func(mem, arg); 852 put_device(&mem->dev); 853 if (ret) 854 break; 855 } 856 return ret; 857 } 858 859 struct for_each_memory_block_cb_data { 860 walk_memory_blocks_func_t func; 861 void *arg; 862 }; 863 864 static int for_each_memory_block_cb(struct device *dev, void *data) 865 { 866 struct memory_block *mem = to_memory_block(dev); 867 struct for_each_memory_block_cb_data *cb_data = data; 868 869 return cb_data->func(mem, cb_data->arg); 870 } 871 872 /** 873 * for_each_memory_block - walk through all present memory blocks 874 * 875 * @arg: argument passed to func 876 * @func: callback for each memory block walked 877 * 878 * This function walks through all present memory blocks, calling func on 879 * each memory block. 880 * 881 * In case func() returns an error, walking is aborted and the error is 882 * returned. 883 */ 884 int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) 885 { 886 struct for_each_memory_block_cb_data cb_data = { 887 .func = func, 888 .arg = arg, 889 }; 890 891 return bus_for_each_dev(&memory_subsys, NULL, &cb_data, 892 for_each_memory_block_cb); 893 } 894