1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Memory subsystem support 4 * 5 * Written by Matt Tolentino <matthew.e.tolentino@intel.com> 6 * Dave Hansen <haveblue@us.ibm.com> 7 * 8 * This file provides the necessary infrastructure to represent 9 * a SPARSEMEM-memory-model system's physical memory in /sysfs. 10 * All arch-independent code that assumes MEMORY_HOTPLUG requires 11 * SPARSEMEM should be contained here, or in mm/memory_hotplug.c. 12 */ 13 14 #include <linux/module.h> 15 #include <linux/init.h> 16 #include <linux/topology.h> 17 #include <linux/capability.h> 18 #include <linux/device.h> 19 #include <linux/memory.h> 20 #include <linux/memory_hotplug.h> 21 #include <linux/mm.h> 22 #include <linux/stat.h> 23 #include <linux/slab.h> 24 25 #include <linux/atomic.h> 26 #include <linux/uaccess.h> 27 28 #define MEMORY_CLASS_NAME "memory" 29 30 #define to_memory_block(dev) container_of(dev, struct memory_block, dev) 31 32 static int sections_per_block; 33 34 static inline unsigned long base_memory_block_id(unsigned long section_nr) 35 { 36 return section_nr / sections_per_block; 37 } 38 39 static inline unsigned long pfn_to_block_id(unsigned long pfn) 40 { 41 return base_memory_block_id(pfn_to_section_nr(pfn)); 42 } 43 44 static inline unsigned long phys_to_block_id(unsigned long phys) 45 { 46 return pfn_to_block_id(PFN_DOWN(phys)); 47 } 48 49 static int memory_subsys_online(struct device *dev); 50 static int memory_subsys_offline(struct device *dev); 51 52 static struct bus_type memory_subsys = { 53 .name = MEMORY_CLASS_NAME, 54 .dev_name = MEMORY_CLASS_NAME, 55 .online = memory_subsys_online, 56 .offline = memory_subsys_offline, 57 }; 58 59 static BLOCKING_NOTIFIER_HEAD(memory_chain); 60 61 int register_memory_notifier(struct notifier_block *nb) 62 { 63 return blocking_notifier_chain_register(&memory_chain, nb); 64 } 65 EXPORT_SYMBOL(register_memory_notifier); 66 67 void unregister_memory_notifier(struct notifier_block *nb) 68 { 69 blocking_notifier_chain_unregister(&memory_chain, nb); 70 } 71 EXPORT_SYMBOL(unregister_memory_notifier); 72 73 static void memory_block_release(struct device *dev) 74 { 75 struct memory_block *mem = to_memory_block(dev); 76 77 kfree(mem); 78 } 79 80 unsigned long __weak memory_block_size_bytes(void) 81 { 82 return MIN_MEMORY_BLOCK_SIZE; 83 } 84 EXPORT_SYMBOL_GPL(memory_block_size_bytes); 85 86 /* 87 * Show the first physical section index (number) of this memory block. 88 */ 89 static ssize_t phys_index_show(struct device *dev, 90 struct device_attribute *attr, char *buf) 91 { 92 struct memory_block *mem = to_memory_block(dev); 93 unsigned long phys_index; 94 95 phys_index = mem->start_section_nr / sections_per_block; 96 return sprintf(buf, "%08lx\n", phys_index); 97 } 98 99 /* 100 * Show whether the memory block is likely to be offlineable (or is already 101 * offline). Once offline, the memory block could be removed. The return 102 * value does, however, not indicate that there is a way to remove the 103 * memory block. 104 */ 105 static ssize_t removable_show(struct device *dev, struct device_attribute *attr, 106 char *buf) 107 { 108 struct memory_block *mem = to_memory_block(dev); 109 unsigned long pfn; 110 int ret = 1, i; 111 112 if (mem->state != MEM_ONLINE) 113 goto out; 114 115 for (i = 0; i < sections_per_block; i++) { 116 if (!present_section_nr(mem->start_section_nr + i)) 117 continue; 118 pfn = section_nr_to_pfn(mem->start_section_nr + i); 119 ret &= is_mem_section_removable(pfn, PAGES_PER_SECTION); 120 } 121 122 out: 123 return sprintf(buf, "%d\n", ret); 124 } 125 126 /* 127 * online, offline, going offline, etc. 128 */ 129 static ssize_t state_show(struct device *dev, struct device_attribute *attr, 130 char *buf) 131 { 132 struct memory_block *mem = to_memory_block(dev); 133 ssize_t len = 0; 134 135 /* 136 * We can probably put these states in a nice little array 137 * so that they're not open-coded 138 */ 139 switch (mem->state) { 140 case MEM_ONLINE: 141 len = sprintf(buf, "online\n"); 142 break; 143 case MEM_OFFLINE: 144 len = sprintf(buf, "offline\n"); 145 break; 146 case MEM_GOING_OFFLINE: 147 len = sprintf(buf, "going-offline\n"); 148 break; 149 default: 150 len = sprintf(buf, "ERROR-UNKNOWN-%ld\n", 151 mem->state); 152 WARN_ON(1); 153 break; 154 } 155 156 return len; 157 } 158 159 int memory_notify(unsigned long val, void *v) 160 { 161 return blocking_notifier_call_chain(&memory_chain, val, v); 162 } 163 164 /* 165 * The probe routines leave the pages uninitialized, just as the bootmem code 166 * does. Make sure we do not access them, but instead use only information from 167 * within sections. 168 */ 169 static bool pages_correctly_probed(unsigned long start_pfn) 170 { 171 unsigned long section_nr = pfn_to_section_nr(start_pfn); 172 unsigned long section_nr_end = section_nr + sections_per_block; 173 unsigned long pfn = start_pfn; 174 175 /* 176 * memmap between sections is not contiguous except with 177 * SPARSEMEM_VMEMMAP. We lookup the page once per section 178 * and assume memmap is contiguous within each section 179 */ 180 for (; section_nr < section_nr_end; section_nr++) { 181 if (WARN_ON_ONCE(!pfn_valid(pfn))) 182 return false; 183 184 if (!present_section_nr(section_nr)) { 185 pr_warn("section %ld pfn[%lx, %lx) not present\n", 186 section_nr, pfn, pfn + PAGES_PER_SECTION); 187 return false; 188 } else if (!valid_section_nr(section_nr)) { 189 pr_warn("section %ld pfn[%lx, %lx) no valid memmap\n", 190 section_nr, pfn, pfn + PAGES_PER_SECTION); 191 return false; 192 } else if (online_section_nr(section_nr)) { 193 pr_warn("section %ld pfn[%lx, %lx) is already online\n", 194 section_nr, pfn, pfn + PAGES_PER_SECTION); 195 return false; 196 } 197 pfn += PAGES_PER_SECTION; 198 } 199 200 return true; 201 } 202 203 /* 204 * MEMORY_HOTPLUG depends on SPARSEMEM in mm/Kconfig, so it is 205 * OK to have direct references to sparsemem variables in here. 206 */ 207 static int 208 memory_block_action(unsigned long start_section_nr, unsigned long action, 209 int online_type, int nid) 210 { 211 unsigned long start_pfn; 212 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 213 int ret; 214 215 start_pfn = section_nr_to_pfn(start_section_nr); 216 217 switch (action) { 218 case MEM_ONLINE: 219 if (!pages_correctly_probed(start_pfn)) 220 return -EBUSY; 221 222 ret = online_pages(start_pfn, nr_pages, online_type, nid); 223 break; 224 case MEM_OFFLINE: 225 ret = offline_pages(start_pfn, nr_pages); 226 break; 227 default: 228 WARN(1, KERN_WARNING "%s(%ld, %ld) unknown action: " 229 "%ld\n", __func__, start_section_nr, action, action); 230 ret = -EINVAL; 231 } 232 233 return ret; 234 } 235 236 static int memory_block_change_state(struct memory_block *mem, 237 unsigned long to_state, unsigned long from_state_req) 238 { 239 int ret = 0; 240 241 if (mem->state != from_state_req) 242 return -EINVAL; 243 244 if (to_state == MEM_OFFLINE) 245 mem->state = MEM_GOING_OFFLINE; 246 247 ret = memory_block_action(mem->start_section_nr, to_state, 248 mem->online_type, mem->nid); 249 250 mem->state = ret ? from_state_req : to_state; 251 252 return ret; 253 } 254 255 /* The device lock serializes operations on memory_subsys_[online|offline] */ 256 static int memory_subsys_online(struct device *dev) 257 { 258 struct memory_block *mem = to_memory_block(dev); 259 int ret; 260 261 if (mem->state == MEM_ONLINE) 262 return 0; 263 264 /* 265 * If we are called from state_store(), online_type will be 266 * set >= 0 Otherwise we were called from the device online 267 * attribute and need to set the online_type. 268 */ 269 if (mem->online_type < 0) 270 mem->online_type = MMOP_ONLINE_KEEP; 271 272 ret = memory_block_change_state(mem, MEM_ONLINE, MEM_OFFLINE); 273 274 /* clear online_type */ 275 mem->online_type = -1; 276 277 return ret; 278 } 279 280 static int memory_subsys_offline(struct device *dev) 281 { 282 struct memory_block *mem = to_memory_block(dev); 283 284 if (mem->state == MEM_OFFLINE) 285 return 0; 286 287 /* Can't offline block with non-present sections */ 288 if (mem->section_count != sections_per_block) 289 return -EINVAL; 290 291 return memory_block_change_state(mem, MEM_OFFLINE, MEM_ONLINE); 292 } 293 294 static ssize_t state_store(struct device *dev, struct device_attribute *attr, 295 const char *buf, size_t count) 296 { 297 struct memory_block *mem = to_memory_block(dev); 298 int ret, online_type; 299 300 ret = lock_device_hotplug_sysfs(); 301 if (ret) 302 return ret; 303 304 if (sysfs_streq(buf, "online_kernel")) 305 online_type = MMOP_ONLINE_KERNEL; 306 else if (sysfs_streq(buf, "online_movable")) 307 online_type = MMOP_ONLINE_MOVABLE; 308 else if (sysfs_streq(buf, "online")) 309 online_type = MMOP_ONLINE_KEEP; 310 else if (sysfs_streq(buf, "offline")) 311 online_type = MMOP_OFFLINE; 312 else { 313 ret = -EINVAL; 314 goto err; 315 } 316 317 switch (online_type) { 318 case MMOP_ONLINE_KERNEL: 319 case MMOP_ONLINE_MOVABLE: 320 case MMOP_ONLINE_KEEP: 321 /* mem->online_type is protected by device_hotplug_lock */ 322 mem->online_type = online_type; 323 ret = device_online(&mem->dev); 324 break; 325 case MMOP_OFFLINE: 326 ret = device_offline(&mem->dev); 327 break; 328 default: 329 ret = -EINVAL; /* should never happen */ 330 } 331 332 err: 333 unlock_device_hotplug(); 334 335 if (ret < 0) 336 return ret; 337 if (ret) 338 return -EINVAL; 339 340 return count; 341 } 342 343 /* 344 * phys_device is a bad name for this. What I really want 345 * is a way to differentiate between memory ranges that 346 * are part of physical devices that constitute 347 * a complete removable unit or fru. 348 * i.e. do these ranges belong to the same physical device, 349 * s.t. if I offline all of these sections I can then 350 * remove the physical device? 351 */ 352 static ssize_t phys_device_show(struct device *dev, 353 struct device_attribute *attr, char *buf) 354 { 355 struct memory_block *mem = to_memory_block(dev); 356 return sprintf(buf, "%d\n", mem->phys_device); 357 } 358 359 #ifdef CONFIG_MEMORY_HOTREMOVE 360 static void print_allowed_zone(char *buf, int nid, unsigned long start_pfn, 361 unsigned long nr_pages, int online_type, 362 struct zone *default_zone) 363 { 364 struct zone *zone; 365 366 zone = zone_for_pfn_range(online_type, nid, start_pfn, nr_pages); 367 if (zone != default_zone) { 368 strcat(buf, " "); 369 strcat(buf, zone->name); 370 } 371 } 372 373 static ssize_t valid_zones_show(struct device *dev, 374 struct device_attribute *attr, char *buf) 375 { 376 struct memory_block *mem = to_memory_block(dev); 377 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); 378 unsigned long nr_pages = PAGES_PER_SECTION * sections_per_block; 379 unsigned long valid_start_pfn, valid_end_pfn; 380 struct zone *default_zone; 381 int nid; 382 383 /* 384 * Check the existing zone. Make sure that we do that only on the 385 * online nodes otherwise the page_zone is not reliable 386 */ 387 if (mem->state == MEM_ONLINE) { 388 /* 389 * The block contains more than one zone can not be offlined. 390 * This can happen e.g. for ZONE_DMA and ZONE_DMA32 391 */ 392 if (!test_pages_in_a_zone(start_pfn, start_pfn + nr_pages, 393 &valid_start_pfn, &valid_end_pfn)) 394 return sprintf(buf, "none\n"); 395 start_pfn = valid_start_pfn; 396 strcat(buf, page_zone(pfn_to_page(start_pfn))->name); 397 goto out; 398 } 399 400 nid = mem->nid; 401 default_zone = zone_for_pfn_range(MMOP_ONLINE_KEEP, nid, start_pfn, nr_pages); 402 strcat(buf, default_zone->name); 403 404 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_KERNEL, 405 default_zone); 406 print_allowed_zone(buf, nid, start_pfn, nr_pages, MMOP_ONLINE_MOVABLE, 407 default_zone); 408 out: 409 strcat(buf, "\n"); 410 411 return strlen(buf); 412 } 413 static DEVICE_ATTR_RO(valid_zones); 414 #endif 415 416 static DEVICE_ATTR_RO(phys_index); 417 static DEVICE_ATTR_RW(state); 418 static DEVICE_ATTR_RO(phys_device); 419 static DEVICE_ATTR_RO(removable); 420 421 /* 422 * Show the memory block size (shared by all memory blocks). 423 */ 424 static ssize_t block_size_bytes_show(struct device *dev, 425 struct device_attribute *attr, char *buf) 426 { 427 return sprintf(buf, "%lx\n", memory_block_size_bytes()); 428 } 429 430 static DEVICE_ATTR_RO(block_size_bytes); 431 432 /* 433 * Memory auto online policy. 434 */ 435 436 static ssize_t auto_online_blocks_show(struct device *dev, 437 struct device_attribute *attr, char *buf) 438 { 439 if (memhp_auto_online) 440 return sprintf(buf, "online\n"); 441 else 442 return sprintf(buf, "offline\n"); 443 } 444 445 static ssize_t auto_online_blocks_store(struct device *dev, 446 struct device_attribute *attr, 447 const char *buf, size_t count) 448 { 449 if (sysfs_streq(buf, "online")) 450 memhp_auto_online = true; 451 else if (sysfs_streq(buf, "offline")) 452 memhp_auto_online = false; 453 else 454 return -EINVAL; 455 456 return count; 457 } 458 459 static DEVICE_ATTR_RW(auto_online_blocks); 460 461 /* 462 * Some architectures will have custom drivers to do this, and 463 * will not need to do it from userspace. The fake hot-add code 464 * as well as ppc64 will do all of their discovery in userspace 465 * and will require this interface. 466 */ 467 #ifdef CONFIG_ARCH_MEMORY_PROBE 468 static ssize_t probe_store(struct device *dev, struct device_attribute *attr, 469 const char *buf, size_t count) 470 { 471 u64 phys_addr; 472 int nid, ret; 473 unsigned long pages_per_block = PAGES_PER_SECTION * sections_per_block; 474 475 ret = kstrtoull(buf, 0, &phys_addr); 476 if (ret) 477 return ret; 478 479 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) 480 return -EINVAL; 481 482 ret = lock_device_hotplug_sysfs(); 483 if (ret) 484 return ret; 485 486 nid = memory_add_physaddr_to_nid(phys_addr); 487 ret = __add_memory(nid, phys_addr, 488 MIN_MEMORY_BLOCK_SIZE * sections_per_block); 489 490 if (ret) 491 goto out; 492 493 ret = count; 494 out: 495 unlock_device_hotplug(); 496 return ret; 497 } 498 499 static DEVICE_ATTR_WO(probe); 500 #endif 501 502 #ifdef CONFIG_MEMORY_FAILURE 503 /* 504 * Support for offlining pages of memory 505 */ 506 507 /* Soft offline a page */ 508 static ssize_t soft_offline_page_store(struct device *dev, 509 struct device_attribute *attr, 510 const char *buf, size_t count) 511 { 512 int ret; 513 u64 pfn; 514 if (!capable(CAP_SYS_ADMIN)) 515 return -EPERM; 516 if (kstrtoull(buf, 0, &pfn) < 0) 517 return -EINVAL; 518 pfn >>= PAGE_SHIFT; 519 ret = soft_offline_page(pfn, 0); 520 return ret == 0 ? count : ret; 521 } 522 523 /* Forcibly offline a page, including killing processes. */ 524 static ssize_t hard_offline_page_store(struct device *dev, 525 struct device_attribute *attr, 526 const char *buf, size_t count) 527 { 528 int ret; 529 u64 pfn; 530 if (!capable(CAP_SYS_ADMIN)) 531 return -EPERM; 532 if (kstrtoull(buf, 0, &pfn) < 0) 533 return -EINVAL; 534 pfn >>= PAGE_SHIFT; 535 ret = memory_failure(pfn, 0); 536 return ret ? ret : count; 537 } 538 539 static DEVICE_ATTR_WO(soft_offline_page); 540 static DEVICE_ATTR_WO(hard_offline_page); 541 #endif 542 543 /* 544 * Note that phys_device is optional. It is here to allow for 545 * differentiation between which *physical* devices each 546 * section belongs to... 547 */ 548 int __weak arch_get_memory_phys_device(unsigned long start_pfn) 549 { 550 return 0; 551 } 552 553 /* A reference for the returned memory block device is acquired. */ 554 static struct memory_block *find_memory_block_by_id(unsigned long block_id) 555 { 556 struct device *dev; 557 558 dev = subsys_find_device_by_id(&memory_subsys, block_id, NULL); 559 return dev ? to_memory_block(dev) : NULL; 560 } 561 562 /* 563 * For now, we have a linear search to go find the appropriate 564 * memory_block corresponding to a particular phys_index. If 565 * this gets to be a real problem, we can always use a radix 566 * tree or something here. 567 * 568 * This could be made generic for all device subsystems. 569 */ 570 struct memory_block *find_memory_block(struct mem_section *section) 571 { 572 unsigned long block_id = base_memory_block_id(__section_nr(section)); 573 574 return find_memory_block_by_id(block_id); 575 } 576 577 static struct attribute *memory_memblk_attrs[] = { 578 &dev_attr_phys_index.attr, 579 &dev_attr_state.attr, 580 &dev_attr_phys_device.attr, 581 &dev_attr_removable.attr, 582 #ifdef CONFIG_MEMORY_HOTREMOVE 583 &dev_attr_valid_zones.attr, 584 #endif 585 NULL 586 }; 587 588 static struct attribute_group memory_memblk_attr_group = { 589 .attrs = memory_memblk_attrs, 590 }; 591 592 static const struct attribute_group *memory_memblk_attr_groups[] = { 593 &memory_memblk_attr_group, 594 NULL, 595 }; 596 597 /* 598 * register_memory - Setup a sysfs device for a memory block 599 */ 600 static 601 int register_memory(struct memory_block *memory) 602 { 603 int ret; 604 605 memory->dev.bus = &memory_subsys; 606 memory->dev.id = memory->start_section_nr / sections_per_block; 607 memory->dev.release = memory_block_release; 608 memory->dev.groups = memory_memblk_attr_groups; 609 memory->dev.offline = memory->state == MEM_OFFLINE; 610 611 ret = device_register(&memory->dev); 612 if (ret) 613 put_device(&memory->dev); 614 615 return ret; 616 } 617 618 static int init_memory_block(struct memory_block **memory, 619 unsigned long block_id, unsigned long state) 620 { 621 struct memory_block *mem; 622 unsigned long start_pfn; 623 int ret = 0; 624 625 mem = find_memory_block_by_id(block_id); 626 if (mem) { 627 put_device(&mem->dev); 628 return -EEXIST; 629 } 630 mem = kzalloc(sizeof(*mem), GFP_KERNEL); 631 if (!mem) 632 return -ENOMEM; 633 634 mem->start_section_nr = block_id * sections_per_block; 635 mem->state = state; 636 start_pfn = section_nr_to_pfn(mem->start_section_nr); 637 mem->phys_device = arch_get_memory_phys_device(start_pfn); 638 mem->nid = NUMA_NO_NODE; 639 640 ret = register_memory(mem); 641 642 *memory = mem; 643 return ret; 644 } 645 646 static int add_memory_block(unsigned long base_section_nr) 647 { 648 int ret, section_count = 0; 649 struct memory_block *mem; 650 unsigned long nr; 651 652 for (nr = base_section_nr; nr < base_section_nr + sections_per_block; 653 nr++) 654 if (present_section_nr(nr)) 655 section_count++; 656 657 if (section_count == 0) 658 return 0; 659 ret = init_memory_block(&mem, base_memory_block_id(base_section_nr), 660 MEM_ONLINE); 661 if (ret) 662 return ret; 663 mem->section_count = section_count; 664 return 0; 665 } 666 667 static void unregister_memory(struct memory_block *memory) 668 { 669 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) 670 return; 671 672 /* drop the ref. we got via find_memory_block() */ 673 put_device(&memory->dev); 674 device_unregister(&memory->dev); 675 } 676 677 /* 678 * Create memory block devices for the given memory area. Start and size 679 * have to be aligned to memory block granularity. Memory block devices 680 * will be initialized as offline. 681 * 682 * Called under device_hotplug_lock. 683 */ 684 int create_memory_block_devices(unsigned long start, unsigned long size) 685 { 686 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 687 unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 688 struct memory_block *mem; 689 unsigned long block_id; 690 int ret = 0; 691 692 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 693 !IS_ALIGNED(size, memory_block_size_bytes()))) 694 return -EINVAL; 695 696 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 697 ret = init_memory_block(&mem, block_id, MEM_OFFLINE); 698 if (ret) 699 break; 700 mem->section_count = sections_per_block; 701 } 702 if (ret) { 703 end_block_id = block_id; 704 for (block_id = start_block_id; block_id != end_block_id; 705 block_id++) { 706 mem = find_memory_block_by_id(block_id); 707 if (WARN_ON_ONCE(!mem)) 708 continue; 709 mem->section_count = 0; 710 unregister_memory(mem); 711 } 712 } 713 return ret; 714 } 715 716 /* 717 * Remove memory block devices for the given memory area. Start and size 718 * have to be aligned to memory block granularity. Memory block devices 719 * have to be offline. 720 * 721 * Called under device_hotplug_lock. 722 */ 723 void remove_memory_block_devices(unsigned long start, unsigned long size) 724 { 725 const unsigned long start_block_id = pfn_to_block_id(PFN_DOWN(start)); 726 const unsigned long end_block_id = pfn_to_block_id(PFN_DOWN(start + size)); 727 struct memory_block *mem; 728 unsigned long block_id; 729 730 if (WARN_ON_ONCE(!IS_ALIGNED(start, memory_block_size_bytes()) || 731 !IS_ALIGNED(size, memory_block_size_bytes()))) 732 return; 733 734 for (block_id = start_block_id; block_id != end_block_id; block_id++) { 735 mem = find_memory_block_by_id(block_id); 736 if (WARN_ON_ONCE(!mem)) 737 continue; 738 mem->section_count = 0; 739 unregister_memory_block_under_nodes(mem); 740 unregister_memory(mem); 741 } 742 } 743 744 /* return true if the memory block is offlined, otherwise, return false */ 745 bool is_memblock_offlined(struct memory_block *mem) 746 { 747 return mem->state == MEM_OFFLINE; 748 } 749 750 static struct attribute *memory_root_attrs[] = { 751 #ifdef CONFIG_ARCH_MEMORY_PROBE 752 &dev_attr_probe.attr, 753 #endif 754 755 #ifdef CONFIG_MEMORY_FAILURE 756 &dev_attr_soft_offline_page.attr, 757 &dev_attr_hard_offline_page.attr, 758 #endif 759 760 &dev_attr_block_size_bytes.attr, 761 &dev_attr_auto_online_blocks.attr, 762 NULL 763 }; 764 765 static struct attribute_group memory_root_attr_group = { 766 .attrs = memory_root_attrs, 767 }; 768 769 static const struct attribute_group *memory_root_attr_groups[] = { 770 &memory_root_attr_group, 771 NULL, 772 }; 773 774 /* 775 * Initialize the sysfs support for memory devices. At the time this function 776 * is called, we cannot have concurrent creation/deletion of memory block 777 * devices, the device_hotplug_lock is not needed. 778 */ 779 void __init memory_dev_init(void) 780 { 781 int ret; 782 unsigned long block_sz, nr; 783 784 /* Validate the configured memory block size */ 785 block_sz = memory_block_size_bytes(); 786 if (!is_power_of_2(block_sz) || block_sz < MIN_MEMORY_BLOCK_SIZE) 787 panic("Memory block size not suitable: 0x%lx\n", block_sz); 788 sections_per_block = block_sz / MIN_MEMORY_BLOCK_SIZE; 789 790 ret = subsys_system_register(&memory_subsys, memory_root_attr_groups); 791 if (ret) 792 panic("%s() failed to register subsystem: %d\n", __func__, ret); 793 794 /* 795 * Create entries for memory sections that were found 796 * during boot and have been initialized 797 */ 798 for (nr = 0; nr <= __highest_present_section_nr; 799 nr += sections_per_block) { 800 ret = add_memory_block(nr); 801 if (ret) 802 panic("%s() failed to add memory block: %d\n", __func__, 803 ret); 804 } 805 } 806 807 /** 808 * walk_memory_blocks - walk through all present memory blocks overlapped 809 * by the range [start, start + size) 810 * 811 * @start: start address of the memory range 812 * @size: size of the memory range 813 * @arg: argument passed to func 814 * @func: callback for each memory section walked 815 * 816 * This function walks through all present memory blocks overlapped by the 817 * range [start, start + size), calling func on each memory block. 818 * 819 * In case func() returns an error, walking is aborted and the error is 820 * returned. 821 */ 822 int walk_memory_blocks(unsigned long start, unsigned long size, 823 void *arg, walk_memory_blocks_func_t func) 824 { 825 const unsigned long start_block_id = phys_to_block_id(start); 826 const unsigned long end_block_id = phys_to_block_id(start + size - 1); 827 struct memory_block *mem; 828 unsigned long block_id; 829 int ret = 0; 830 831 if (!size) 832 return 0; 833 834 for (block_id = start_block_id; block_id <= end_block_id; block_id++) { 835 mem = find_memory_block_by_id(block_id); 836 if (!mem) 837 continue; 838 839 ret = func(mem, arg); 840 put_device(&mem->dev); 841 if (ret) 842 break; 843 } 844 return ret; 845 } 846 847 struct for_each_memory_block_cb_data { 848 walk_memory_blocks_func_t func; 849 void *arg; 850 }; 851 852 static int for_each_memory_block_cb(struct device *dev, void *data) 853 { 854 struct memory_block *mem = to_memory_block(dev); 855 struct for_each_memory_block_cb_data *cb_data = data; 856 857 return cb_data->func(mem, cb_data->arg); 858 } 859 860 /** 861 * for_each_memory_block - walk through all present memory blocks 862 * 863 * @arg: argument passed to func 864 * @func: callback for each memory block walked 865 * 866 * This function walks through all present memory blocks, calling func on 867 * each memory block. 868 * 869 * In case func() returns an error, walking is aborted and the error is 870 * returned. 871 */ 872 int for_each_memory_block(void *arg, walk_memory_blocks_func_t func) 873 { 874 struct for_each_memory_block_cb_data cb_data = { 875 .func = func, 876 .arg = arg, 877 }; 878 879 return bus_for_each_dev(&memory_subsys, NULL, &cb_data, 880 for_each_memory_block_cb); 881 } 882