Lines Matching +full:memory +full:- +full:to +full:- +full:memory

1 // SPDX-License-Identifier: GPL-2.0
3 * Memory subsystem support
8 * This file provides the necessary infrastructure to represent
9 * a SPARSEMEM-memory-model system's physical memory in /sysfs.
10 * All arch-independent code that assumes MEMORY_HOTPLUG requires
19 #include <linux/memory.h>
29 #define MEMORY_CLASS_NAME "memory"
46 return -EINVAL; in mhp_online_type_from_str()
79 * Memory blocks are cached in a local radix tree to avoid
86 * Memory groups, indexed by memory group id (mgid).
109 WARN_ON(mem->altmap); in memory_block_release()
119 /* Show the memory block ID, relative to the memory block size */
125 return sysfs_emit(buf, "%08lx\n", memory_block_id(mem->start_section_nr)); in phys_index_show()
130 * with CONFIG_MEMORY_HOTREMOVE - bad heuristic.
149 * so that they're not open-coded in state_show()
151 switch (mem->state) { in state_show()
159 output = "going-offline"; in state_show()
163 return sysfs_emit(buf, "ERROR-UNKNOWN-%ld\n", mem->state); in state_show()
188 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); in memory_block_online()
196 return -EHWPOISON; in memory_block_online()
198 zone = zone_for_pfn_range(mem->online_type, mem->nid, mem->group, in memory_block_online()
203 * they describe (they remain until the memory is unplugged), doing in memory_block_online()
204 * their initialization and accounting at memory onlining/offlining in memory_block_online()
205 * stage helps to keep accounting easier to follow - e.g vmemmaps in memory_block_online()
206 * belong to the same zone as the memory they backed. in memory_block_online()
208 if (mem->altmap) in memory_block_online()
209 nr_vmemmap_pages = mem->altmap->free; in memory_block_online()
214 arg.nr_pages = nr_pages - nr_vmemmap_pages; in memory_block_online()
223 zone, mem->altmap->inaccessible); in memory_block_online()
229 nr_pages - nr_vmemmap_pages, zone, mem->group); in memory_block_online()
241 adjust_present_page_count(pfn_to_page(start_pfn), mem->group, in memory_block_online()
244 mem->zone = zone; in memory_block_online()
259 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); in memory_block_offline()
265 if (!mem->zone) in memory_block_offline()
266 return -EINVAL; in memory_block_offline()
272 if (mem->altmap) in memory_block_offline()
273 nr_vmemmap_pages = mem->altmap->free; in memory_block_offline()
277 adjust_present_page_count(pfn_to_page(start_pfn), mem->group, in memory_block_offline()
278 -nr_vmemmap_pages); in memory_block_offline()
281 nr_pages - nr_vmemmap_pages, mem->zone, mem->group); in memory_block_offline()
286 mem->group, nr_vmemmap_pages); in memory_block_offline()
293 mem->zone = NULL; in memory_block_offline()
297 arg.nr_pages = nr_pages - nr_vmemmap_pages; in memory_block_offline()
306 * OK to have direct references to sparsemem variables in here.
322 "%ld\n", __func__, mem->start_section_nr, action, action); in memory_block_action()
323 ret = -EINVAL; in memory_block_action()
334 if (mem->state != from_state_req) in memory_block_change_state()
335 return -EINVAL; in memory_block_change_state()
338 mem->state = MEM_GOING_OFFLINE; in memory_block_change_state()
341 mem->state = ret ? from_state_req : to_state; in memory_block_change_state()
352 if (mem->state == MEM_ONLINE) in memory_subsys_online()
357 * we want to default to MMOP_ONLINE. in memory_subsys_online()
359 if (mem->online_type == MMOP_OFFLINE) in memory_subsys_online()
360 mem->online_type = MMOP_ONLINE; in memory_subsys_online()
363 mem->online_type = MMOP_OFFLINE; in memory_subsys_online()
372 if (mem->state == MEM_OFFLINE) in memory_subsys_offline()
386 return -EINVAL; in state_store()
396 /* mem->online_type is protected by device_hotplug_lock */ in state_store()
397 mem->online_type = online_type; in state_store()
398 ret = device_online(&mem->dev); in state_store()
401 ret = device_offline(&mem->dev); in state_store()
404 ret = -EINVAL; /* should never happen */ in state_store()
412 return -EINVAL; in state_store()
419 * covered by a memory block, allowing for identifying which memory blocks
420 * comprise a storage increment. Since a memory block spans complete
428 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); in phys_device_show()
446 return sysfs_emit_at(buf, len, " %s", zone->name); in print_allowed_zone()
453 unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); in valid_zones_show()
455 struct memory_group *group = mem->group; in valid_zones_show()
457 int nid = mem->nid; in valid_zones_show()
464 if (mem->state == MEM_ONLINE) { in valid_zones_show()
466 * If !mem->zone, the memory block spans multiple zones and in valid_zones_show()
469 default_zone = mem->zone; in valid_zones_show()
472 len += sysfs_emit_at(buf, len, "%s", default_zone->name); in valid_zones_show()
479 len += sysfs_emit_at(buf, len, "%s", default_zone->name); in valid_zones_show()
497 * Show the memory block size (shared by all memory blocks).
508 * Memory auto online policy.
525 return -EINVAL; in auto_online_blocks_store()
544 * Some architectures will have custom drivers to do this, and
545 * will not need to do it from userspace. The fake hot-add code
561 if (phys_addr & ((pages_per_block << PAGE_SHIFT) - 1)) in probe_store()
562 return -EINVAL; in probe_store()
587 * Support for offlining pages of memory
598 return -EPERM; in soft_offline_page_store()
600 return -EINVAL; in soft_offline_page_store()
614 return -EPERM; in hard_offline_page_store()
616 return -EINVAL; in hard_offline_page_store()
619 if (ret == -EOPNOTSUPP) in hard_offline_page_store()
635 * A reference for the returned memory block device is acquired.
645 get_device(&mem->dev); in find_memory_block_by_id()
679 static int __add_memory_block(struct memory_block *memory) in __add_memory_block() argument
683 memory->dev.bus = &memory_subsys; in __add_memory_block()
684 memory->dev.id = memory->start_section_nr / sections_per_block; in __add_memory_block()
685 memory->dev.release = memory_block_release; in __add_memory_block()
686 memory->dev.groups = memory_memblk_attr_groups; in __add_memory_block()
687 memory->dev.offline = memory->state == MEM_OFFLINE; in __add_memory_block()
689 ret = device_register(&memory->dev); in __add_memory_block()
691 put_device(&memory->dev); in __add_memory_block()
694 ret = xa_err(xa_store(&memory_blocks, memory->dev.id, memory, in __add_memory_block()
697 device_unregister(&memory->dev); in __add_memory_block()
705 const unsigned long start_pfn = section_nr_to_pfn(mem->start_section_nr); in early_node_zone_for_memory_block()
712 * This logic only works for early memory, when the applicable zones in early_node_zone_for_memory_block()
713 * already span the memory block. We don't expect overlapping zones on in early_node_zone_for_memory_block()
714 * a single node for early memory. So if we're told that some PFNs in early_node_zone_for_memory_block()
715 * of a node fall into this memory block, we can assume that all node in early_node_zone_for_memory_block()
716 * zones that intersect with the memory block are actually applicable. in early_node_zone_for_memory_block()
717 * No need to look at the memmap. in early_node_zone_for_memory_block()
720 zone = pgdat->node_zones + i; in early_node_zone_for_memory_block()
738 * memory_block_add_nid() - Indicate that system RAM falling into this memory
739 * block device (partially) belongs to the given node.
740 * @mem: The memory block device.
742 * @context: The memory initialization context.
744 * Indicate that system RAM falling into this memory block (partially) belongs
745 * to the given node. If the context indicates ("early") that we are adding the
747 * set/adjust mem->zone based on the zone ranges of the given node.
752 if (context == MEMINIT_EARLY && mem->nid != nid) { in memory_block_add_nid()
754 * For early memory we have to determine the zone when setting in memory_block_add_nid()
756 * memory block by indicate via zone == NULL that we're not in memory_block_add_nid()
759 * setting the node id a second time to a different node, in memory_block_add_nid()
762 if (mem->nid == NUMA_NO_NODE) in memory_block_add_nid()
763 mem->zone = early_node_zone_for_memory_block(mem, nid); in memory_block_add_nid()
765 mem->zone = NULL; in memory_block_add_nid()
769 * If this memory block spans multiple nodes, we only indicate in memory_block_add_nid()
771 * to hotplugged memory), zone == NULL will prohibit memory offlining in memory_block_add_nid()
774 mem->nid = nid; in memory_block_add_nid()
787 put_device(&mem->dev); in add_memory_block()
788 return -EEXIST; in add_memory_block()
792 return -ENOMEM; in add_memory_block()
794 mem->start_section_nr = block_id * sections_per_block; in add_memory_block()
795 mem->state = state; in add_memory_block()
796 mem->nid = NUMA_NO_NODE; in add_memory_block()
797 mem->altmap = altmap; in add_memory_block()
798 INIT_LIST_HEAD(&mem->group_next); in add_memory_block()
803 * MEM_ONLINE at this point implies early memory. With NUMA, in add_memory_block()
805 * memory_block_add_nid(). Memory hotplug updated the zone in add_memory_block()
806 * manually when memory onlining/offlining succeeds. in add_memory_block()
808 mem->zone = early_node_zone_for_memory_block(mem, NUMA_NO_NODE); in add_memory_block()
816 mem->group = group; in add_memory_block()
817 list_add(&mem->group_next, &group->memory_blocks); in add_memory_block()
846 static void remove_memory_block(struct memory_block *memory) in remove_memory_block() argument
848 if (WARN_ON_ONCE(memory->dev.bus != &memory_subsys)) in remove_memory_block()
851 WARN_ON(xa_erase(&memory_blocks, memory->dev.id) == NULL); in remove_memory_block()
853 if (memory->group) { in remove_memory_block()
854 list_del(&memory->group_next); in remove_memory_block()
855 memory->group = NULL; in remove_memory_block()
859 put_device(&memory->dev); in remove_memory_block()
860 device_unregister(&memory->dev); in remove_memory_block()
864 * Create memory block devices for the given memory area. Start and size
865 * have to be aligned to memory block granularity. Memory block devices
882 return -EINVAL; in create_memory_block_devices()
903 * Remove memory block devices for the given memory area. Start and size
904 * have to be aligned to memory block granularity. Memory block devices
905 * have to be offline.
924 num_poisoned_pages_sub(-1UL, memblk_nr_poison(mem)); in remove_memory_block_devices()
958 * Initialize the sysfs support for memory devices. At the time this function
959 * is called, we cannot have concurrent creation/deletion of memory block
967 /* Validate the configured memory block size */ in memory_dev_init()
970 panic("Memory block size not suitable: 0x%lx\n", block_sz); in memory_dev_init()
975 panic("%s() failed to register subsystem: %d\n", __func__, ret); in memory_dev_init()
978 * Create entries for memory sections that were found in memory_dev_init()
985 panic("%s() failed to add memory block: %d\n", __func__, in memory_dev_init()
991 * walk_memory_blocks - walk through all present memory blocks overlapped
994 * @start: start address of the memory range
995 * @size: size of the memory range
996 * @arg: argument passed to func
997 * @func: callback for each memory section walked
999 * This function walks through all present memory blocks overlapped by the
1000 * range [start, start + size), calling func on each memory block.
1011 const unsigned long end_block_id = phys_to_block_id(start + size - 1); in walk_memory_blocks()
1025 put_device(&mem->dev); in walk_memory_blocks()
1042 return cb_data->func(mem, cb_data->arg); in for_each_memory_block_cb()
1046 * for_each_memory_block - walk through all present memory blocks
1048 * @arg: argument passed to func
1049 * @func: callback for each memory block walked
1051 * This function walks through all present memory blocks, calling func on
1052 * each memory block.
1069 * This is an internal helper to unify allocation and initialization of
1070 * memory groups. Note that the passed memory group will be copied to a
1071 * dynamically allocated memory group. After this call, the passed
1072 * memory group should no longer be used.
1081 return -EINVAL; in memory_group_register()
1085 return -ENOMEM; in memory_group_register()
1087 INIT_LIST_HEAD(&new_group->memory_blocks); in memory_group_register()
1101 * memory_group_register_static() - Register a static memory group.
1103 * @max_pages: The maximum number of pages we'll have in this static memory
1106 * Register a new static memory group and return the memory group id.
1107 * All memory in the group belongs to a single unit, such as a DIMM. All
1108 * memory belonging to a static memory group is added in one go to be removed
1109 * in one go -- it's static.
1111 * Returns an error if out of memory, if the node id is invalid, if no new
1112 * memory groups can be registered, or if max_pages is invalid (0). Otherwise,
1113 * returns the new memory group id.
1125 return -EINVAL; in memory_group_register_static()
1131 * memory_group_register_dynamic() - Register a dynamic memory group.
1133 * @unit_pages: Unit in pages in which is memory added/removed in this dynamic
1134 * memory group.
1136 * Register a new dynamic memory group and return the memory group id.
1137 * Memory within a dynamic memory group is added/removed dynamically
1140 * Returns an error if out of memory, if the node id is invalid, if no new
1141 * memory groups can be registered, or if unit_pages is invalid (0, not a
1142 * power of two, smaller than a single memory block). Otherwise, returns the
1143 * new memory group id.
1157 return -EINVAL; in memory_group_register_dynamic()
1163 * memory_group_unregister() - Unregister a memory group.
1164 * @mgid: the memory group id
1166 * Unregister a memory group. If any memory block still belongs to this
1167 * memory group, unregistering will fail.
1169 * Returns -EINVAL if the memory group id is invalid, returns -EBUSY if some
1170 * memory blocks still belong to this memory group and returns 0 if
1178 return -EINVAL; in memory_group_unregister()
1182 return -EINVAL; in memory_group_unregister()
1183 if (!list_empty(&group->memory_blocks)) in memory_group_unregister()
1184 return -EBUSY; in memory_group_unregister()
1192 * This is an internal helper only to be used in core memory hotplug code to
1193 * lookup a memory group. We don't care about locking, as we don't expect a
1194 * memory group to get unregistered while adding memory to it -- because
1195 * the group and the memory is managed by the same driver.
1203 * This is an internal helper only to be used in core memory hotplug code to
1204 * walk all dynamic memory groups excluding a given memory group, either
1205 * belonging to a specific node, or belonging to any node.
1219 if (nid != NUMA_NO_NODE && group->nid != nid) in walk_dynamic_memory_groups()
1236 atomic_long_inc(&mem->nr_hwpoison); in memblk_nr_poison_inc()
1245 atomic_long_sub(i, &mem->nr_hwpoison); in memblk_nr_poison_sub()
1250 return atomic_long_read(&mem->nr_hwpoison); in memblk_nr_poison()